Port over Icing c++ code from upstream Change-Id: Ia3981fed7e0e70589efc027d4123f306cdfbe990

commit: 128c9db88925c8425f2ad81e1d8985461d7ba21a [log] [tgz]
author: Cassie Wang <cassiewang@google.com> Fri Dec 20 15:11:45 2019 -0800
committer: Cassie Wang <cassiewang@google.com> Fri Dec 20 16:18:05 2019 -0800
tree: f97ee47cc99d2c162eb30a5e051c606823dfd1ec
parent: 1897505cb34f3d53e848da13fafe7691c17417ea [diff]
diff --git a/METADATA b/METADATA
index 2af1aa1..d350608 100644
--- a/METADATA
+++ b/METADATA

@@ -12,6 +12,6 @@
     type: PIPER
     value: "http://google3/third_party/icing/"
   }
-  last_upgrade_date { year: 2019 month: 11 day: 25 }
+  last_upgrade_date { year: 2019 month: 12 day: 20 }
   license_type: NOTICE
 }

diff --git a/icing/absl_ports/annotate.cc b/icing/absl_ports/annotate.cc
new file mode 100644
index 0000000..f73c432
--- /dev/null
+++ b/icing/absl_ports/annotate.cc

@@ -0,0 +1,43 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/absl_ports/annotate.h"
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+namespace {
+constexpr std::string_view kErrorSeparator = ";";
+}  // namespace
+
+libtextclassifier3::Status Annotate(const libtextclassifier3::Status& s,
+                                    std::string_view msg) {
+  if (s.ok() || msg.empty()) {
+    return s;
+  }
+
+  std::string new_msg =
+      (!s.error_message().empty())
+          ? absl_ports::StrCat(s.error_message(), kErrorSeparator, msg)
+          : std::string(msg);
+  return libtextclassifier3::Status(s.CanonicalCode(), new_msg);
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/absl_ports/annotate.h b/icing/absl_ports/annotate.h
new file mode 100644
index 0000000..81adce0
--- /dev/null
+++ b/icing/absl_ports/annotate.h

@@ -0,0 +1,49 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_ANNOTATE_H_
+#define ICING_ABSL_PORTS_ANNOTATE_H_
+
+#include <string_view>
+
+#include "utils/base/status.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// Returns a Status that is identical to `s` except that the error_message()
+// has been augmented by adding `msg` to the end of the original error message.
+//
+// Annotate should be used to add higher-level information to a Status.  E.g.,
+//
+//   libtextclassifier3::Status s = file::GetContents(...);
+//   if (!s.ok()) {
+//     return Annotate(s, "loading blacklist");
+//   }
+//
+// Annotate() adds the appropriate separators, so callers should not include a
+// separator in `msg`. The exact formatting is subject to change, so you should
+// not depend on it in your tests.
+//
+// OK status values have no error message and therefore if `s` is OK, the result
+// is unchanged.
+libtextclassifier3::Status Annotate(const libtextclassifier3::Status& s,
+                                    std::string_view msg);
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_ANNOTATE_H_

diff --git a/icing/absl_ports/canonical_errors.cc b/icing/absl_ports/canonical_errors.cc
new file mode 100644
index 0000000..03b2c61
--- /dev/null
+++ b/icing/absl_ports/canonical_errors.cc

@@ -0,0 +1,176 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/absl_ports/canonical_errors.h"
+
+#include "utils/base/status.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+libtextclassifier3::Status CancelledError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::CANCELLED,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status UnknownError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::UNKNOWN,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status InvalidArgumentError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status DeadlineExceededError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::DEADLINE_EXCEEDED,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status NotFoundError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::NOT_FOUND,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status AlreadyExistsError(std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::ALREADY_EXISTS,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status PermissionDeniedError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::PERMISSION_DENIED,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status ResourceExhaustedError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status FailedPreconditionError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::FAILED_PRECONDITION,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status AbortedError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::ABORTED,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status OutOfRangeError(std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::OUT_OF_RANGE, std::string(error_message));
+}
+
+libtextclassifier3::Status UnimplementedError(std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::UNIMPLEMENTED,
+      std::string(error_message));
+}
+
+libtextclassifier3::Status InternalError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::INTERNAL,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status UnavailableError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::UNAVAILABLE,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status DataLossError(std::string_view error_message) {
+  return libtextclassifier3::Status(libtextclassifier3::StatusCode::DATA_LOSS,
+                                    std::string(error_message));
+}
+
+libtextclassifier3::Status UnauthenticatedError(
+    std::string_view error_message) {
+  return libtextclassifier3::Status(
+      libtextclassifier3::StatusCode::UNAUTHENTICATED,
+      std::string(error_message));
+}
+
+bool IsCancelled(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::CANCELLED;
+}
+bool IsUnknown(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::UNKNOWN;
+}
+bool IsInvalidArgument(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::INVALID_ARGUMENT;
+}
+bool IsDeadlineExceeded(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::DEADLINE_EXCEEDED;
+}
+bool IsNotFound(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::NOT_FOUND;
+}
+bool IsAlreadyExists(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::ALREADY_EXISTS;
+}
+bool IsPermissionDenied(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::PERMISSION_DENIED;
+}
+bool IsResourceExhausted(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED;
+}
+bool IsFailedPrecondition(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::FAILED_PRECONDITION;
+}
+bool IsAborted(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::ABORTED;
+}
+bool IsOutOfRange(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::OUT_OF_RANGE;
+}
+bool IsUnimplemented(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::UNIMPLEMENTED;
+}
+bool IsInternal(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::INTERNAL;
+}
+bool IsUnavailable(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::UNAVAILABLE;
+}
+bool IsDataLoss(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() == libtextclassifier3::StatusCode::DATA_LOSS;
+}
+bool IsUnauthenticated(const libtextclassifier3::Status& status) {
+  return status.CanonicalCode() ==
+         libtextclassifier3::StatusCode::UNAUTHENTICATED;
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/absl_ports/canonical_errors.h b/icing/absl_ports/canonical_errors.h
new file mode 100644
index 0000000..c2d7784
--- /dev/null
+++ b/icing/absl_ports/canonical_errors.h

@@ -0,0 +1,68 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_CANONICAL_ERRORS_H_
+#define ICING_ABSL_PORTS_CANONICAL_ERRORS_H_
+
+#include <string_view>
+
+#include "utils/base/status.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+libtextclassifier3::Status CancelledError(std::string_view error_message);
+libtextclassifier3::Status UnknownError(std::string_view error_message);
+libtextclassifier3::Status InvalidArgumentError(std::string_view error_message);
+libtextclassifier3::Status DeadlineExceededError(
+    std::string_view error_message);
+libtextclassifier3::Status NotFoundError(std::string_view error_message);
+libtextclassifier3::Status AlreadyExistsError(std::string_view error_message);
+libtextclassifier3::Status PermissionDeniedError(
+    std::string_view error_message);
+libtextclassifier3::Status ResourceExhaustedError(
+    std::string_view error_message);
+libtextclassifier3::Status FailedPreconditionError(
+    std::string_view error_message);
+libtextclassifier3::Status AbortedError(std::string_view error_message);
+libtextclassifier3::Status OutOfRangeError(std::string_view error_message);
+libtextclassifier3::Status UnimplementedError(std::string_view error_message);
+libtextclassifier3::Status InternalError(std::string_view error_message);
+libtextclassifier3::Status UnavailableError(std::string_view error_message);
+libtextclassifier3::Status DataLossError(std::string_view error_message);
+libtextclassifier3::Status UnauthenticatedError(std::string_view error_message);
+
+bool IsCancelled(const libtextclassifier3::Status& status);
+bool IsUnknown(const libtextclassifier3::Status& status);
+bool IsInvalidArgument(const libtextclassifier3::Status& status);
+bool IsDeadlineExceeded(const libtextclassifier3::Status& status);
+bool IsNotFound(const libtextclassifier3::Status& status);
+bool IsAlreadyExists(const libtextclassifier3::Status& status);
+bool IsPermissionDenied(const libtextclassifier3::Status& status);
+bool IsResourceExhausted(const libtextclassifier3::Status& status);
+bool IsFailedPrecondition(const libtextclassifier3::Status& status);
+bool IsAborted(const libtextclassifier3::Status& status);
+bool IsOutOfRange(const libtextclassifier3::Status& status);
+bool IsUnimplemented(const libtextclassifier3::Status& status);
+bool IsInternal(const libtextclassifier3::Status& status);
+bool IsUnavailable(const libtextclassifier3::Status& status);
+bool IsDataLoss(const libtextclassifier3::Status& status);
+bool IsUnauthenticated(const libtextclassifier3::Status& status);
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_CANONICAL_ERRORS_H_

diff --git a/icing/absl_ports/mutex.h b/icing/absl_ports/mutex.h
new file mode 100644
index 0000000..c49b1e1
--- /dev/null
+++ b/icing/absl_ports/mutex.h

@@ -0,0 +1,73 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_MUTEX_H_
+#define ICING_ABSL_PORTS_MUTEX_H_
+
+#include <mutex>         // NOLINT
+#include <shared_mutex>  // NOLINT
+
+#include "icing/absl_ports/thread_annotations.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// Simple wrapper around std::shared_mutex with annotations to allow thread
+// annotation checks.
+class LOCKABLE shared_mutex {
+ public:
+  void lock() EXCLUSIVE_LOCK_FUNCTION() { m_.lock(); }
+  bool try_lock() EXCLUSIVE_TRYLOCK_FUNCTION(true) { return m_.try_lock(); }
+  void unlock() UNLOCK_FUNCTION() { m_.unlock(); }
+
+  void lock_shared() SHARED_LOCK_FUNCTION() { m_.lock_shared(); }
+  bool try_lock_shared() SHARED_TRYLOCK_FUNCTION(true) {
+    return m_.try_lock_shared();
+  }
+  void unlock_shared() UNLOCK_FUNCTION() { m_.unlock_shared(); }
+
+ private:
+  std::shared_mutex m_;
+};
+
+// Simple wrapper around std::unique_lock with annotations to allow thread
+// annotation checks.
+class SCOPED_LOCKABLE unique_lock {
+ public:
+  explicit unique_lock(shared_mutex* mu) EXCLUSIVE_LOCK_FUNCTION(mu)
+      : lock_(*mu) {}
+  ~unique_lock() UNLOCK_FUNCTION() = default;
+
+ private:
+  std::unique_lock<shared_mutex> lock_;
+};
+
+// Simple wrapper around std::shared_lock with annotations to allow thread
+// annotation checks.
+class SCOPED_LOCKABLE shared_lock {
+ public:
+  explicit shared_lock(shared_mutex* mu) SHARED_LOCK_FUNCTION(mu)
+      : lock_(*mu) {}
+  ~shared_lock() UNLOCK_FUNCTION() = default;
+
+ private:
+  std::shared_lock<shared_mutex> lock_;
+};
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_MUTEX_H_

diff --git a/icing/absl_ports/status_imports.h b/icing/absl_ports/status_imports.h
new file mode 100644
index 0000000..fe4b6d9
--- /dev/null
+++ b/icing/absl_ports/status_imports.h

@@ -0,0 +1,32 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_STATUS_IMPORTS_H_
+#define ICING_ABSL_PORTS_STATUS_IMPORTS_H_
+
+#include "utils/base/status.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// TODO(b/144458732) Delete this file once visibility on TC3 Status has been
+// granted to the sample app.
+using Status = libtextclassifier3::Status;
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_STATUS_IMPORTS_H_

diff --git a/icing/absl_ports/status_macros.h b/icing/absl_ports/status_macros.h
new file mode 100644
index 0000000..44cffdd
--- /dev/null
+++ b/icing/absl_ports/status_macros.h

@@ -0,0 +1,117 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_STATUS_MACROS_H_
+#define ICING_ABSL_PORTS_STATUS_MACROS_H_
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// TODO(b/144458732): Move the fixes included in this file over to TC Status and
+// remove this file.
+class StatusAdapter {
+ public:
+  explicit StatusAdapter(const libtextclassifier3::Status& s) : s_(s) {}
+  explicit StatusAdapter(libtextclassifier3::Status&& s) : s_(std::move(s)) {}
+  template <typename T>
+  explicit StatusAdapter(const libtextclassifier3::StatusOr<T>& s)
+      : s_(s.status()) {}
+  template <typename T>
+  explicit StatusAdapter(libtextclassifier3::StatusOr<T>&& s)
+      : s_(std::move(s).status()) {}
+
+  bool ok() const { return s_.ok(); }
+  explicit operator bool() const { return ok(); }
+
+  const libtextclassifier3::Status& status() const& { return s_; }
+  libtextclassifier3::Status status() && { return std::move(s_); }
+
+ private:
+  libtextclassifier3::Status s_;
+};
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+// Evaluates an expression that produces a `libtextclassifier3::Status`. If the
+// status is not ok, returns it from the current function.
+//
+// For example:
+//   libtextclassifier3::Status MultiStepFunction() {
+//     ICING_RETURN_IF_ERROR(Function(args...));
+//     ICING_RETURN_IF_ERROR(foo.Method(args...));
+//     return libtextclassifier3::Status();
+//   }
+#define ICING_RETURN_IF_ERROR(expr) ICING_RETURN_IF_ERROR_IMPL(expr)
+#define ICING_RETURN_IF_ERROR_IMPL(expr)                       \
+  ICING_STATUS_MACROS_IMPL_ELSE_BLOCKER_                       \
+  if (::icing::lib::absl_ports::StatusAdapter adapter{expr}) { \
+  } else /* NOLINT */                                          \
+    return std::move(adapter).status()
+
+// The GNU compiler emits a warning for code like:
+//
+//   if (foo)
+//     if (bar) { } else baz;
+//
+// because it thinks you might want the else to bind to the first if.  This
+// leads to problems with code like:
+//
+//   if (do_expr) ICING_RETURN_IF_ERROR(expr);
+//
+// The "switch (0) case 0:" idiom is used to suppress this.
+#define ICING_STATUS_MACROS_IMPL_ELSE_BLOCKER_ \
+  switch (0)                                   \
+  case 0:                                      \
+  default:  // NOLINT
+
+#define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
+  ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
+#define ICING_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
+
+// Macros that help consume libtextclassifier3::StatusOr<...> return values and
+// propagate errors. TC_STRIP These macros are inspired by the nice practice
+// from Google3:
+// https://g3doc.corp.google.com/devtools/library_club/g3doc/totw/121.md?cl=head
+// TC_END_STRIP
+#define ICING_ASSIGN_OR_RETURN(lhs, rexpr)                                 \
+  ICING_ASSIGN_OR_RETURN_IMPL(                                             \
+      ICING_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, \
+      rexpr)
+
+#define ICING_ASSIGN_OR_RETURN_IMPL(statusor, lhs, rexpr) \
+  auto statusor = (rexpr);                                \
+  if (!statusor.ok()) {                                   \
+    return statusor.status();                             \
+  }                                                       \
+  lhs = std::move(statusor.ValueOrDie())
+
+#define ICING_ASSIGN_OR_RETURN_VAL(lhs, rexpr, val)                        \
+  ICING_ASSIGN_OR_RETURN_VAL_IMPL(                                         \
+      ICING_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, \
+      rexpr, val)
+
+#define ICING_ASSIGN_OR_RETURN_VAL_IMPL(statusor, lhs, rexpr, val) \
+  auto statusor = (rexpr);                                         \
+  if (!statusor.ok()) {                                            \
+    return val;                                                    \
+  }                                                                \
+  lhs = std::move(statusor.ValueOrDie())
+
+#endif  // ICING_ABSL_PORTS_STATUS_MACROS_H_

diff --git a/icing/absl_ports/str_cat.cc b/icing/absl_ports/str_cat.cc
new file mode 100644
index 0000000..2cf020d
--- /dev/null
+++ b/icing/absl_ports/str_cat.cc

@@ -0,0 +1,190 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+char* Append(char* out, std::string_view s) {
+  if (!s.empty()) {
+    memcpy(out, s.data(), s.length());
+    out += s.length();
+  }
+  return out;
+}
+
+std::string StrCat(std::string_view a, std::string_view b) {
+  std::string::size_type result_size = a.length() + b.length();
+  // Create result with enough room to fit all operands.
+  std::string result;
+  // __resize_default_init is provided by libc++ >= 8.0 and allows us to
+  // allocate room for the content we're about to copy while avoiding the
+  // unnecessary zero-initialization that the normal std::string::resize will
+  // perform.
+  //
+  // The current absl implementation copies a null char to the character at
+  // previous_size after the call to resize_default_init due to implementation
+  // differences between libstdc++ and libc++. That behavior is NOT copied over
+  // here because the following lines are just about to overwrite that character
+  // anyways.
+  result.__resize_default_init(result_size);
+
+  char* out = &result[0];
+  out = Append(out, a);
+  out = Append(out, b);
+  return result;
+}
+
+std::string StrCat(std::string_view a, std::string_view b, std::string_view c) {
+  std::string::size_type result_size = a.length() + b.length() + c.length();
+  // Create result with enough room to fit all operands.
+  std::string result;
+  // __resize_default_init is provided by libc++ >= 8.0 and allows us to
+  // allocate room for the content we're about to copy while avoiding the
+  // unnecessary zero-initialization that the normal std::string::resize will
+  // perform.
+  //
+  // The current absl implementation copies a null char to the character at
+  // previous_size after the call to resize_default_init due to implementation
+  // differences between libstdc++ and libc++. That behavior is NOT copied over
+  // here because the following lines are just about to overwrite that character
+  // anyways.
+  result.__resize_default_init(result_size);
+
+  char* out = &result[0];
+  out = Append(out, a);
+  out = Append(out, b);
+  out = Append(out, c);
+  return result;
+}
+
+std::string StrCat(std::string_view a, std::string_view b, std::string_view c,
+                   std::string_view d) {
+  std::string::size_type result_size =
+      a.length() + b.length() + c.length() + d.length();
+  // Create result with enough room to fit all operands.
+  std::string result;
+  // __resize_default_init is provided by libc++ >= 8.0 and allows us to
+  // allocate room for the content we're about to copy while avoiding the
+  // unnecessary zero-initialization that the normal std::string::resize will
+  // perform.
+  //
+  // The current absl implementation copies a null char to the character at
+  // previous_size after the call to resize_default_init due to implementation
+  // differences between libstdc++ and libc++. That behavior is NOT copied over
+  // here because the following lines are just about to overwrite that character
+  // anyways.
+  result.__resize_default_init(result_size);
+
+  char* out = &result[0];
+  out = Append(out, a);
+  out = Append(out, b);
+  out = Append(out, c);
+  out = Append(out, d);
+  return result;
+}
+
+std::string StrCatPieces(std::vector<std::string_view> pieces) {
+  std::string::size_type result_size = 0;
+  for (std::string_view s : pieces) {
+    result_size += s.length();
+  }
+  // Create result with enough room to fit all operands.
+  std::string result;
+  // __resize_default_init is provided by libc++ >= 8.0 and allows us to
+  // allocate room for the content we're about to copy while avoiding the
+  // unnecessary zero-initialization that the normal std::string::resize will
+  // perform.
+  //
+  // The current absl implementation copies a null char to the character at
+  // previous_size after the call to resize_default_init due to implementation
+  // differences between libstdc++ and libc++. That behavior is NOT copied over
+  // here because the following lines are just about to overwrite that character
+  // anyways.
+  result.__resize_default_init(result_size);
+
+  char* out = &result[0];
+  for (std::string_view s : pieces) {
+    out = Append(out, s);
+  }
+  return result;
+}
+
+void StrAppend(std::string* dest, std::string_view a) {
+  std::string::size_type old_size = dest->size();
+  std::string::size_type new_size = old_size + a.length();
+  dest->__resize_default_init(new_size);
+
+  char* out = &(*dest)[old_size];
+  out = Append(out, a);
+}
+
+void StrAppend(std::string* dest, std::string_view a, std::string_view b) {
+  std::string::size_type old_size = dest->size();
+  std::string::size_type new_size = old_size + a.length() + b.length();
+  dest->__resize_default_init(new_size);
+
+  char* out = &(*dest)[old_size];
+  out = Append(out, a);
+  out = Append(out, b);
+}
+
+void StrAppend(std::string* dest, std::string_view a, std::string_view b,
+               std::string_view c) {
+  std::string::size_type old_size = dest->size();
+  std::string::size_type new_size =
+      old_size + a.length() + b.length() + c.length();
+  dest->__resize_default_init(new_size);
+
+  char* out = &(*dest)[old_size];
+  out = Append(out, a);
+  out = Append(out, b);
+  out = Append(out, c);
+}
+
+void StrAppend(std::string* dest, std::string_view a, std::string_view b,
+               std::string_view c, std::string_view d) {
+  std::string::size_type old_size = dest->size();
+  std::string::size_type new_size =
+      old_size + a.length() + b.length() + c.length() + d.length();
+  dest->__resize_default_init(new_size);
+
+  char* out = &(*dest)[old_size];
+  out = Append(out, a);
+  out = Append(out, b);
+  out = Append(out, c);
+  out = Append(out, d);
+}
+
+void StrAppendPieces(std::string* dest, std::vector<std::string_view> pieces) {
+  std::string::size_type old_size = dest->size();
+  std::string::size_type result_size = old_size;
+  for (std::string_view s : pieces) {
+    result_size += s.length();
+  }
+  // Create result with enough room to fit all operands.
+  std::string result;
+  result.__resize_default_init(result_size);
+
+  char* out = &(*dest)[old_size];
+  for (std::string_view s : pieces) {
+    out = Append(out, s);
+  }
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/absl_ports/str_cat.h b/icing/absl_ports/str_cat.h
new file mode 100644
index 0000000..b2dd63d
--- /dev/null
+++ b/icing/absl_ports/str_cat.h

@@ -0,0 +1,81 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_STR_CAT_H_
+#define ICING_ABSL_PORTS_STR_CAT_H_
+
+#include <cstdarg>
+#include <cstring>
+#include <string>
+#include <string_view>
+#include <vector>
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+// Appends the content of s to the char buffer starting at out and returns the
+// address of the first character after the content copied from s.
+// REQUIRES: out is large enough to hold all content from s.
+char* Append(char* out, std::string_view s);
+
+// A port of absl::StrCat.
+//
+// Merges given strings or numbers, using no delimiter(s), returning the merged
+// result as a string.
+//
+// Unlike absl::StrCat, this version only accepts string_views. For converting
+// numerics to strings, use StringPrintf.
+//
+// Separate implementations for 2-4 arguments are provided separately from the
+// variadic definition, just like absl does. This is a minor optimization to
+// avoid constructing a vector and copying all string_view params.
+std::string StrCat(std::string_view a, std::string_view b);
+std::string StrCat(std::string_view a, std::string_view b, std::string_view c);
+std::string StrCat(std::string_view a, std::string_view b, std::string_view c,
+                   std::string_view d);
+
+std::string StrCatPieces(std::vector<std::string_view> pieces);
+
+template <typename... AV>
+std::string StrCat(const AV&... args) {
+  return StrCatPieces({static_cast<const std::string_view&>(args)...});
+}
+
+// A port of absl::StrAppend.
+//
+// Appends a string or set of strings to an existing string, in a similar
+// fashion to `StrCat()`.
+//
+// Unlike absl::StrAppend, this version only accepts string_views. For
+// converting numerics to strings, use StringPrintf.
+void StrAppend(std::string* dest, std::string_view a);
+void StrAppend(std::string* dest, std::string_view a, std::string_view b);
+void StrAppend(std::string* dest, std::string_view a, std::string_view b,
+               std::string_view c);
+void StrAppend(std::string* dest, std::string_view a, std::string_view b,
+               std::string_view c, std::string_view d);
+
+void StrAppendPieces(std::string* dest, std::vector<std::string_view> pieces);
+
+template <typename... AV>
+void StrAppend(std::string* dest, const AV&... args) {
+  StrAppendPieces(dest, {static_cast<const std::string_view&>(args)...});
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_STR_CAT_H_

diff --git a/icing/absl_ports/str_join.h b/icing/absl_ports/str_join.h
new file mode 100644
index 0000000..7c8936a
--- /dev/null
+++ b/icing/absl_ports/str_join.h

@@ -0,0 +1,111 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ABSL_PORTS_STR_JOIN_H_
+#define ICING_ABSL_PORTS_STR_JOIN_H_
+
+#include <string>
+#include <string_view>
+
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+namespace absl_ports {
+
+class DefaultFormatter {
+ public:
+  template <typename T>
+  std::string operator()(const T& element) {
+    return std::string(element);
+  }
+};
+
+class NumberFormatter {
+ public:
+  template <typename T>
+  std::string operator()(const T& number) {
+    return std::to_string(number);
+  }
+};
+
+// A port of absl::StrJoin.
+//
+// Joins a range of elements and returns the result as a std::string.
+// `absl::StrJoin()` takes a range, a separator string to use between the
+// elements joined.
+//
+// A Formatter may be supplied to convert the Iterator's elements to a
+// std::string.
+template <typename Iterator, typename Formatter>
+std::string StrJoin(Iterator first, Iterator last, std::string_view sep,
+                    Formatter&& formatter) {
+  std::string::size_type result_size = 0;
+  bool add_separator_before_element = false;
+  for (Iterator current = first; current != last; ++current) {
+    if (add_separator_before_element) {
+      result_size += sep.length();
+    }
+
+    std::string formatted = formatter(*current);
+    result_size += formatted.length();
+
+    add_separator_before_element = true;
+  }
+  // Create result with enough room to fit all operands.
+  std::string result;
+  // __resize_default_init is provided by libc++ >= 8.0 and allows us to
+  // allocate room for the content we're about to copy while avoiding the
+  // unnecessary zero-initialization that the normal std::string::resize will
+  // perform.
+  //
+  // The current absl implementation copies a null char to the character at
+  // previous_size after the call to resize_default_init due to implementation
+  // differences between libstdc++ and libc++. That behavior is NOT copied over
+  // here because the following lines are just about to overwrite that character
+  // anyways.
+  result.__resize_default_init(result_size);
+
+  add_separator_before_element = false;
+  for (char* out = &result[0]; first != last; ++first) {
+    if (add_separator_before_element) {
+      out = Append(out, sep);
+    }
+
+    std::string formatted = formatter(*first);
+    out = Append(out, formatted);
+
+    add_separator_before_element = true;
+  }
+
+  return result;
+}
+
+template <typename Container, typename Formatter>
+std::string StrJoin(const Container& container, std::string_view sep,
+                    Formatter&& formatter) {
+  return absl_ports::StrJoin(std::begin(container), std::end(container), sep,
+                             formatter);
+}
+
+template <typename Container>
+std::string StrJoin(const Container& container, std::string_view sep) {
+  return absl_ports::StrJoin(container, sep, DefaultFormatter());
+}
+
+}  // namespace absl_ports
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ABSL_PORTS_STR_JOIN_H_

diff --git a/icing/absl_ports/thread_annotations.h b/icing/absl_ports/thread_annotations.h
new file mode 100644
index 0000000..f5de7b7
--- /dev/null
+++ b/icing/absl_ports/thread_annotations.h

@@ -0,0 +1,208 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file is a port of absl::thread_annotations.
+//
+// This header file contains macro definitions for thread safety annotations
+// that allow developers to document the locking policies of multi-threaded
+// code. The annotations can also help program analysis tools to identify
+// potential thread safety issues.
+//
+// These annotations are implemented using compiler attributes. Using the macros
+// defined here instead of raw attributes allow for portability and future
+// compatibility.
+//
+// When referring to mutexes in the arguments of the attributes, you should
+// use variable names or more complex expressions (e.g. my_object->mutex_)
+// that evaluate to a concrete mutex object whenever possible. If the mutex
+// you want to refer to is not in scope, you may use a member pointer
+// (e.g. &MyClass::mutex_) to refer to a mutex in some (unknown) object.
+
+#ifndef ICING_ABSL_PORTS_THREAD_ANNOTATIONS_H_
+#define ICING_ABSL_PORTS_THREAD_ANNOTATIONS_H_
+
+#if defined(__clang__)
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   __attribute__((x))
+#else
+#define THREAD_ANNOTATION_ATTRIBUTE__(x)   // no-op
+#endif                                     // defined(__clang__)
+
+// GUARDED_BY()
+//
+// Documents if a shared field or global variable needs to be protected by a
+// mutex. GUARDED_BY() allows the user to specify a particular mutex that
+// should be held when accessing the annotated variable.
+//
+// Although this annotation (and PT_GUARDED_BY, below) cannot be applied to
+// local variables, a local variable and its associated mutex can often be
+// combined into a small class or struct, thereby allowing the annotation.
+//
+// Example:
+//
+//   class Foo {
+//     Mutex mu_;
+//     int p1_ GUARDED_BY(mu_);
+//     ...
+//   };
+#define GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(guarded_by(x))
+
+// PT_GUARDED_BY()
+//
+// Documents if the memory location pointed to by a pointer should be guarded
+// by a mutex when dereferencing the pointer.
+//
+// Example:
+//   class Foo {
+//     Mutex mu_;
+//     int *p1_ PT_GUARDED_BY(mu_);
+//     ...
+//   };
+//
+// Note that a pointer variable to a shared memory location could itself be a
+// shared variable.
+//
+// Example:
+//
+//   // `q_`, guarded by `mu1_`, points to a shared memory location that is
+//   // guarded by `mu2_`:
+//   int *q_ GUARDED_BY(mu1_) PT_GUARDED_BY(mu2_);
+#define PT_GUARDED_BY(x) THREAD_ANNOTATION_ATTRIBUTE__(pt_guarded_by(x))
+
+// ACQUIRED_AFTER() / ACQUIRED_BEFORE()
+//
+// Documents the acquisition order between locks that can be held
+// simultaneously by a thread. For any two locks that need to be annotated
+// to establish an acquisition order, only one of them needs the annotation.
+// (i.e. You don't have to annotate both locks with both ACQUIRED_AFTER
+// and ACQUIRED_BEFORE.)
+//
+// As with GUARDED_BY, this is only applicable to mutexes that are shared
+// fields or global variables.
+//
+// Example:
+//
+//   Mutex m1_;
+//   Mutex m2_ ACQUIRED_AFTER(m1_);
+#define ACQUIRED_AFTER(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_after(__VA_ARGS__))
+
+#define ACQUIRED_BEFORE(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(acquired_before(__VA_ARGS__))
+
+// EXCLUSIVE_LOCKS_REQUIRED() / SHARED_LOCKS_REQUIRED()
+//
+// Documents a function that expects a mutex to be held prior to entry.
+// The mutex is expected to be held both on entry to, and exit from, the
+// function.
+//
+// An exclusive lock allows read-write access to the guarded data member(s), and
+// only one thread can acquire a lock exclusively at any one time. A shared lock
+// allows read-only access, and any number of threads can acquire a shared lock
+// concurrently.
+//
+// Generally, non-const methods should be annotated with
+// EXCLUSIVE_LOCKS_REQUIRED, while const methods should be annotated with
+// SHARED_LOCKS_REQUIRED.
+//
+// Example:
+//
+//   Mutex mu1, mu2;
+//   int a GUARDED_BY(mu1);
+//   int b GUARDED_BY(mu2);
+//
+//   void foo() EXCLUSIVE_LOCKS_REQUIRED(mu1, mu2) { ... }
+//   void bar() const SHARED_LOCKS_REQUIRED(mu1, mu2) { ... }
+#define EXCLUSIVE_LOCKS_REQUIRED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_locks_required(__VA_ARGS__))
+
+#define SHARED_LOCKS_REQUIRED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_locks_required(__VA_ARGS__))
+
+// LOCKS_EXCLUDED()
+//
+// Documents the locks acquired in the body of the function. These locks
+// cannot be held when calling this function.
+#define LOCKS_EXCLUDED(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(locks_excluded(__VA_ARGS__))
+
+// LOCK_RETURNED()
+//
+// Documents a function that returns a mutex without acquiring it.  For example,
+// a public getter method that returns a pointer to a private mutex should
+// be annotated with LOCK_RETURNED.
+#define LOCK_RETURNED(x) \
+  THREAD_ANNOTATION_ATTRIBUTE__(lock_returned(x))
+
+// LOCKABLE
+//
+// Documents if a class/type is a lockable type.
+#define LOCKABLE \
+  THREAD_ANNOTATION_ATTRIBUTE__(lockable)
+
+// SCOPED_LOCKABLE
+//
+// Documents if a class does RAII locking.
+// The constructor should use `LOCK_FUNCTION()` to specify the mutex that is
+// acquired, and the destructor should use `UNLOCK_FUNCTION()` with no
+// arguments; the analysis will assume that the destructor unlocks whatever the
+// constructor locked.
+#define SCOPED_LOCKABLE \
+  THREAD_ANNOTATION_ATTRIBUTE__(scoped_lockable)
+
+// EXCLUSIVE_LOCK_FUNCTION()
+//
+// Documents functions that acquire a lock in the body of a function, and do
+// not release it.
+#define EXCLUSIVE_LOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_lock_function(__VA_ARGS__))
+
+// SHARED_LOCK_FUNCTION()
+//
+// Documents functions that acquire a shared (reader) lock in the body of a
+// function, and do not release it.
+#define SHARED_LOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_lock_function(__VA_ARGS__))
+
+// UNLOCK_FUNCTION()
+//
+// Documents functions that expect a lock to be held on entry to the function,
+// and release it in the body of the function.
+#define UNLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(unlock_function(__VA_ARGS__))
+
+// EXCLUSIVE_TRYLOCK_FUNCTION() / SHARED_TRYLOCK_FUNCTION()
+//
+// Documents functions that try to acquire a lock, and return success or failure
+// (or a non-boolean value that can be interpreted as a boolean).
+// The first argument should be `true` for functions that return `true` on
+// success, or `false` for functions that return `false` on success. The second
+// argument specifies the mutex that is locked on success. If unspecified, this
+// mutex is assumed to be `this`.
+#define EXCLUSIVE_TRYLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(exclusive_trylock_function(__VA_ARGS__))
+
+#define SHARED_TRYLOCK_FUNCTION(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(shared_trylock_function(__VA_ARGS__))
+
+// ASSERT_EXCLUSIVE_LOCK() / ASSERT_SHARED_LOCK()
+//
+// Documents functions that dynamically check to see if a lock is held, and fail
+// if it is not held.
+#define ASSERT_EXCLUSIVE_LOCK(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_exclusive_lock(__VA_ARGS__))
+
+#define ASSERT_SHARED_LOCK(...) \
+  THREAD_ANNOTATION_ATTRIBUTE__(assert_shared_lock(__VA_ARGS__))
+
+#endif  // ICING_ABSL_PORTS_THREAD_ANNOTATIONS_H_

diff --git a/icing/document-builder.h b/icing/document-builder.h
new file mode 100644
index 0000000..2bbe590
--- /dev/null
+++ b/icing/document-builder.h

@@ -0,0 +1,302 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_DOCUMENT_BUILDER_H_
+#define ICING_DOCUMENT_BUILDER_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/proto/document.pb.h"
+
+namespace icing {
+namespace lib {
+
+class DocumentBuilder {
+ public:
+  DocumentBuilder() = default;
+  explicit DocumentBuilder(DocumentProto document)
+      : document_(std::move(document)) {}
+
+  DocumentBuilder& SetNamespace(std::string name_space) {
+    document_.set_namespace_(std::move(name_space));
+    return *this;
+  }
+
+  DocumentBuilder& SetUri(std::string uri) {
+    document_.set_uri(std::move(uri));
+    return *this;
+  }
+
+  DocumentBuilder& SetKey(std::string name_space, std::string uri) {
+    return SetNamespace(std::move(name_space)).SetUri(std::move(uri));
+  }
+
+  DocumentBuilder& SetSchema(std::string schema) {
+    document_.set_schema(std::move(schema));
+    return *this;
+  }
+
+  DocumentBuilder& SetCreationTimestampSecs(uint64_t creation_timestamp_secs) {
+    document_.set_creation_timestamp_secs(creation_timestamp_secs);
+    return *this;
+  }
+
+  DocumentBuilder& SetScore(int32_t score) {
+    document_.set_score(score);
+    return *this;
+  }
+
+  DocumentBuilder& SetTtlSecs(uint64_t ttl_secs) {
+    document_.set_ttl_secs(ttl_secs);
+    return *this;
+  }
+
+  DocumentBuilder& ClearProperties() {
+    document_.clear_properties();
+    return *this;
+  }
+
+  DocumentBuilder& ClearCustomProperties() {
+    document_.clear_custom_properties();
+    return *this;
+  }
+
+  // Takes a property name and any number of string values.
+  template <typename... V>
+  DocumentBuilder& AddStringProperty(std::string property_name,
+                                     V... string_values) {
+    return AddStringProperty(std::move(property_name), {string_values...});
+  }
+
+  // Takes a custom property name and any number of string values.
+  template <typename... V>
+  DocumentBuilder& AddCustomStringProperty(std::string property_name,
+                                           V... string_values) {
+    return AddCustomStringProperty(std::move(property_name),
+                                   {string_values...});
+  }
+
+  // Takes a property name and any number of int64_t values.
+  template <typename... V>
+  DocumentBuilder& AddInt64Property(std::string property_name,
+                                    V... int64_values) {
+    return AddInt64Property(std::move(property_name), {int64_values...});
+  }
+
+  // Takes a custom property name and any number of int64_t values.
+  template <typename... V>
+  DocumentBuilder& AddCustomInt64Property(std::string property_name,
+                                          V... int64_values) {
+    return AddCustomInt64Property(std::move(property_name), {int64_values...});
+  }
+
+  // Takes a property name and any number of double values.
+  template <typename... V>
+  DocumentBuilder& AddDoubleProperty(std::string property_name,
+                                     V... double_values) {
+    return AddDoubleProperty(std::move(property_name), {double_values...});
+  }
+
+  // Takes a custom property name and any number of double values.
+  template <typename... V>
+  DocumentBuilder& AddCustomDoubleProperty(std::string property_name,
+                                           V... double_values) {
+    return AddCustomDoubleProperty(std::move(property_name),
+                                   {double_values...});
+  }
+
+  // Takes a property name and any number of boolean values.
+  template <typename... V>
+  DocumentBuilder& AddBooleanProperty(std::string property_name,
+                                      V... boolean_values) {
+    return AddBooleanProperty(std::move(property_name), {boolean_values...});
+  }
+
+  // Takes a custom property name and any number of boolean values.
+  template <typename... V>
+  DocumentBuilder& AddCustomBooleanProperty(std::string property_name,
+                                            V... boolean_values) {
+    return AddCustomBooleanProperty(std::move(property_name),
+                                    {boolean_values...});
+  }
+
+  // Takes a property name and any number of bytes values.
+  template <typename... V>
+  DocumentBuilder& AddBytesProperty(std::string property_name,
+                                    V... bytes_values) {
+    return AddBytesProperty(std::move(property_name), {bytes_values...});
+  }
+
+  // Takes a custom property name and any number of bytes values.
+  template <typename... V>
+  DocumentBuilder& AddCustomBytesProperty(std::string property_name,
+                                          V... bytes_values) {
+    return AddCustomBytesProperty(std::move(property_name), {bytes_values...});
+  }
+
+  // Takes a property name and any number of document values.
+  template <typename... V>
+  DocumentBuilder& AddDocumentProperty(std::string property_name,
+                                       V&&... document_values) {
+    return AddDocumentProperty(std::move(property_name), {document_values...});
+  }
+
+  // Takes a custom property name and any number of document values.
+  template <typename... V>
+  DocumentBuilder& AddCustomDocumentProperty(std::string property_name,
+                                             V&&... document_values) {
+    return AddCustomDocumentProperty(std::move(property_name),
+                                     {document_values...});
+  }
+
+  DocumentProto Build() const { return document_; }
+
+ private:
+  DocumentProto document_;
+
+  DocumentBuilder& AddStringProperty(
+      std::string property_name,
+      std::initializer_list<std::string_view> string_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (std::string_view string_value : string_values) {
+      property->mutable_string_values()->Add(std::string(string_value));
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomStringProperty(
+      std::string property_name,
+      std::initializer_list<std::string_view> string_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (std::string_view string_value : string_values) {
+      custom_property->mutable_string_values()->Add(std::string(string_value));
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddInt64Property(
+      std::string property_name, std::initializer_list<int64_t> int64_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (int64_t int64_value : int64_values) {
+      property->mutable_int64_values()->Add(int64_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomInt64Property(
+      std::string property_name, std::initializer_list<int64_t> int64_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (int64_t int64_value : int64_values) {
+      custom_property->mutable_int64_values()->Add(int64_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddDoubleProperty(
+      std::string property_name, std::initializer_list<double> double_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (double double_value : double_values) {
+      property->mutable_double_values()->Add(double_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomDoubleProperty(
+      std::string property_name, std::initializer_list<double> double_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (double double_value : double_values) {
+      custom_property->mutable_double_values()->Add(double_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddBooleanProperty(
+      std::string property_name, std::initializer_list<bool> boolean_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (bool boolean_value : boolean_values) {
+      property->mutable_boolean_values()->Add(boolean_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomBooleanProperty(
+      std::string property_name, std::initializer_list<bool> boolean_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (bool boolean_value : boolean_values) {
+      custom_property->mutable_boolean_values()->Add(boolean_value);
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddBytesProperty(
+      std::string property_name,
+      std::initializer_list<std::string> bytes_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (const std::string& bytes_value : bytes_values) {
+      property->mutable_bytes_values()->Add(std::string(bytes_value));
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomBytesProperty(
+      std::string property_name,
+      std::initializer_list<std::string> bytes_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (const std::string& bytes_value : bytes_values) {
+      custom_property->mutable_bytes_values()->Add(std::string(bytes_value));
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddDocumentProperty(
+      std::string property_name,
+      std::initializer_list<DocumentProto> document_values) {
+    auto property = document_.add_properties();
+    property->set_name(std::move(property_name));
+    for (DocumentProto document_value : document_values) {
+      property->mutable_document_values()->Add(std::move(document_value));
+    }
+    return *this;
+  }
+
+  DocumentBuilder& AddCustomDocumentProperty(
+      std::string property_name,
+      std::initializer_list<DocumentProto> document_values) {
+    auto custom_property = document_.add_custom_properties();
+    custom_property->set_name(std::move(property_name));
+    for (DocumentProto document_value : document_values) {
+      custom_property->mutable_document_values()->Add(
+          std::move(document_value));
+    }
+    return *this;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_DOCUMENT_BUILDER_H_

diff --git a/icing/file/file-backed-bitmap.cc b/icing/file/file-backed-bitmap.cc
new file mode 100644
index 0000000..0eb9474
--- /dev/null
+++ b/icing/file/file-backed-bitmap.cc

@@ -0,0 +1,328 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/file-backed-bitmap.h"
+
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+#include "icing/util/math-util.h"
+
+namespace icing {
+namespace lib {
+
+int FileBackedBitmap::GetBlockCapacity(int num_blocks) {
+  // The first block has a lower capacity due to the Header.
+  const int capacity_bytes = kBlockByteSize * num_blocks - kHeaderByteSize;
+  return capacity_bytes * 8;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>>
+FileBackedBitmap::Create(const Filesystem* filesystem,
+                         std::string_view file_path,
+                         MemoryMappedFile::Strategy mmap_strategy) {
+  if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
+    return absl_ports::UnimplementedError(
+        "FileBackedBitmap currently doesn't support READ_WRITE_MANUAL_SYNC "
+        "mmap strategy.");
+  }
+
+  auto bitmap = std::unique_ptr<FileBackedBitmap>(
+      new FileBackedBitmap(filesystem, file_path, mmap_strategy));
+
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = bitmap->Initialize();
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message();
+    return status;
+  }
+  return bitmap;
+}
+
+FileBackedBitmap::FileBackedBitmap(const Filesystem* filesystem,
+                                   std::string_view file_path,
+                                   MemoryMappedFile::Strategy mmap_strategy)
+    : filesystem_(filesystem),
+      file_path_(file_path),
+      mmapper_(new MemoryMappedFile(*filesystem, file_path, mmap_strategy)) {}
+
+FileBackedBitmap::~FileBackedBitmap() {
+  // Only update if we have auto_sync setup, otherwise the checksum will be
+  // updated when the client calls PersistToDisk
+  if (mmapper_->strategy() ==
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
+    // Any valid, initialized file should at least have 1 block.
+    if (mmapper_->region_size() >= kBlockByteSize &&
+        header().version == kCurrentVersion &&
+        header().state == Header::ChecksumState::kStale) {
+      if (!PersistToDisk().ok()) {
+        ICING_LOG(WARNING)
+            << "Failed to persist bitmap to disk while destructing "
+            << file_path_;
+      }
+    }
+  }
+}
+
+const FileBackedBitmap::Header& FileBackedBitmap::header() const {
+  return reinterpret_cast<const Header&>(*mmapper_->region());
+}
+
+FileBackedBitmap::Header* FileBackedBitmap::mutable_header() {
+  return reinterpret_cast<Header*>(mmapper_->mutable_region());
+}
+
+libtextclassifier3::Status FileBackedBitmap::FileBackedBitmap::Initialize() {
+  ICING_VLOG(1) << "Initialize bitmap file: " << file_path_;
+
+  const bool is_new_bitmap = !filesystem_->FileExists(file_path_.c_str());
+
+  int64_t file_size = 0;
+  if (is_new_bitmap) {
+    file_size = kBlockByteSize;
+    if (!filesystem_->Grow(file_path_.c_str(), file_size)) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Unable to create a minimal bitmap; "
+          "filename: %s; target size: %lld",
+          file_path_.c_str(), static_cast<long long>(file_size)));
+    }
+
+    ICING_VLOG(1) << "Creating new bitmap in file: " << file_path_
+                  << " of size: " << file_size;
+  } else {
+    file_size = filesystem_->GetFileSize(file_path_.c_str());
+    if (file_size == Filesystem::kBadFileSize) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "File corrupted; filename: %s; size: %lld.", file_path_.c_str(),
+          static_cast<long long>(file_size)));
+    }
+
+    ICING_VLOG(1) << "Loading bitmap from  file: " << file_path_
+                  << " of size: " << file_size;
+  }
+
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = mmapper_->Remap(0, file_size);
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message();
+    return status;
+  }
+
+  if (is_new_bitmap) {
+    mutable_header()->version = kCurrentVersion;
+    mutable_header()->state = Header::ChecksumState::kStale;
+    mutable_header()->checksum = 0;
+
+    return mmapper_->PersistToDisk();
+  }
+
+  if (header().state == Header::ChecksumState::kStale) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "File corrupted, has partially flushed data; filename: ", file_path_));
+  }
+
+  if (header().checksum != ComputeChecksum()) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "File corrupted, checksum doesn't match; filename: ", file_path_));
+  }
+
+  if (header().version != kCurrentVersion) {
+    return UpgradeToCurrentVersion();
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FileBackedBitmap::UpgradeToCurrentVersion() {
+  // Currently, only 1 format is supported.
+  return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+      "File corrupted, mismatched version; filename: %s; %d vs %d.",
+      file_path_.c_str(), header().version, kCurrentVersion));
+}
+
+libtextclassifier3::Status FileBackedBitmap::SetWord(int word_index,
+                                                     Word word) {
+  if (word_index >= NumBits() / kNumWordBits) {
+    ICING_LOG(ERROR) << "word_index: " << word_index
+                     << ", number of words: " << NumBits() / kNumWordBits;
+    return absl_ports::InternalError("Trying to access invalid memory");
+  }
+
+  Word* bitmap_data =
+      reinterpret_cast<Word*>(mmapper_->mutable_region() + kHeaderByteSize);
+
+  bitmap_data[word_index] = word;
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<FileBackedBitmap::Word> FileBackedBitmap::GetWord(
+    int word_index) const {
+  if (word_index >= NumBits() / kNumWordBits) {
+    ICING_LOG(ERROR) << "word_index: " << word_index
+                     << ", number of words: " << NumBits() / kNumWordBits;
+    return absl_ports::InternalError("Trying to access invalid memory");
+  }
+
+  const Word* bitmap_data = reinterpret_cast<const Word*>(
+      mmapper_->mutable_region() + kHeaderByteSize);
+  return bitmap_data[word_index];
+}
+
+int FileBackedBitmap::NumBits() const {
+  return (mmapper_->region_size() - kHeaderByteSize) * 8;
+}
+
+libtextclassifier3::Status FileBackedBitmap::Set(int bit_index,
+                                                 bool bit_value) {
+  if (bit_index >= NumBits()) {
+    // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+    // that can support error logging.
+    libtextclassifier3::Status status = GrowTo(bit_index);
+    if (!status.ok()) {
+      ICING_LOG(ERROR) << status.error_message();
+      return status;
+    }
+
+    if (!bit_value) {
+      // All newly added bits are set to false.
+      return libtextclassifier3::Status::OK;
+    }
+  }
+
+  // Figure out which word needs to be modified.
+  const int word_index = bit_index / kNumWordBits;
+  const int word_mask = 1u << (bit_index % kNumWordBits);
+
+  ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
+  Word new_word = bit_value ? (old_word | word_mask) : old_word & ~word_mask;
+  if (new_word != old_word) {
+    ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
+    mutable_header()->state = Header::ChecksumState::kStale;
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<bool> FileBackedBitmap::Get(int bit_index) const {
+  if (bit_index >= NumBits()) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Bitmap file %s is of size %d and can't read bit_index %d.",
+        file_path_.c_str(), NumBits(), bit_index));
+  }
+
+  const Word word_index = bit_index / kNumWordBits;
+  const Word word_mask = 1u << (bit_index % kNumWordBits);
+
+  ICING_ASSIGN_OR_RETURN(Word word, GetWord(word_index));
+  return word & word_mask;
+}
+
+size_t FileBackedBitmap::FileSizeForBits(int num_bits) {
+  const int word_index = num_bits / kNumWordBits;
+  size_t new_file_size = kHeaderByteSize + (word_index + 1) * sizeof(Word);
+  return math_util::RoundUpTo(new_file_size,
+                              static_cast<size_t>(kBlockByteSize));
+}
+
+libtextclassifier3::Status FileBackedBitmap::GrowTo(int new_num_bits) {
+  if (new_num_bits > kMaxNumBits) {
+    return absl_ports::ResourceExhaustedError(IcingStringUtil::StringPrintf(
+        "Bitmap file %s has a max-capacity of %d bits and cannot fit %d bits",
+        file_path_.c_str(), kMaxNumBits, new_num_bits));
+  }
+
+  const size_t new_file_size = FileSizeForBits(new_num_bits);
+  if (!filesystem_->Grow(file_path_.c_str(), new_file_size)) {
+    return absl_ports::InternalError(
+        IcingStringUtil::StringPrintf("Growing file %s to new size %zd failed",
+                                      file_path_.c_str(), new_file_size));
+  }
+
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message();
+    return status;
+  }
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Grew file %s to new size %zd", file_path_.c_str(), new_file_size);
+  mutable_header()->state = Header::ChecksumState::kStale;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FileBackedBitmap::TruncateTo(int new_num_bits) {
+  if (new_num_bits > NumBits()) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  const size_t new_file_size = FileSizeForBits(new_num_bits);
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = mmapper_->Remap(0, new_file_size);
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message();
+    return status;
+  }
+  if (!filesystem_->Truncate(file_path_.c_str(), new_file_size)) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Truncating file %s to new size %zd failed", file_path_.c_str(),
+        new_file_size));
+  }
+
+  const int word_index = new_num_bits / kNumWordBits;
+  // Mask to only keep bits <= new_num_bits and clear everything else.
+  const Word word_mask = (1u << (new_num_bits % kNumWordBits)) - 1;
+
+  ICING_ASSIGN_OR_RETURN(Word old_word, GetWord(word_index));
+  Word new_word = old_word & word_mask;
+  ICING_RETURN_IF_ERROR(SetWord(word_index, new_word));
+
+  // TODO(cassiewang) It might be worth replacing this with memset().
+  const int num_words = NumBits() / kNumWordBits;
+  for (int i = word_index + 1; i < num_words; ++i) {
+    ICING_RETURN_IF_ERROR(SetWord(i, 0));
+  }
+
+  mutable_header()->state = Header::ChecksumState::kStale;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status FileBackedBitmap::PersistToDisk() {
+  mutable_header()->checksum = ComputeChecksum();
+  mutable_header()->state = Header::ChecksumState::kFresh;
+  return mmapper_->PersistToDisk();
+}
+
+uint32_t FileBackedBitmap::ComputeChecksum() const {
+  std::string_view bitmap_bytes(mmapper_->region() + kHeaderByteSize,
+                                NumBits() / 8);
+  return Crc32().Append(bitmap_bytes);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/file-backed-bitmap.h b/icing/file/file-backed-bitmap.h
new file mode 100644
index 0000000..54d9245
--- /dev/null
+++ b/icing/file/file-backed-bitmap.h

@@ -0,0 +1,219 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A file-backed bitmap with fast & efficient reads/writes of bits.
+// The bitmap will automatically grow in size as more bits are added, with a
+// max-capacity of 2M bits.
+//
+// Note on Performance:
+// This class internally uses mmap() without a readahead buffer. This keeps the
+// memory-usage low while also having low (amortized) read/write latency.
+// However, some reads/writes will pay the cost of page-faults.
+// In order to keep memory-mapping efficient, the bitmap always grows in
+// 4KiB sized blocks so that it is aligned with system page-size.
+//
+// This class doesn't aggressively flush/sync changes to disk and relies on the
+// system to buffer and flush changes in the background. This greatly reduces
+// disk-churn and performance of writes. However, an unexpected crash or an
+// abrupt reboot of the system could lead to data-loss. This can be mitigated
+// by manually calling PersistToDisk() when needed.
+//
+// Usage:
+// auto bitmap = RETURN_OR_ASSIGN(FileBackedBitmap::Create(...));
+//
+// bitmap.Set(100, false);
+// bitmap.Set(10, true);
+//
+// bitmap.Get(0);  // Default default of 'false'.
+// bitmap.Get(10);
+//
+// bitmap.PersistToDisk();  // Optional. Immediately syncs all changes to disk.
+// bitmap.reset();
+
+#ifndef ICING_FILE_FILE_BACKED_BITMAP_H_
+#define ICING_FILE_FILE_BACKED_BITMAP_H_
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+
+namespace icing {
+namespace lib {
+
+class FileBackedBitmap {
+ public:
+  // Growth of FileBackedBitmap is in blocks of a fixed size. This helper
+  // returns the number of bits that can be fitted in the specified number of
+  // blocks.
+  //
+  // NOTE: This is meant for tests and clients shouldn't care about this.
+  static int GetBlockCapacity(int num_blocks);
+
+  // Returns an initialized instance of the bitmap that can immediately handle
+  // read/write operations.
+  //
+  // file_path : Specifies the file to persist the bitmap to; must be a path
+  //             within a directory that already exists. If the file itself
+  //             doesn't exist, a new bitmap will be created.
+  //
+  // mmap_strategy : Mmap strategy for the underlying file, see
+  //                 MemoryMappedFile::Strategy for more details.
+  //
+  // Returns an error if the file was corrupted or if any IO error was
+  // encountered. An error here implies that the old data has been lost and
+  // the file has to be deleted and re-initialized again.
+  static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedBitmap>> Create(
+      const Filesystem* filesystem, std::string_view file_path,
+      MemoryMappedFile::Strategy mmap_strategy);
+
+  // If the bitmap was created with
+  // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, then changes will be
+  // synced by the system and the checksum will be updated.
+  ~FileBackedBitmap();
+
+  // Set the bit at the specified position. The bitmap is automatically resized
+  // to at least fit 'index' number of bits. bit_index should not be larger than
+  // 2M, which is the max-capacity of FileBackedBitmap.
+  //
+  // Returns any encountered IO error.
+  //
+  // NOTE: While changes take place immediately, they may not be fully persisted
+  // to disk till PersistToDisk() is called.
+  //
+  // NOTE: The Bitmap grows in blocks of 4KiB. So, setting a specific bit
+  // beyond current capacity can lead to pre-allocating up to ~32K extra bits.
+  libtextclassifier3::Status Set(int bit_index, bool bit_value);
+
+  // Get the bit at the specified index. Unset bits default to 'false'.
+  //
+  // Returns OUT_OF_RANGE error if bit_index > NumBits().
+  libtextclassifier3::StatusOr<bool> Get(int bit_index) const;
+
+  // Count of bits currently being stored in this bitmap.
+  //
+  // NOTE: BitMap growth happens in blocks of 4KiB. So, the smallest bitmap will
+  // automatically have ~32K bits pre-allocated. Subsequently, future
+  // growths/truncations of the bitmap will change NumBits() in multiples of
+  // 32K.
+  int NumBits() const;
+
+  // Truncates the size of the bitmap to 'new_num_bits'. Any data beyond this
+  // will be lost.
+  libtextclassifier3::Status TruncateTo(int new_num_bits);
+
+  // Syncs all the changes made to the bitmap to disk and updates the checksum.
+  //
+  // Returns any encountered IO error.
+  //
+  // NOTE: Neither Set() nor the ~FileBackedBitmap() guarantee syncing all
+  // changes to disk. This method should be explicitly called to protect the
+  // data from an abrupt system reboot.
+  libtextclassifier3::Status PersistToDisk();
+
+ private:
+  // Limit the max-size of the bitmap. Someone wanting to store more bits will
+  // likely benefit from a custom solution.
+  static constexpr int kMaxNumBits = 2 * 1024 * 1024;
+
+  // Growth of FileBackedBitmap will be in blocks of this size. This size
+  // should align with the page-size of the system so that mmapping can be
+  // most efficient.
+  static constexpr int kBlockByteSize = 4 * 1024;
+
+  // Version of the file-format used by the class. Every time the format is
+  // modified in a backwards-incompatible way, this needs to be incremented
+  static constexpr int32_t kCurrentVersion = 1;
+
+  struct Header {
+    // Version of the file-format used by this class. This allows us to change
+    // the format and upgrade old data to the new format without losing it.
+    int32_t version;
+
+    // Checksum of the entire file when it was last persisted to disk.
+    // This is used on init to make sure that the file has not been corrupted.
+    //
+    // NOTE: The checksum is not expected to match when ChecksumState=kStale.
+    uint32_t checksum;
+
+    // As an optimization, FileBackedBitmap delays recomputation of the checksum
+    // even when some bits in the Bitmap are modified. While this improves
+    // performance, it increases the risk of losing data due to a crash.
+    // ChecksumState tracks if the changes to the bitmap have been fully
+    // reflected in the checksum stored above.
+    //
+    // NOTE: We use int32_t to store a bool info here to keep the Header
+    // aligned.
+    enum ChecksumState : int32_t { kFresh, kStale };
+    ChecksumState state;
+  };
+
+  // The size of the backing file to store the specified number of bits. This
+  // size is aligned to the page-size of the system so that it can be
+  // efficiently memory mapped.
+  static size_t FileSizeForBits(int num_bits);
+
+  static constexpr int kHeaderByteSize = sizeof(Header);
+
+  // Helpers to read/modify the header of the bitmap file.
+  const Header& header() const;
+  Header* mutable_header();
+
+  // Use FileBackedBitmap::Create() to instantiate.
+  FileBackedBitmap(const Filesystem* filesystem, std::string_view file_path,
+                   MemoryMappedFile::Strategy mmap_strategy);
+
+  // Verify the contents of the bitmap and get ready for read/write operations.
+  //
+  // Returns an error if the file was corrupted or if any IO error was
+  // encountered. An error here implies that the old data has been lost and
+  // the file has to be deleted and re-initialized again.
+  libtextclassifier3::Status Initialize();
+
+  // Makes sure that the data on disk is upgraded to match the file-format
+  // represented by kCurrentVersion.
+  libtextclassifier3::Status UpgradeToCurrentVersion();
+
+  // Grows the size of the bitmap to match 'new_num_bits'. Any newly added bit
+  // will default to 'false'.
+  //
+  // The upper-bound for new_num_bits is kMaxNumBits. Requests to further
+  // increase the size will fail with an INVALID_ARGUMENT error.
+  libtextclassifier3::Status GrowTo(int new_num_bits);
+
+  using Word = uint32_t;
+  static constexpr int kNumWordBits = sizeof(Word) * 8;
+
+  // Helpers to perform 32bit read/write operations on the raw bitmap data.
+  // This makes it easy to use 32bit bitwise operations to modify the bitmap.
+  libtextclassifier3::StatusOr<Word> GetWord(int word_index) const;
+  libtextclassifier3::Status SetWord(int word_index, Word word);
+
+  // CRC32 based checksum of all the bits stored in the bitmap. This checksum
+  // only uses the data and not the contents of the header.
+  uint32_t ComputeChecksum() const;
+
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+  std::unique_ptr<MemoryMappedFile> mmapper_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_FILE_BACKED_BITMAP_H_

diff --git a/icing/file/file-backed-bitmap_test.cc b/icing/file/file-backed-bitmap_test.cc
new file mode 100644
index 0000000..9bfec65
--- /dev/null
+++ b/icing/file/file-backed-bitmap_test.cc

@@ -0,0 +1,395 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/file-backed-bitmap.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::_;
+using ::testing::HasSubstr;
+
+class FileBackedBitmapTest : public testing::Test {
+ protected:
+  static int GetBlockCapacity(int num_blocks) {
+    return FileBackedBitmap::GetBlockCapacity(num_blocks);
+  }
+
+  void SetUp() override { file_path_ = GetTestTempDir() + "/bitmap"; }
+
+  void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+  std::string file_path_;
+  Filesystem filesystem_;
+  MemoryMappedFile::Strategy mmap_strategy_ =
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC;
+};
+
+// TODO(cassiewang) Add tests for other corruption scenarios where the
+// file has an invalid checksum or is dirty on init.
+TEST_F(FileBackedBitmapTest, InvalidFile) {
+  ASSERT_THAT(FileBackedBitmap::Create(&filesystem_, "", mmap_strategy_)
+                  .status()
+                  .error_message(),
+              HasSubstr("Unable to create"));
+
+  std::string bad_data = "Bad bitmap file content";
+  filesystem_.Write(file_path_.c_str(), bad_data.data(), bad_data.size());
+
+  ASSERT_THAT(FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_)
+                  .status()
+                  .error_message(),
+              HasSubstr("corrupted"));
+}
+
+TEST_F(FileBackedBitmapTest, CreateNewBitMap) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  // Even a new bitmap will have 1 block with pre-allocated bits.
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(0), IsOkAndHolds(false));
+  ICING_EXPECT_OK(bitmap->PersistToDisk());
+}
+
+TEST_F(FileBackedBitmapTest, CanReadAfterWrite) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+
+  // Can Grow bitmap to store both true and false.
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  ICING_EXPECT_OK(bitmap->Set(200000, false));
+
+  EXPECT_THAT(bitmap->Get(0), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(200000), IsOkAndHolds(false));
+
+  // Can write new data without growing the bitmap.
+  ICING_EXPECT_OK(bitmap->Set(50, true));
+  EXPECT_THAT(bitmap->Get(50), IsOkAndHolds(true));
+
+  // Can modify the value of a previously written bit.
+  ICING_EXPECT_OK(bitmap->Set(100000, false));
+  ICING_EXPECT_OK(bitmap->Set(200000, true));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(200000), IsOkAndHolds(true));
+}
+
+// Make sure that the growth of the bitmap is in multiples of 4KiB blocks.
+// This is required to keep our memory mapping efficient.
+TEST_F(FileBackedBitmapTest, BitMapGrowsInMultipleOfBlocks) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(1) - 1), IsOkAndHolds(false));
+
+  // 100K bits need four 4KiB sized blocks.
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // 200K bits need seven 4KiB sized blocks.
+  ICING_EXPECT_OK(bitmap->Set(200000, false));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(7));
+
+  // Reusing pre-allocated bits doesn't require any growth.
+  ICING_EXPECT_OK(bitmap->Set(0, false));
+  ICING_EXPECT_OK(bitmap->Set(1000, false));
+  ICING_EXPECT_OK(bitmap->Set(GetBlockCapacity(7) - 1, false));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(7));
+}
+
+TEST_F(FileBackedBitmapTest, CanPersistBitmapToDiskRegularly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+
+  // Can read older data after PersistToDisk.
+  ICING_EXPECT_OK(bitmap->Set(100, true));
+  ICING_EXPECT_OK(bitmap->PersistToDisk());
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(50), IsOkAndHolds(false));
+
+  // Can grow the bitmap to write new data after PersistToDisk.
+  ICING_EXPECT_OK(bitmap->Set(50000, false));
+  ICING_EXPECT_OK(bitmap->PersistToDisk());
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(2));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(50000), IsOkAndHolds(false));
+
+  // Can write new data without growing the bitmap after PersistToDisk.
+  ICING_EXPECT_OK(bitmap->Set(50, true));
+  ICING_EXPECT_OK(bitmap->PersistToDisk());
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(2));
+  EXPECT_THAT(bitmap->Get(50), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(50000), IsOkAndHolds(false));
+}
+
+TEST_F(FileBackedBitmapTest, BitmapUsableAcrossMultipleInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  ICING_EXPECT_OK(bitmap->Set(100, true));
+
+  // Persist all data and reset the bitmap instance.
+  ICING_EXPECT_OK(bitmap->PersistToDisk());
+  bitmap.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(0), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+
+  // Reset the bitmap instance without explicitly persisting data.
+  // Even here, the system should flush the data, unless the device reboots.
+  bitmap.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(0), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+
+  // We can continue to read/write bits on an existing data.
+  ICING_EXPECT_OK(bitmap->Set(200, false));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(200), IsOkAndHolds(false));
+}
+
+TEST_F(FileBackedBitmapTest, HandleOutOfRangeReads) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  // The bitmap initially has 4K bits pre-allocated.
+  EXPECT_THAT(bitmap->Get(0), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(1) - 1), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(1)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(2)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Expand bitmap to use 2 blocks.
+  ICING_EXPECT_OK(bitmap->Set(GetBlockCapacity(1) + 100, true));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(1) + 1), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(2) - 1), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(2) + 1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(5)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(GetBlockCapacity(5)),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedBitmapTest, TruncateBitmap) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+
+  // NOTE: Set uses zero-based index. So, Set(99) sets the 100th bit.
+  ICING_EXPECT_OK(bitmap->Set(99, true));
+  ICING_EXPECT_OK(bitmap->Set(100, true));
+  ICING_EXPECT_OK(bitmap->Set(101, true));
+
+  // Bitmap has fewer bits. So, nothing to truncate.
+  ICING_EXPECT_OK(bitmap->TruncateTo(200));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(99), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(101), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(199), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(200), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(201), IsOkAndHolds(false));
+
+  // Bitmap size doesn't have any set bits beyond the requested size.
+  // So, nothing to truncate.
+  ICING_EXPECT_OK(bitmap->TruncateTo(102));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(99), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(101), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(102), IsOkAndHolds(false));
+
+  // TruncateTo(100) should reduce the size of the bitmap to 100.
+  ICING_EXPECT_OK(bitmap->TruncateTo(100));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(99), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(101), IsOkAndHolds(false));
+}
+
+TEST_F(FileBackedBitmapTest, TruncateBitmapAcrossMultipleBlocks) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  ICING_EXPECT_OK(bitmap->Set(100, true));
+  ICING_EXPECT_OK(bitmap->Set(1000, true));
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // Bitmap only has 100K bits. So, nothing to truncate.
+  ICING_EXPECT_OK(bitmap->TruncateTo(200000));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // Bitmap only has 100K bits. So, nothing to truncate.
+  ICING_EXPECT_OK(bitmap->TruncateTo(100001));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // Truncate to 50K should chop off block#3 and block#4.
+  // It should also clear bits beyond 50K.
+  ICING_EXPECT_OK(bitmap->Set(49999, true));
+  ICING_EXPECT_OK(bitmap->Set(50000, true));
+  ICING_EXPECT_OK(bitmap->Set(50001, true));
+  ICING_EXPECT_OK(bitmap->TruncateTo(50000));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(2));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(49999), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(50000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(50001), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Truncate to 2K should chop off the 2nd block.
+  ICING_EXPECT_OK(bitmap->TruncateTo(2000));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(50000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Truncate to 500 not chop any blocks off, but clear subsequent bits.
+  ICING_EXPECT_OK(bitmap->TruncateTo(500));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(50000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedBitmapTest, TruncateBitmapAcrossInstances) {
+  // Instance#1
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  ICING_EXPECT_OK(bitmap->Set(100, true));
+  ICING_EXPECT_OK(bitmap->Set(1000, true));
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // Instance#2
+  bitmap.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  ICING_EXPECT_OK(bitmap->TruncateTo(50000));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(2));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(49999), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(50000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Instance#3
+  bitmap.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  ICING_EXPECT_OK(bitmap->TruncateTo(500));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(100), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(500), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+// Make sure that a bitmap can both grow and be truncated many times.
+TEST_F(FileBackedBitmapTest, TruncateAndGrowBitmap) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+
+  // Grow#1
+  ICING_EXPECT_OK(bitmap->Set(1000, true));
+  ICING_EXPECT_OK(bitmap->Set(100000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(4));
+
+  // Truncate#1
+  ICING_EXPECT_OK(bitmap->TruncateTo(50000));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(2));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Grow#2
+  ICING_EXPECT_OK(bitmap->Set(200000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(7));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(true));
+  EXPECT_THAT(bitmap->Get(100000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(200000), IsOkAndHolds(true));
+
+  // Truncate#2
+  ICING_EXPECT_OK(bitmap->TruncateTo(1000));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+  EXPECT_THAT(bitmap->Get(1000), IsOkAndHolds(false));
+  EXPECT_THAT(bitmap->Get(100000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(bitmap->Get(200000),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedBitmapTest, BitMapCantGrowTooBigInSize) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedBitmap> bitmap,
+      FileBackedBitmap::Create(&filesystem_, file_path_, mmap_strategy_));
+  // Set a bit is within the 2M bit limit.
+  ICING_EXPECT_OK(bitmap->Set(1000, true));
+
+  // Go beyond the 2M bit limit.
+  EXPECT_THAT(bitmap->Set(3 * 1024 * 1024, true),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  // Subsequent calls work fine.
+  ICING_EXPECT_OK(bitmap->Set(2000, true));
+  EXPECT_THAT(bitmap->NumBits(), GetBlockCapacity(1));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
new file mode 100644
index 0000000..d17757f
--- /dev/null
+++ b/icing/file/file-backed-proto-log.h

@@ -0,0 +1,867 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// File-backed log of protos with append-only writes and position based reads.
+//
+// There should only be one instance of a FileBackedProtoLog of the same file at
+// a time; using multiple instances at the same time may lead to undefined
+// behavior.
+//
+// The entire checksum is computed on initialization to verify the contents are
+// valid. On failure, the log will be truncated to the last verified state when
+// PersistToDisk() was called. If the log cannot successfully restore the last
+// state due to disk corruption or some other inconsistency, then the entire log
+// will be lost.
+//
+// Each proto written to the file will have a metadata written just before it.
+// The metadata consists of
+//   {
+//     1 bytes of kProtoMagic;
+//     3 bytes of the proto size
+//     n bytes of the proto itself
+//   }
+//
+// Example usage:
+//   ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+//       FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path_,
+//                                                  options));
+//   auto proto_log = create_result.proto_log;
+//
+//   Document document;
+//   document.set_namespace("com.google.android.example");
+//   document.set_uri("www.google.com");
+//
+//   int64_t document_offset = proto_log->WriteProto(document));
+//   Document same_document = proto_log->ReadProto(document_offset));
+//   proto_log->PersistToDisk();
+//
+// TODO(b/136514769): Add versioning to the header and a UpgradeToVersion
+// migration method.
+
+#ifndef ICING_FILE_FILE_BACKED_PROTO_LOG_H_
+#define ICING_FILE_FILE_BACKED_PROTO_LOG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/zlib.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+template <typename ProtoT>
+class FileBackedProtoLog {
+ public:
+  struct Options {
+    // Whether to compress each proto before writing to the proto log.
+    bool compress;
+
+    // Byte-size limit for each proto written to the store. This does not
+    // include the bytes needed for the metadata of each proto.
+    //
+    // NOTE: Currently, we only support protos up to 16MiB. We store the proto
+    // size in 3 bytes within the metadata.
+    //
+    // NOTE: This limit is only enforced for future writes. If the store
+    // previously had a higher limit, then reading older entries could return
+    // larger protos.
+    //
+    // NOTE: The max_proto_size is the upper limit for input protos into the
+    // ProtoLog. Even if the proto is larger than max_proto_size, but compresses
+    // to a smaller size, ProtoLog will not accept it. Protos that result in a
+    // compressed size larger than max_proto_size are also not accepted.
+    const int32_t max_proto_size;
+
+    // Must specify values for options.
+    Options() = delete;
+    explicit Options(bool compress_in,
+                     const int32_t max_proto_size_in = kMaxProtoSize)
+        : compress(compress_in), max_proto_size(max_proto_size_in) {}
+  };
+
+  // Header stored at the beginning of the file before the rest of the log
+  // contents. Stores metadata on the log.
+  //
+  // TODO(b/139375388): Migrate the Header struct to a proto. This makes
+  // migrations easier since we don't need to worry about different size padding
+  // (which would affect the checksum) and different endians.
+  struct Header {
+    static constexpr int32_t kMagic = 0xf4c6f67a;
+
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic = kMagic;
+
+    // Whether to compress the protos before writing to the log.
+    bool compress = true;
+
+    // The maximum proto size that can be written to the log.
+    int32_t max_proto_size = 0;
+
+    // Checksum of the log elements, doesn't include the header fields.
+    uint32_t log_checksum = 0;
+
+    // Last known good offset at which the log and its checksum were updated.
+    // If we crash between writing to the log and updating the checksum, we can
+    // try to rewind the log to this offset and verify the checksum is still
+    // valid instead of throwing away the entire log.
+    int64_t rewind_offset = sizeof(Header);
+
+    // Must be at the end. Contains the crc checksum of the preceding fields.
+    uint32_t header_checksum = 0;
+
+    uint32_t CalculateHeaderChecksum() const {
+      Crc32 crc;
+      std::string_view header_str(reinterpret_cast<const char*>(this),
+                                  offsetof(Header, header_checksum));
+      crc.Append(header_str);
+      return crc.Get();
+    }
+  };
+
+  struct CreateResult {
+    // A successfully initialized log.
+    std::unique_ptr<FileBackedProtoLog<ProtoT>> proto_log;
+
+    // Whether there was some data loss while initializing from a previous
+    // state. This can happen if the file is corrupted or some previously added
+    // data was unpersisted. This may be used to signal that any derived data
+    // off of the proto log may need to be regenerated.
+    bool data_loss;
+  };
+
+  // Factory method to create, initialize, and return a FileBackedProtoLog. Will
+  // create the file if it doesn't exist.
+  //
+  // If on re-initialization the log detects disk corruption or some previously
+  // added data was unpersisted, the log will rewind to the last-good state. The
+  // log saves these checkpointed "good" states when PersistToDisk() is called
+  // or the log is safely destructed. If the log rewinds successfully to the
+  // last-good state, then the returned CreateResult.data_loss indicates
+  // there was some data loss so that any derived data may know that it
+  // needs to be updated. If the log re-initializes successfully without any
+  // data loss, the boolean will be false.
+  //
+  // Params:
+  //   filesystem: Handles system level calls
+  //   file_path: Path of the underlying file. Directory of the file should
+  //   already exist
+  //   options: Configuration options for the proto log
+  //
+  // Returns:
+  //   FileBackedProtoLog::CreateResult on success
+  //   INVALID_ARGUMENT on an invalid option
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> Create(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Not copyable
+  FileBackedProtoLog(const FileBackedProtoLog&) = delete;
+  FileBackedProtoLog& operator=(const FileBackedProtoLog&) = delete;
+
+  // This will update the checksum of the log as well.
+  ~FileBackedProtoLog();
+
+  // Writes the serialized proto to the underlying file. Writes are applied
+  // directly to the underlying file. Users do not need to sync the file after
+  // writing.
+  //
+  // Returns:
+  //   Offset of the newly appended proto in file on success
+  //   INVALID_ARGUMENT if proto is too large, as decided by
+  //     Options.max_proto_size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
+
+  // Reads out a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   A proto on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file size
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
+
+  // Calculates and returns the disk usage in bytes.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // An iterator helping to find offsets of all the protos in file.
+  // Example usage:
+  //
+  // while (iterator.Advance().ok()) {
+  //   int64_t offset = iterator.GetOffset();
+  //   // Do something
+  // }
+  class Iterator {
+   public:
+    Iterator(const Filesystem& filesystem, const std::string& file_path,
+             int64_t initial_offset);
+
+    // Advances to the position of next proto.
+    //
+    // Returns:
+    //   OK on success
+    //   OUT_OF_RANGE_ERROR if it reaches the end
+    //   INTERNAL_ERROR on IO error
+    libtextclassifier3::Status Advance();
+
+    // Returns the file offset of current proto.
+    int64_t GetOffset();
+
+   private:
+    static constexpr int64_t kInvalidOffset = -1;
+    // Used to read proto metadata
+    MemoryMappedFile mmapped_file_;
+    // Offset of first proto
+    int64_t initial_offset_;
+    int64_t current_offset_;
+    int64_t file_size_;
+  };
+
+  // Returns an iterator of current proto log. The caller needs to keep the
+  // proto log unchanged while using the iterator, otherwise unexpected
+  // behaviors could happen.
+  Iterator GetIterator();
+
+  // Persists all changes since initialization or the last call to
+  // PersistToDisk(). Any changes that aren't persisted may be lost if the
+  // system fails to close safely.
+  //
+  // Example use case:
+  //
+  //   Document document;
+  //   document.set_namespace("com.google.android.example");
+  //   document.set_uri("www.google.com");
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // We lose the document here since it wasn't persisted.
+  //     // *SYSTEM CRASH*
+  //   }
+  //
+  //   {
+  //     // Can still successfully create after a crash since the log can
+  //     // rewind/truncate to recover into a previously good state
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // Lost the proto since we didn't PersistToDisk before the crash
+  //     proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
+  //
+  //     int64_t document_offset = proto_log->WriteProto(document));
+  //
+  //     // Persisted this time, so we should be ok.
+  //     ICING_ASSERT_OK(proto_log->PersistToDisk());
+  //   }
+  //
+  //   {
+  //     ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+  //         FileBackedProtoLog<DocumentProto>::Create(filesystem, file_path,
+  //                                                    options));
+  //     auto proto_log = std::move(create_result.proto_log);
+  //
+  //     // SUCCESS
+  //     Document same_document = proto_log->ReadProto(document_offset));
+  //   }
+  //
+  // NOTE: Since all protos are already written to the file directly, this
+  // just updates the checksum and rewind position. Without these updates,
+  // future initializations will truncate the file and discard unpersisted
+  // changes.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates the checksum of the log contents. Excludes the header content.
+  //
+  // Returns:
+  //   Crc of the log content
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ private:
+  // Object can only be instantiated via the ::Create factory.
+  FileBackedProtoLog(const Filesystem* filesystem, const std::string& file_path,
+                     std::unique_ptr<Header> header);
+
+  // Initializes a new proto log.
+  //
+  // Returns:
+  //   std::unique_ptr<FileBackedProtoLog> that can be used immediately
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options);
+
+  // Verifies that the existing proto log is in a good state. If not in a good
+  // state, then the proto log may be truncated to the last good state and
+  // content will be lost.
+  //
+  // Returns:
+  //   std::unique_ptr<FileBackedProtoLog> that can be used immediately
+  //   INTERNAL_ERROR on IO error or internal inconsistencies in the file
+  //   INVALID_ARGUMENT_ERROR if options aren't consistent with previous
+  //     instances
+  static libtextclassifier3::StatusOr<CreateResult> InitializeExistingFile(
+      const Filesystem* filesystem, const std::string& file_path,
+      const Options& options, int64_t file_size);
+
+  // Takes an initial checksum and updates it with the content between `start`
+  // and `end` offsets in the file.
+  //
+  // Returns:
+  //   Crc of the content between `start`, inclusive, and `end`, exclusive.
+  //   INTERNAL_ERROR on IO error
+  //   INVALID_ARGUMENT_ERROR if start and end aren't within the file size
+  static libtextclassifier3::StatusOr<Crc32> ComputeChecksum(
+      const Filesystem* filesystem, const std::string& file_path,
+      Crc32 initial_crc, int64_t start, int64_t end);
+
+  // Magic number added in front of every proto. Used when reading out protos
+  // as a first check for corruption in each entry in the file. Even if there is
+  // a corruption, the best we can do is roll back to our last recovery point
+  // and throw away un-flushed data. We can discard/reuse this byte if needed so
+  // that we have 4 bytes to store the size of protos, and increase the size of
+  // protos we support.
+  static constexpr uint8_t kProtoMagic = 0x5C;
+
+  // Our internal max for protos.
+  //
+  // WARNING: Changing this to a larger number may invalidate our assumption
+  // that that proto size can safely be stored in the last 3 bytes of the proto
+  // header.
+  static constexpr int kMaxProtoSize = (1 << 24) - 1;  // 16MiB
+  static_assert(kMaxProtoSize <= 0x00FFFFFF,
+                "kMaxProtoSize doesn't fit in 3 bytes");
+
+  // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+  static constexpr int kDeflateCompressionLevel = 3;
+
+  // Chunks of the file to mmap at a time, so we don't mmap the entire file.
+  static constexpr int kMmapChunkSize = 4 * 1024;
+
+  ScopedFd fd_;
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+
+  // Reads out the metadata of a proto located at file_offset from the file.
+  //
+  // Returns:
+  //   Proto's metadata on success
+  //   OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+  //   INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+  static libtextclassifier3::StatusOr<int> ReadProtoMetadata(
+      MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+  std::unique_ptr<Header> header_;
+};
+
+template <typename ProtoT>
+constexpr uint8_t FileBackedProtoLog<ProtoT>::kProtoMagic;
+
+template <typename ProtoT>
+FileBackedProtoLog<ProtoT>::FileBackedProtoLog(const Filesystem* filesystem,
+                                               const std::string& file_path,
+                                               std::unique_ptr<Header> header)
+    : filesystem_(filesystem),
+      file_path_(file_path),
+      header_(std::move(header)) {
+  fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
+}
+
+template <typename ProtoT>
+FileBackedProtoLog<ProtoT>::~FileBackedProtoLog() {
+  if (!PersistToDisk().ok()) {
+    ICING_LOG(WARNING)
+        << "Error persisting to disk during destruction of FileBackedProtoLog: "
+        << file_path_;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
+FileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
+                                   const std::string& file_path,
+                                   const Options& options) {
+  if (options.max_proto_size <= 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be greater than 0, was %d",
+        options.max_proto_size));
+  }
+
+  // Since we store the proto_size in 3 bytes, we can only support protos of up
+  // to 16MiB.
+  if (options.max_proto_size > kMaxProtoSize) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "options.max_proto_size must be under 16MiB, was %d",
+        options.max_proto_size));
+  }
+
+  if (!filesystem->FileExists(file_path.c_str())) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size '", file_path, "'"));
+  }
+
+  if (file_size == 0) {
+    return InitializeNewFile(filesystem, file_path, options);
+  }
+
+  return InitializeExistingFile(filesystem, file_path, options, file_size);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
+FileBackedProtoLog<ProtoT>::InitializeNewFile(const Filesystem* filesystem,
+                                              const std::string& file_path,
+                                              const Options& options) {
+  // Create the header
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  header->compress = options.compress;
+  header->max_proto_size = options.max_proto_size;
+  header->header_checksum = header->CalculateHeaderChecksum();
+
+  if (!filesystem->Write(file_path.c_str(), header.get(), sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write header for file: ", file_path));
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<FileBackedProtoLog<ProtoT>>(
+          new FileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                         std::move(header))),
+      /*data_loss=*/false};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<typename FileBackedProtoLog<ProtoT>::CreateResult>
+FileBackedProtoLog<ProtoT>::InitializeExistingFile(const Filesystem* filesystem,
+                                                   const std::string& file_path,
+                                                   const Options& options,
+                                                   int64_t file_size) {
+  if (file_size < sizeof(Header)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("File header too short for: ", file_path));
+  }
+
+  std::unique_ptr<Header> header = std::make_unique<Header>();
+  if (!filesystem->PRead(file_path.c_str(), header.get(), sizeof(Header),
+                         /*offset=*/0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to read header for file: ", file_path));
+  }
+
+  // Make sure the header is still valid before we use any of its values. This
+  // is covered by the header_checksum check below, but this is a quick check
+  // that can save us from an extra crc computation.
+  if (header->magic != Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for file: ", file_path));
+  }
+
+  if (header->header_checksum != header->CalculateHeaderChecksum()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header checksum for: ", file_path));
+  }
+
+  if (header->compress != options.compress) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Inconsistent compress option, expected %d, actual %d",
+        header->compress, options.compress));
+  }
+
+  if (header->max_proto_size > options.max_proto_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Max proto size cannot be smaller than previous "
+        "instantiations, previous size %d, wanted size %d",
+        header->max_proto_size, options.max_proto_size));
+  }
+  header->max_proto_size = options.max_proto_size;
+
+  bool data_loss = false;
+  ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
+                         ComputeChecksum(filesystem, file_path, Crc32(),
+                                         sizeof(Header), file_size));
+  // Double check that the log checksum is the same as the one that was
+  // persisted last time. If not, we start recovery logic.
+  if (header->log_checksum != calculated_log_checksum.Get()) {
+    // Need to rewind the proto log since the checksums don't match
+    data_loss = true;
+    // Worst case, we have to rewind the entire log back to just the header
+    int64_t last_known_good = sizeof(Header);
+
+    // Calculate the checksum of the log contents just up to the last rewind
+    // offset point. This will be valid if we just appended contents to the log
+    // without updating the checksum, and we can rewind back to this point
+    // safely.
+    ICING_ASSIGN_OR_RETURN(
+        calculated_log_checksum,
+        ComputeChecksum(filesystem, file_path, Crc32(), sizeof(Header),
+                        header->rewind_offset));
+    if (header->log_checksum == calculated_log_checksum.Get()) {
+      // Check if it matches our last rewind state. If so, this becomes our last
+      // good state and we can safely truncate and recover from here.
+      last_known_good = header->rewind_offset;
+    } else {
+      // Otherwise, we're going to truncate the entire log and this resets the
+      // checksum to an empty log state.
+      header->log_checksum = 0;
+    }
+
+    if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Error truncating file: ", file_path));
+    }
+
+    ICING_LOG(INFO) << "Truncated '" << file_path << "' to size "
+                    << last_known_good;
+  }
+
+  CreateResult create_result = {
+      std::unique_ptr<FileBackedProtoLog<ProtoT>>(
+          new FileBackedProtoLog<ProtoT>(filesystem, file_path,
+                                         std::move(header))),
+      data_loss};
+
+  return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32> FileBackedProtoLog<ProtoT>::ComputeChecksum(
+    const Filesystem* filesystem, const std::string& file_path,
+    Crc32 initial_crc, int64_t start, int64_t end) {
+  auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
+                                       MemoryMappedFile::Strategy::READ_ONLY);
+  Crc32 new_crc(initial_crc.Get());
+
+  if (start < 0) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Starting checksum offset of file '%s' must be greater than 0, was "
+        "%lld",
+        file_path.c_str(), static_cast<long long>(start)));
+  }
+
+  int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+  if (end > file_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Ending checksum offset of file '%s' must be within "
+        "file size of %lld, was %lld",
+        file_path.c_str(), static_cast<long long>(file_size),
+        static_cast<long long>(end)));
+  }
+
+  for (int i = start; i < end; i += kMmapChunkSize) {
+    // Don't read past the file size.
+    int next_chunk_size = kMmapChunkSize;
+    if ((i + kMmapChunkSize) >= end) {
+      next_chunk_size = end - i;
+    }
+
+    ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+    auto mmap_str = std::string_view(mmapped_file.region(), next_chunk_size);
+    new_crc.Append(mmap_str);
+  }
+
+  return new_crc;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::WriteProto(
+    const ProtoT& proto) {
+  int64_t proto_size = proto.ByteSizeLong();
+  int32_t metadata;
+  int metadata_size = sizeof(metadata);
+  int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
+
+  if (proto_size > header_->max_proto_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "proto_size, %lld, was too large to write. Max is %d",
+        static_cast<long long>(proto_size), header_->max_proto_size));
+  }
+
+  // At this point, we've guaranteed that proto_size is under kMaxProtoSize (see
+  // ::Create), so we can safely store it in an int.
+  int final_size = 0;
+
+  std::string proto_str;
+  google3_proto_compat::io::StringOutputStream proto_stream(&proto_str);
+
+  if (header_->compress) {
+    google3_proto_compat::io::GzipOutputStream::Options options;
+    options.format = google3_proto_compat::io::GzipOutputStream::ZLIB;
+    options.compression_level = kDeflateCompressionLevel;
+
+    google3_proto_compat::io::GzipOutputStream compressing_stream(&proto_stream,
+                                                                  options);
+
+    bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
+                   compressing_stream.Close();
+
+    if (!success) {
+      return absl_ports::InternalError("Error compressing proto.");
+    }
+
+    final_size = proto_str.size();
+
+    // In case the compressed proto is larger than the original proto, we also
+    // can't write it.
+    if (final_size > header_->max_proto_size) {
+      return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+          "Compressed proto size, %d, was greater than "
+          "max_proto_size, %d",
+          final_size, header_->max_proto_size));
+    }
+  } else {
+    // Serialize the proto directly into the write buffer at an offset of the
+    // metadata.
+    proto.SerializeToZeroCopyStream(&proto_stream);
+    final_size = proto_str.size();
+  }
+
+  // 1st byte for magic, next 3 bytes for proto size.
+  metadata = (kProtoMagic << 24) | final_size;
+
+  // Actually write metadata, has to be done after we know the possibly
+  // compressed proto size
+  if (!filesystem_->Write(fd_.get(), &metadata, metadata_size)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto metadata to: ", file_path_));
+  }
+
+  // Write the serialized proto
+  if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto to: ", file_path_));
+  }
+
+  return current_position;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<ProtoT> FileBackedProtoLog<ProtoT>::ReadProto(
+    int64_t file_offset) const {
+  int64_t file_size = filesystem_->GetFileSize(fd_.get());
+  MemoryMappedFile mmapped_file(*filesystem_, file_path_,
+                                MemoryMappedFile::Strategy::READ_ONLY);
+  if (file_offset >= file_size) {
+    // file_size points to the next byte to write at, so subtract one to get the
+    // inclusive, actual size of file.
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+                                      "out of range of the file size, %lld",
+                                      static_cast<long long>(file_offset),
+                                      static_cast<long long>(file_size - 1)));
+  }
+
+  // Read out the metadata
+  ICING_ASSIGN_OR_RETURN(
+      int metadata, ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+  // Copy out however many bytes it says the proto is
+  int stored_size = metadata & 0x00FFFFFF;
+
+  ICING_RETURN_IF_ERROR(
+      mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
+  google3_proto_compat::io::ArrayInputStream proto_stream(
+      mmapped_file.mutable_region(), stored_size);
+
+  // Deserialize proto
+  ProtoT proto;
+  if (header_->compress) {
+    google3_proto_compat::io::GzipInputStream decompress_stream(&proto_stream);
+    proto.ParseFromZeroCopyStream(&decompress_stream);
+  } else {
+    proto.ParseFromZeroCopyStream(&proto_stream);
+  }
+
+  return proto;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t> FileBackedProtoLog<ProtoT>::GetDiskUsage()
+    const {
+  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+  if (size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError("Failed to get disk usage of proto log");
+  }
+  return size;
+}
+
+template <typename ProtoT>
+FileBackedProtoLog<ProtoT>::Iterator::Iterator(const Filesystem& filesystem,
+                                               const std::string& file_path,
+                                               int64_t initial_offset)
+    : mmapped_file_(filesystem, file_path,
+                    MemoryMappedFile::Strategy::READ_ONLY),
+      initial_offset_(initial_offset),
+      current_offset_(kInvalidOffset),
+      file_size_(filesystem.GetFileSize(file_path.c_str())) {
+  if (file_size_ == Filesystem::kBadFileSize) {
+    // Fails all Advance() calls
+    file_size_ = 0;
+  }
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status FileBackedProtoLog<ProtoT>::Iterator::Advance() {
+  if (current_offset_ == kInvalidOffset) {
+    // First Advance() call
+    current_offset_ = initial_offset_;
+  } else {
+    // Jumps to the next proto position
+    ICING_ASSIGN_OR_RETURN(
+        int metadata,
+        ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
+    int proto_size = metadata & 0x00FFFFFF;
+    current_offset_ += sizeof(metadata) + proto_size;
+  }
+
+  if (current_offset_ < file_size_) {
+    return libtextclassifier3::Status::OK;
+  } else {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "The next proto offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(current_offset_),
+        static_cast<long long>(file_size_)));
+  }
+}
+
+template <typename ProtoT>
+int64_t FileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
+  return current_offset_;
+}
+
+template <typename ProtoT>
+typename FileBackedProtoLog<ProtoT>::Iterator
+FileBackedProtoLog<ProtoT>::GetIterator() {
+  return Iterator(*filesystem_, file_path_, /*initial_offset=*/sizeof(Header));
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int> FileBackedProtoLog<ProtoT>::ReadProtoMetadata(
+    MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
+  // Checks file_offset
+  if (file_offset >= file_size) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "offset, %lld, is out of file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+  int metadata;
+  int metadata_size = sizeof(metadata);
+  if (file_offset + metadata_size >= file_size) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Wrong metadata offset %lld, metadata doesn't fit in "
+        "with file range [0, %lld)",
+        static_cast<long long>(file_offset),
+        static_cast<long long>(file_size)));
+  }
+  // Reads metadata
+  ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
+  memcpy(&metadata, mmapped_file->region(), metadata_size);
+  // Checks magic number
+  uint8_t stored_k_proto_magic = metadata >> 24;
+  if (stored_k_proto_magic != kProtoMagic) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
+        stored_k_proto_magic));
+  }
+  return metadata;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status FileBackedProtoLog<ProtoT>::PersistToDisk() {
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (file_size == header_->rewind_offset) {
+    // No changes made, don't need to update the checksum.
+    return libtextclassifier3::Status::OK;
+  }
+
+  int64_t new_content_size = file_size - header_->rewind_offset;
+  Crc32 crc;
+  if (new_content_size < 0) {
+    // File shrunk, recalculate the entire checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc, ComputeChecksum(filesystem_, file_path_, Crc32(), sizeof(Header),
+                             file_size));
+  } else {
+    // Append new changes to the existing checksum.
+    ICING_ASSIGN_OR_RETURN(
+        crc,
+        ComputeChecksum(filesystem_, file_path_, Crc32(header_->log_checksum),
+                        header_->rewind_offset, file_size));
+  }
+
+  header_->log_checksum = crc.Get();
+  header_->rewind_offset = file_size;
+  header_->header_checksum = header_->CalculateHeaderChecksum();
+
+  if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+                           sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to update header to: ", file_path_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+FileBackedProtoLog<ProtoT>::ComputeChecksum() {
+  return FileBackedProtoLog<ProtoT>::ComputeChecksum(
+      filesystem_, file_path_, Crc32(), /*start=*/sizeof(Header),
+      /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_FILE_BACKED_PROTO_LOG_H_

diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
new file mode 100644
index 0000000..26e0fb0
--- /dev/null
+++ b/icing/file/file-backed-proto-log_benchmark.cc

@@ -0,0 +1,169 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+// go/microbenchmarks
+//
+// To build and run on a local machine:
+//   $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:file-backed-proto-log_benchmark
+//
+//   $ blaze-bin/icing/file/file-backed-proto-log_benchmark
+//   --benchmarks=all
+//
+//
+// To build and run on an Android device (must be connected and rooted):
+//   $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//   --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//   icing/file:file-backed-proto-log_benchmark
+//
+//   $ adb root
+//
+//   $ adb push
+//   blaze-bin/icing/file/file-backed-proto-log_benchmark
+//   /data/local/tmp/
+//
+//   $ adb shell /data/local/tmp/file-backed-proto-log-benchmark
+//   --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+static void BM_Write(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->WriteProto(document));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Write)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+static void BM_Read(benchmark::State& state) {
+  const Filesystem filesystem;
+  int string_length = state.range(0);
+  const std::string file_path = IcingStringUtil::StringPrintf(
+      "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+  int max_proto_size = (1 << 24) - 1;  // 16 MiB
+  bool compress = true;
+
+  // Make sure it doesn't already exist.
+  filesystem.DeleteFile(file_path.c_str());
+
+  auto proto_log =
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem, file_path,
+          FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+          .ValueOrDie()
+          .proto_log;
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  std::default_random_engine random;
+  const std::string rand_str =
+      RandomString(kAlNumAlphabet, string_length, &random);
+
+  auto document_properties = document.add_properties();
+  document_properties->set_name("string property");
+  document_properties->add_string_values(rand_str);
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+                             proto_log->WriteProto(document));
+
+  for (auto _ : state) {
+    testing::DoNotOptimize(proto_log->ReadProto(write_offset));
+  }
+  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+                          string_length);
+
+  // Cleanup after ourselves
+  filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Read)
+    ->Arg(1)
+    ->Arg(32)
+    ->Arg(512)
+    ->Arg(1024)
+    ->Arg(4 * 1024)
+    ->Arg(8 * 1024)
+    ->Arg(16 * 1024)
+    ->Arg(32 * 1024)
+    ->Arg(256 * 1024)
+    ->Arg(2 * 1024 * 1024)
+    ->Arg(8 * 1024 * 1024)
+    ->Arg(15 * 1024 * 1024);  // We do 15MiB here since our max proto size is
+                              // 16MiB, and we need some extra space for the
+                              // rest of the document properties
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
new file mode 100644
index 0000000..3a9060d
--- /dev/null
+++ b/icing/file/file-backed-proto-log_test.cc

@@ -0,0 +1,519 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/file-backed-proto-log.h"
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::A;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Pair;
+using ::testing::Return;
+
+class FileBackedProtoLogTest : public ::testing::Test {
+ protected:
+  // Adds a user-defined default construct because a const member variable may
+  // make the compiler accidentally delete the default constructor.
+  // https://stackoverflow.com/a/47368753
+  FileBackedProtoLogTest() {}
+
+  void SetUp() override { file_path_ = GetTestTempDir() + "/proto_log"; }
+
+  void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+  const Filesystem filesystem_;
+  std::string file_path_;
+  bool compress_ = true;
+  int64_t max_proto_size_ = 256 * 1024;  // 256 KiB
+};
+
+TEST_F(FileBackedProtoLogTest, Initialize) {
+  // max_proto_size must be greater than 0
+  int invalid_max_proto_size = 0;
+  ASSERT_THAT(FileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  FileBackedProtoLog<DocumentProto>::Options(
+                      compress_, invalid_max_proto_size)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  EXPECT_THAT(create_result.proto_log, NotNull());
+  EXPECT_FALSE(create_result.data_loss);
+
+  // Can't recreate the same file with different options.
+  ASSERT_THAT(FileBackedProtoLog<DocumentProto>::Create(
+                  &filesystem_, file_path_,
+                  FileBackedProtoLog<DocumentProto>::Options(!compress_,
+                                                             max_proto_size_)),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(FileBackedProtoLogTest, WriteProtoTooLarge) {
+  int max_proto_size = 1;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size)));
+  auto proto_log = std::move(create_result.proto_log);
+  EXPECT_FALSE(create_result.data_loss);
+
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  // Proto is too large for the max_proto_size_in
+  ASSERT_THAT(proto_log->WriteProto(document),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(FileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  EXPECT_FALSE(create_result.data_loss);
+
+  // Write a proto
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
+                             proto_log->WriteProto(document));
+
+  // The 4 bytes of metadata that just doesn't have the same kProtoMagic
+  // specified in file-backed-proto-log.h
+  uint32_t wrong_magic = 0x7E000000;
+
+  // Sanity check that we opened the file correctly
+  int fd = filesystem_.OpenForWrite(file_path_.c_str());
+  ASSERT_GT(fd, 0);
+
+  // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
+  // a proto entry.
+  filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
+
+  ASSERT_THAT(proto_log->ReadProto(file_offset),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(FileBackedProtoLogTest, ReadWriteUncompressedProto) {
+  int last_offset;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/false, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, ReadWriteCompressedProto) {
+  int last_offset;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Write the first proto
+    DocumentProto document1 =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+                               proto_log->WriteProto(document1));
+
+    int document1_offset = written_position;
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document1)));
+
+    // Write a second proto that's close to the max size. Leave some room for
+    // the rest of the proto properties.
+    std::string long_str(max_proto_size_ - 1024, 'a');
+    DocumentProto document2 = DocumentBuilder()
+                                  .SetKey("namespace2", "uri2")
+                                  .AddStringProperty("long_str", long_str)
+                                  .Build();
+
+    ICING_ASSERT_OK_AND_ASSIGN(written_position,
+                               proto_log->WriteProto(document2));
+
+    int document2_offset = written_position;
+    last_offset = written_position;
+    ASSERT_GT(document2_offset, document1_offset);
+
+    // Check the second proto
+    ASSERT_THAT(proto_log->ReadProto(written_position),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+  }
+
+  {
+    // Make a new proto_log with the same file_path, and make sure we
+    // can still write to the same underlying file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(
+                /*compress_in=*/true, max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Write a third proto
+    DocumentProto document3 =
+        DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+    ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+                IsOkAndHolds(Gt(last_offset)));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, CorruptHeader) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto recreated_proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    int corrupt_offset =
+        offsetof(FileBackedProtoLog<DocumentProto>::Header, rewind_offset);
+    // We should never rewind to a negative offset.
+    int invalid_rewind_offset = -1;
+    filesystem_.PWrite(file_path_.c_str(), corrupt_offset,
+                       &invalid_rewind_offset, sizeof(invalid_rewind_offset));
+  }
+
+  {
+    // Reinitialize the same proto_log
+    ASSERT_THAT(FileBackedProtoLog<DocumentProto>::Create(
+                    &filesystem_, file_path_,
+                    FileBackedProtoLog<DocumentProto>::Options(
+                        compress_, max_proto_size_)),
+                StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, CorruptContent) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    DocumentProto document =
+        DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+    // Write and persist an document.
+    ICING_ASSERT_OK_AND_ASSIGN(int document_offset,
+                               proto_log->WriteProto(document));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // "Corrupt" the content written in the log.
+    document.set_uri("invalid");
+    std::string serialized_document = document.SerializeAsString();
+    filesystem_.PWrite(file_path_.c_str(), document_offset,
+                       serialized_document.data(), serialized_document.size());
+  }
+
+  {
+    // We can recover, but we have data loss.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.data_loss);
+
+    // Lost everything in the log since the rewind position doesn't help if
+    // there's been data corruption within the persisted region
+    ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+              sizeof(FileBackedProtoLog<DocumentProto>::Header));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, PersistToDisk) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace1", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace2", "uri2").Build();
+  int document1_offset, document2_offset;
+  int log_size;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Write and persist the first proto
+    ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+                               proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+    // Write, but don't explicitly persist the second proto
+    ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+                               proto_log->WriteProto(document2));
+
+    // Check that what we read is what we wrote
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    log_size = filesystem_.GetFileSize(file_path_.c_str());
+    ASSERT_GT(log_size, 0);
+  }
+
+  {
+    // The header rewind position and checksum aren't updated in this "system
+    // crash" scenario.
+
+    std::string bad_proto =
+        "some incomplete proto that we didn't finish writing before the system "
+        "crashed";
+    filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
+                       bad_proto.size());
+
+    // Double check that we actually wrote something to the underlying file
+    ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
+  }
+
+  {
+    // We can recover, but we have data loss
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    ASSERT_TRUE(create_result.data_loss);
+
+    // Check that everything was persisted across instances
+    ASSERT_THAT(proto_log->ReadProto(document1_offset),
+                IsOkAndHolds(EqualsProto(document1)));
+    ASSERT_THAT(proto_log->ReadProto(document2_offset),
+                IsOkAndHolds(EqualsProto(document2)));
+
+    // We correctly rewound to the last good state.
+    ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, Iterator) {
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "uri1").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+      FileBackedProtoLog<DocumentProto>::Create(
+          &filesystem_, file_path_,
+          FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                     max_proto_size_)));
+  auto proto_log = std::move(create_result.proto_log);
+  EXPECT_FALSE(create_result.data_loss);
+
+  {
+    // Empty iterator
+    auto iterator = proto_log->GetIterator();
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterates through some documents
+    ICING_ASSERT_OK(proto_log->WriteProto(document1));
+    ICING_ASSERT_OK(proto_log->WriteProto(document2));
+    auto iterator = proto_log->GetIterator();
+    // 1st proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document1)));
+    // 2nd proto
+    ICING_ASSERT_OK(iterator.Advance());
+    ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+                IsOkAndHolds(EqualsProto(document2)));
+    // Tries to advance
+    ASSERT_THAT(iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+
+  {
+    // Iterator with bad filesystem
+    MockFilesystem mock_filesystem;
+    ON_CALL(mock_filesystem, GetFileSize(A<const char *>()))
+        .WillByDefault(Return(Filesystem::kBadFileSize));
+    FileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
+        mock_filesystem, file_path_, /*initial_offset=*/0);
+    ASSERT_THAT(bad_iterator.Advance(),
+                StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  }
+}
+
+TEST_F(FileBackedProtoLogTest, ComputeChecksum) {
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  Crc32 checksum;
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+
+    ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
+
+    // Calling it twice with no changes should get us the same checksum
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        FileBackedProtoLog<DocumentProto>::CreateResult create_result,
+        FileBackedProtoLog<DocumentProto>::Create(
+            &filesystem_, file_path_,
+            FileBackedProtoLog<DocumentProto>::Options(compress_,
+                                                       max_proto_size_)));
+    auto proto_log = std::move(create_result.proto_log);
+    EXPECT_FALSE(create_result.data_loss);
+
+    // Checksum should be consistent across instances
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // PersistToDisk shouldn't affect the checksum value
+    ICING_EXPECT_OK(proto_log->PersistToDisk());
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+    // Check that modifying the log leads to a different checksum
+    ICING_EXPECT_OK(proto_log->WriteProto(document));
+    EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
new file mode 100644
index 0000000..1dc19ca
--- /dev/null
+++ b/icing/file/file-backed-proto.h

@@ -0,0 +1,251 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A simple file-backed proto with an in-memory cache.
+// WARNING: Only use this for small protos. Files storing larger protos can
+// benefit from more sophisticated strategies like chunked reads/writes,
+// using mmap and ideally, not even using protos.
+//
+// TODO(b/133793579) Consider exposing a checksum mismatch to callers.
+
+#ifndef ICING_FILE_FILE_BACKED_PROTO_H_
+#define ICING_FILE_FILE_BACKED_PROTO_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/thread_annotations.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// This class is go/thread-compatible
+template <typename ProtoT>
+class FileBackedProto {
+ public:
+  // Header stored at the beginning of the file before the proto.
+  struct Header {
+    static constexpr int32_t kMagic = 0x726f746f;
+
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic;
+
+    // Checksum of the serialized proto, for a more thorough check against file
+    // corruption.
+    uint32_t proto_checksum;
+  };
+
+  // Used the specified file to read older version of the proto and store
+  // newer versions of the proto.
+  //
+  // file_path : Must be a path within in a directory that already exists.
+  FileBackedProto(const Filesystem& filesystem, std::string_view file_path);
+
+  // Returns a reference to the proto read from the file. It
+  // internally caches the read proto so that future calls are fast.
+  //
+  // NOTE: The caller does NOT get ownership of the object returned and
+  // the returned object is only valid till a new version of the proto is
+  // written to the file.
+  //
+  // Returns NOT_FOUND if the file was empty or never written to.
+  // Returns INTERNAL_ERROR if an IO error or a corruption was encountered.
+  libtextclassifier3::StatusOr<const ProtoT*> Read() const
+      LOCKS_EXCLUDED(mutex_);
+
+  // Writes the new version of the proto provided through to disk.
+  // Successful Write() invalidates any previously read version of the proto.
+  //
+  // Returns INTERNAL_ERROR if any IO error is encountered and will NOT
+  // invalidate any previously read versions of the proto.
+  //
+  // TODO(cassiewang) The implementation today loses old data if Write() fails.
+  // We should write to a tmp file first and rename the file to fix this.
+  // TODO(samzheng) Change to Write(ProtoT&& proto)
+  libtextclassifier3::Status Write(std::unique_ptr<ProtoT> proto)
+      LOCKS_EXCLUDED(mutex_);
+
+  // Disallow copy and assign.
+  FileBackedProto(const FileBackedProto&) = delete;
+  FileBackedProto& operator=(const FileBackedProto&) = delete;
+
+ private:
+  // Upper bound of file-size that is supported.
+  static constexpr int32_t kMaxFileSize = 1 * 1024 * 1024;  // 1 MiB.
+
+  // Used to provide reader and writer locks
+  mutable absl_ports::shared_mutex mutex_;
+
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+
+  mutable std::unique_ptr<ProtoT> cached_proto_ GUARDED_BY(mutex_);
+};
+
+template <typename ProtoT>
+constexpr int32_t FileBackedProto<ProtoT>::kMaxFileSize;
+
+template <typename ProtoT>
+FileBackedProto<ProtoT>::FileBackedProto(const Filesystem& filesystem,
+                                         const std::string_view file_path)
+    : filesystem_(&filesystem), file_path_(file_path) {}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<const ProtoT*> FileBackedProto<ProtoT>::Read()
+    const {
+  ICING_VLOG(1) << "Reading proto from file: " << file_path_;
+
+  absl_ports::unique_lock l(&mutex_);
+
+  // Return cached proto if we've already read from disk.
+  if (cached_proto_ != nullptr) {
+    ICING_VLOG(1) << "Reusing cached proto for file: " << file_path_;
+    return cached_proto_.get();
+  }
+
+  int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+  if (file_size == Filesystem::kBadFileSize || file_size == 0) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Missing file: ", file_path_));
+  }
+
+  if (file_size > kMaxFileSize) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "File larger than expected, couldn't read: ", file_path_));
+  }
+
+  ScopedFd fd(filesystem_->OpenForRead(file_path_.c_str()));
+  if (!fd.is_valid()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to open file for read: ", file_path_));
+  }
+
+  ICING_VLOG(1) << "Loading proto from  file: " << file_path_
+                << " of size: " << file_size;
+
+  Header header;
+  if (!filesystem_->PRead(fd.get(), &header, sizeof(Header),
+                          /*offset=*/0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to read header of: ", file_path_));
+  }
+
+  if (header.magic != Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for: ", file_path_));
+  }
+
+  int proto_size = file_size - sizeof(Header);
+  auto buffer = std::make_unique<uint8_t[]>(proto_size);
+  if (!filesystem_->PRead(fd.get(), buffer.get(), proto_size,
+                          /*offset=*/sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("File read failed: ", file_path_));
+  }
+
+  std::string_view buffer_str(reinterpret_cast<const char*>(buffer.get()),
+                              proto_size);
+  Crc32 crc;
+  crc.Append(buffer_str);
+  if (header.proto_checksum != crc.Get()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Checksum of file does not match: ", file_path_));
+  }
+
+  auto proto = std::make_unique<ProtoT>();
+  if (!proto->ParseFromArray(buffer.get(), proto_size)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Proto parse failed. File corrupted: ", file_path_));
+  }
+
+  ICING_VLOG(1) << "Successfully read proto from file: " << file_path_;
+  cached_proto_ = std::move(proto);
+  return cached_proto_.get();
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status FileBackedProto<ProtoT>::Write(
+    std::unique_ptr<ProtoT> new_proto) {
+  ICING_VLOG(1) << "Writing proto to file: " << file_path_;
+
+  absl_ports::unique_lock l(&mutex_);
+
+  const std::string new_proto_str = new_proto->SerializeAsString();
+  if (new_proto_str.size() >= kMaxFileSize) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "New proto too large. size: %d; limit: %d.",
+        static_cast<int>(new_proto_str.size()), kMaxFileSize));
+  }
+
+  if (cached_proto_ != nullptr &&
+      cached_proto_->SerializeAsString() == new_proto_str) {
+    ICING_VLOG(1) << "Skip writing proto to file as contents are identical: "
+                  << file_path_;
+    return libtextclassifier3::Status::OK;
+  }
+
+  ScopedFd fd(filesystem_->OpenForWrite(file_path_.c_str()));
+  if (!fd.is_valid()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to open file for write: ", file_path_));
+  }
+
+  if (!filesystem_->Truncate(fd.get(), 0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to truncate file: ", file_path_));
+  }
+
+  Header header;
+  header.magic = Header::kMagic;
+
+  Crc32 crc;
+  crc.Append(new_proto_str);
+  header.proto_checksum = crc.Get();
+  if (!filesystem_->Write(fd.get(), &header, sizeof(Header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write header to file: ", file_path_));
+  }
+
+  if (!filesystem_->Write(fd.get(), new_proto_str.data(),
+                          new_proto_str.size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write proto to file: ", file_path_));
+  }
+
+  if (!filesystem_->DataSync(fd.get())) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Failed to sync file; filename: %s; content_size: %d ",
+        file_path_.c_str(), static_cast<int>(new_proto_str.size())));
+  }
+
+  ICING_VLOG(1) << "Successfully wrote proto to file: " << file_path_;
+  cached_proto_ = std::move(new_proto);
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_FILE_BACKED_PROTO_H_

diff --git a/icing/file/file-backed-proto_test.cc b/icing/file/file-backed-proto_test.cc
new file mode 100644
index 0000000..7f994fb
--- /dev/null
+++ b/icing/file/file-backed-proto_test.cc

@@ -0,0 +1,148 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/file-backed-proto.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::Not;
+using ::testing::Pointee;
+
+namespace icing {
+namespace lib {
+namespace {
+
+class FileBackedProtoTest : public ::testing::Test {
+ protected:
+  void SetUp() override { filename_ = GetTestTempDir() + "/schema.pb"; }
+
+  void TearDown() override { filesystem_.DeleteFile(filename_.c_str()); }
+
+  Filesystem filesystem_;
+  std::string filename_;
+};
+
+TEST_F(FileBackedProtoTest, SimpleReadWriteTest) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace", "google.com").Build();
+
+  FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+  ICING_ASSERT_OK(file_proto.Write(absl::make_unique<DocumentProto>(document)));
+  EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
+  // Multiple reads work.
+  EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
+  EXPECT_THAT(file_proto.Read(), IsOkAndHolds(Pointee(EqualsProto(document))));
+}
+
+TEST_F(FileBackedProtoTest, DataPersistsAcrossMultipleInstancesTest) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace", "google.com").Build();
+
+  {
+    FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+    EXPECT_THAT(file_proto.Read(), Not(IsOk()));  // Nothing to read.
+
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(document)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(document))));
+  }
+  {
+    // Different instance of FileBackedProto.
+    FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(document))));
+  }
+}
+
+TEST_F(FileBackedProtoTest, MultipleUpdatesToProtoTest) {
+  DocumentProto googleProto =
+      DocumentBuilder().SetKey("namespace", "google.com").Build();
+  DocumentProto youtubeProto =
+      DocumentBuilder().SetKey("namespace", "youtube.com").Build();
+  DocumentProto wazeProto =
+      DocumentBuilder().SetKey("namespace", "waze.com").Build();
+
+  {
+    FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(googleProto))));
+
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(youtubeProto)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
+  }
+  {
+    // Different instance of FileBackedProto.
+    FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(youtubeProto))));
+
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(wazeProto)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(wazeProto))));
+
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(googleProto)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(googleProto))));
+  }
+}
+
+TEST_F(FileBackedProtoTest, InvalidFilenameTest) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace", "google.com").Build();
+
+  FileBackedProto<DocumentProto> file_proto(filesystem_, "");
+  EXPECT_THAT(file_proto.Read(), Not(IsOk()));
+  EXPECT_THAT(file_proto.Write(absl::make_unique<DocumentProto>(document)),
+              Not(IsOk()));
+}
+
+TEST_F(FileBackedProtoTest, FileCorruptionTest) {
+  DocumentProto document =
+      DocumentBuilder().SetKey("namespace", "google.com").Build();
+
+  {
+    FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+    ICING_ASSERT_OK(
+        file_proto.Write(absl::make_unique<DocumentProto>(document)));
+    EXPECT_THAT(file_proto.Read(),
+                IsOkAndHolds(Pointee(EqualsProto(document))));
+  }
+
+  document.set_uri("g00gle.com");
+  std::string document_str = document.SerializeAsString();
+  filesystem_.PWrite(filename_.c_str(),
+                     /*offset=*/sizeof(FileBackedProto<DocumentProto>::Header),
+                     document_str.data(), document_str.size());
+
+  FileBackedProto<DocumentProto> file_proto(filesystem_, filename_);
+  EXPECT_THAT(file_proto.Read(), Not(IsOk()));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
new file mode 100644
index 0000000..dc8a675
--- /dev/null
+++ b/icing/file/file-backed-vector.h

@@ -0,0 +1,708 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A file-backed vector that can store fixed-width elements. It provides
+// built-in support for checksums to verify data integrity and an in-memory
+// cache for fast read/writes.
+//
+// If the file is corrupted/in an invalid state, all contents are lost, i.e.
+// there is no clear recovery path other than recreating/repopulating the
+// contents.
+//
+// Note on Performance:
+// The class keeps the vector in a mmapped area. This allows users to specify
+// which MemoryMappedFile::Strategy they wish to use with this class. The vector
+// will implicitly grow when the user tries to access an element beyond its
+// current size. Growing happens in 16KiB chunks, up to a maximum size of 1MiB.
+//
+// Note on Checksumming:
+// Checksumming happens lazily. We do tail checksums to avoid recalculating the
+// checksum of the entire file on each modfification. A full checksum will be
+// computed/verified at creation time, when persisting to disk, or whenever the
+// user manually calls ComputeChecksum(). A separate header checksum is kept for
+// a quick integrity check.
+//
+//
+// Usage:
+// RETURN_OR_ASSIGN(auto vector, FileBackedVector<char>::Create(...));
+//
+// ICING_RETURN_IF_ERROR(vector->Set(0, 'a'));
+// ICING_RETURN_IF_ERROR(vector->Set(1, 'b'));
+// ICING_RETURN_IF_ERROR(vector->Set(2, 'c'));
+//
+// vector->num_elements();  // Returns 3
+//
+// vector->At(2);  // Returns 'c'
+//
+// vector->TruncateTo(1);
+// vector->num_elements();  // Returns 1
+// vector->At(0);  // Returns 'a'
+//
+// vector->ComputeChecksum();  // Force a checksum update and gets the checksum
+//
+// vector->PersistToDisk();  // Persist contents to disk.
+
+#ifndef ICING_FILE_FILE_BACKED_VECTOR_H_
+#define ICING_FILE_FILE_BACKED_VECTOR_H_
+
+#include <stdint.h>
+#include <sys/mman.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+#include "icing/util/math-util.h"
+
+namespace icing {
+namespace lib {
+
+template <typename T>
+class FileBackedVector {
+ public:
+  // Header stored at the beginning of the file before the rest of the vector
+  // elements. Stores metadata on the vector.
+  struct Header {
+    // Static assert constants.
+    static constexpr int32_t kHeaderSize = 24;
+    static constexpr int32_t kHeaderChecksumOffset = 16;
+
+    static constexpr int32_t kMagic = 0x8bbbe237;
+
+    // Holds the magic as quick sanity check against file corruption
+    int32_t magic;
+
+    // Byte size of each element in the vector
+    int32_t element_size;
+
+    // Number of elements currently in the vector
+    int32_t num_elements;
+
+    // Checksum of the vector elements, doesn't include the header fields.
+    //
+    // TODO(cassiewang): Add a checksum state that can track if the checksum is
+    // fresh or stale. This lets us short circuit checksum computations if we
+    // know the checksum is fresh.
+    uint32_t vector_checksum;
+
+    // Must be below all actual header content fields and above the padding
+    // field. Contains the crc checksum of the preceding fields.
+    uint32_t header_checksum;
+
+    // This field has no actual meaning here but is just used as padding for the
+    // struct so the size of the struct can be a multiple of 8. Doing this makes
+    // the address right after the header a multiple of 8 and prevents a ubsan
+    // misalign-pointer-use error (go/ubsan).
+    //
+    // NOTE: please remove this when adding new fields and re-assert that the
+    // size is multiple of 8.
+    int32_t padding_for_ptr_alignment;
+
+    uint32_t CalculateHeaderChecksum() const {
+      // Sanity check that the memory layout matches the disk layout.
+      static_assert(std::is_standard_layout<FileBackedVector::Header>::value,
+                    "");
+      static_assert(sizeof(FileBackedVector::Header) == kHeaderSize, "");
+      static_assert(
+          sizeof(FileBackedVector::Header) % sizeof(void*) == 0,
+          "Header has insufficient padding for void* pointer alignment");
+      static_assert(offsetof(FileBackedVector::Header, header_checksum) ==
+                        kHeaderChecksumOffset,
+                    "");
+
+      Crc32 crc;
+      std::string_view header_str(
+          reinterpret_cast<const char*>(this),
+          offsetof(FileBackedVector::Header, header_checksum));
+      crc.Append(header_str);
+      return crc.Get();
+    }
+  };
+
+  // Creates a new FileBackedVector to read/write content to.
+  //
+  // filesystem: Object to make system level calls
+  // file_path : Specifies the file to persist the vector to; must be a path
+  //             within a directory that already exists.
+  // mmap_strategy : Strategy/optimizations to access the content in the vector,
+  //                 see MemoryMappedFile::Strategy for more details
+  static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+  Create(const Filesystem& filesystem, const std::string& file_path,
+         MemoryMappedFile::Strategy mmap_strategy);
+
+  // Deletes the FileBackedVector
+  //
+  // filesystem: Object to make system level calls
+  // file_path : Specifies the file the vector is persisted to.
+  static libtextclassifier3::Status Delete(const Filesystem& filesystem,
+                                           const std::string& file_path);
+
+  // Not copyable
+  FileBackedVector(const FileBackedVector&) = delete;
+  FileBackedVector& operator=(const FileBackedVector&) = delete;
+
+  // If the vector was created with
+  // MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC, then changes will be
+  // synced by the system and the checksum will be updated.
+  ~FileBackedVector();
+
+  // Accesses the element at idx.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
+  libtextclassifier3::StatusOr<const T*> Get(int32_t idx) const;
+
+  // Writes the value at idx.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
+  libtextclassifier3::Status Set(int32_t idx, const T& value);
+
+  // Resizes to first len elements. The crc is not updated on truncation.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if len < 0 or >= num_elements()
+  libtextclassifier3::Status TruncateTo(int32_t len);
+
+  // Flushes content to underlying file.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates and returns the disk usage in bytes.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Accessors.
+  const T* array() const {
+    return reinterpret_cast<const T*>(mmapped_file_->region());
+  }
+
+  T* mutable_array() const {
+    return reinterpret_cast<T*>(mmapped_file_->mutable_region());
+  }
+
+  int32_t num_elements() const { return header_->num_elements; }
+
+  // Updates checksum of the vector contents and returns it.
+  //
+  // Returns:
+  //   INTERNAL_ERROR if the vector's internal state is inconsistent
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ private:
+  // We track partial updates to the array for crc updating. This
+  // requires extra memory to keep track of original buffers but
+  // allows for much faster crc re-computation. This is the frac limit
+  // of byte len after which we will discard recorded changes and
+  // recompute the entire crc instead.
+  static constexpr int32_t kPartialCrcLimitDiv = 8;  // limit is 1/8th
+
+  // Grow file by at least this many elements if array is growable.
+  static constexpr int64_t kGrowElements = 1u << 14;  // 16K
+
+  // Max number of elements that can be held by the vector.
+  static constexpr int64_t kMaxNumElements = 1u << 20;  // 1M
+
+  // Can only be created through the factory ::Create function
+  FileBackedVector(const Filesystem& filesystem, const std::string& file_path,
+                   std::unique_ptr<Header> header,
+                   std::unique_ptr<MemoryMappedFile> mmapped_file);
+
+  // Initialize a new FileBackedVector, and create the file.
+  static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+  InitializeNewFile(const Filesystem& filesystem, const std::string& file_path,
+                    ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy);
+
+  // Initialize a FileBackedVector from an existing file.
+  static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+  InitializeExistingFile(const Filesystem& filesystem,
+                         const std::string& file_path, ScopedFd fd,
+                         MemoryMappedFile::Strategy mmap_strategy);
+
+  // Grows the underlying file to hold at least num_elements
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if we can't grow to the specified size
+  libtextclassifier3::Status GrowIfNecessary(int32_t num_elements);
+
+  // Cached constructor params.
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+  std::unique_ptr<Header> header_;
+  std::unique_ptr<MemoryMappedFile> mmapped_file_;
+
+  // Offset before which all the elements have been included in the calculation
+  // of crc at the time it was calculated.
+  int32_t changes_end_ = 0;
+
+  // Offset of changes that have happened since the last crc update between [0,
+  // changes_end_).
+  std::vector<int32_t> changes_;
+
+  // Buffer of the original elements that have been changed since the last crc
+  // update. Will be cleared if the size grows too big.
+  std::string saved_original_buffer_;
+
+  // Keep track of all pages we touched so we can write them back to
+  // disk.
+  std::vector<bool> dirty_pages_;
+};
+
+template <typename T>
+constexpr int32_t FileBackedVector<T>::kPartialCrcLimitDiv;
+
+template <typename T>
+constexpr int64_t FileBackedVector<T>::kGrowElements;
+
+template <typename T>
+constexpr int64_t FileBackedVector<T>::kMaxNumElements;
+
+template <typename T>
+libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+FileBackedVector<T>::Create(const Filesystem& filesystem,
+                            const std::string& file_path,
+                            MemoryMappedFile::Strategy mmap_strategy) {
+  if (mmap_strategy == MemoryMappedFile::Strategy::READ_WRITE_MANUAL_SYNC) {
+    // FileBackedVector's behavior of growing the file underneath the mmap is
+    // inherently broken with MAP_PRIVATE. Growing the vector requires extending
+    // the file size, then unmapping and then re-mmapping over the new, larger
+    // file. But when we unmap, we lose all the vector's contents if they
+    // weren't manually persisted. Either don't allow READ_WRITE_MANUAL_SYNC
+    // vectors from growing, or make users aware of this somehow
+    return absl_ports::UnimplementedError(
+        "FileBackedVector currently doesn't support READ_WRITE_MANUAL_SYNC "
+        "mmap strategy.");
+  }
+
+  ScopedFd fd(filesystem.OpenForWrite(file_path.c_str()));
+  if (!fd.is_valid()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to open ", file_path));
+  }
+
+  int64_t file_size = filesystem.GetFileSize(file_path.c_str());
+  if (file_size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Bad file size for file ", file_path));
+  }
+
+  const bool new_file = file_size == 0;
+  if (new_file) {
+    return InitializeNewFile(filesystem, file_path, std::move(fd),
+                             mmap_strategy);
+  }
+  return InitializeExistingFile(filesystem, file_path, std::move(fd),
+                                mmap_strategy);
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+FileBackedVector<T>::InitializeNewFile(
+    const Filesystem& filesystem, const std::string& file_path, ScopedFd fd,
+    MemoryMappedFile::Strategy mmap_strategy) {
+  // Create header.
+  auto header = std::make_unique<Header>();
+  header->magic = FileBackedVector<T>::Header::kMagic;
+  header->element_size = sizeof(T);
+  header->header_checksum = header->CalculateHeaderChecksum();
+
+  // We use Write() here, instead of writing through the mmapped region
+  // created below, so we can gracefully handle errors that occur when the
+  // disk is full. See b/77309668 for details.
+  if (!filesystem.PWrite(fd.get(), /*offset=*/0, header.get(),
+                         sizeof(Header))) {
+    return absl_ports::InternalError("Failed to write header");
+  }
+
+  // Constructor of MemoryMappedFile doesn't actually call mmap(), mmap()
+  // happens on MemoryMappedFile::Remap(). So having a potentially unflushed fd
+  // at this point shouldn't run into issues with a mmap of the same file. But
+  // we'll close the fd just in case.
+  fd.reset();
+  auto mmapped_file =
+      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+
+  return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>(
+      filesystem, file_path, std::move(header), std::move(mmapped_file)));
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
+FileBackedVector<T>::InitializeExistingFile(
+    const Filesystem& filesystem, const std::string& file_path,
+    const ScopedFd fd, MemoryMappedFile::Strategy mmap_strategy) {
+  int64_t file_size = filesystem.GetFileSize(file_path.c_str());
+  if (file_size < sizeof(FileBackedVector<T>::Header)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("File header too short for ", file_path));
+  }
+
+  auto header = std::make_unique<Header>();
+  if (!filesystem.PRead(fd.get(), header.get(), sizeof(Header),
+                        /*offset=*/0)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to read header of ", file_path));
+  }
+
+  // Make sure the header is still valid before we use any of its values. This
+  // should technically be included in the header_checksum check below, but this
+  // is a quick/fast check that can save us from an extra crc computation.
+  if (header->kMagic != FileBackedVector<T>::Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for ", file_path));
+  }
+
+  // Mmap the content of the vector, excluding the header so its easier to
+  // access elements from the mmapped region
+  auto mmapped_file =
+      std::make_unique<MemoryMappedFile>(filesystem, file_path, mmap_strategy);
+  ICING_RETURN_IF_ERROR(
+      mmapped_file->Remap(sizeof(Header), file_size - sizeof(Header)));
+
+  // Check header
+  if (header->header_checksum != header->CalculateHeaderChecksum()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header crc for ", file_path));
+  }
+
+  if (header->element_size != sizeof(T)) {
+    return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+        "Inconsistent element size, expected %zd, actual %d", sizeof(T),
+        header->element_size));
+  }
+
+  // Check vector contents
+  Crc32 vector_checksum;
+  std::string_view vector_contents(
+      reinterpret_cast<const char*>(mmapped_file->region()),
+      header->num_elements * sizeof(T));
+  vector_checksum.Append(vector_contents);
+
+  if (vector_checksum.Get() != header->vector_checksum) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid vector contents for ", file_path));
+  }
+
+  return std::unique_ptr<FileBackedVector<T>>(new FileBackedVector<T>(
+      filesystem, file_path, std::move(header), std::move(mmapped_file)));
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Delete(
+    const Filesystem& filesystem, const std::string& file_path) {
+  if (!filesystem.DeleteFile(file_path.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to delete file: ", file_path));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+FileBackedVector<T>::FileBackedVector(
+    const Filesystem& filesystem, const std::string& file_path,
+    std::unique_ptr<Header> header,
+    std::unique_ptr<MemoryMappedFile> mmapped_file)
+    : filesystem_(&filesystem),
+      file_path_(file_path),
+      header_(std::move(header)),
+      mmapped_file_(std::move(mmapped_file)),
+      changes_end_(header_->num_elements) {}
+
+template <typename T>
+FileBackedVector<T>::~FileBackedVector() {
+  if (mmapped_file_->strategy() ==
+      MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
+    if (!PersistToDisk().ok()) {
+      ICING_LOG(WARNING)
+          << "Failed to persist vector to disk while destructing "
+          << file_path_;
+    }
+  }
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
+    int32_t idx) const {
+  if (idx < 0) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
+  }
+
+  if (idx >= header_->num_elements) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Index, %d, was greater than vector size, %d", idx,
+        header_->num_elements));
+  }
+
+  return &array()[idx];
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::Set(int32_t idx,
+                                                    const T& value) {
+  if (idx < 0) {
+    return absl_ports::OutOfRangeError(
+        IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
+  }
+
+  int32_t start_byte = idx * sizeof(T);
+
+  ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
+
+  if (idx + 1 > header_->num_elements) {
+    header_->num_elements = idx + 1;
+  }
+
+  if (mutable_array()[idx] == value) {
+    // No need to update
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Cache original value to update crcs.
+  if (idx < changes_end_) {
+    // If we exceed kPartialCrcLimitDiv, clear changes_end_ to
+    // revert to full CRC.
+    if ((saved_original_buffer_.size() + sizeof(T)) *
+            FileBackedVector<T>::kPartialCrcLimitDiv >
+        changes_end_ * sizeof(T)) {
+      ICING_VLOG(2) << "FileBackedVector change tracking limit exceeded";
+      changes_.clear();
+      saved_original_buffer_.clear();
+      changes_end_ = 0;
+      header_->vector_checksum = 0;
+    } else {
+      changes_.push_back(idx);
+      saved_original_buffer_.append(
+          reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
+          sizeof(T));
+    }
+  }
+
+  mutable_array()[idx] = value;
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::GrowIfNecessary(
+    int32_t num_elements) {
+  if (sizeof(T) == 0) {
+    // Growing is a no-op
+    return libtextclassifier3::Status::OK;
+  }
+
+  // TODO(cassiewang): Benchmark to see if having ABSL_PREDICT_TRUE is impactful
+  if (ABSL_PREDICT_TRUE(num_elements <= header_->num_elements)) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  if (num_elements > FileBackedVector<T>::kMaxNumElements) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "%d exceeds maximum number of elements allowed, %lld", num_elements,
+        static_cast<long long>(FileBackedVector<T>::kMaxNumElements)));
+  }
+
+  int64_t current_file_size = filesystem_->GetFileSize(file_path_.c_str());
+  int64_t least_file_size_needed = sizeof(Header) + num_elements * sizeof(T);
+
+  if (least_file_size_needed <= current_file_size) {
+    // Our underlying file can hold the target num_elements cause we've grown
+    // before
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Otherwise, we need to grow. Grow to kGrowElements boundary.
+  least_file_size_needed = math_util::RoundUpTo(
+      least_file_size_needed,
+      int64_t{FileBackedVector<T>::kGrowElements * sizeof(T)});
+  if (!filesystem_->Grow(file_path_.c_str(), least_file_size_needed)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Couldn't grow file ", file_path_));
+  }
+
+  ICING_RETURN_IF_ERROR(mmapped_file_->Remap(
+      sizeof(Header), least_file_size_needed - sizeof(Header)));
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::TruncateTo(
+    int32_t new_num_elements) {
+  if (new_num_elements < 0) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Truncated length %d must be >= 0", new_num_elements));
+  }
+
+  if (new_num_elements >= header_->num_elements) {
+    return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+        "Truncated length %d must be less than the current size %d",
+        new_num_elements, header_->num_elements));
+  }
+
+  header_->num_elements = new_num_elements;
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<Crc32> FileBackedVector<T>::ComputeChecksum() {
+  // First apply the modified area. Keep a bitmap of already updated
+  // regions so we don't double-update.
+  std::vector<bool> updated(changes_end_);
+  uint32_t cur_offset = 0;
+  Crc32 cur_crc(header_->vector_checksum);
+  int num_partial_crcs = 0;
+  int num_truncated = 0;
+  int num_overlapped = 0;
+  int num_duplicate = 0;
+  for (size_t i = 0; i < changes_.size(); i++) {
+    const int32_t change_offset = changes_[i];
+    if (change_offset > changes_end_) {
+      return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Failed to update crc, change offset %d, changes_end_ %d",
+          change_offset, changes_end_));
+    }
+
+    // Skip truncated tracked changes.
+    if (change_offset >= header_->num_elements) {
+      ++num_truncated;
+      continue;
+    }
+
+    // Turn change buffer into change^original.
+    const char* buffer_end = &saved_original_buffer_[cur_offset + sizeof(T)];
+    const char* cur_array =
+        reinterpret_cast<const char*>(array()) + change_offset * sizeof(T);
+    // Now xor in. SSE acceleration please?
+    for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
+         cur++, cur_array++) {
+      *cur ^= *cur_array;
+    }
+
+    // Skip over already updated bytes by setting update to 0.
+    bool new_update = false;
+    bool overlap = false;
+    uint32_t cur_element = change_offset;
+    for (char* cur = &saved_original_buffer_[cur_offset]; cur < buffer_end;
+         cur_element++, cur += sizeof(T)) {
+      if (updated[cur_element]) {
+        memset(cur, 0, sizeof(T));
+        overlap = true;
+      } else {
+        updated[cur_element] = true;
+        new_update = true;
+      }
+    }
+
+    // Apply update to crc.
+    if (new_update) {
+      // Explicitly create the string_view with length
+      std::string_view xored_str(buffer_end - sizeof(T), sizeof(T));
+      if (!cur_crc
+               .UpdateWithXor(xored_str, changes_end_ * sizeof(T),
+                              change_offset * sizeof(T))
+               .ok()) {
+        return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+            "Failed to update crc, change offset %d, change "
+            "length %zd changes_end_ %d",
+            change_offset, xored_str.length(), changes_end_));
+      }
+      num_partial_crcs++;
+      if (overlap) {
+        num_overlapped++;
+      }
+    } else {
+      num_duplicate++;
+    }
+    cur_offset += sizeof(T);
+  }
+  if (!changes_.empty()) {
+    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+        "Array update partial crcs %d truncated %d overlapped %d duplicate %d",
+        num_partial_crcs, num_truncated, num_overlapped, num_duplicate);
+  }
+
+  // Now update with grown area.
+  if (changes_end_ < header_->num_elements) {
+    // Explicitly create the string_view with length
+    std::string_view update_str(
+        reinterpret_cast<const char*>(array()) + changes_end_ * sizeof(T),
+        (header_->num_elements - changes_end_) * sizeof(T));
+    cur_crc.Append(update_str);
+    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+        "Array update tail crc offset %d -> %d", changes_end_,
+        header_->num_elements);
+  }
+
+  // Clear, now that we've applied changes.
+  changes_.clear();
+  saved_original_buffer_.clear();
+  changes_end_ = header_->num_elements;
+
+  // Commit new crc.
+  header_->vector_checksum = cur_crc.Get();
+  return cur_crc;
+}
+
+template <typename T>
+libtextclassifier3::Status FileBackedVector<T>::PersistToDisk() {
+  // Update and write the header
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  header_->vector_checksum = checksum.Get();
+  header_->header_checksum = header_->CalculateHeaderChecksum();
+
+  if (!filesystem_->PWrite(file_path_.c_str(), /*offset=*/0, header_.get(),
+                           sizeof(Header))) {
+    return absl_ports::InternalError("Failed to sync header");
+  }
+
+  MemoryMappedFile::Strategy strategy = mmapped_file_->strategy();
+
+  if (strategy == MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC) {
+    // Changes should have been applied to the underlying file, but call msync()
+    // as an extra safety step to ensure they are written out.
+    ICING_RETURN_IF_ERROR(mmapped_file_->PersistToDisk());
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<int64_t> FileBackedVector<T>::GetDiskUsage()
+    const {
+  int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+  if (size == Filesystem::kBadFileSize) {
+    return absl_ports::InternalError(
+        "Failed to get disk usage of file-backed vector");
+  }
+  return size;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_FILE_BACKED_VECTOR_H_

diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
new file mode 100644
index 0000000..7561b57
--- /dev/null
+++ b/icing/file/file-backed-vector_test.cc

@@ -0,0 +1,429 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/file-backed-vector.h"
+
+#include <errno.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string_view>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+using ::testing::Eq;
+using ::testing::Pointee;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class FileBackedVectorTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    file_path_ = GetTestTempDir() + "/test.array";
+    fd_ = filesystem_.OpenForWrite(file_path_.c_str());
+    ASSERT_NE(-1, fd_);
+    ASSERT_TRUE(filesystem_.Truncate(fd_, 0));
+  }
+
+  void TearDown() override {
+    close(fd_);
+    filesystem_.DeleteFile(file_path_.c_str());
+  }
+
+  // Helper method to loop over some data and insert into the vector at some idx
+  template <typename T>
+  void Insert(FileBackedVector<T>* vector, int32_t idx, std::string data) {
+    for (int i = 0; i < data.length(); ++i) {
+      ICING_ASSERT_OK(vector->Set(idx + i, data.at(i)));
+    }
+  }
+
+  // Helper method to retrieve data from the beginning of the vector
+  template <typename T>
+  std::string_view Get(FileBackedVector<T>* vector, int32_t expected_len) {
+    return Get(vector, 0, expected_len);
+  }
+
+  template <typename T>
+  std::string_view Get(FileBackedVector<T>* vector, int32_t idx,
+                       int32_t expected_len) {
+    return std::string_view(vector->array() + idx, expected_len);
+  }
+
+  Filesystem filesystem_;
+  std::string file_path_;
+  int fd_;
+};
+
+TEST_F(FileBackedVectorTest, Create) {
+  {
+    // Create a vector for a new file
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto vector, FileBackedVector<char>::Create(
+                         filesystem_, file_path_,
+                         MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  }
+
+  {
+    // We can create it again based on the same file.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        auto vector, FileBackedVector<char>::Create(
+                         filesystem_, file_path_,
+                         MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  }
+}
+
+TEST_F(FileBackedVectorTest, SimpleShared) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  std::string expected = "abcde";
+  Insert(vector.get(), 0, expected);
+  EXPECT_EQ(expected.length(), vector->num_elements());
+  EXPECT_EQ(expected, Get(vector.get(), expected.length()));
+
+  uint32_t good_crc_value = 1134899064U;
+  const Crc32 good_crc(good_crc_value);
+  // Explicit call to update the crc does update the value
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
+
+  // PersistToDisk does nothing bad.
+  ICING_EXPECT_OK(vector->PersistToDisk());
+
+  // Close out the old vector to ensure everything persists properly before we
+  // reassign it
+  vector.reset();
+
+  // Write a bad crc, this would be a mismatch compared to the computed crc of
+  // the contents on reinitialization.
+  uint32_t bad_crc_value = 123;
+  filesystem_.PWrite(file_path_.data(),
+                     offsetof(FileBackedVector<char>::Header, vector_checksum),
+                     &bad_crc_value, sizeof(bad_crc_value));
+
+  ASSERT_THAT(FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+
+  // Get it back into an ok state
+  filesystem_.PWrite(file_path_.data(),
+                     offsetof(FileBackedVector<char>::Header, vector_checksum),
+                     &good_crc_value, sizeof(good_crc_value));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      vector, FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_EQ(expected, Get(vector.get(), expected.length()));
+
+  // Close out the old vector to ensure everything persists properly before we
+  // reassign it
+  vector.reset();
+
+  // Can reinitialize it safely
+  ICING_ASSERT_OK_AND_ASSIGN(
+      vector, FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  // Truncate the content
+  ICING_EXPECT_OK(vector->TruncateTo(0));
+
+  // We don't automatically update the crc when we truncate.
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
+  EXPECT_EQ(0u, vector->num_elements());
+}
+
+TEST_F(FileBackedVectorTest, Get) {
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  std::string expected = "abc";
+  Insert(vector.get(), 0, expected);
+  EXPECT_EQ(expected.length(), vector->num_elements());
+
+  EXPECT_THAT(vector->Get(0), IsOkAndHolds(Pointee(Eq('a'))));
+  EXPECT_THAT(vector->Get(1), IsOkAndHolds(Pointee(Eq('b'))));
+  EXPECT_THAT(vector->Get(2), IsOkAndHolds(Pointee(Eq('c'))));
+
+  // Out of bounds error
+  EXPECT_THAT(vector->Get(3),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Get(-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(FileBackedVectorTest, IncrementalCrc_NonOverlappingChanges) {
+  int num_elements = 1000;
+  int incremental_size = 3;
+  // Create an array with some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  Insert(vector.get(), 0, std::string(num_elements, 'a'));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
+
+  // Non-overlapping changes to the array, with increasing intervals
+  // between updating the checksum. Validate by mapping another array on top.
+  uint32_t next_update = 2;
+  for (uint32_t i = 0; i < num_elements; i += incremental_size) {
+    Insert(vector.get(), i, std::string(incremental_size, 'b'));
+
+    if (i >= next_update) {
+      ICING_ASSERT_OK_AND_ASSIGN(Crc32 incremental_crc,
+                                 vector->ComputeChecksum());
+      ICING_LOG(INFO) << "Now crc @" << incremental_crc.Get();
+
+      Crc32 full_crc;
+      std::string_view reconstructed_view =
+          std::string_view(vector->array(), vector->num_elements());
+      full_crc.Append(reconstructed_view);
+
+      ASSERT_EQ(incremental_crc, full_crc);
+      next_update *= 2;
+    }
+  }
+
+  for (uint32_t i = 0; i < num_elements; ++i) {
+    EXPECT_THAT(vector->Get(i), IsOkAndHolds(Pointee(Eq('b'))));
+  }
+}
+
+TEST_F(FileBackedVectorTest, IncrementalCrc_OverlappingChanges) {
+  int num_elements = 1000;
+  int incremental_size = 3;
+  // Create an array with some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  Insert(vector.get(), 0, std::string(num_elements, 'a'));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(2620640643U)));
+
+  // Overlapping changes to the array, with increasing intervals
+  // between updating the checksum. Validate by mapping another array on top.
+  uint32_t next_update = 2;
+  for (uint32_t i = 0; i < num_elements; i++) {
+    Insert(vector.get(), i, std::string(incremental_size, 'b'));
+
+    if (i >= next_update) {
+      ICING_ASSERT_OK_AND_ASSIGN(Crc32 incremental_crc,
+                                 vector->ComputeChecksum());
+      ICING_LOG(INFO) << "Now crc @" << incremental_crc.Get();
+
+      Crc32 full_crc;
+      std::string_view reconstructed_view =
+          std::string_view(vector->array(), vector->num_elements());
+      full_crc.Append(reconstructed_view);
+
+      ASSERT_EQ(incremental_crc, full_crc);
+      next_update *= 2;
+    }
+  }
+  for (uint32_t i = 0; i < num_elements; ++i) {
+    EXPECT_THAT(vector->Get(i), IsOkAndHolds(Pointee(Eq('b'))));
+  }
+}
+
+TEST_F(FileBackedVectorTest, Grow) {
+  // This is the same value as FileBackedVector::kMaxNumElts
+  constexpr int32_t kMaxNumElts = 1U << 20;
+
+  ASSERT_TRUE(filesystem_.Truncate(fd_, 0));
+
+  // Create an array and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  EXPECT_THAT(vector->Set(kMaxNumElts + 11, 'a'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+  EXPECT_THAT(vector->Set(-1, 'a'),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  uint32_t start = kMaxNumElts - 13;
+  Insert(vector.get(), start, "abcde");
+
+  // Crc works?
+  const Crc32 good_crc(1134899064U);
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(good_crc));
+
+  // PersistToDisk does nothing bad, and ensures the content is still there
+  // after we recreate the vector
+  ICING_EXPECT_OK(vector->PersistToDisk());
+
+  // Close out the old vector to ensure everything persists properly before we
+  // reassign it
+  vector.reset();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      vector, FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  std::string expected = "abcde";
+  EXPECT_EQ(expected, Get(vector.get(), start, expected.length()));
+}
+
+TEST_F(FileBackedVectorTest, GrowsInChunks) {
+  // This is the same value as FileBackedVector::kGrowElements
+  constexpr int32_t kGrowElements = 1U << 14;  // 16K
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int>> vector,
+      FileBackedVector<int>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  // Our initial file size should just be the size of the header
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(sizeof(FileBackedVector<char>::Header)));
+
+  // Once we add something though, we'll grow to kGrowElements big
+  Insert(vector.get(), 0, "a");
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(kGrowElements * sizeof(int)));
+
+  // Should still be the same size, don't need to grow underlying file
+  Insert(vector.get(), 1, "b");
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(kGrowElements * sizeof(int)));
+
+  // Now we grow by a kGrowElements chunk, so the underlying file is 2
+  // kGrowElements big
+  Insert(vector.get(), 2, std::string(kGrowElements, 'c'));
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(kGrowElements * 2 * sizeof(int)));
+
+  // Destroy/persist the contents.
+  vector.reset();
+
+  // Reinitialize
+  ICING_ASSERT_OK_AND_ASSIGN(
+      vector, FileBackedVector<int>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  // Should be the same file size as before
+  EXPECT_THAT(filesystem_.GetFileSize(file_path_.c_str()),
+              Eq(kGrowElements * 2 * sizeof(int)));
+}
+
+TEST_F(FileBackedVectorTest, Delete) {
+  // Can delete even if there's nothing there
+  ICING_EXPECT_OK(FileBackedVector<int64_t>::Delete(filesystem_, file_path_));
+
+  // Create a vector and add some data.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  std::string expected = "abcde";
+  Insert(vector.get(), 0, expected);
+  ASSERT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(1134899064U)));
+  ASSERT_EQ(expected.length(), vector->num_elements());
+
+  // Close out the old vector to ensure everything persists properly before we
+  // delete the underlying files
+  vector.reset();
+
+  ICING_EXPECT_OK(FileBackedVector<int64_t>::Delete(filesystem_, file_path_));
+
+  EXPECT_FALSE(filesystem_.FileExists(file_path_.data()));
+
+  // Can successfully create again.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      vector, FileBackedVector<char>::Create(
+                  filesystem_, file_path_,
+                  MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+}
+
+TEST_F(FileBackedVectorTest, TruncateTo) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<char>> vector,
+      FileBackedVector<char>::Create(
+          filesystem_, file_path_,
+          MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC));
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(0)));
+
+  Insert(vector.get(), 0, "A");
+  Insert(vector.get(), 1, "Z");
+
+  EXPECT_EQ(2, vector->num_elements());
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(1658635950)));
+
+  // Modify 1 element, out of 2 total elements. 1/2 changes exceeds the partial
+  // crc limit, so our next checksum call will recompute the entire vector's
+  // checksum.
+  Insert(vector.get(), 1, "J");
+  // We'll ignore everything after the 1st element, so the full vector's
+  // checksum will only include "J".
+  ICING_EXPECT_OK(vector->TruncateTo(1));
+  EXPECT_EQ(1, vector->num_elements());
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
+
+  // Truncating doesn't cause the checksum to be updated.
+  ICING_EXPECT_OK(vector->TruncateTo(0));
+  EXPECT_EQ(0, vector->num_elements());
+  EXPECT_THAT(vector->ComputeChecksum(), IsOkAndHolds(Crc32(31158534)));
+
+  // Can't truncate past end.
+  EXPECT_THAT(vector->TruncateTo(100),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+
+  // Must be greater than or equal to 0
+  EXPECT_THAT(vector->TruncateTo(-1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
new file mode 100644
index 0000000..5367e87
--- /dev/null
+++ b/icing/file/filesystem.cc

@@ -0,0 +1,678 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/filesystem.h"
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <pthread.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <unordered_set>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+
+using std::vector;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// The size of the block for st_blksize returned by stat() and as a
+// consequence also the granularity of GetDiskUsage(). It seems that there is
+// no appropriate constant for this. See http://linux.die.net/man/2/stat
+constexpr int kStatBlockSize = 512;
+
+// Logs information about open file descriptors.
+//
+// This function uses getrlimit() to find the maximum number of file
+// descriptors, then calls readlink("/proc/self/fd/N") for each possible file
+// descriptor number to get a description of the open file from procfs.
+//
+// We don't use readdir() to list the contents of /proc/self/fd (which would be
+// the more obvious approach) because that would require a free file descriptor
+// to open the directory, while we call this function when all file descriptors
+// are in use.
+void LogOpenFileDescriptors() {
+  // Determine the limit on file descriptor numbers. RLIMIT_NOFILE should return
+  // the maximum file descriptor + 1, which is 1024 on Android by default. We
+  // restrict the limit to 4096 so we don't take too much time if the value
+  // turns out to be much higher for some reason.
+  constexpr int kMaxFileDescriptorsToStat = 4096;
+  struct rlimit rlim = {0, 0};
+  if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "getrlimit() failed (errno=%d)", errno);
+    return;
+  }
+  int fd_lim = rlim.rlim_cur;
+  if (fd_lim > kMaxFileDescriptorsToStat) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Maximum number of file descriptors (%d) too large.", fd_lim);
+    fd_lim = kMaxFileDescriptorsToStat;
+  }
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "Listing up to %d file descriptors.", fd_lim);
+
+  // Verify that /proc/self/fd is a directory. If not, procfs is not mounted or
+  // inaccessible for some other reason. In that case, there's no point trying
+  // to read from it.
+  struct stat statbuf;
+  if (stat("/proc/self/fd", &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
+    ICING_LOG(ERROR) << "/proc/self/fd not available. Giving up.";
+    return;
+  }
+
+  // Now read each link individually.
+  char path[1024];
+  char target[1024];
+  for (int fd = 0; fd < fd_lim; ++fd) {
+    snprintf(path, arraysize(path), "/proc/self/fd/%d", fd);
+    ssize_t len = readlink(path, target, arraysize(target));
+    if (len >= 0) {
+      // Zero-terminate the buffer, because readlink() won't.
+      target[len < arraysize(target) ? len : arraysize(target) - 1] = '\0';
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> \"%s\"", fd,
+                                                        target);
+    } else if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> ? (errno=%d)",
+                                                        fd, errno);
+    }
+  }
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "File descriptor list complete.");
+}
+
+// Logs an error formatted as: desc1 + file_name + desc2 + strerror(errnum).
+//
+// If errnum == EMFILE (too many open files), then it also logs a list of open
+// file descriptors (see LogOpenFileDescriptors() above).
+void LogOpenError(const char* desc1, const char* file_name, const char* desc2,
+                  int errnum) {
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "%s%s%s%s", desc1, file_name, desc2, strerror(errnum));
+  if (errnum == EMFILE) {
+    LogOpenFileDescriptors();
+  }
+}
+
+// Recursive implementation of ListDirectory. Prefix is used to prepend the
+// directory name during recursion.
+// We cannot use scandir due to a bug in old platform versions. See b/7339844.
+bool ListDirectoryInternal(const char* dir_name,
+                           const std::unordered_set<std::string>& exclude,
+                           bool recursive, const char* prefix,
+                           std::vector<std::string>* entries) {
+  DIR* dir = opendir(dir_name);
+  if (!dir) {
+    LogOpenError("Unable to open directory ", dir_name, ": ", errno);
+    return false;
+  }
+
+  dirent* p;
+  // readdir's implementation seems to be thread safe.
+  while ((p = readdir(dir)) != nullptr) {
+    std::string file_name(p->d_name);
+    if (file_name == "." || file_name == ".." ||
+        exclude.find(file_name) != exclude.end()) {
+      continue;
+    }
+    std::string relative_path = absl_ports::StrCat(prefix, p->d_name);
+    entries->push_back(relative_path);
+    // Recurse down directories, if requested.
+    if (recursive && (p->d_type == DT_DIR)) {
+      std::string sub_dir_name = absl_ports::StrCat(dir_name, "/", p->d_name);
+      std::string relative_path_with_slash =
+          absl_ports::StrCat(relative_path, "/");
+      if (!ListDirectoryInternal(sub_dir_name.c_str(), exclude, recursive,
+                                 relative_path_with_slash.c_str(), entries)) {
+        return false;
+      }
+    }
+  }
+  if (closedir(dir) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Error closing %s: %s", dir_name, strerror(errno));
+  }
+  return true;
+}
+
+}  // namespace
+
+ScopedFd::~ScopedFd() {
+  if (fd_ >= 0) {
+    close(fd_);
+  }
+}
+
+void ScopedFd::reset(int fd) {
+  if (fd_ >= 0) {
+    close(fd_);
+  }
+  fd_ = fd;
+}
+
+const int64_t Filesystem::kBadFileSize;
+
+bool Filesystem::DeleteFile(const char* file_name) const {
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf("Deleting file %s", file_name);
+  int ret = unlink(file_name);
+  if (ret != 0 && errno != ENOENT) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Deleting file %s failed: %s", file_name, strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::DeleteDirectory(const char* dir_name) const {
+  int ret = rmdir(dir_name);
+  if (ret != 0 && errno != ENOENT) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Deleting directory %s failed: %s", dir_name, strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::DeleteDirectoryRecursively(const char* dir_name) const {
+  // Ensure the dir_name really is a directory and exists.
+  struct stat st;
+  if (stat(dir_name, &st) < 0) {
+    if (errno == ENOENT) {
+      return true;  // If directory didn't exist, this was successful.
+    }
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Stat %s failed: %s", dir_name, strerror(errno));
+    return false;
+  }
+  vector<std::string> entries;
+  if (!ListDirectory(dir_name, &entries)) {
+    return false;
+  }
+
+  bool success = true;
+  for (vector<std::string>::iterator i = entries.begin(); i != entries.end();
+       ++i) {
+    std::string filename = std::string(dir_name) + '/' + *i;
+    if (stat(filename.c_str(), &st) < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Stat %s failed: %s", filename.c_str(), strerror(errno));
+      success = false;
+    } else if (S_ISDIR(st.st_mode)) {
+      success = DeleteDirectoryRecursively(filename.c_str()) && success;
+    } else {
+      success = DeleteFile(filename.c_str()) && success;
+    }
+  }
+
+  if (success) {
+    success = DeleteDirectory(dir_name);
+  }
+
+  return success;
+}
+
+bool Filesystem::FileExists(const char* file_name) const {
+  bool exists = false;
+  struct stat st;
+  if (stat(file_name, &st) == 0) {
+    exists = S_ISREG(st.st_mode) != 0;
+  } else {
+    if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Unable to stat file %s: %s", file_name, strerror(errno));
+    }
+    exists = false;
+  }
+  return exists;
+}
+
+bool Filesystem::DirectoryExists(const char* dir_name) const {
+  bool exists = false;
+  struct stat st;
+  if (stat(dir_name, &st) == 0) {
+    exists = S_ISDIR(st.st_mode) != 0;
+  } else {
+    if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Unable to stat directory %s: %s", dir_name, strerror(errno));
+    }
+    exists = false;
+  }
+  return exists;
+}
+
+int Filesystem::GetBasenameIndex(const char* file_name) const {
+  // Find final slash.
+  const char* last_slash = strrchr(file_name, '/');
+  if (!last_slash) {
+    // file_name is just basename.
+    return 0;
+  }
+
+  // Skip slash.
+  return last_slash + 1 - file_name;
+}
+
+std::string Filesystem::GetBasename(const char* file_name) const {
+  size_t len = strlen(file_name);
+  int idx = GetBasenameIndex(file_name);
+  return std::string(file_name + idx, len - idx);
+}
+
+std::string Filesystem::GetDirname(const char* file_name) const {
+  int idx = GetBasenameIndex(file_name);
+  // Remove the trailing slash
+  if (idx > 0) {
+    idx -= 1;
+  }
+  return std::string(file_name, idx);
+}
+
+bool Filesystem::ListDirectory(const char* dir_name,
+                               vector<std::string>* entries) const {
+  entries->clear();
+  return ListDirectory(dir_name, /*exclude=*/{}, /*recursive=*/false, entries);
+}
+
+bool Filesystem::ListDirectory(const char* dir_name,
+                               const std::unordered_set<std::string>& exclude,
+                               bool recursive,
+                               std::vector<std::string>* entries) const {
+  return ListDirectoryInternal(dir_name, exclude, recursive, /*prefix=*/"",
+                               entries);
+}
+
+bool Filesystem::GetMatchingFiles(const char* glob,
+                                  vector<std::string>* matches) const {
+  matches->clear();
+
+  // Split dirname/basename.
+  int basename_idx = GetBasenameIndex(glob);
+  if (basename_idx == 0) {
+    // We need a directory.
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Expected directory, no matching files for: %s", glob);
+    return true;
+  }
+  const char* basename_glob = glob + basename_idx;
+  std::string dirname(glob, basename_idx);
+  vector<std::string> entries;
+  if (!ListDirectory(dirname.c_str(), &entries) && errno != ENOENT) {
+    return false;
+  }
+
+  for (vector<std::string>::iterator i = entries.begin(); i != entries.end();
+       ++i) {
+    // The filename needs to match glob following last_slash.
+    if (!fnmatch(basename_glob, i->c_str(), FNM_PATHNAME)) {
+      // Add it to the list.
+      matches->push_back(dirname + *i);
+    }
+  }
+  return true;
+}
+
+int Filesystem::OpenForWrite(const char* file_name) const {
+  int fd = open(file_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for write failed: ", errno);
+  }
+  return fd;
+}
+
+int Filesystem::OpenForAppend(const char* file_name) const {
+  // Don't use the O_APPEND flag because, although it opens for
+  // append, it doesn't set the file cursor to at the end until
+  // first write occurs.  This can be confusing if you expect
+  // the file position at the end.  Instead, explicitly
+  // seek to end after opening.
+  int fd = open(file_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for write failed: ", errno);
+  } else {
+    lseek(fd, 0, SEEK_END);
+  }
+  return fd;
+}
+
+int Filesystem::OpenForRead(const char* file_name) const {
+  int fd = open(file_name, O_RDONLY);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for read failed: ", errno);
+  }
+  return fd;
+}
+
+int64_t Filesystem::GetFileSize(int fd) const {
+  struct stat st;
+  if (fstat(fd, &st) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
+                                                      strerror(errno));
+    return kBadFileSize;
+  }
+  return st.st_size;
+}
+
+int64_t Filesystem::GetFileSize(const char* filename) const {
+  struct stat st;
+  if (stat(filename, &st) < 0) {
+    if (errno == ENOENT) {
+      ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+          "Unable to stat file %s: %s", filename, strerror(errno));
+    } else {
+      ICING_LOG(WARNING) << IcingStringUtil::StringPrintf(
+          "Unable to stat file %s: %s", filename, strerror(errno));
+    }
+    return kBadFileSize;
+  }
+  return st.st_size;
+}
+
+bool Filesystem::Truncate(int fd, int64_t new_size) const {
+  if (ftruncate(fd, new_size) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Unable to truncate file: %s", strerror(errno));
+    return false;
+  }
+  lseek(fd, new_size, SEEK_SET);
+  return true;
+}
+
+bool Filesystem::Truncate(const char* filename, int64_t new_size) const {
+  int fd = OpenForAppend(filename);
+  if (fd == -1) {
+    return false;
+  }
+  bool success = Truncate(fd, new_size);
+  close(fd);
+  return success;
+}
+
+bool Filesystem::Grow(int fd, int64_t new_size) const {
+  if (ftruncate(fd, new_size) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to grow file: %s",
+                                                      strerror(errno));
+    return false;
+  }
+
+  return true;
+}
+
+bool Filesystem::Grow(const char* filename, int64_t new_size) const {
+  int fd = OpenForAppend(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  bool grew = Grow(fd, new_size);
+  close(fd);
+  return grew;
+}
+
+bool Filesystem::Write(int fd, const void* data, size_t data_size) const {
+  size_t write_len = data_size;
+  do {
+    // Don't try to write too much at once.
+    size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
+    ssize_t wrote = write(fd, data, chunk_size);
+    if (wrote < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
+                                                        strerror(errno));
+      return false;
+    }
+    data = static_cast<const uint8_t*>(data) + wrote;
+    write_len -= wrote;
+  } while (write_len > 0);
+  return true;
+}
+
+bool Filesystem::Write(const char* filename, const void* data,
+                       size_t data_size) const {
+  int fd = OpenForWrite(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  bool success = Write(fd, data, data_size);
+  close(fd);
+  return success;
+}
+
+bool Filesystem::PWrite(int fd, off_t offset, const void* data,
+                        size_t data_size) const {
+  size_t write_len = data_size;
+  do {
+    // Don't try to write too much at once.
+    size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
+    ssize_t wrote = pwrite(fd, data, chunk_size, offset);
+    if (wrote < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
+                                                        strerror(errno));
+      return false;
+    }
+    data = static_cast<const uint8_t*>(data) + wrote;
+    write_len -= wrote;
+    offset += wrote;
+  } while (write_len > 0);
+  return true;
+}
+
+bool Filesystem::PWrite(const char* filename, off_t offset, const void* data,
+                        size_t data_size) const {
+  int fd = OpenForWrite(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  bool success = PWrite(fd, offset, data, data_size);
+  close(fd);
+  return success;
+}
+
+bool Filesystem::Read(int fd, void* buf, size_t buf_size) const {
+  ssize_t read_status = read(fd, buf, buf_size);
+  if (read_status < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad read: %s",
+                                                      strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::Read(const char* filename, void* buf, size_t buf_size) const {
+  int fd = OpenForRead(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  bool success = Read(fd, buf, buf_size);
+  close(fd);
+  return success;
+}
+
+bool Filesystem::PRead(int fd, void* buf, size_t buf_size, off_t offset) const {
+  ssize_t read_status = pread(fd, buf, buf_size, offset);
+  if (read_status < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad read: %s",
+                                                      strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::PRead(const char* filename, void* buf, size_t buf_size,
+                       off_t offset) const {
+  int fd = OpenForRead(filename);
+  if (fd == -1) {
+    return false;
+  }
+
+  bool success = PRead(fd, buf, buf_size, offset);
+  close(fd);
+  return success;
+}
+
+bool Filesystem::DataSync(int fd) const {
+#ifdef __APPLE__  // iOS has no fdatasync(), only fsync()
+  int result = fsync(fd);
+#else
+  int result = fdatasync(fd);
+#endif
+
+  if (result < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to sync data: %s",
+                                                      strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::RenameFile(const char* old_name, const char* new_name) const {
+  if (rename(old_name, new_name) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Unable to rename file %s to %s: %s", old_name, new_name,
+        strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool Filesystem::SwapFiles(const char* one, const char* two) const {
+  std::string tmp_name = absl_ports::StrCat(one, ".tmp");
+  const char* tmp_cstr = tmp_name.c_str();
+
+  // Blow away a tmp file if it already exists
+  if (FileExists(tmp_cstr) && !DeleteFile(tmp_cstr)) {
+    return false;
+  }
+  if (DirectoryExists(tmp_cstr) && !DeleteDirectoryRecursively(tmp_cstr)) {
+    return false;
+  }
+
+  // Perform the swap
+  if (!RenameFile(one, tmp_cstr)) {
+    return false;
+  }
+  if (!RenameFile(two, one)) {
+    return false;
+  }
+  if (!RenameFile(tmp_cstr, two)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool Filesystem::CreateDirectory(const char* dir_name) const {
+  bool success = DirectoryExists(dir_name);
+  if (!success) {
+    if (mkdir(dir_name, S_IRUSR | S_IWUSR | S_IXUSR) == 0) {
+      success = true;
+    } else {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Creating directory %s failed: %s", dir_name, strerror(errno));
+    }
+  }
+  return success;
+}
+
+bool Filesystem::CreateDirectoryRecursively(const char* dir_name) const {
+  if ((strlen(dir_name) == 0) || DirectoryExists(dir_name)) {
+    return true;
+  }
+  std::string path_before = GetDirname(dir_name);
+  if (!CreateDirectoryRecursively(path_before.c_str())) {
+    return false;
+  }
+  return CreateDirectory(dir_name);
+}
+
+int64_t Filesystem::GetDiskUsage(int fd) const {
+  struct stat st;
+  if (fstat(fd, &st) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
+                                                      strerror(errno));
+    return kBadFileSize;
+  }
+  return st.st_blocks * kStatBlockSize;
+}
+
+int64_t Filesystem::GetFileDiskUsage(const char* path) const {
+  struct stat st;
+  if (stat(path, &st) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
+                                                      path, strerror(errno));
+    return kBadFileSize;
+  }
+  return st.st_blocks * kStatBlockSize;
+}
+
+int64_t Filesystem::GetDiskUsage(const char* path) const {
+  struct stat st;
+  if (stat(path, &st) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
+                                                      path, strerror(errno));
+    return kBadFileSize;
+  }
+  int64_t result = st.st_blocks * kStatBlockSize;
+  if (S_ISDIR(st.st_mode)) {
+    vector<std::string> list;
+    if (!ListDirectory(path, &list)) {
+      return kBadFileSize;
+    }
+    for (vector<std::string>::iterator i = list.begin(); i != list.end(); ++i) {
+      std::string sub_path = std::string(path) + '/' + *i;
+      uint64_t sub_usage = GetDiskUsage(sub_path.c_str());
+      if (sub_usage != kBadFileSize) {
+        result += sub_usage;
+      }  // Else just ignore the failing entry.
+    }
+  }
+  return result;
+}
+
+int64_t Filesystem::GetCurrentPosition(int fd) const {
+  return lseek(fd, 0, SEEK_CUR);
+}
+
+int64_t Filesystem::SetPosition(int fd, int offset) const {
+  return lseek(fd, offset, SEEK_SET);
+}
+
+void Filesystem::IncrementByOrSetInvalid(int64_t size, int64_t* to_increment) {
+  if (*to_increment == kBadFileSize) {
+    return;
+  }
+  if (size == kBadFileSize) {
+    *to_increment = kBadFileSize;
+    return;
+  }
+  *to_increment += size;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
new file mode 100644
index 0000000..b85f3a0
--- /dev/null
+++ b/icing/file/filesystem.h

@@ -0,0 +1,237 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Methods for interacting with the filesystem.
+
+#ifndef ICING_FILE_FILESYSTEM_H_
+#define ICING_FILE_FILESYSTEM_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+// Closes fd when it goes out of scope, if fd >= 0.
+class ScopedFd {
+ public:
+  explicit ScopedFd(int fd = -1) : fd_(fd) {}
+  ScopedFd(const ScopedFd&) = delete;
+  ScopedFd(ScopedFd&& other) : ScopedFd() { *this = std::move(other); }
+
+  ScopedFd& operator=(const ScopedFd&) = delete;
+  ScopedFd& operator=(ScopedFd&& other) {
+    std::swap(fd_, other.fd_);
+    return *this;
+  }
+  ~ScopedFd();
+
+  bool is_valid() const { return fd_ >= 0; }
+  int operator*() const { return fd_; }
+  int get() const { return fd_; }
+  void reset(int fd = -1);
+
+ private:
+  int fd_;
+};
+
+struct FILEDeleter {
+  void operator()(FILE* fp) const {
+    if (fp) {
+      fclose(fp);
+    }
+  }
+};
+typedef std::unique_ptr<FILE, FILEDeleter> ScopedFILE;
+
+// Class containing file operation methods.
+// LINT.IfChange
+class Filesystem {
+ public:
+  static const int64_t kBadFileSize = std::numeric_limits<int64_t>::max();
+
+  constexpr Filesystem() = default;
+  virtual ~Filesystem() = default;
+
+  // Deletes a file, returns true on success or if the file did
+  // not yet exist.
+  virtual bool DeleteFile(const char* file_name) const;
+
+  // Deletes a directory, returns true on success or if the directory did
+  // not yet exist.
+  virtual bool DeleteDirectory(const char* dir_name) const;
+
+  // Deletes a directory, including any contents, and returns true on
+  // success or if the directory did not yet exist.
+  virtual bool DeleteDirectoryRecursively(const char* dir_name) const;
+
+  // Returns true if a file exists.  False if the file doesn't exist.
+  // If there is an error getting stat on the file, it logs the error and //
+  // asserts.
+  virtual bool FileExists(const char* file_name) const;
+
+  // Returns true if a directory exists.  False if the file doesn't exist.
+  // If there is an error getting stat on the file, it logs the error and
+  // asserts.
+  virtual bool DirectoryExists(const char* dir_name) const;
+
+  // Return index to start of basename in file_name. Anything before
+  // basename is the dirname (including the final slash).
+  virtual int GetBasenameIndex(const char* file_name) const;
+
+  // Return a string containing the basename.
+  virtual std::string GetBasename(const char* file_name) const;
+
+  // Return a string containing the dirname.
+  virtual std::string GetDirname(const char* file_name) const;
+
+  // Gets the names of the entries of a given directory. Does not include "."
+  // and "..". Returns false on error.
+  virtual bool ListDirectory(const char* dir_name,
+                             std::vector<std::string>* entries) const;
+
+  // Adds the names of the entries of a given directory -- recursively if
+  // specified, and excluding files/directories named in exclude -- to entries.
+  // Regardless of exclude, does not include "." and "..".  Excluded files are
+  // excluded at every level.  Returns false on error.
+  //
+  // Example use case: list all files & directories in fooDir/, recursively,
+  // excluding anything named "tmp" or "cache" (presumed directories) and the
+  // files within them.
+  virtual bool ListDirectory(const char* dir_name,
+                             const std::unordered_set<std::string>& exclude,
+                             bool recursive,
+                             std::vector<std::string>* entries) const;
+
+  // Use glob to return matched files into "matches". Returns false if
+  // glob had an error.
+  //
+  // Cannot match multiple directories so everything up the last slash
+  // must be literal.
+  virtual bool GetMatchingFiles(const char* glob,
+                                std::vector<std::string>* matches) const;
+
+  // Opens the file for read/write. Creates if not existing.  Returns
+  // -1 on fail or an open file descriptor on success.
+  virtual int OpenForWrite(const char* file_name) const;
+
+  // Opens the file for read/write, and positions the file at the
+  // end for appending.  Creates if not existing.  Returns -1 on fail
+  // or an open file descriptor on success.
+  virtual int OpenForAppend(const char* file_name) const;
+
+  // Opens a file for read only.  Fails if file doesn't exist.  Returns
+  // file descriptor or -1 on fail.  Set quiet to true to suppress
+  // log warnings.
+  virtual int OpenForRead(const char* file_name) const;
+
+  // Gets the size of a file, given an open file descriptor.
+  // Returns kBadFileSize on error.
+  virtual int64_t GetFileSize(int fd) const;
+
+  // Gets the size of a file, given a filename.
+  virtual int64_t GetFileSize(const char* filename) const;
+
+  // Truncates the file to the requested size.  Seeks to the
+  // end position of the file after truncate.  Returns false
+  // if fails.
+  virtual bool Truncate(int fd, int64_t new_size) const;
+
+  // Truncates the file to the requested size.
+  // Returns false if fails.
+  virtual bool Truncate(const char* filename, int64_t new_size) const;
+
+  // Grows the file to the requested size.  Does not change the
+  // position pointer.
+  virtual bool Grow(int fd, int64_t new_size) const;
+  virtual bool Grow(const char* filename, int64_t new_size) const;
+
+  // Writes to a file.  Returns true if all the data was successfully
+  // written.  Handles interrupted writes.
+  virtual bool Write(int fd, const void* data, size_t data_size) const;
+  virtual bool Write(const char* filename, const void* data,
+                     size_t data_size) const;
+
+  virtual bool PWrite(int fd, off_t offset, const void* data,
+                      size_t data_size) const;
+  virtual bool PWrite(const char* filename, off_t offset, const void* data,
+                      size_t data_size) const;
+
+  // Reads from a file. Returns true if data was successfully read out. If the
+  // file is seekable, read starts at the file offset, and the file offset is
+  // incremented by number of bytes read.
+  virtual bool Read(int fd, void* buf, size_t buf_size) const;
+  virtual bool Read(const char* filename, void* buf, size_t buf_size) const;
+  virtual bool PRead(int fd, void* buf, size_t buf_size, off_t offset) const;
+  virtual bool PRead(const char* filename, void* buf, size_t buf_size,
+                     off_t offset) const;
+
+  // Syncs the file to disk (fdatasync). Returns true on success.
+  virtual bool DataSync(int fd) const;
+
+  // Renames a file.  A file with new_name must not already exist.
+  virtual bool RenameFile(const char* old_name, const char* new_name) const;
+
+  // Renames two files or directories so their names are swapped.
+  // Both names must already exist.
+  virtual bool SwapFiles(const char* one, const char* two) const;
+
+  // Creates a directory if it does not yet exist.
+  virtual bool CreateDirectory(const char* dir_name) const;
+
+  // Creates a directory if it does not yet exist, building the entire path
+  // if it does not yet exist.
+  virtual bool CreateDirectoryRecursively(const char* dir_name) const;
+
+  // Compute the disk usage of the given file. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length.
+  // Returns kBadFileSize on error.
+  virtual int64_t GetDiskUsage(int fd) const;
+
+  // Compute the disk usage of the given file or directory. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length. Returns kBadFileSize
+  // on error. Does not recurse on directories.
+  virtual int64_t GetFileDiskUsage(const char* path) const;
+
+  // Compute the disk usage of the given file or directory. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length. Returns kBadFileSize
+  // on error. Recurses on directories.
+  virtual int64_t GetDiskUsage(const char* path) const;
+
+  // Returns the current position in the given file. Returns -1 and sets errno
+  // on failure.
+  virtual int64_t GetCurrentPosition(int fd) const;
+
+  virtual int64_t SetPosition(int fd, int offset) const;
+
+  // Increments to_increment by size if size is valid, or sets to_increment
+  // to kBadFileSize if either size or to_increment is kBadFileSize.
+  static void IncrementByOrSetInvalid(int64_t size, int64_t* to_increment);
+};
+// LINT.ThenChange(//depot/google3/icing/file/mock-filesystem.h)
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_FILESYSTEM_H_

diff --git a/icing/file/filesystem_test.cc b/icing/file/filesystem_test.cc
new file mode 100644
index 0000000..b5b8b6c
--- /dev/null
+++ b/icing/file/filesystem_test.cc

@@ -0,0 +1,452 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Test for Filesystem class and utils.
+
+#include "icing/file/filesystem.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/tmp-directory.h"
+
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
+using std::sort;
+using std::vector;
+using ::testing::Eq;
+using ::testing::Ge;
+using ::testing::Gt;
+using ::testing::Le;
+using ::testing::Ne;
+using ::testing::UnorderedElementsAre;
+
+namespace icing {
+namespace lib {
+
+namespace {
+// Create some test files in the specified directory.  "test data" plus the
+// relative path name + "\n" is written to each.
+void CreateTestFiles(const vector<std::string>& file_names,
+                     const std::string& append_dir) {
+  Filesystem filesystem;
+  for (const std::string& one_file_name : file_names) {
+    // Write the filename to the file
+    std::string one_file_path = append_dir + "/" + one_file_name;
+    int fd = filesystem.OpenForWrite(one_file_path.c_str());
+    ASSERT_THAT(fd, Gt(0));
+    std::string test_data = "test data " + one_file_name + "\n";
+    EXPECT_TRUE(
+        filesystem.Write(fd, test_data.c_str(), strlen(test_data.c_str())));
+    EXPECT_THAT(close(fd), Eq(0));
+  }
+}
+}  // namespace
+
+// Indicates if the file system supports Sparse Files.
+// 'Sparse files' are essentially pre-allocated files of big sizes which do not
+// yet use any blocks. A few tests validate that disk-usage is accounted
+// correctly in those cases as zero.
+// However, on HFS+ file system sparse files are not supported.
+// The new AFS supports sparse files, but as of 2017-09 all simulators in prod
+// are running on MacOS using HFS+.
+bool FileSystemSupportsSparseFiles() {
+#ifdef TARGET_IPHONE_SIMULATOR
+  return false;
+#else
+  return true;
+#endif
+}
+
+class FilesystemTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    temp_dir_ = GetTestTempDir() + "/icing_filesystem";
+    Filesystem filesystem;
+    ASSERT_TRUE(filesystem.CreateDirectoryRecursively(temp_dir_.c_str()));
+  }
+
+  void TearDown() override {
+    Filesystem filesystem;
+    EXPECT_TRUE(filesystem.DeleteDirectoryRecursively(temp_dir_.c_str()));
+  }
+
+  // Write junk data of given size to the given file descriptor
+  void WriteJunk(int fd, size_t size) {
+    const int kBufLen = 1024;
+    int buf[kBufLen];
+    for (int i = 0; i < kBufLen; ++i) {
+      buf[i] = i;
+    }
+    const int kBufSize = kBufLen * sizeof(int);
+
+    Filesystem filesystem;
+    for (size_t i = 0; i < size / kBufSize; ++i) {
+      EXPECT_TRUE(filesystem.Write(fd, buf, kBufSize));
+    }
+    if (size % kBufSize) {
+      EXPECT_TRUE(filesystem.Write(fd, buf, size % kBufSize));
+    }
+  }
+
+  std::string temp_dir_;
+};
+
+TEST_F(FilesystemTest, Names) {
+  const std::string filename("/foo/bar/README.txt");
+  Filesystem filesystem;
+
+  std::string basename = filesystem.GetBasename(filename.c_str());
+  EXPECT_THAT(basename, Eq("README.txt"));
+
+  std::string dirname = filesystem.GetDirname(filename.c_str());
+  EXPECT_THAT(dirname, Eq("/foo/bar"));
+
+  basename = filesystem.GetBasename(dirname.c_str());
+  EXPECT_THAT(basename, Eq("bar"));
+
+  dirname = filesystem.GetDirname(dirname.c_str());
+  EXPECT_THAT(dirname, Eq("/foo"));
+
+  basename = filesystem.GetBasename(dirname.c_str());
+  EXPECT_THAT(basename, Eq("foo"));
+
+  dirname = filesystem.GetDirname(dirname.c_str());
+  EXPECT_THAT(dirname, Eq(""));
+}
+
+TEST_F(FilesystemTest, OneLetter) {
+  Filesystem filesystem;
+
+  const std::string basename = filesystem.GetDirname("a");
+  EXPECT_THAT(basename, Eq(""));
+
+  const std::string dirname = filesystem.GetDirname("a");
+  EXPECT_THAT(dirname, Eq(""));
+}
+
+TEST_F(FilesystemTest, Directory) {
+  Filesystem filesystem;
+
+  const std::string foo_str = temp_dir_ + "/foo";
+  const std::string bar_str = foo_str + "/bar";
+  const char* foo_dir = foo_str.c_str();
+  const char* bar_dir = bar_str.c_str();
+
+  EXPECT_TRUE(filesystem.CreateDirectory(foo_dir));
+  EXPECT_TRUE(filesystem.DirectoryExists(foo_dir));
+  EXPECT_TRUE(filesystem.DeleteDirectory(foo_dir));
+  EXPECT_FALSE(filesystem.DirectoryExists(foo_dir));
+
+  EXPECT_FALSE(filesystem.CreateDirectory(bar_dir));
+  EXPECT_FALSE(filesystem.DirectoryExists(foo_dir));
+  EXPECT_FALSE(filesystem.DirectoryExists(bar_dir));
+  EXPECT_TRUE(filesystem.CreateDirectoryRecursively(bar_dir));
+  EXPECT_TRUE(filesystem.DirectoryExists(foo_dir));
+  EXPECT_TRUE(filesystem.DirectoryExists(bar_dir));
+
+  EXPECT_FALSE(filesystem.DeleteDirectory(foo_dir));
+  EXPECT_TRUE(filesystem.DeleteDirectoryRecursively(foo_dir));
+  EXPECT_FALSE(filesystem.DirectoryExists(foo_dir));
+  EXPECT_FALSE(filesystem.DirectoryExists(bar_dir));
+
+  // Deleting a non-existing directory returns true.
+  EXPECT_TRUE(filesystem.DeleteDirectory(foo_dir));
+  EXPECT_TRUE(filesystem.DeleteDirectoryRecursively(foo_dir));
+}
+
+TEST_F(FilesystemTest, FSync) {
+  Filesystem filesystem;
+  const std::string foo_file = temp_dir_ + "/foo_file";
+  int fd = filesystem.OpenForWrite(foo_file.c_str());
+  ASSERT_THAT(fd, Ne(-1));
+  EXPECT_TRUE(filesystem.DataSync(fd));
+  close(fd);
+}
+
+TEST_F(FilesystemTest, Truncate) {
+  Filesystem filesystem;
+  const std::string foo_file = temp_dir_ + "/foo_file";
+  const char* filename = foo_file.c_str();
+  int fd = filesystem.OpenForWrite(filename);
+  ASSERT_THAT(fd, Ne(-1));
+  char data[10000] = {0};  // Zero-init to satisfy msan.
+  EXPECT_TRUE(filesystem.Write(fd, data, sizeof(data)));
+  close(fd);
+  EXPECT_THAT(filesystem.GetFileSize(filename), Eq(sizeof(data)));
+  EXPECT_TRUE(filesystem.Truncate(filename, sizeof(data) / 2));
+  EXPECT_THAT(filesystem.GetFileSize(filename), Eq(sizeof(data) / 2));
+  EXPECT_TRUE(filesystem.Truncate(filename, 0));
+  EXPECT_THAT(filesystem.GetFileSize(filename), Eq(0u));
+}
+
+TEST_F(FilesystemTest, GetMatchingFiles) {
+  Filesystem filesystem;
+  const std::string foo_dir = temp_dir_ + "/foo";
+  const std::string glob = foo_dir + "/p_*_q";
+  vector<std::string> matches;
+
+  // Non existing directory
+  EXPECT_TRUE(filesystem.GetMatchingFiles(glob.c_str(), &matches));
+  EXPECT_THAT(matches.size(), Eq(0u));
+
+  // Existing directory
+  matches.clear();
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(foo_dir.c_str()));
+  EXPECT_TRUE(filesystem.GetMatchingFiles(glob.c_str(), &matches));
+  EXPECT_THAT(matches.size(), Eq(0u));
+
+  // With some files
+  matches.clear();
+  const char* files[] = {"p_1_q", "p_2_q", "p_3", "4_q"};
+  for (size_t i = 0; i < ABSL_ARRAYSIZE(files); ++i) {
+    ScopedFd file(filesystem.OpenForWrite((foo_dir + "/" + files[i]).c_str()));
+  }
+  const std::string good[] = {foo_dir + "/p_1_q", foo_dir + "/p_2_q"};
+  vector<std::string> expected(good, good + ABSL_ARRAYSIZE(good));
+  EXPECT_TRUE(filesystem.GetMatchingFiles(glob.c_str(), &matches));
+  sort(matches.begin(), matches.end());
+  EXPECT_THAT(matches, Eq(expected));
+}
+
+TEST_F(FilesystemTest, IncrementByOrSetInvalid) {
+  int64_t to_increment = 1;
+  Filesystem::IncrementByOrSetInvalid(2, &to_increment);
+  EXPECT_THAT(to_increment, Eq(3));
+
+  Filesystem::IncrementByOrSetInvalid(Filesystem::kBadFileSize, &to_increment);
+  EXPECT_THAT(to_increment, Eq(Filesystem::kBadFileSize));
+
+  to_increment = Filesystem::kBadFileSize;
+  Filesystem::IncrementByOrSetInvalid(2, &to_increment);
+  EXPECT_THAT(to_increment, Eq(Filesystem::kBadFileSize));
+}
+
+TEST_F(FilesystemTest, GetDiskUsage) {
+  Filesystem filesystem;
+  const std::string foo_dir = temp_dir_ + "/foo";
+
+  const int64_t kCluster = 4096;  // at least the anticipated fs cluster
+
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(foo_dir.c_str()));
+
+  // Grow a sparse file, and then append to it.
+  const std::string filename = foo_dir + "/myfile";
+  // Size to expand the sparse file to.
+  const int64_t kExpandedSize = 100 * kCluster - 5;
+  // Actual data to write to the file.
+  const int64_t kJunkSize = 5 * kCluster - 10;
+
+  EXPECT_TRUE(filesystem.Truncate(filename.c_str(), kExpandedSize));
+  ScopedFd fd(filesystem.OpenForWrite(filename.c_str()));
+  WriteJunk(*fd, kJunkSize);
+
+  int64_t size = filesystem.GetDiskUsage(*fd);
+  EXPECT_THAT(size, Ge(kJunkSize));
+  if (FileSystemSupportsSparseFiles()) {
+    EXPECT_THAT(size, Le(kExpandedSize));
+  }
+}
+
+TEST_F(FilesystemTest, GetDiskUsagePath) {
+  Filesystem filesystem;
+  const std::string foo_dir = temp_dir_ + "/foo";
+
+  const int64_t kCluster = 4096;  // at least the anticipated fs cluster
+
+  // Non-existing
+  {
+    EXPECT_THAT(filesystem.GetDiskUsage(foo_dir.c_str()),
+                Eq(Filesystem::kBadFileSize));
+  }
+
+  // A single directory
+  {
+    ASSERT_TRUE(filesystem.CreateDirectoryRecursively(foo_dir.c_str()));
+    int64_t size = filesystem.GetDiskUsage(foo_dir.c_str());
+    EXPECT_THAT(size, Ge(0 * kCluster));
+    EXPECT_THAT(size, Le(1 * kCluster));
+  }
+
+  // Nested directories
+  const std::string bar_dir = foo_dir + "/bar";
+  {
+    ASSERT_TRUE(filesystem.CreateDirectoryRecursively(bar_dir.c_str()));
+    int64_t size = filesystem.GetDiskUsage(bar_dir.c_str());
+    EXPECT_THAT(size, Ge(0 * kCluster));
+    EXPECT_THAT(size, Le(2 * kCluster));
+  }
+
+  // Two regular files
+  const std::string reg1 = bar_dir + "/f1";
+  const std::string reg2 = bar_dir + "/f2";
+  {
+    {
+      ScopedFd f1(filesystem.OpenForWrite(reg1.c_str()));
+      ScopedFd f2(filesystem.OpenForWrite(reg2.c_str()));
+      WriteJunk(*f1, 5 * kCluster - 10);
+      WriteJunk(*f2, 8 * kCluster - 10);
+    }
+    int64_t size = filesystem.GetDiskUsage(foo_dir.c_str());
+    EXPECT_THAT(size, Ge(13 * kCluster));
+    EXPECT_THAT(size, Le(15 * kCluster));
+  }
+
+  // Two sparse files
+  const std::string sparse1 = foo_dir + "/s1";
+  const std::string sparse2 = foo_dir + "/s2";
+  {
+    EXPECT_TRUE(filesystem.Truncate(sparse1.c_str(), 100 * kCluster - 5));
+    EXPECT_TRUE(filesystem.Truncate(sparse2.c_str(), 200 * kCluster - 123));
+    int64_t size = filesystem.GetDiskUsage(foo_dir.c_str());
+    EXPECT_THAT(size, Ge(13 * kCluster));
+    if (FileSystemSupportsSparseFiles()) {
+      EXPECT_THAT(size, Le(17 * kCluster));
+    } else {
+      EXPECT_THAT(size, Le(313 * kCluster));
+    }
+  }
+
+  // Some junk in the sparse files
+  {
+    {
+      ScopedFd f1(filesystem.OpenForWrite(sparse1.c_str()));
+      ScopedFd f2(filesystem.OpenForWrite(sparse2.c_str()));
+      WriteJunk(*f1, 5 * kCluster - 10);
+      WriteJunk(*f2, 8 * kCluster - 10);
+    }
+    int64_t size = filesystem.GetDiskUsage(foo_dir.c_str());
+    EXPECT_THAT(size, Ge(26 * kCluster));
+    if (FileSystemSupportsSparseFiles()) {
+      EXPECT_THAT(size, Le(30 * kCluster));
+    } else {
+      EXPECT_THAT(size, Le(313 * kCluster));
+    }
+  }
+}
+
+// TODO(b/112435354): Add test case for original (non-recursive) ListDirectory()
+
+// Tests ListDirectory() with recursive dir search, with no exclusions
+// (simple test).
+TEST_F(FilesystemTest, ListDirectoryRecursiveSimple) {
+  Filesystem filesystem;
+  const std::string append_dir = temp_dir_ + "/append_test";
+  const std::string dir1_name = "dir1";
+  const std::string dir1_path = append_dir + "/" + dir1_name;
+  vector<std::string> some_files = {"file1", "file2", dir1_name + "/file3"};
+
+  // Make sure there is no pre-existing test-dir structure
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(append_dir.c_str()));
+
+  // Setup a test-dir structure
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(
+      dir1_path.c_str()));  // deepest path for test
+  CreateTestFiles(some_files, append_dir);
+
+  // Call the ListDirectory API with recursive dir-search, no exclusions.
+  vector<std::string> result;
+  EXPECT_TRUE(filesystem.ListDirectory(append_dir.c_str(), /*exclude=*/{},
+                                       /*recursive=*/true, &result));
+
+  // Verify that all files are returned, and no extras.
+  EXPECT_THAT(result, UnorderedElementsAre(some_files[0], some_files[1],
+                                           dir1_name, some_files[2]));
+
+  // Clean up
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(append_dir.c_str()));
+}
+
+// Tests ListDirectory() with recursive dir search, with exclusions.
+// This test is similar in structure to ListDirectory_recursive_simple, but with
+// exclusions.
+TEST_F(FilesystemTest, ListDirectoryRecursiveExclude) {
+  Filesystem filesystem;
+  const std::string append_dir = temp_dir_ + "/append_test";
+  const std::string dir1_name = "dir1";
+  const std::string dir1_path = append_dir + "/" + dir1_name;
+  vector<std::string> some_files = {"file1", "file2", dir1_name + "/file3"};
+
+  // Make sure there is no pre-existing test-dir structure
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(append_dir.c_str()));
+
+  // Setup a test-dir structure
+  ASSERT_TRUE(filesystem.CreateDirectoryRecursively(
+      dir1_path.c_str()));  // deepest path for test
+  CreateTestFiles(some_files, append_dir);
+
+  // Call the ListDirectory API with recursive dir-search, but exclude dir1.
+  vector<std::string> result;
+  std::unordered_set<std::string> exclude;
+  bool success = filesystem.ListDirectory(append_dir.c_str(),
+                                          /*exclude=*/{dir1_name.c_str()},
+                                          /*recursive=*/true, &result);
+
+  // Verify that all files are returned, and no extras.
+  EXPECT_TRUE(success);
+  EXPECT_THAT(result, UnorderedElementsAre(some_files[0], some_files[1]));
+
+  // Clean up
+  ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(append_dir.c_str()));
+}
+
+TEST_F(FilesystemTest, ReadWrite) {
+  Filesystem filesystem;
+  const std::string foo_file = temp_dir_ + "/foo_file";
+  int fd = filesystem.OpenForWrite(foo_file.c_str());
+  const std::string data = "hello world";
+  EXPECT_TRUE(filesystem.Write(fd, data.c_str(), strlen(data.c_str())));
+
+  std::string hello;
+  hello.resize(strlen("hello"));
+  EXPECT_TRUE(filesystem.Read(foo_file.c_str(), &hello[0], strlen("hello")));
+  EXPECT_THAT(hello, Eq("hello"));
+
+  // Read starts from wherever file offset is at the moment.
+  filesystem.SetPosition(fd, 0);
+  hello.clear();
+  hello.resize(strlen("hello"));
+  EXPECT_TRUE(filesystem.Read(fd, &hello[0], strlen("hello")));
+  EXPECT_THAT(hello, Eq("hello"));
+
+  // Shouldn't need to move file offset anymore since file offset gets updated
+  // after the read.
+  std::string world;
+  world.resize(strlen(" world"));
+  EXPECT_TRUE(filesystem.Read(fd, &world[0], strlen(" world")));
+  EXPECT_THAT(world, Eq(" world"));
+
+  // PRead should not be dependent on the file offset
+  world.clear();
+  world.resize(strlen(" world"));
+  EXPECT_TRUE(
+      filesystem.PRead(fd, &world[0], strlen(" world"), strlen("hello")));
+  EXPECT_THAT(world, Eq(" world"));
+
+  hello.clear();
+  hello.resize(strlen("hello"));
+  EXPECT_TRUE(
+      filesystem.PRead(foo_file.c_str(), &hello[0], strlen("hello"), 0));
+  EXPECT_THAT(hello, Eq("hello"));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/memory-mapped-file.cc b/icing/file/memory-mapped-file.cc
new file mode 100644
index 0000000..ebd419b
--- /dev/null
+++ b/icing/file/memory-mapped-file.cc

@@ -0,0 +1,171 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// TODO(cassiewang) Add unit-tests to this class.
+
+#include "icing/file/memory-mapped-file.h"
+
+#include <sys/mman.h>
+
+#include <cerrno>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/math-util.h"
+
+namespace icing {
+namespace lib {
+
+MemoryMappedFile::MemoryMappedFile(const Filesystem& filesystem,
+                                   const std::string_view file_path,
+                                   Strategy mmap_strategy)
+    : filesystem_(&filesystem),
+      file_path_(file_path),
+      strategy_(mmap_strategy) {}
+
+MemoryMappedFile::~MemoryMappedFile() { Unmap(); }
+
+void MemoryMappedFile::MemoryMappedFile::Unmap() {
+  if (mmap_result_ != nullptr) {
+    munmap(mmap_result_, region_size_);
+    mmap_result_ = nullptr;
+  }
+
+  file_offset_ = 0;
+  region_ = nullptr;
+  region_size_ = 0;
+  adjusted_mmap_size_ = 0;
+}
+
+libtextclassifier3::Status MemoryMappedFile::Remap(size_t file_offset,
+                                                   size_t mmap_size) {
+  // First unmap any previously mmapped region.
+  Unmap();
+
+  if (mmap_size == 0) {
+    // Nothing more to do.
+    return libtextclassifier3::Status::OK;
+  }
+
+  size_t aligned_offset =
+      math_util::RoundDownTo(file_offset, system_page_size());
+  size_t alignment_adjustment = file_offset - aligned_offset;
+  size_t adjusted_mmap_size = alignment_adjustment + mmap_size;
+
+  int mmap_flags = 0;
+  // Determines if the mapped region should just be readable or also writable.
+  int protection_flags = 0;
+  ScopedFd fd;
+  switch (strategy_) {
+    case Strategy::READ_ONLY: {
+      mmap_flags = MAP_PRIVATE;
+      protection_flags = PROT_READ;
+      fd.reset(filesystem_->OpenForRead(file_path_.c_str()));
+      break;
+    }
+    case Strategy::READ_WRITE_AUTO_SYNC: {
+      mmap_flags = MAP_SHARED;
+      protection_flags = PROT_READ | PROT_WRITE;
+      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+      break;
+    }
+    case Strategy::READ_WRITE_MANUAL_SYNC: {
+      mmap_flags = MAP_PRIVATE;
+      protection_flags = PROT_READ | PROT_WRITE;
+      // TODO(cassiewang) MAP_PRIVATE effectively makes it a read-only file.
+      // figure out if we can open this file in read-only mode.
+      fd.reset(filesystem_->OpenForWrite(file_path_.c_str()));
+      break;
+    }
+    default:
+      return absl_ports::UnknownError(IcingStringUtil::StringPrintf(
+          "Invalid value in switch statement: %d", strategy_));
+  }
+
+  if (!fd.is_valid()) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to open file meant to be mmapped: ", file_path_));
+  }
+
+  mmap_result_ = mmap(nullptr, adjusted_mmap_size, protection_flags, mmap_flags,
+                      fd.get(), aligned_offset);
+
+  if (mmap_result_ == MAP_FAILED) {
+    mmap_result_ = nullptr;
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to mmap region due to error: ", strerror(errno)));
+  }
+
+  file_offset_ = file_offset;
+  region_ = reinterpret_cast<char*>(mmap_result_) + alignment_adjustment;
+  region_size_ = mmap_size;
+  adjusted_mmap_size_ = adjusted_mmap_size;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MemoryMappedFile::PersistToDisk() {
+  if (strategy_ == Strategy::READ_ONLY) {
+    return absl_ports::FailedPreconditionError(absl_ports::StrCat(
+        "Attempting to PersistToDisk on a read-only file: ", file_path_));
+  }
+
+  if (region_ == nullptr) {
+    // Nothing mapped to sync.
+    return libtextclassifier3::Status::OK;
+  }
+
+  if (strategy_ == Strategy::READ_WRITE_AUTO_SYNC &&
+      msync(mmap_result_, adjusted_mmap_size_, MS_SYNC) != 0) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to sync file using msync(): ", file_path_));
+  }
+
+  // In order to prevent automatic syncing of changes, files that use the
+  // READ_WRITE_MANUAL_SYNC strategy are mmapped using MAP_PRIVATE. Such files
+  // can't be synced using msync(). So, we have to directly write to the
+  // underlying file to update it.
+  if (strategy_ == Strategy::READ_WRITE_MANUAL_SYNC &&
+      !filesystem_->PWrite(file_path_.c_str(), 0, region(), region_size())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Unable to sync file using PWrite(): ", file_path_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status MemoryMappedFile::OptimizeFor(
+    AccessPattern access_pattern) {
+  int madvise_flag = 0;
+  if (access_pattern == AccessPattern::ACCESS_ALL) {
+    madvise_flag = MADV_WILLNEED;
+  } else if (access_pattern == AccessPattern::ACCESS_NONE) {
+    madvise_flag = MADV_DONTNEED;
+  } else if (access_pattern == AccessPattern::ACCESS_RANDOM) {
+    madvise_flag = MADV_RANDOM;
+  } else if (access_pattern == AccessPattern::ACCESS_SEQUENTIAL) {
+    madvise_flag = MADV_SEQUENTIAL;
+  }
+
+  if (madvise(mmap_result_, adjusted_mmap_size_, madvise_flag) != 0) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to madvise file ", file_path_, "; Error: ", strerror(errno)));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/file/memory-mapped-file.h b/icing/file/memory-mapped-file.h
new file mode 100644
index 0000000..1be3dd8
--- /dev/null
+++ b/icing/file/memory-mapped-file.h

@@ -0,0 +1,155 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Allows memory-mapping a full file or a specific region within the file.
+// It also supports efficiently switching the region being mapped.
+//
+// Note on Performance:
+// It supports different optimized strategies for common patterns on both
+// read-only and read-write files. This includes using read-ahead buffers for
+// faster reads as well as background-sync vs manual-sync of changes to disk.
+// For more details, see comments at MemoryMappedFile::Strategy.
+//
+// Usage:
+//
+// MemoryMappedFile mmapped_file(filesystem, "/file.pb", READ_WRITE_AUTO_SYNC));
+// mmapped_file->Remap(0, 16* 1024);  // load the first 16K of the file.
+//
+// char read_byte = mmapped_file->region()[100];
+// mmapped_file->mutable_region()[10] = write_byte;
+//
+// mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
+// disk.
+//
+// mmapped_file->Remap(16*1024, 16* 1024);  // load the next 16K.
+// mmapped_file->mutable_region()[10] = write_byte;
+// mmapped_file.reset();
+
+#ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_
+#define ICING_FILE_MEMORY_MAPPED_FILE_H_
+
+#include <unistd.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/status.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+
+class MemoryMappedFile {
+ public:
+  static size_t __attribute__((const)) system_page_size() {
+    static const size_t page_size = sysconf(_SC_PAGE_SIZE);
+    return page_size;
+  }
+
+  enum Strategy {
+    // Memory map a read-only file into a read-only memory region.
+    READ_ONLY,
+
+    // Memory map a read-write file into a writable memory region. Any changes
+    // made to the region are automatically flushed to the underlying file in
+    // the background.
+    READ_WRITE_AUTO_SYNC,
+
+    // Memory map a read-write file into a writable memory region. Changes made
+    // to this region will never be auto-synced to the underlying file. Unless
+    // the caller explicitly calls PersistToDisk(), all changes will be lost
+    // when the
+    // MemoryMappedFile is destroyed.
+    READ_WRITE_MANUAL_SYNC,
+  };
+
+  // file_path : Full path of the file that needs to be memory-mapped.
+  MemoryMappedFile(const Filesystem& filesystem, std::string_view file_path,
+                   Strategy mmap_strategy);
+
+  // Frees any region that is still memory-mapped region.
+  ~MemoryMappedFile();
+
+  // Memory-map the newly specified region within the file specified by
+  // file_offset and mmap_size. Unmaps any previously mmapped region.
+  //
+  // Returns any encountered IO error.
+  libtextclassifier3::Status Remap(size_t file_offset, size_t mmap_size);
+
+  // unmap and free-up the region that has currently been memory mapped.
+  void Unmap();
+
+  // Explicitly persist any changes made to the currently mapped region to disk.
+  //
+  // NOTE: This is only valid if Strategy=READ_WRITE was used.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Advise the system to help it optimize the memory-mapped region for
+  // upcoming read/write operations.
+  //
+  // NOTE: See linux documentation of madvise() for additional details.
+  enum AccessPattern {
+    // Future memory access are expected to be in random order. So, readhead
+    // will have limited impact on latency.
+    ACCESS_RANDOM,
+
+    // Future memory access are expected to be sequential. So, some readahead
+    // can greatly improve latency.
+    ACCESS_SEQUENTIAL,
+
+    // Future memory access is expected to be high-volume and all over the file.
+    // So, preloading the whole region into memory would greatly improve
+    // latency.
+    ACCESS_ALL,
+
+    // Future memory access is expected to be rare. So, it is best to free up
+    // as much of preloaded memory as possible.
+    ACCESS_NONE,
+  };
+  libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
+
+  // Accessors to the memory-mapped region. Returns null if nothing is mapped.
+  const char* region() const { return region_; }
+  char* mutable_region() { return region_; }
+
+  size_t region_size() const { return region_size_; }
+  Strategy strategy() const { return strategy_; }
+
+ private:
+  // Cached constructor params.
+  const Filesystem* const filesystem_;
+  const std::string file_path_;
+  const Strategy strategy_;
+
+  // Offset within the file at which the current memory-mapped region starts.
+  size_t file_offset_ = 0;
+
+  // Region that is currently memory-mapped.
+  char* region_ = nullptr;
+  size_t region_size_ = 0;
+
+  // The actual size of the region we mmapped. As the requested region might not
+  // align with system pages, we often mmap more bytes than requested.
+  size_t adjusted_mmap_size_ = 0;
+
+  // Raw pointer (or error) returned by calls to mmap().
+  void* mmap_result_ = nullptr;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_MEMORY_MAPPED_FILE_H_

diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h
new file mode 100644
index 0000000..a82f253
--- /dev/null
+++ b/icing/file/mock-filesystem.h

@@ -0,0 +1,325 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_MOCK_FILESYSTEM_H_
+#define ICING_FILE_MOCK_FILESYSTEM_H_
+
+#include <cstdint>
+
+#include "gmock/gmock.h"
+#include "icing/file/filesystem.h"
+
+namespace icing {
+namespace lib {
+using ::testing::_;
+using ::testing::A;
+
+class MockFilesystem : public Filesystem {
+ public:
+  MockFilesystem() {
+    // For all methods, we always delegate calls to a real Filesystem instance
+    // by default.
+
+    ON_CALL(*this, DeleteFile).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.DeleteFile(file_name);
+    });
+
+    ON_CALL(*this, DeleteDirectory).WillByDefault([this](const char* dir_name) {
+      return real_filesystem_.DeleteDirectory(dir_name);
+    });
+
+    ON_CALL(*this, DeleteDirectoryRecursively)
+        .WillByDefault([this](const char* dir_name) {
+          return real_filesystem_.DeleteDirectoryRecursively(dir_name);
+        });
+
+    ON_CALL(*this, FileExists).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.FileExists(file_name);
+    });
+
+    ON_CALL(*this, DirectoryExists).WillByDefault([this](const char* dir_name) {
+      return real_filesystem_.DirectoryExists(dir_name);
+    });
+
+    ON_CALL(*this, GetBasenameIndex)
+        .WillByDefault([this](const char* file_name) {
+          return real_filesystem_.GetBasenameIndex(file_name);
+        });
+
+    ON_CALL(*this, GetBasename).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.GetBasename(file_name);
+    });
+
+    ON_CALL(*this, GetDirname).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.GetDirname(file_name);
+    });
+
+    ON_CALL(*this, ListDirectory(_, _))
+        .WillByDefault(
+            [this](const char* dir_name, std::vector<string>* entries) {
+              return real_filesystem_.ListDirectory(dir_name, entries);
+            });
+
+    ON_CALL(*this, ListDirectory(_, _, _, _))
+        .WillByDefault([this](const char* dir_name,
+                              const std::unordered_set<string>& exclude,
+                              bool recursive, std::vector<string>* entries) {
+          return real_filesystem_.ListDirectory(dir_name, exclude, recursive,
+                                                entries);
+        });
+
+    ON_CALL(*this, GetMatchingFiles)
+        .WillByDefault([this](const char* glob, std::vector<string>* matches) {
+          return real_filesystem_.GetMatchingFiles(glob, matches);
+        });
+
+    ON_CALL(*this, OpenForWrite).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.OpenForWrite(file_name);
+    });
+
+    ON_CALL(*this, OpenForAppend).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.OpenForAppend(file_name);
+    });
+
+    ON_CALL(*this, OpenForRead).WillByDefault([this](const char* file_name) {
+      return real_filesystem_.OpenForRead(file_name);
+    });
+
+    ON_CALL(*this, GetFileSize(A<int>())).WillByDefault([this](int fd) {
+      return real_filesystem_.GetFileSize(fd);
+    });
+
+    ON_CALL(*this, GetFileSize(A<const char*>()))
+        .WillByDefault([this](const char* file_name) {
+          return real_filesystem_.GetFileSize(file_name);
+        });
+
+    ON_CALL(*this, Truncate(A<int>(), _))
+        .WillByDefault([this](int fd, int64_t new_size) {
+          return real_filesystem_.Truncate(fd, new_size);
+        });
+
+    ON_CALL(*this, Truncate(A<const char*>(), _))
+        .WillByDefault([this](const char* filename, int64_t new_size) {
+          return real_filesystem_.Truncate(filename, new_size);
+        });
+
+    ON_CALL(*this, Grow(A<int>(), _))
+        .WillByDefault([this](int fd, int64_t new_size) {
+          return real_filesystem_.Grow(fd, new_size);
+        });
+
+    ON_CALL(*this, Grow(A<const char*>(), _))
+        .WillByDefault([this](const char* filename, int64_t new_size) {
+          return real_filesystem_.Grow(filename, new_size);
+        });
+
+    ON_CALL(*this, Write(A<int>(), _, _))
+        .WillByDefault([this](int fd, const void* data, size_t data_size) {
+          return real_filesystem_.Write(fd, data, data_size);
+        });
+
+    ON_CALL(*this, Write(A<const char*>(), _, _))
+        .WillByDefault(
+            [this](const char* filename, const void* data, size_t data_size) {
+              return real_filesystem_.Write(filename, data, data_size);
+            });
+
+    ON_CALL(*this, PWrite(A<int>(), _, _, _))
+        .WillByDefault(
+            [this](int fd, off_t offset, const void* data, size_t data_size) {
+              return real_filesystem_.PWrite(fd, offset, data, data_size);
+            });
+
+    ON_CALL(*this, PWrite(A<const char*>(), _, _, _))
+        .WillByDefault([this](const char* filename, off_t offset,
+                              const void* data, size_t data_size) {
+          return real_filesystem_.PWrite(filename, offset, data, data_size);
+        });
+
+    ON_CALL(*this, Read(A<int>(), _, _))
+        .WillByDefault([this](int fd, void* buf, size_t buf_size) {
+          return real_filesystem_.Read(fd, buf, buf_size);
+        });
+
+    ON_CALL(*this, Read(A<const char*>(), _, _))
+        .WillByDefault(
+            [this](const char* filename, void* buf, size_t buf_size) {
+              return real_filesystem_.Read(filename, buf, buf_size);
+            });
+
+    ON_CALL(*this, PRead(A<int>(), _, _, _))
+        .WillByDefault(
+            [this](int fd, void* buf, size_t buf_size, off_t offset) {
+              return real_filesystem_.PRead(fd, buf, buf_size, offset);
+            });
+
+    ON_CALL(*this, PRead(A<const char*>(), _, _, _))
+        .WillByDefault([this](const char* filename, void* buf, size_t buf_size,
+                              off_t offset) {
+          return real_filesystem_.PRead(filename, buf, buf_size, offset);
+        });
+
+    ON_CALL(*this, DataSync).WillByDefault([this](int fd) {
+      return real_filesystem_.DataSync(fd);
+    });
+
+    ON_CALL(*this, RenameFile)
+        .WillByDefault([this](const char* old_name, const char* new_name) {
+          return real_filesystem_.RenameFile(old_name, new_name);
+        });
+
+    ON_CALL(*this, SwapFiles)
+        .WillByDefault([this](const char* one, const char* two) {
+          return real_filesystem_.SwapFiles(one, two);
+        });
+
+    ON_CALL(*this, CreateDirectory).WillByDefault([this](const char* dir_name) {
+      return real_filesystem_.CreateDirectory(dir_name);
+    });
+
+    ON_CALL(*this, CreateDirectoryRecursively)
+        .WillByDefault([this](const char* dir_name) {
+          return real_filesystem_.CreateDirectoryRecursively(dir_name);
+        });
+
+    ON_CALL(*this, GetDiskUsage(A<int>())).WillByDefault([this](int fd) {
+      return real_filesystem_.GetDiskUsage(fd);
+    });
+
+    ON_CALL(*this, GetFileDiskUsage).WillByDefault([this](const char* path) {
+      return real_filesystem_.GetFileDiskUsage(path);
+    });
+
+    ON_CALL(*this, GetDiskUsage(A<const char*>()))
+        .WillByDefault([this](const char* path) {
+          return real_filesystem_.GetDiskUsage(path);
+        });
+
+    ON_CALL(*this, GetCurrentPosition).WillByDefault([this](int fd) {
+      return real_filesystem_.GetCurrentPosition(fd);
+    });
+
+    ON_CALL(*this, SetPosition).WillByDefault([this](int fd, int offset) {
+      return real_filesystem_.SetPosition(fd, offset);
+    });
+  }
+
+  MOCK_METHOD(bool, DeleteFile, (const char* file_name), (const));
+
+  MOCK_METHOD(bool, DeleteDirectory, (const char* dir_name), (const));
+
+  MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char* dir_name),
+              (const));
+
+  MOCK_METHOD(bool, FileExists, (const char* file_name), (const));
+
+  MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const));
+
+  MOCK_METHOD(int, GetBasenameIndex, (const char* file_name), (const));
+
+  MOCK_METHOD(std::string, GetBasename, (const char* file_name), (const));
+
+  MOCK_METHOD(std::string, GetDirname, (const char* file_name), (const));
+
+  MOCK_METHOD(bool, ListDirectory,
+              (const char* dir_name, std::vector<std::string>* entries),
+              (const));
+
+  MOCK_METHOD(bool, ListDirectory,
+              (const char* dir_name,
+               const std::unordered_set<std::string>& exclude, bool recursive,
+               std::vector<std::string>* entries),
+              (const));
+
+  MOCK_METHOD(bool, GetMatchingFiles,
+              (const char* glob, std::vector<std::string>* matches), (const));
+
+  MOCK_METHOD(int, OpenForWrite, (const char* file_name), (const));
+
+  MOCK_METHOD(int, OpenForAppend, (const char* file_name), (const));
+
+  MOCK_METHOD(int, OpenForRead, (const char* file_name), (const));
+
+  MOCK_METHOD(int64_t, GetFileSize, (int fd), (const));
+
+  MOCK_METHOD(int64_t, GetFileSize, (const char* filename), (const));
+
+  MOCK_METHOD(bool, Truncate, (int fd, int64_t new_size), (const));
+
+  MOCK_METHOD(bool, Truncate, (const char* filename, int64_t new_size),
+              (const));
+
+  MOCK_METHOD(bool, Grow, (int fd, int64_t new_size), (const));
+
+  MOCK_METHOD(bool, Grow, (const char* filename, int64_t new_size), (const));
+
+  MOCK_METHOD(bool, Write, (int fd, const void* data, size_t data_size),
+              (const));
+
+  MOCK_METHOD(bool, Write,
+              (const char* filename, const void* data, size_t data_size),
+              (const));
+
+  MOCK_METHOD(bool, PWrite,
+              (int fd, off_t offset, const void* data, size_t data_size),
+              (const));
+
+  MOCK_METHOD(bool, PWrite,
+              (const char* filename, off_t offset, const void* data,
+               size_t data_size),
+              (const));
+
+  MOCK_METHOD(bool, Read, (int fd, void* buf, size_t buf_size), (const));
+
+  MOCK_METHOD(bool, Read, (const char* filename, void* buf, size_t buf_size),
+              (const));
+
+  MOCK_METHOD(bool, PRead, (int fd, void* buf, size_t buf_size, off_t offset),
+              (const));
+
+  MOCK_METHOD(bool, PRead,
+              (const char* filename, void* buf, size_t buf_size, off_t offset),
+              (const));
+
+  MOCK_METHOD(bool, DataSync, (int fd), (const));
+
+  MOCK_METHOD(bool, RenameFile, (const char* old_name, const char* new_name),
+              (const));
+
+  MOCK_METHOD(bool, SwapFiles, (const char* one, const char* two), (const));
+
+  MOCK_METHOD(bool, CreateDirectory, (const char* dir_name), (const));
+
+  MOCK_METHOD(bool, CreateDirectoryRecursively, (const char* dir_name),
+              (const));
+
+  MOCK_METHOD(int64_t, GetDiskUsage, (int fd), (const));
+
+  MOCK_METHOD(int64_t, GetFileDiskUsage, (const char* path), (const));
+
+  MOCK_METHOD(int64_t, GetDiskUsage, (const char* path), (const));
+
+  MOCK_METHOD(int64_t, GetCurrentPosition, (int fd), (const));
+
+  MOCK_METHOD(int64_t, SetPosition, (int fd, int offset), (const));
+
+ private:
+  Filesystem real_filesystem_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_FILE_MOCK_FILESYSTEM_H_

diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
new file mode 100644
index 0000000..8e1d469
--- /dev/null
+++ b/icing/icing-search-engine.cc

@@ -0,0 +1,649 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/index-processor.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/icing-search-engine-options.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/query/query-processor.h"
+#include "icing/result-retriever.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scoring-processor.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr std::string_view kDocumentAndIndexSubfolderName =
+    "document_index_dir";
+constexpr std::string_view kSchemaSubfolderName = "schema_dir";
+constexpr std::string_view kIcingSearchEngineHeaderFilename =
+    "icing_search_engine_header";
+
+libtextclassifier3::Status ValidateOptions(
+    const IcingSearchEngineOptions& options) {
+  // These options are only used in IndexProcessor, which won't be created
+  // until the first Put call. So they must be checked here, so that any
+  // errors can be surfaced in Initialize.
+  if (options.max_tokens_per_doc() <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "Options::max_tokens_per_doc must be greater than zero.");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status ValidateResultSpec(
+    const ResultSpecProto& result_spec) {
+  if (result_spec.num_to_retrieve() < 0) {
+    return absl_ports::InvalidArgumentError(
+        "ResultSpec::num_to_retrieve cannot be negative.");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+IndexProcessor::Options CreateIndexProcessorOptions(
+    const IcingSearchEngineOptions& options) {
+  IndexProcessor::Options index_processor_options;
+  index_processor_options.max_tokens_per_document =
+      options.max_tokens_per_doc();
+  index_processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
+  return index_processor_options;
+}
+
+std::string MakeHeaderFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kIcingSearchEngineHeaderFilename);
+}
+
+// Document store and index files are in a standalone subfolder because they
+// can be re-generated at the same time during full optimization. Others like
+// schema store can be optimized separately.
+std::string MakeDocumentAndIndexDirectoryPath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentAndIndexSubfolderName);
+}
+
+// Makes a temporary folder path for document and index which will be used
+// during full optimization.
+std::string MakeDocumentAndIndexTemporaryDirectoryPath(
+    const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentAndIndexSubfolderName,
+                            "_optimize_tmp");
+}
+
+// SchemaStore files are in a standalone subfolder for easier file management.
+// We can delete and recreate the subfolder and not touch/affect anything
+// else.
+std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
+}
+
+// Helper function to wrap results in ScoredDocumentHit without changing the
+// order.
+std::vector<ScoredDocumentHit> WrapResults(
+    std::unique_ptr<DocHitInfoIterator> result_iterator, int num_to_return) {
+  std::vector<ScoredDocumentHit> document_hits;
+  while (result_iterator->Advance().ok() && num_to_return-- > 0) {
+    const DocHitInfo& doc_hit_info = result_iterator->doc_hit_info();
+    // Score is just a placeholder here and has no meaning.
+    document_hits.emplace_back(doc_hit_info.document_id(),
+                               doc_hit_info.hit_section_ids_mask(),
+                               /*score=*/0);
+  }
+  return document_hits;
+}
+
+libtextclassifier3::StatusOr<std::vector<ScoredDocumentHit>> RunScoring(
+    std::unique_ptr<DocHitInfoIterator> result_iterator,
+    const ScoringSpecProto& scoring_spec, int num_to_return,
+    const DocumentStore* document_store) {
+  if (scoring_spec.rank_by() == ScoringSpecProto::RankingStrategy::NONE) {
+    // No scoring needed, return in original order
+    return WrapResults(std::move(result_iterator), num_to_return);
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(scoring_spec, document_store));
+  return scoring_processor->ScoreAndRank(std::move(result_iterator),
+                                         num_to_return);
+}
+
+}  // namespace
+
+IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options)
+    : IcingSearchEngine(options, std::make_unique<Filesystem>(),
+                        std::make_unique<Clock>()) {}
+
+IcingSearchEngine::IcingSearchEngine(
+    IcingSearchEngineOptions options,
+    std::unique_ptr<const Filesystem> filesystem, std::unique_ptr<Clock> clock)
+    : options_(std::move(options)),
+      filesystem_(std::move(filesystem)),
+      icing_filesystem_(std::make_unique<IcingFilesystem>()),
+      clock_(std::move(clock)) {
+  ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
+}
+
+IcingSearchEngine::~IcingSearchEngine() {
+  if (initialized_) {
+    if (!PersistToDisk().ok()) {
+      ICING_LOG(ERROR)
+          << "Error persisting to disk in IcingSearchEngine destructor";
+    }
+  }
+}
+
+libtextclassifier3::Status IcingSearchEngine::Initialize() {
+  ICING_VLOG(1) << "Initializing IcingSearchEngine in dir: "
+                << options_.base_dir();
+
+  ICING_RETURN_IF_ERROR(ValidateOptions(options_));
+
+  // This method does both read and write so we need a writer lock. Using two
+  // locks (reader and writer) has the chance to be interrupted during
+  // switching.
+  absl_ports::unique_lock l(&mutex_);
+
+  // Make sure the base directory exists
+  if (!filesystem_->CreateDirectoryRecursively(options_.base_dir().c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Could not create directory: ", options_.base_dir()));
+  }
+
+  const std::string schema_store_dir =
+      MakeSchemaDirectoryPath(options_.base_dir());
+  // Make sure the sub-directory exists
+  if (!filesystem_->CreateDirectoryRecursively(schema_store_dir.c_str())) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Could not create directory: ", schema_store_dir));
+  }
+  ICING_ASSIGN_OR_RETURN(
+      schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir));
+
+  const std::string document_store_and_index_dir =
+      MakeDocumentAndIndexDirectoryPath(options_.base_dir());
+  // Make sure the sub-directory exists
+  if (!filesystem_->CreateDirectoryRecursively(
+          document_store_and_index_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Could not create directory: ", document_store_and_index_dir));
+  }
+  ICING_ASSIGN_OR_RETURN(
+      document_store_,
+      DocumentStore::Create(filesystem_.get(), document_store_and_index_dir,
+                            clock_.get(), schema_store_.get()));
+
+  ICING_ASSIGN_OR_RETURN(language_segmenter_,
+                         LanguageSegmenter::Create(options_.lang_model_path()));
+
+  ICING_ASSIGN_OR_RETURN(normalizer_,
+                         Normalizer::Create(options_.max_token_length()));
+
+  Index::Options index_options(document_store_and_index_dir,
+                               options_.index_merge_size());
+  ICING_ASSIGN_OR_RETURN(index_,
+                         Index::Create(index_options, icing_filesystem_.get()));
+
+  // Even if each subcomponent initialized fine independently, we need to
+  // check if they're consistent with each other.
+  if (!CheckConsistency().ok()) {
+    ICING_VLOG(1)
+        << "IcingSearchEngine in inconsistent state, regenerating all "
+           "derived data";
+    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+  }
+
+  initialized_ = true;
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
+  if (!HeaderExists()) {
+    // Without a header file, we have no checksum and can't even detect
+    // inconsistencies
+    return absl_ports::NotFoundError("No header file found.");
+  }
+
+  // Header does exist, verify that the header looks fine.
+  IcingSearchEngine::Header header;
+  if (!filesystem_->Read(MakeHeaderFilename(options_.base_dir()).c_str(),
+                         &header, sizeof(header))) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Couldn't read: ", MakeHeaderFilename(options_.base_dir())));
+  }
+
+  if (header.magic != IcingSearchEngine::Header::kMagic) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Invalid header kMagic for file: ",
+                           MakeHeaderFilename(options_.base_dir())));
+  }
+
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  if (checksum.Get() != header.checksum) {
+    return absl_ports::InternalError(
+        "IcingSearchEngine checksum doesn't match");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles() {
+  ICING_RETURN_IF_ERROR(
+      document_store_->UpdateSchemaStore(schema_store_.get()));
+  ICING_RETURN_IF_ERROR(index_->Reset());
+  ICING_RETURN_IF_ERROR(RestoreIndex());
+
+  const std::string header_file =
+      MakeHeaderFilename(options_.base_dir().c_str());
+  if (HeaderExists()) {
+    if (!filesystem_->DeleteFile(header_file.c_str())) {
+      return absl_ports::InternalError(
+          absl_ports::StrCat("Unable to delete file: ", header_file));
+    }
+  }
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::SetSchema(
+    const SchemaProto& new_schema, bool ignore_errors_and_delete_documents) {
+  return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
+}
+
+libtextclassifier3::Status IcingSearchEngine::SetSchema(
+    SchemaProto&& new_schema, bool ignore_errors_and_delete_documents) {
+  ICING_VLOG(1) << "Setting new Schema";
+
+  ICING_RETURN_IF_ERROR(SchemaUtil::Validate(new_schema));
+
+  absl_ports::unique_lock l(&mutex_);
+
+  ICING_ASSIGN_OR_RETURN(
+      const SchemaStore::SetSchemaResult set_schema_result,
+      schema_store_->SetSchema(std::move(new_schema),
+                               ignore_errors_and_delete_documents));
+
+  if (set_schema_result.success) {
+    if (!set_schema_result.old_schema_type_ids_changed.empty() ||
+        !set_schema_result.schema_types_incompatible_by_id.empty() ||
+        !set_schema_result.schema_types_deleted_by_id.empty()) {
+      ICING_RETURN_IF_ERROR(document_store_->OptimizedUpdateSchemaStore(
+          schema_store_.get(), set_schema_result));
+    }
+
+    if (set_schema_result.index_incompatible) {
+      // Clears all index files
+      ICING_RETURN_IF_ERROR(index_->Reset());
+      ICING_RETURN_IF_ERROR(RestoreIndex());
+    }
+
+    return libtextclassifier3::Status::OK;
+  }
+
+  // TODO(cassiewang): Instead of returning a Status, consider returning some
+  // of the information we have in SetSchemaResult such as which types were
+  // deleted and which types were incompatible.
+  return absl_ports::FailedPreconditionError("Schema is incompatible.");
+}
+
+libtextclassifier3::StatusOr<SchemaProto> IcingSearchEngine::GetSchema() {
+  absl_ports::shared_lock l(&mutex_);
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, schema_store_->GetSchema());
+  return *schema;
+}
+
+libtextclassifier3::StatusOr<SchemaTypeConfigProto>
+IcingSearchEngine::GetSchemaType(std::string schema_type) {
+  absl_ports::shared_lock l(&mutex_);
+  ICING_ASSIGN_OR_RETURN(const SchemaTypeConfigProto* type_config,
+                         schema_store_->GetSchemaTypeConfig(schema_type));
+  return *type_config;
+}
+
+libtextclassifier3::Status IcingSearchEngine::Put(
+    const DocumentProto& document) {
+  return Put(DocumentProto(document));
+}
+
+libtextclassifier3::Status IcingSearchEngine::Put(DocumentProto&& document) {
+  ICING_VLOG(1) << "Writing document to document store";
+
+  // Lock must be acquired before validation because the DocumentStore uses
+  // the schema file to validate, and the schema could be changed in
+  // SetSchema() which is protected by the same mutex.
+  absl_ports::unique_lock l(&mutex_);
+
+  ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+                         document_store_->Put(document));
+
+  IndexProcessor index_processor(schema_store_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), index_.get(),
+                                 CreateIndexProcessorOptions(options_));
+  ICING_RETURN_IF_ERROR(index_processor.IndexDocument(document, document_id));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocumentProto> IcingSearchEngine::Get(
+    const std::string_view name_space, const std::string_view uri) {
+  absl_ports::shared_lock l(&mutex_);
+
+  return document_store_->Get(name_space, uri);
+}
+
+libtextclassifier3::Status IcingSearchEngine::Delete(
+    const std::string_view name_space, const std::string_view uri) {
+  ICING_VLOG(1) << "Deleting document from doc store";
+
+  absl_ports::unique_lock l(&mutex_);
+
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete Document. namespace: " << name_space
+                     << ", uri: " << uri;
+    return status;
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::PersistToDisk() {
+  ICING_VLOG(1) << "Persisting data to disk";
+  absl_ports::unique_lock l(&mutex_);
+  return InternalPersistToDisk();
+}
+
+// Optimizes storage for document store and index.
+//
+// Steps:
+// 1. Flush data to disk.
+// 2. Copy data needed to a tmp directory.
+// 3. Swap current directory and tmp directory.
+//
+// TODO(b/143724846) Optimize schema store here as well.
+// TODO(b/143724541) Signal the caller if the failure is unrecoverable.
+libtextclassifier3::Status IcingSearchEngine::Optimize() {
+  ICING_VLOG(1) << "Optimizing icing storage";
+
+  absl_ports::unique_lock l(&mutex_);
+
+  // Flushes data to disk before doing optimization
+  ICING_RETURN_IF_ERROR(InternalPersistToDisk());
+
+  // Gets the current directory path and an empty tmp directory path for
+  // document store and index optimization.
+  std::string current_dir =
+      MakeDocumentAndIndexDirectoryPath(options_.base_dir());
+  std::string temporary_dir =
+      MakeDocumentAndIndexTemporaryDirectoryPath(options_.base_dir());
+  if (!filesystem_->DeleteDirectoryRecursively(temporary_dir.c_str()) ||
+      !filesystem_->CreateDirectoryRecursively(temporary_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to create a tmp directory: ", temporary_dir));
+  }
+
+  // Copies valid document data to tmp directory
+  auto optimize_status = document_store_->OptimizeInto(temporary_dir);
+
+  // Handles error if any
+  if (!optimize_status.ok()) {
+    filesystem_->DeleteDirectoryRecursively(temporary_dir.c_str());
+    return absl_ports::Annotate(optimize_status,
+                                "Failed to optimize document store.");
+  }
+
+  // Resets before swapping
+  document_store_.reset();
+  index_.reset();
+
+  // When swapping files, always put the current working directory at the
+  // second place because it is renamed at the latter position so we're less
+  // vulnerable to errors.
+  if (!filesystem_->SwapFiles(temporary_dir.c_str(), current_dir.c_str())) {
+    // Try to rebuild document store and index if swapping fails, to avoid
+    // leaving the system in the broken state for future operations.
+    // TODO(b/144458732): Implement a more robust version of
+    // TC_ASSIGN_OR_RETURN that can support error logging.
+    auto document_store_or = DocumentStore::Create(
+        filesystem_.get(), current_dir, clock_.get(), schema_store_.get());
+    if (!document_store_or.ok()) {
+      ICING_LOG(ERROR)
+          << document_store_or.status().error_message()
+          << "Failed to swap files, no document store instance available";
+      return document_store_or.status();
+    }
+    document_store_ = std::move(document_store_or).ValueOrDie();
+
+    Index::Options index_options(current_dir, options_.index_merge_size());
+    // TODO(b/144458732): Implement a more robust version of
+    // TC_ASSIGN_OR_RETURN that can support error logging.
+    auto index_or = Index::Create(index_options, icing_filesystem_.get());
+    if (!index_or.ok()) {
+      ICING_LOG(ERROR) << index_or.status().error_message()
+                       << "Failed to swap files, no index instance available";
+      return index_or.status();
+    }
+    index_ = std::move(index_or).ValueOrDie();
+    return absl_ports::InternalError("Failed to rename files");
+  }
+
+  // Recreates the doc store instance
+  ICING_ASSIGN_OR_RETURN(
+      document_store_,
+      DocumentStore::Create(filesystem_.get(), current_dir, clock_.get(),
+                            schema_store_.get()));
+
+  // Deletes tmp directory
+  if (!filesystem_->DeleteDirectoryRecursively(temporary_dir.c_str())) {
+    return absl_ports::InternalError("Failed to delete temporary directory");
+  }
+
+  // Recreates the index instance and re-indexes all the documents.
+  // TODO(b/143646633): figure out if we need to optimize index and doc store
+  // at the same time.
+  Index::Options index_options(current_dir, options_.index_merge_size());
+  ICING_ASSIGN_OR_RETURN(index_,
+                         Index::Create(index_options, icing_filesystem_.get()));
+  ICING_RETURN_IF_ERROR(RestoreIndex());
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk() {
+  ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(document_store_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(index_->PersistToDisk());
+
+  // Update the combined checksum and write to header file.
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> IcingSearchEngine::ComputeChecksum() {
+  Crc32 total_checksum;
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto checksum_or = schema_store_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of SchemaStore";
+    return checksum_or.status();
+  }
+
+  Crc32 schema_store_checksum = std::move(checksum_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = document_store_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of DocumentStore";
+    return checksum_or.status();
+  }
+  Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
+
+  Crc32 index_checksum = index_->ComputeChecksum();
+
+  total_checksum.Append(std::to_string(document_store_checksum.Get()));
+  total_checksum.Append(std::to_string(schema_store_checksum.Get()));
+  total_checksum.Append(std::to_string(index_checksum.Get()));
+
+  return total_checksum;
+}
+
+bool IcingSearchEngine::HeaderExists() {
+  if (!filesystem_->FileExists(
+          MakeHeaderFilename(options_.base_dir()).c_str())) {
+    return false;
+  }
+
+  int64_t file_size =
+      filesystem_->GetFileSize(MakeHeaderFilename(options_.base_dir()).c_str());
+
+  // If it's been truncated to size 0 before, we consider it to be a new file
+  return file_size != 0 && file_size != Filesystem::kBadFileSize;
+}
+
+libtextclassifier3::Status IcingSearchEngine::UpdateHeader(
+    const Crc32& checksum) {
+  // Write the header
+  IcingSearchEngine::Header header;
+  header.magic = IcingSearchEngine::Header::kMagic;
+  header.checksum = checksum.Get();
+
+  // This should overwrite the header.
+  if (!filesystem_->Write(MakeHeaderFilename(options_.base_dir()).c_str(),
+                          &header, sizeof(header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
+                           MakeHeaderFilename(options_.base_dir())));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<SearchResultProto> IcingSearchEngine::Search(
+    const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+    const ResultSpecProto& result_spec) {
+  ICING_RETURN_IF_ERROR(ValidateResultSpec(result_spec));
+
+  // TODO(b/146008613) Explore ideas to make this function read-only.
+  absl_ports::unique_lock l(&mutex_);
+
+  // Gets unordered results from query processor
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), clock_.get());
+  ICING_ASSIGN_OR_RETURN(QueryProcessor::QueryResults query_results,
+                         query_processor.ParseSearch(search_spec));
+
+  // Generates the final list of document hits
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<ScoredDocumentHit> result_document_hits,
+      RunScoring(std::move(query_results.root_iterator), scoring_spec,
+                 result_spec.num_to_retrieve(), document_store_.get()));
+
+  // Retrieves the document protos and snippets if requested
+  ResultRetriever result_retriever(document_store_.get(), schema_store_.get(),
+                                   language_segmenter_.get());
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<SearchResultProto::ResultProto> results,
+      result_retriever.RetrieveResults(result_spec, query_results.query_terms,
+                                       search_spec.term_match_type(),
+                                       result_document_hits));
+  // Assembles the final search result proto
+  SearchResultProto search_results;
+  search_results.mutable_results()->Reserve(results.size());
+  for (SearchResultProto::ResultProto& result : results) {
+    search_results.mutable_results()->Add(std::move(result));
+  }
+  return search_results;
+}
+
+libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
+  DocumentId last_stored_document_id =
+      document_store_->last_added_document_id();
+
+  if (last_stored_document_id == kInvalidDocumentId) {
+    // Nothing to index
+    return libtextclassifier3::Status::OK;
+  }
+
+  IndexProcessor index_processor(schema_store_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), index_.get(),
+                                 CreateIndexProcessorOptions(options_));
+
+  for (DocumentId document_id = kMinDocumentId;
+       document_id <= last_stored_document_id; document_id++) {
+    libtextclassifier3::StatusOr<DocumentProto> document_or =
+        document_store_->Get(document_id);
+
+    if (!document_or.ok()) {
+      if (absl_ports::IsInvalidArgument(document_or.status()) ||
+          absl_ports::IsNotFound(document_or.status())) {
+        // Skips invalid and non-existing documents.
+        continue;
+      } else {
+        // Returns other errors
+        return document_or.status();
+      }
+    }
+
+    ICING_RETURN_IF_ERROR(
+        index_processor.IndexDocument(document_or.ValueOrDie(), document_id));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
new file mode 100644
index 0000000..4069020
--- /dev/null
+++ b/icing/icing-search-engine.h

@@ -0,0 +1,319 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_ICING_SEARCH_ENGINE_H_
+#define ICING_ICING_SEARCH_ENGINE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/mutex.h"
+#include "icing/absl_ports/thread_annotations.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/icing-search-engine-options.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(cassiewang) Top-level comments and links to design-doc.
+class IcingSearchEngine {
+ public:
+  struct Header {
+    static constexpr int32_t kMagic = 0x6e650d0a;
+
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic;
+
+    // Checksum of the IcingSearchEngine's sub-component's checksums.
+    uint32_t checksum;
+  };
+
+  explicit IcingSearchEngine(const IcingSearchEngineOptions& options);
+
+  // Calculates integrity checks and persists files to disk.
+  ~IcingSearchEngine();
+
+  // Loads & verifies the contents previously indexed from disk and gets ready
+  // to handle read/write requests.
+  //
+  // WARNING: This is expected to be fast if Icing had a clean shutdown.
+  // Otherwise, it can take longer as it runs integrity checks and attempts
+  // to bring the index to a consistent state. If the data on disk is not
+  // consistent, it restores the state when PersistToDisk() was last called.
+  //
+  // Returns OK on success, ie, Icing was initialized and all data verified.
+  // Returns DATA_LOSS on partial success, when Icing encountered
+  // data-inconsistency and had to restore its state back to the last call
+  // to PersistToDisk().
+  // Returns any other error encountered due to which the call couldn't be
+  // completed. The instance of IcingSearchEngine is not usable if this
+  // happens.
+  libtextclassifier3::Status Initialize() LOCKS_EXCLUDED(mutex_);
+
+  // Specifies the schema to be applied on all Documents that are already
+  // stored as well as future documents. A schema can be 'invalid' and/or
+  // 'incompatible'. These are two independent concepts.
+  //
+  // An 'invalid' schema is one that is not constructed properly. For example,
+  // a PropertyConfigProto is missing the property name field. A schema can be
+  // 'invalid' even if there is no previously existing schema.
+  //
+  // An 'incompatible' schema is one that is incompatible with a previously
+  // existing schema. If there is no previously existing schema, then a new
+  // schema cannot be incompatible. An incompatible schema is one that
+  // invalidates pre-existing data. For example, a previously OPTIONAL field is
+  // now REQUIRED in the new schema, and pre-existing data is considered invalid
+  // against the new schema now.
+  //
+  // Default behavior will not allow a new schema to be set if it is invalid or
+  // incompatible.
+  //
+  // The argument 'ignore_errors_and_delete_documents' can be set to true to
+  // force set an incompatible schema. In that case, documents that are
+  // invalidated by the new schema would be deleted from Icing. This cannot be
+  // used to force set an invalid schema.
+  //
+  // This schema is persisted to disk and used across multiple instances.
+  // So, callers should only have to call this if the schema changed.
+  // However, calling it multiple times with the same schema is a no-op.
+  //
+  // On any error, Icing will keep using the older schema.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if 'new_schema' is invalid
+  //   FAILED_PRECONDITION if 'new_schema' is incompatible
+  //   INTERNAL_ERROR if Icing failed to store the new schema or upgrade
+  //     existing data based on the new schema.
+  //
+  // TODO(cassiewang) Figure out, document (and maybe even enforce) the best
+  // way ordering of calls between Initialize() and SetSchema(), both when
+  // the caller is creating an instance of IcingSearchEngine for the first
+  // time and when the caller is reinitializing an existing index on disk.
+  libtextclassifier3::Status SetSchema(
+      SchemaProto&& new_schema, bool ignore_errors_and_delete_documents = false)
+      LOCKS_EXCLUDED(mutex_);
+
+  // This function makes a copy of the schema and calls SetSchema(SchemaProto&&
+  // new_schema, bool ignore_errors_and_delete_documents)
+  //
+  // NOTE: It's recommended to call SetSchema(SchemaProto&& new_schema, bool
+  // ignore_errors_and_delete_documents) directly to avoid a copy if the caller
+  // can make an rvalue SchemaProto.
+  libtextclassifier3::Status SetSchema(
+      const SchemaProto& new_schema,
+      bool ignore_errors_and_delete_documents = false) LOCKS_EXCLUDED(mutex_);
+
+  // Get Icing's current copy of the schema.
+  //
+  // Returns:
+  //   SchemaProto on success
+  //   NOT_FOUND if a schema has not been set yet
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<SchemaProto> GetSchema() LOCKS_EXCLUDED(mutex_);
+
+  // Get Icing's copy of the SchemaTypeConfigProto of name schema_type
+  //
+  // Returns:
+  //   SchemaTypeConfigProto on success
+  //   NOT_FOUND if a schema has not been set yet or if there is no
+  //     SchemaTypeConfig of schema_type in the SchemaProto
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<SchemaTypeConfigProto> GetSchemaType(
+      std::string schema_type) LOCKS_EXCLUDED(mutex_);
+
+  // Puts the document into icing search engine so that it's stored and
+  // indexed. Documents are automatically written to disk, callers can also
+  // call PersistToDisk() to flush changes immediately.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Put(DocumentProto&& document)
+      LOCKS_EXCLUDED(mutex_);
+
+  // This function makes a copy of document and calls Put(DocumentProto&&
+  // document).
+  //
+  // NOTE: It's recommended to call Put(DocumentProto&& document) directly to
+  // avoid a copy if the caller can make an rvalue DocumentProto.
+  libtextclassifier3::Status Put(const DocumentProto& document)
+      LOCKS_EXCLUDED(mutex_);
+
+  // Finds and returns the document identified by the given key (namespace +
+  // uri)
+  //
+  // Returns:
+  //   The document found on success
+  //   NOT_FOUND if the key doesn't exist or doc has been deleted
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentProto> Get(std::string_view name_space,
+                                                  std::string_view uri);
+
+  // Deletes the Document specified by the given namespace / uri pair from the
+  // search engine. Delete changes are automatically applied to disk, callers
+  // can also call PersistToDisk() to flush changes immediately.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Delete(std::string_view name_space,
+                                    std::string_view uri)
+      LOCKS_EXCLUDED(mutex_);
+
+  // Retrieves, scores, ranks, and returns the results according to the specs.
+  // Please refer to each proto file for spec definitions.
+  //
+  // Returns:
+  //   A SearchResultProto on success
+  //   INVALID_ARGUMENT if any of specs is invalid
+  //   INTERNAL_ERROR on any other errors
+  libtextclassifier3::StatusOr<SearchResultProto> Search(
+      const SearchSpecProto& search_spec, const ScoringSpecProto& scoring_spec,
+      const ResultSpecProto& result_spec) LOCKS_EXCLUDED(mutex_);
+
+  // Makes sure that every update/delete received till this point is flushed
+  // to disk. If the app crashes after a call to PersistToDisk(), Icing
+  // would be able to fully recover all data written up to this point.
+  //
+  // NOTE: It is not necessary to call PersistToDisk() to read back data
+  // that was recently written. All read APIs will include the most recent
+  // updates/deletes regardless of the data being flushed to disk.
+  libtextclassifier3::Status PersistToDisk() LOCKS_EXCLUDED(mutex_);
+
+  // Allows Icing to run tasks that are too expensive and/or unnecessary to be
+  // executed in real-time, but are useful to keep it fast and be
+  // resource-efficient. This method purely optimizes the internal files and
+  // has no functional impact on what gets accepted/returned.
+  //
+  // NOTE: This method should be called about once every 24 hours when the
+  // device is idle and charging. It can also be called when the system needs
+  // to free up extra disk-space.
+  //
+  // WARNING: This method is CPU and IO intensive and depending on the
+  // contents stored, it can take from a few seconds to a few minutes.
+  // This call also blocks all read/write operations on Icing.
+  libtextclassifier3::Status Optimize() LOCKS_EXCLUDED(mutex_);
+
+  // Disallow copy and move.
+  IcingSearchEngine(const IcingSearchEngine&) = delete;
+  IcingSearchEngine& operator=(const IcingSearchEngine&) = delete;
+
+ protected:
+  IcingSearchEngine(IcingSearchEngineOptions options,
+                    std::unique_ptr<const Filesystem> filesystem,
+                    std::unique_ptr<Clock> clock);
+
+ private:
+  const IcingSearchEngineOptions options_;
+  const std::unique_ptr<const Filesystem> filesystem_;
+  const std::unique_ptr<const IcingFilesystem> icing_filesystem_;
+  bool initialized_ = false;
+
+  // Abstraction for accessing time values.
+  std::unique_ptr<Clock> clock_;
+
+  // Used to provide reader and writer locks
+  absl_ports::shared_mutex mutex_;
+
+  // Stores and processes the schema
+  std::unique_ptr<SchemaStore> schema_store_ GUARDED_BY(mutex_);
+
+  // Used to store all valid documents
+  std::unique_ptr<DocumentStore> document_store_ GUARDED_BY(mutex_);
+
+  std::unique_ptr<const LanguageSegmenter> language_segmenter_
+      GUARDED_BY(mutex_);
+
+  std::unique_ptr<const Normalizer> normalizer_ GUARDED_BY(mutex_);
+
+  // Storage for all hits of content from the document store.
+  std::unique_ptr<Index> index_ GUARDED_BY(mutex_);
+
+  // Helper method to do the actual work to persist data to disk. We need this
+  // separate method so that other public methods don't need to call
+  // PersistToDisk(). Public methods calling each other may cause deadlock
+  // issues.
+  libtextclassifier3::Status InternalPersistToDisk()
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Many of the internal components rely on other components' derived data.
+  // Check that everything is consistent with each other so that we're not using
+  // outdated derived data in some parts of our system.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND if missing header file
+  //   INTERNAL_ERROR on any IO errors or if header is inconsistent
+  libtextclassifier3::Status CheckConsistency()
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Repopulates derived data off our ground truths.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on any IO errors
+  libtextclassifier3::Status RegenerateDerivedFiles()
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Helper method to restore missing document data in index_. All documents
+  // will be reindexed. This does not clear the index, so it is recommended to
+  // call Index::Reset first.
+  //
+  // Returns:
+  //   OK on success
+  //   RESOURCE_EXHAUSTED if the index fills up before finishing indexing
+  //   NOT_FOUND if some Document's schema type is not in the SchemaStore
+  //   INTERNAL_ERROR on any IO errors
+  libtextclassifier3::Status RestoreIndex() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Computes the combined checksum of the IcingSearchEngine - includes all its
+  // subcomponents
+  //
+  // Returns:
+  //   Combined checksum on success
+  //   INTERNAL_ERROR on compute error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum()
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Checks if the header exists already. This does not create the header file
+  // if it doesn't exist.
+  bool HeaderExists() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+  // Update and replace the header file. Creates the header file if it doesn't
+  // exist.
+  libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
+      EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_ICING_SEARCH_ENGINE_H_

diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
new file mode 100644
index 0000000..a7f6adc
--- /dev/null
+++ b/icing/icing-search-engine_fuzz_test.cc

@@ -0,0 +1,102 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstddef>
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/document-builder.h"
+#include "icing/icing-search-engine.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/icing-search-engine-options.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+IcingSearchEngineOptions Setup() {
+  IcingSearchEngineOptions icing_options;
+  libtextclassifier3::Status status =
+      SetUpICUDataFile("icing/icu.dat");
+  icing_options.set_base_dir(GetTestTempDir() + "/icing");
+  icing_options.set_lang_model_path(GetLangIdModelPath());
+  return icing_options;
+}
+
+SchemaProto SetTypes() {
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("Message");
+  PropertyConfigProto* body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+  return schema;
+}
+
+DocumentProto MakeDocument(const uint8_t* data, size_t size) {
+  // TODO (sidchhabra): Added more optimized fuzzing techniques.
+  DocumentProto document;
+  string string_prop(reinterpret_cast<const char*>(data), size);
+  return DocumentBuilder()
+      .SetKey("namespace", "uri1")
+      .SetSchema("Message")
+      .AddStringProperty("body", string_prop)
+      .Build();
+}
+
+SearchSpecProto SetSearchSpec(const uint8_t* data, size_t size) {
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  // TODO (sidchhabra): Added more optimized fuzzing techniques.
+  string query_string(reinterpret_cast<const char*>(data), size);
+  search_spec.set_query(query_string);
+  return search_spec;
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  // Initialize
+  IcingSearchEngineOptions icing_options = Setup();
+  IcingSearchEngine icing(icing_options);
+  const Filesystem filesystem_;
+  // TODO (b/145758378): Deleting directory should not be required.
+  filesystem_.DeleteDirectoryRecursively(icing_options.base_dir().c_str());
+  libtextclassifier3::Status status = icing.Initialize();
+  SchemaProto schema_proto = SetTypes();
+  status = icing.SetSchema(schema_proto);
+
+  // Index
+  DocumentProto document = MakeDocument(data, size);
+  status = icing.Put(document);
+
+  // Query
+  SearchSpecProto search_spec = SetSearchSpec(data, size);
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ResultSpecProto result_spec;
+  libtextclassifier3::StatusOr<SearchResultProto> result =
+      icing.Search(search_spec, scoring_spec, result_spec);
+  return 0;
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
new file mode 100644
index 0000000..e389b57
--- /dev/null
+++ b/icing/icing-search-engine_test.cc

@@ -0,0 +1,1848 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/icing-search-engine.h"
+
+#include <cstdint>
+#include <ctime>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/icing-search-engine-options.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::IsEmpty;
+using ::testing::Lt;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+// For mocking purpose, we allow tests to provide a custom Filesystem.
+class TestIcingSearchEngine : public IcingSearchEngine {
+ public:
+  TestIcingSearchEngine(const IcingSearchEngineOptions& options,
+                        std::unique_ptr<const Filesystem> filesystem,
+                        std::unique_ptr<FakeClock> clock)
+      : IcingSearchEngine(options, std::move(filesystem), std::move(clock)) {}
+};
+
+std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
+
+class IcingSearchEngineTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+    filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(GetTestBaseDir().c_str());
+  }
+
+  const Filesystem* filesystem() const { return &filesystem_; }
+
+ private:
+  Filesystem filesystem_;
+};
+
+constexpr int kMaxSupportedDocumentSize = (1u << 24) - 1;
+
+// Non-zero value so we don't override it to be the current time
+constexpr std::time_t kDefaultCreationTimestampSecs = 1575492852;
+
+std::string GetDocumentIndexDir() {
+  return GetTestBaseDir() + "/document_index_dir";
+}
+
+std::string GetSchemaDir() { return GetTestBaseDir() + "/schema_dir"; }
+
+std::string GetHeaderFilename() {
+  return GetTestBaseDir() + "/icing_search_engine_header";
+}
+
+IcingSearchEngineOptions GetDefaultIcingOptions() {
+  IcingSearchEngineOptions icing_options;
+  icing_options.set_base_dir(GetTestBaseDir());
+  icing_options.set_lang_model_path(GetLangIdModelPath());
+  return icing_options;
+}
+
+DocumentProto GetDefaultDocument() {
+  return DocumentBuilder()
+      .SetKey("namespace", "uri")
+      .SetSchema("Message")
+      .AddStringProperty("body", "message body")
+      .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+      .Build();
+}
+
+SchemaProto GetDefaultSchema() {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  return schema;
+}
+
+ScoringSpecProto GetDefaultScoringSpec() {
+  ScoringSpecProto scoring_spec;
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  return scoring_spec;
+}
+
+TEST_F(IcingSearchEngineTest, SimpleInitialization) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  DocumentProto document = GetDefaultDocument();
+  ICING_ASSERT_OK(icing.Put(document));
+  ICING_ASSERT_OK(icing.Put(DocumentProto(document)));
+}
+
+TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(-1);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_index_merge_size(0);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // One is fine, if a bit weird. It just means that the lite index will be
+  // smaller and will request a merge any time content is added to it.
+  options.set_index_merge_size(1);
+  IcingSearchEngine icing(options);
+  ICING_EXPECT_OK(icing.Initialize());
+}
+
+TEST_F(IcingSearchEngineTest,
+       NegativeMaxTokensPerDocSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_tokens_per_doc(-1);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, ZeroMaxTokensPerDocSizeReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_tokens_per_doc(0);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, GoodMaxTokensPerDocSizeReturnsOk) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // INT_MAX is valid - it just means that we shouldn't limit the number of
+  // tokens per document. It would be pretty inconceivable that anyone would
+  // produce such a document - the text being indexed alone would take up at
+  // least ~4.3 GiB! - and the document would be rejected before indexing
+  // for exceeding max_document_size, but there's no reason to explicitly
+  // bar it.
+  options.set_max_tokens_per_doc(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options);
+  ICING_EXPECT_OK(icing.Initialize());
+}
+
+TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_token_length(-1);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  options.set_max_token_length(0);
+  IcingSearchEngine icing(options);
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // A length of 1 is allowed - even though it would be strange to want
+  // this.
+  options.set_max_token_length(1);
+  IcingSearchEngine icing(options);
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_EXPECT_OK(icing.Put(document));
+
+  // "message" should have been truncated to "m"
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  // The indexed tokens were  truncated to length of 1, so "m" will match
+  search_spec.set_query("m");
+
+  SearchResultProto exp_result;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document;
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+
+  // The query token is also truncated to length of 1, so "me"->"m" matches "m"
+  search_spec.set_query("me");
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+
+  // The query token is still truncated to length of 1, so "massage"->"m"
+  // matches "m"
+  search_spec.set_query("massage");
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+}
+
+TEST_F(IcingSearchEngineTest,
+       MaxIntMaxTokenLenReturnsOkTooLargeTokenReturnsResourceExhausted) {
+  IcingSearchEngineOptions options = GetDefaultIcingOptions();
+  // Set token length to max. This is allowed (it just means never to
+  // truncate tokens). However, this does mean that tokens that exceed the
+  // size of the lexicon will cause indexing to fail.
+  options.set_max_token_length(std::numeric_limits<int32_t>::max());
+  IcingSearchEngine icing(options);
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Add a document that just barely fits under the max document limit.
+  // This will still fail to index because we won't actually have enough
+  // room in the lexicon to fit this content.
+  std::string enormous_string(kMaxSupportedDocumentSize - 256, 'p');
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Message")
+          .AddStringProperty("body", std::move(enormous_string))
+          .Build();
+  EXPECT_THAT(icing.Put(document),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("p");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+}
+
+TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  // This fails DocumentStore::Create()
+  ON_CALL(*mock_filesystem, CreateDirectoryRecursively(_))
+      .WillByDefault(Return(false));
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::move(mock_filesystem),
+                              std::make_unique<FakeClock>());
+
+  ASSERT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                       HasSubstr("Could not create directory")));
+}
+
+TEST_F(IcingSearchEngineTest,
+       InvalidFileCreateLangSegmenterReturnsInvalidArgument) {
+  IcingSearchEngineOptions options(GetDefaultIcingOptions());
+  options.set_lang_model_path("notarealfile");
+  TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+                              std::make_unique<FakeClock>());
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest,
+       CircularReferenceCreateSectionManagerReturnsInvalidArgument) {
+  // Create a type config with a circular reference.
+  SchemaProto schema;
+  auto* type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto* body = type->add_properties();
+  body->set_property_name("recipient");
+  body->set_schema_type("Person");
+  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  type = schema.add_types();
+  type->set_schema_type("Person");
+
+  body = type->add_properties();
+  body->set_property_name("recipient");
+  body->set_schema_type("Message");
+  body->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  EXPECT_THAT(icing.SetSchema(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, NoSchemaSet) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  DocumentProto document = GetDefaultDocument();
+  EXPECT_THAT(icing.Put(document),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'Message' not found")));
+  EXPECT_THAT(icing.Put(DocumentProto(document)),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'Message' not found")));
+}
+
+TEST_F(IcingSearchEngineTest, FailToReadSchema) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Successfully initialize and set a schema
+    IcingSearchEngine icing(icing_options);
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+  }
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+
+  // This fails FileBackedProto::Read() when we try to check the schema we
+  // had previously set
+  ON_CALL(*mock_filesystem,
+          OpenForRead(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
+      .WillByDefault(Return(-1));
+
+  TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
+                                   std::make_unique<FakeClock>());
+
+  ASSERT_THAT(test_icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                       HasSubstr("Unable to open file for read")));
+}
+
+TEST_F(IcingSearchEngineTest, FailToWriteSchema) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  auto mock_filesystem = std::make_unique<MockFilesystem>();
+  // This fails FileBackedProto::Write()
+  ON_CALL(*mock_filesystem,
+          OpenForWrite(Eq(icing_options.base_dir() + "/schema_dir/schema.pb")))
+      .WillByDefault(Return(-1));
+
+  TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
+                              std::make_unique<FakeClock>());
+
+  ICING_ASSERT_OK(icing.Initialize());
+  ASSERT_THAT(icing.SetSchema(GetDefaultSchema()),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+                       HasSubstr("Unable to open file for write")));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchema) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  auto message_document = GetDefaultDocument();
+
+  auto schema_with_message = GetDefaultSchema();
+
+  SchemaProto schema_with_email;
+  SchemaTypeConfigProto* type = schema_with_email.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  SchemaProto schema_with_email_and_message = schema_with_email;
+  type = schema_with_email_and_message.add_types();
+  type->set_schema_type("Message");
+  property = type->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Create an arbitrary invalid schema
+  SchemaProto invalid_schema;
+  SchemaTypeConfigProto* empty_type = invalid_schema.add_types();
+  empty_type->set_schema_type("");
+
+  // Make sure we can't set invalid schemas
+  EXPECT_THAT(icing.SetSchema(invalid_schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Can add an document of a set schema
+  ICING_EXPECT_OK(icing.SetSchema(schema_with_message));
+  ICING_EXPECT_OK(icing.Put(message_document));
+
+  // Schema with Email doesn't have Message, so would result incompatible
+  // data
+  EXPECT_THAT(icing.SetSchema(schema_with_email),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+  // Can expand the set of schema types and add an document of a new
+  // schema type
+  ICING_EXPECT_OK(icing.SetSchema(SchemaProto(schema_with_email_and_message)));
+  ICING_EXPECT_OK(icing.Put(message_document));
+
+  // Can't add an document whose schema isn't set
+  auto photo_document = DocumentBuilder()
+                            .SetKey("namespace", "uri")
+                            .SetSchema("Photo")
+                            .AddStringProperty("creator", "icing")
+                            .Build();
+  EXPECT_THAT(icing.Put(photo_document),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'Photo' not found")));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  SchemaProto schema_with_no_indexed_property = GetDefaultSchema();
+  schema_with_no_indexed_property.mutable_types(0)
+      ->mutable_properties(0)
+      ->clear_indexing_config();
+
+  ICING_EXPECT_OK(icing.SetSchema(schema_with_no_indexed_property));
+  // Nothing will be index and Search() won't return anything.
+  ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto empty_result;
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(empty_result)));
+
+  SchemaProto schema_with_indexed_property = GetDefaultSchema();
+  // Index restoration should be triggered here because new schema requires more
+  // properties to be indexed.
+  ICING_EXPECT_OK(icing.SetSchema(schema_with_indexed_property));
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  SchemaProto schema_with_optional_subject;
+  auto type = schema_with_optional_subject.add_types();
+  type->set_schema_type("email");
+
+  // Add a OPTIONAL property
+  auto property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  ICING_EXPECT_OK(icing.SetSchema(schema_with_optional_subject));
+
+  DocumentProto email_document_without_subject =
+      DocumentBuilder()
+          .SetKey("namespace", "without_subject")
+          .SetSchema("email")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto email_document_with_subject =
+      DocumentBuilder()
+          .SetKey("namespace", "with_subject")
+          .SetSchema("email")
+          .AddStringProperty("subject", "foo")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  ICING_EXPECT_OK(icing.Put(email_document_without_subject));
+  ICING_EXPECT_OK(icing.Put(email_document_with_subject));
+
+  SchemaProto schema_with_required_subject;
+  type = schema_with_required_subject.add_types();
+  type->set_schema_type("email");
+
+  // Add a REQUIRED property
+  property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  // Can't set the schema since it's incompatible
+  EXPECT_THAT(icing.SetSchema(schema_with_required_subject),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  // Force set it
+  ICING_EXPECT_OK(icing.SetSchema(schema_with_required_subject,
+                                  /*ignore_errors_and_delete_documents=*/true));
+
+  EXPECT_THAT(icing.Get("namespace", "with_subject"),
+              IsOkAndHolds(EqualsProto(email_document_with_subject)));
+
+  // The document without a subject got deleted because it failed validation
+  // against the new schema
+  EXPECT_THAT(icing.Get("namespace", "without_subject"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+  type = schema.add_types();
+  type->set_schema_type("message");
+
+  ICING_EXPECT_OK(icing.SetSchema(schema));
+
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace", "email_uri")
+          .SetSchema("email")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto message_document =
+      DocumentBuilder()
+          .SetKey("namespace", "message_uri")
+          .SetSchema("message")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  ICING_EXPECT_OK(icing.Put(email_document));
+  ICING_EXPECT_OK(icing.Put(message_document));
+
+  // Clear the schema and only add the "email" type, essentially deleting the
+  // "message" type
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  type->set_schema_type("email");
+
+  // Can't set the schema since it's incompatible
+  EXPECT_THAT(icing.SetSchema(new_schema),
+              StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+  // Force set it
+  ICING_EXPECT_OK(icing.SetSchema(new_schema,
+                                  /*ignore_errors_and_delete_documents=*/true));
+
+  // "email" document is still there
+  EXPECT_THAT(icing.Get("namespace", "email_uri"),
+              IsOkAndHolds(EqualsProto(email_document)));
+
+  // "message" document got deleted
+  EXPECT_THAT(icing.Get("namespace", "message_uri"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, GetSchemaNotFound) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  EXPECT_THAT(icing.GetSchema(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, GetSchemaOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+  EXPECT_THAT(icing.GetSchema(), IsOkAndHolds(EqualsProto(GetDefaultSchema())));
+}
+
+TEST_F(IcingSearchEngineTest, GetSchemaTypeNotFound) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  EXPECT_THAT(icing.GetSchemaType("nonexistent_schema"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+  EXPECT_THAT(icing.GetSchemaType(GetDefaultSchema().types(0).schema_type()),
+              IsOkAndHolds(EqualsProto(GetDefaultSchema().types(0))));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Simple put and get
+  ICING_ASSERT_OK(icing.Put(GetDefaultDocument()));
+  ASSERT_THAT(icing.Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+
+  // Put an invalid document
+  ASSERT_THAT(icing.Put(DocumentProto()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'namespace' is empty")));
+
+  // Get a non-existing key
+  ASSERT_THAT(icing.Get("wrong", "uri"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_ASSERT_OK(icing.Put(document_one));
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_ASSERT_OK(icing.Put(document_two));
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+
+  ResultSpecProto result_spec;
+  result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+  result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+  result_spec.mutable_snippet_spec()->set_num_to_snippet(1);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SearchResultProto results,
+      icing.Search(search_spec, GetDefaultScoringSpec(), result_spec));
+  EXPECT_THAT(results.results(), SizeIs(2));
+  EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+  EXPECT_THAT(GetMatch(results.results(0).document(),
+                       results.results(0).snippet(), "body",
+                       /*snippet_index=*/0),
+              Eq("message"));
+  EXPECT_THAT(
+      GetWindow(results.results(0).document(), results.results(0).snippet(),
+                "body", /*snippet_index=*/0),
+      Eq("message body"));
+  EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+  EXPECT_THAT(
+      GetMatch(results.results(1).document(), results.results(1).snippet(),
+               "body", /*snippet_index=*/0),
+      IsEmpty());
+  EXPECT_THAT(
+      GetWindow(results.results(1).document(), results.results(1).snippet(),
+                "body", /*snippet_index=*/0),
+      IsEmpty());
+
+  search_spec.set_query("foo");
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  DocumentProto document_one =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_ASSERT_OK(icing.Put(document_one));
+  DocumentProto document_two =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_ASSERT_OK(icing.Put(document_two));
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("message");
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_to_retrieve(1);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      document_two;
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_to_retrieve(0);
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
+              IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+}
+
+TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("");
+
+  ResultSpecProto result_spec;
+  result_spec.set_num_to_retrieve(-5);
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  {
+    // Set the schema up beforehand.
+    IcingSearchEngine icing(icing_options);
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+    // Schema will be persisted to disk when icing goes out of scope.
+  }
+
+  {
+    // Ensure that icing initializes the schema and section_manager
+    // properly from the pre-existing file.
+    IcingSearchEngine icing(icing_options);
+    ICING_ASSERT_OK(icing.Initialize());
+
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    // The index and document store will be persisted to disk when icing goes
+    // out of scope.
+  }
+
+  {
+    // Ensure that the index is brought back up without problems and we
+    // can query for the content that we expect.
+    IcingSearchEngine icing(icing_options);
+    ICING_ASSERT_OK(icing.Initialize());
+
+    SearchSpecProto search_spec;
+    search_spec.set_term_match_type(TermMatchType::PREFIX);
+    search_spec.set_query("message");
+
+    SearchResultProto expected_result;
+    (*expected_result.mutable_results()->Add()->mutable_document()) =
+        GetDefaultDocument();
+
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+
+    search_spec.set_query("foo");
+    EXPECT_THAT(
+        icing.Search(search_spec, GetDefaultScoringSpec(),
+                     ResultSpecProto::default_instance()),
+        IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message body1")
+                                .Build();
+  {
+    IcingSearchEngine icing(icing_options);
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_ASSERT_OK(icing.Put(document1));
+
+    // Deletes document1
+    ICING_ASSERT_OK(icing.Delete("namespace", "uri1"));
+    const std::string document_log_path =
+        icing_options.base_dir() + "/document_index_dir/document_log";
+    int64_t document_log_size_before =
+        filesystem()->GetFileSize(document_log_path.c_str());
+    ICING_ASSERT_OK(icing.Optimize());
+    int64_t document_log_size_after =
+        filesystem()->GetFileSize(document_log_path.c_str());
+
+    // Validates that document can't be found right after Optimize()
+    EXPECT_THAT(icing.Get("namespace", "uri1"),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    // Validates that document is actually removed from document log
+    EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(icing_options);
+  ICING_EXPECT_OK(icing.Initialize());
+  EXPECT_THAT(icing.Get("namespace", "uri1"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
+  IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
+  IcingSearchEngine icing(icing_options);
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Create a tmp dir that will be used in Optimize() to swap files,
+  // this validates that any tmp dirs will be deleted before using.
+  const std::string tmp_dir =
+      icing_options.base_dir() + "/document_index_dir" + "_optimize_tmp";
+  const std::string tmp_file = tmp_dir + "/file";
+  ASSERT_TRUE(filesystem()->CreateDirectory(tmp_dir.c_str()));
+  ScopedFd fd(filesystem()->OpenForWrite(tmp_file.c_str()));
+  ASSERT_TRUE(fd.is_valid());
+  ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
+  fd.reset();
+  ICING_ASSERT_OK(icing.Optimize());
+
+  EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
+  EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
+}
+
+TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body3")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+
+    ICING_ASSERT_OK(icing.Put(document1));
+    ICING_ASSERT_OK(icing.Optimize());
+
+    // Validates that Get() and Put() are good right after Optimize()
+    EXPECT_THAT(icing.Get("namespace", "uri1"),
+                IsOkAndHolds(EqualsProto(document1)));
+    ICING_EXPECT_OK(icing.Put(document2));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  EXPECT_THAT(icing.Get("namespace", "uri1"),
+              IsOkAndHolds(EqualsProto(document1)));
+  EXPECT_THAT(icing.Get("namespace", "uri2"),
+              IsOkAndHolds(EqualsProto(document2)));
+  ICING_EXPECT_OK(icing.Put(document3));
+}
+
+TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) {
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body1")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body2")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_ASSERT_OK(icing.Put(document1));
+    ICING_ASSERT_OK(icing.Put(document2));
+    ICING_ASSERT_OK(icing.Optimize());
+
+    // Validates that Delete() works right after Optimize()
+    ICING_EXPECT_OK(icing.Delete("namespace", "uri1"));
+    EXPECT_THAT(icing.Get("namespace", "uri1"),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(icing.Get("namespace", "uri2"),
+                IsOkAndHolds(EqualsProto(document2)));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.Delete("namespace", "uri2"));
+  EXPECT_THAT(icing.Get("namespace", "uri1"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(icing.Get("namespace", "uri2"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
+  // Creates 3 test schemas
+  SchemaProto schema1 = SchemaProto(GetDefaultSchema());
+
+  SchemaProto schema2 = SchemaProto(schema1);
+  auto new_property2 = schema2.mutable_types(0)->add_properties();
+  new_property2->set_property_name("property2");
+  new_property2->set_data_type(PropertyConfigProto::DataType::STRING);
+  new_property2->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  new_property2->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  new_property2->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  SchemaProto schema3 = SchemaProto(schema2);
+  auto new_property3 = schema3.mutable_types(0)->add_properties();
+  new_property3->set_property_name("property3");
+  new_property3->set_data_type(PropertyConfigProto::DataType::STRING);
+  new_property3->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  new_property3->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  new_property3->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(schema1));
+    ICING_ASSERT_OK(icing.Optimize());
+
+    // Validates that SetSchema() works right after Optimize()
+    ICING_EXPECT_OK(icing.SetSchema(schema2));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(schema3));
+}
+
+TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) = document;
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_ASSERT_OK(icing.Initialize());
+    ICING_ASSERT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_ASSERT_OK(icing.Put(document));
+    ICING_ASSERT_OK(icing.Optimize());
+
+    // Validates that Search() works right after Optimize()
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // Destroys IcingSearchEngine to make sure nothing is cached.
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .SetCreationTimestampSecs(100)
+                               .SetTtlSecs(500)
+                               .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) = document;
+
+  // Time just has to be less than the document's creation timestamp (100) + the
+  // schema's ttl (500)
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetSeconds(400);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::move(fake_clock));
+
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(schema));
+  ICING_EXPECT_OK(icing.Put(document));
+
+  // Check that the document is returned as part of search results
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("Message");
+
+  auto body = type->add_properties();
+  body->set_property_name("body");
+  body->set_data_type(PropertyConfigProto::DataType::STRING);
+  body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  body->mutable_indexing_config()->set_term_match_type(TermMatchType::PREFIX);
+  body->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("Message")
+                               .AddStringProperty("body", "message body")
+                               .SetCreationTimestampSecs(100)
+                               .SetTtlSecs(500)
+                               .Build();
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) = document;
+
+  // Time just has to be greater than the document's creation timestamp (100) +
+  // the schema's ttl (500)
+  auto fake_clock = std::make_unique<FakeClock>();
+  fake_clock->SetSeconds(700);
+
+  TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+                              std::make_unique<Filesystem>(),
+                              std::move(fake_clock));
+
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(schema));
+  ICING_EXPECT_OK(icing.Put(document));
+
+  // Check that the document is returned as part of search results
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+}
+
+TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  auto property = type_config->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  DocumentProto message_document =
+      DocumentBuilder()
+          .SetKey("namespace", "message_uri")
+          .SetSchema("message")
+          .AddStringProperty("body", "foo")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_ASSERT_OK(icing.Initialize());
+  ICING_ASSERT_OK(icing.SetSchema(schema));
+  ICING_ASSERT_OK(icing.Put(message_document));
+
+  // Make sure we can search for message document
+  SearchSpecProto search_spec;
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  // The message isn't indexed, so we get nothing
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(SearchResultProto::default_instance())));
+
+  // With just the schema type filter, we can search for the message
+  search_spec.Clear();
+  search_spec.add_schema_type_filters("message");
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      message_document;
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+
+  // Since SchemaTypeIds are assigned based on order in the SchemaProto, this
+  // will force a change in the DocumentStore's cached SchemaTypeIds
+  schema.clear_types();
+  type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  // Adding a new indexed property will require reindexing
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  property = type_config->add_properties();
+  property->set_property_name("body");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::PREFIX);
+  property->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  ICING_EXPECT_OK(icing.SetSchema(schema));
+
+  search_spec.Clear();
+  search_spec.set_query("foo");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.add_schema_type_filters("message");
+
+  // We can still search for the message document
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Get("namespace", "uri"),
+                IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
+
+  // We should be able to recover from this and access all our previous data
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // Checks that DocumentLog is still ok
+  EXPECT_THAT(icing.Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+
+  // Checks that the index is still ok so we can search over it
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+
+  // Checks that Schema is still since it'll be needed to validate the document
+  ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Get("namespace", "uri"),
+                IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Change the header's magic value
+  int32_t invalid_magic = 1;  // Anything that's not the actual kMagic value.
+  filesystem()->PWrite(GetHeaderFilename().c_str(),
+                       offsetof(IcingSearchEngine::Header, magic),
+                       &invalid_magic, sizeof(invalid_magic));
+
+  // We should be able to recover from this and access all our previous data
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // Checks that DocumentLog is still ok
+  EXPECT_THAT(icing.Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+
+  // Checks that the index is still ok so we can search over it
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+
+  // Checks that Schema is still since it'll be needed to validate the document
+  ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Get("namespace", "uri"),
+                IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Change the header's checksum value
+  uint32_t invalid_checksum =
+      1;  // Anything that's not the actual checksum value
+  filesystem()->PWrite(GetHeaderFilename().c_str(),
+                       offsetof(IcingSearchEngine::Header, checksum),
+                       &invalid_checksum, sizeof(invalid_checksum));
+
+  // We should be able to recover from this and access all our previous data
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // Checks that DocumentLog is still ok
+  EXPECT_THAT(icing.Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+
+  // Checks that the index is still ok so we can search over it
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+
+  // Checks that Schema is still since it'll be needed to validate the document
+  ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+}
+
+TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Get("namespace", "uri"),
+                IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  const std::string schema_file =
+      absl_ports::StrCat(GetSchemaDir(), "/schema.pb");
+  const std::string corrupt_data = "1234";
+  EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
+                                  corrupt_data.size()));
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
+  {
+    // Basic initialization/setup
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Get("namespace", "uri"),
+                IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  const std::string document_log_file =
+      absl_ports::StrCat(GetDocumentIndexDir(), "/document_log");
+  const std::string corrupt_data = "1234";
+  EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
+                                  corrupt_data.data(), corrupt_data.size()));
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  EXPECT_THAT(icing.Initialize(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("additional", "content")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  {
+    // Initializes folder and schema
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+
+    SchemaProto schema;
+    auto type = schema.add_types();
+    type->set_schema_type("Message");
+
+    auto property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    property = type->add_properties();
+    property->set_property_name("additional");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    ICING_EXPECT_OK(icing.SetSchema(schema));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    ICING_EXPECT_OK(icing.Put(document2));
+
+    // Won't get us anything because "additional" isn't marked as an indexed
+    // property in the schema
+    SearchSpecProto search_spec;
+    search_spec.set_query("additional:content");
+    search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+    SearchResultProto expected_result;
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  {
+    // This schema will change the SchemaTypeIds from the previous schema_
+    // (since SchemaTypeIds are assigned based on order of the types, and this
+    // new schema changes the ordering of previous types)
+    SchemaProto new_schema;
+    auto type = new_schema.add_types();
+    type->set_schema_type("Email");
+
+    type = new_schema.add_types();
+    type->set_schema_type("Message");
+
+    // Adding a new property changes the SectionIds (since SectionIds are
+    // assigned based on alphabetical order of indexed sections, marking
+    // "additional" as an indexed property will push the "body" property to a
+    // different SectionId)
+    auto property = type->add_properties();
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    property = type->add_properties();
+    property->set_property_name("additional");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir()));
+    ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
+  }  // Will persist new schema
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // We can insert a Email document since we kept the new schema
+  DocumentProto email_document =
+      DocumentBuilder()
+          .SetKey("namespace", "email_uri")
+          .SetSchema("Email")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  ICING_EXPECT_OK(icing.Put(email_document));
+  EXPECT_THAT(icing.Get("namespace", "email_uri"),
+              IsOkAndHolds(EqualsProto(email_document)));
+
+  SearchSpecProto search_spec;
+
+  // The section restrict will ensure we are using the correct, updated
+  // SectionId in the Index
+  search_spec.set_query("additional:content");
+
+  // Schema type filter will ensure we're using the correct, updated
+  // SchemaTypeId in the DocumentStore
+  search_spec.add_schema_type_filters("Message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) = document2;
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
+  {
+    // Initializes folder and schema, index one document
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message body")
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(filesystem(), GetSchemaDir()));
+    ICING_EXPECT_OK(schema_store->SetSchema(GetDefaultSchema()));
+
+    // Puts a second document into DocumentStore but doesn't index it.
+    FakeClock fake_clock;
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> document_store,
+        DocumentStore::Create(filesystem(), GetDocumentIndexDir(), &fake_clock,
+                              schema_store.get()));
+    ICING_EXPECT_OK(document_store->Put(document2));
+  }
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  // Index Restoration should be triggered here and document2 should be
+  // indexed.
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // DocumentStore kept the additional document
+  EXPECT_THAT(icing.Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(GetDefaultDocument())));
+  EXPECT_THAT(icing.Get("namespace", "uri2"),
+              IsOkAndHolds(EqualsProto(document2)));
+
+  // We indexed the additional document
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) = document2;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
+  SearchSpecProto search_spec;
+  search_spec.set_query("message");
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  SearchResultProto expected_result;
+  (*expected_result.mutable_results()->Add()->mutable_document()) =
+      GetDefaultDocument();
+
+  {
+    // Initializes folder and schema, index one document
+    IcingSearchEngine icing(GetDefaultIcingOptions());
+    ICING_EXPECT_OK(icing.Initialize());
+    ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+    ICING_EXPECT_OK(icing.Put(GetDefaultDocument()));
+    EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                             ResultSpecProto::default_instance()),
+                IsOkAndHolds(EqualsProto(expected_result)));
+  }  // This should shut down IcingSearchEngine and persist anything it needs to
+
+  // Pretend we lost the entire index
+  EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
+      absl_ports::StrCat(GetDocumentIndexDir(), "/idx/lite.").c_str()));
+
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+
+  // Check that our index is ok by searching over the restored index
+  EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(expected_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ICING_ASSERT_OK(icing.Put(document2));
+  ICING_ASSERT_OK(icing.Put(document3));
+  ICING_ASSERT_OK(icing.Put(document1));
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  // Result should be in descending score order
+  SearchResultProto exp_result;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document3;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document2;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document1;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Creates 3 documents and ensures the relationship of them is:
+  // document1 < document2 < document3
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message1")
+                                .SetScore(1)
+                                .SetCreationTimestampSecs(1571111111)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message2")
+                                .SetScore(2)
+                                .SetCreationTimestampSecs(1572222222)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message3")
+                                .SetScore(3)
+                                .SetCreationTimestampSecs(1573333333)
+                                .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ICING_ASSERT_OK(icing.Put(document3));
+  ICING_ASSERT_OK(icing.Put(document1));
+  ICING_ASSERT_OK(icing.Put(document2));
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  SearchResultProto exp_result;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document2;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document1;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document3;
+
+  // Results should not be ranked by score but returned in reverse insertion
+  // order.
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Creates 3 documents and ensures the relationship in terms of creation
+  // timestamp score is: document1 < document2 < document3
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri/1")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message1")
+                                .SetCreationTimestampSecs(1571111111)
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri/2")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message2")
+                                .SetCreationTimestampSecs(1572222222)
+                                .Build();
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri/3")
+                                .SetSchema("Message")
+                                .AddStringProperty("body", "message3")
+                                .SetCreationTimestampSecs(1573333333)
+                                .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ICING_ASSERT_OK(icing.Put(document3));
+  ICING_ASSERT_OK(icing.Put(document1));
+  ICING_ASSERT_OK(icing.Put(document2));
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  // Result should be in descending timestamp order
+  SearchResultProto exp_result;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document3;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document2;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document1;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(
+      ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
+  IcingSearchEngine icing(GetDefaultIcingOptions());
+  ICING_EXPECT_OK(icing.Initialize());
+  ICING_EXPECT_OK(icing.SetSchema(GetDefaultSchema()));
+
+  // Creates 3 documents and ensures the relationship in terms of document
+  // score is: document1 < document2 < document3
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/1")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message1")
+          .SetScore(1)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/2")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message2")
+          .SetScore(2)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+  DocumentProto document3 =
+      DocumentBuilder()
+          .SetKey("namespace", "uri/3")
+          .SetSchema("Message")
+          .AddStringProperty("body", "message3")
+          .SetScore(3)
+          .SetCreationTimestampSecs(kDefaultCreationTimestampSecs)
+          .Build();
+
+  // Intentionally inserts the documents in the order that is different than
+  // their score order
+  ICING_ASSERT_OK(icing.Put(document2));
+  ICING_ASSERT_OK(icing.Put(document3));
+  ICING_ASSERT_OK(icing.Put(document1));
+
+  // "m" will match all 3 documents
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+  search_spec.set_query("m");
+
+  // Result should be in ascending score order
+  SearchResultProto exp_result;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document1;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document2;
+  (*exp_result.mutable_results()->Add()->mutable_document()) = document3;
+
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
+  EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+                           ResultSpecProto::default_instance()),
+              IsOkAndHolds(EqualsProto(exp_result)));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/hit/doc-hit-info.cc b/icing/index/hit/doc-hit-info.cc
new file mode 100644
index 0000000..80dbbde
--- /dev/null
+++ b/icing/index/hit/doc-hit-info.cc

@@ -0,0 +1,62 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/hit/doc-hit-info.h"
+
+#include "icing/legacy/core/icing-string-util.h"
+
+namespace icing {
+namespace lib {
+
+bool DocHitInfo::operator<(const DocHitInfo& other) const {
+  if (document_id() != other.document_id()) {
+    // Sort by document_id descending. This mirrors how the individual hits that
+    // are collapsed into this DocHitInfo would sort with other hits -
+    // document_ids are inverted when encoded in hits. Hits are encoded this way
+    // because they are appended to posting lists and the most recent value
+    // appended to a posting list must have the smallest encoded value of any
+    // hit on the posting list.
+    return document_id() > other.document_id();
+  }
+  if (hit_section_ids_mask() != other.hit_section_ids_mask()) {
+    return hit_section_ids_mask() < other.hit_section_ids_mask();
+  }
+  // Doesn't matter which way we compare this array, as long as
+  // DocHitInfo is unequal when it is unequal.
+  return memcmp(max_hit_score_, other.max_hit_score_, sizeof(max_hit_score_)) <
+         0;
+}
+
+void DocHitInfo::UpdateSection(SectionId section_id, Hit::Score hit_score) {
+  SectionIdMask section_id_mask = (1u << section_id);
+  if (hit_section_ids_mask() & section_id_mask) {
+    max_hit_score_[section_id] =
+        std::max(max_hit_score_[section_id], hit_score);
+  } else {
+    max_hit_score_[section_id] = hit_score;
+    hit_section_ids_mask_ |= section_id_mask;
+  }
+}
+
+void DocHitInfo::MergeSectionsFrom(const DocHitInfo& other) {
+  SectionIdMask other_mask = other.hit_section_ids_mask();
+  while (other_mask) {
+    SectionId section_id = __builtin_ctz(other_mask);
+    UpdateSection(section_id, other.max_hit_score(section_id));
+    other_mask &= ~(1u << section_id);
+  }
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
new file mode 100644
index 0000000..386822d
--- /dev/null
+++ b/icing/index/hit/doc-hit-info.h

@@ -0,0 +1,88 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_HIT_DOC_HIT_INFO_H_
+#define ICING_INDEX_HIT_DOC_HIT_INFO_H_
+
+#include <limits>
+
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// DocHitInfo provides a collapsed view of all hits for a specific term and doc.
+// Hits contain a document_id, section_id and a hit score. The information in
+// multiple hits is collapse into a DocHitInfo by providing a SectionIdMask of
+// all sections that contained a hit for this term as well as the highest hit
+// score of any hit for each section.
+class DocHitInfo {
+ public:
+  explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId,
+                      SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
+      : document_id_(document_id_in),
+        hit_section_ids_mask_(hit_section_ids_mask) {
+    memset(max_hit_score_, Hit::kMaxHitScore, sizeof(max_hit_score_));
+  }
+
+  DocumentId document_id() const { return document_id_; }
+
+  void set_document_id(DocumentId document_id) { document_id_ = document_id; }
+
+  SectionIdMask hit_section_ids_mask() const { return hit_section_ids_mask_; }
+
+  void set_hit_section_ids_mask(SectionIdMask section_id_mask) {
+    hit_section_ids_mask_ = section_id_mask;
+  }
+
+  Hit::Score max_hit_score(SectionId section_id) const {
+    return max_hit_score_[section_id];
+  }
+
+  bool operator<(const DocHitInfo& other) const;
+  bool operator==(const DocHitInfo& other) const {
+    return (*this < other) == (other < *this);
+  }
+
+  // Updates the hit_section_ids_mask and max_hit_score for the section, if
+  // necessary.
+  void UpdateSection(SectionId section_id, Hit::Score hit_score);
+
+  // Merges the sections of other into this. The hit_section_ids_masks are or'd
+  // and the max hit score for each section between the two is set.
+  //
+  // This does not affect the DocumentId of this or other. If callers care about
+  // only merging sections for DocHitInfos with the same DocumentId, callers
+  // should check this themselves.
+  void MergeSectionsFrom(const DocHitInfo& other);
+
+ private:
+  DocumentId document_id_;
+  SectionIdMask hit_section_ids_mask_;
+  Hit::Score max_hit_score_[kMaxSectionId + 1];
+} __attribute__((packed));
+static_assert(sizeof(DocHitInfo) == 22, "");
+// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
+static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan");
+static_assert(sizeof(Hit::Score) == 1,
+              "Change how max_hit_score_ is initialized if changing the type "
+              "of Hit::Score");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_HIT_DOC_HIT_INFO_H_

diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
new file mode 100644
index 0000000..d8adbc1
--- /dev/null
+++ b/icing/index/hit/doc-hit-info_test.cc

@@ -0,0 +1,170 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/hit/doc-hit-info.h"
+
+#include "icing/index/hit/hit.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsTrue;
+using ::testing::Ne;
+
+constexpr DocumentId kSomeDocumentId = 12;
+constexpr DocumentId kSomeOtherDocumentId = 54;
+
+TEST(DocHitInfoTest, InitialMaxHitScores) {
+  DocHitInfo info(kSomeDocumentId);
+  for (SectionId i = 0; i <= kMaxSectionId; ++i) {
+    EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kMaxHitScore));
+  }
+}
+
+TEST(DocHitInfoTest, UpdateHitScores) {
+  DocHitInfo info(kSomeDocumentId);
+  ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+
+  // Updating a section for the first time, should change its max hit score,
+  // even though the hit score (16) may be lower than the current value returned
+  // by info.max_hit_score(3) (kMaxHitScore)
+  info.UpdateSection(3, 16);
+  EXPECT_THAT(info.max_hit_score(3), Eq(16));
+
+  // Updating a section with a hit score lower than the previously set one
+  // should not update max hit score.
+  info.UpdateSection(3, 15);
+  EXPECT_THAT(info.max_hit_score(3), Eq(16));
+
+  // Updating a section with a hit score higher than the previously set one
+  // should update the max hit score.
+  info.UpdateSection(3, 17);
+  EXPECT_THAT(info.max_hit_score(3), Eq(17));
+
+  // Updating a section with kMaxHitScore should *always* set the max hit
+  // score to kMaxHitScore (regardless of what value kMaxHitScore is
+  // defined with).
+  info.UpdateSection(3, Hit::kMaxHitScore);
+  EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+
+  // Updating a section that has had kMaxHitScore explicitly set, should
+  // *never* change the max hit score (regardless of what value kMaxHitScore
+  // is defined with).
+  info.UpdateSection(3, 16);
+  EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+}
+
+TEST(DocHitInfoTest, UpdateSectionIdMask) {
+  DocHitInfo info(kSomeDocumentId);
+  EXPECT_THAT(info.hit_section_ids_mask(), Eq(kSectionIdMaskNone));
+
+  info.UpdateSection(3, 16);
+  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
+
+  // Calling update again shouldn't do anything
+  info.UpdateSection(3, 15);
+  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
+
+  // Updating another section shouldn't do anything
+  info.UpdateSection(2, 77);
+  EXPECT_THAT(info.hit_section_ids_mask() & 1U << 3, IsTrue());
+}
+
+TEST(DocHitInfoTest, MergeSectionsFromDifferentDocumentId) {
+  // Merging infos with different document_ids works.
+  DocHitInfo info1(kSomeDocumentId);
+  DocHitInfo info2(kSomeOtherDocumentId);
+  info2.UpdateSection(7, 12);
+  info1.MergeSectionsFrom(info2);
+  EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+  EXPECT_THAT(info1.document_id(), Eq(kSomeDocumentId));
+}
+
+TEST(DocHitInfoTest, MergeSectionsFromKeepsOldSection) {
+  // Merging shouldn't override sections that are present info1, but not present
+  // in info2.
+  DocHitInfo info1(kSomeDocumentId);
+  info1.UpdateSection(3, 16);
+  DocHitInfo info2(kSomeDocumentId);
+  info1.MergeSectionsFrom(info2);
+  EXPECT_THAT(info1.max_hit_score(3), Eq(16));
+}
+
+TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) {
+  // Merging should add sections that were not present in info1, but are present
+  // in info2.
+  DocHitInfo info1(kSomeDocumentId);
+  DocHitInfo info2(kSomeDocumentId);
+  info2.UpdateSection(7, 12);
+  info1.MergeSectionsFrom(info2);
+  EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+}
+
+TEST(DocHitInfoTest, MergeSectionsFromSetsHigherHitScore) {
+  // Merging should override the value of a section in info1 if the same section
+  // is present in info2 with a higher hit score.
+  DocHitInfo info1(kSomeDocumentId);
+  info1.UpdateSection(2, 77);
+  DocHitInfo info2(kSomeDocumentId);
+  info2.UpdateSection(2, 89);
+  info1.MergeSectionsFrom(info2);
+  EXPECT_THAT(info1.max_hit_score(2), Eq(89));
+}
+
+TEST(DocHitInfoTest, MergeSectionsFromDoesNotSetLowerHitScore) {
+  // Merging should not override the hit score of a section in info1 if the same
+  // section is present in info2 but with a lower hit score.
+  DocHitInfo info1(kSomeDocumentId);
+  info1.UpdateSection(5, 108);
+  DocHitInfo info2(kSomeDocumentId);
+  info2.UpdateSection(5, 13);
+  info1.MergeSectionsFrom(info2);
+  EXPECT_THAT(info1.max_hit_score(5), Eq(108));
+}
+
+TEST(DocHitInfoTest, Comparison) {
+  constexpr DocumentId kDocumentId = 1;
+  DocHitInfo info(kDocumentId);
+  info.UpdateSection(1, 12);
+
+  constexpr DocumentId kHighDocumentId = 15;
+  DocHitInfo high_document_id_info(kHighDocumentId);
+  high_document_id_info.UpdateSection(1, 12);
+
+  DocHitInfo high_section_id_info(kDocumentId);
+  high_section_id_info.UpdateSection(1, 12);
+  high_section_id_info.UpdateSection(6, Hit::kMaxHitScore);
+
+  std::vector<DocHitInfo> infos{info, high_document_id_info,
+                                high_section_id_info};
+  std::sort(infos.begin(), infos.end());
+  EXPECT_THAT(infos,
+              ElementsAre(high_document_id_info, info, high_section_id_info));
+
+  // There are no requirements for how DocHitInfos with the same DocumentIds and
+  // hit masks will compare, but they must not be equal.
+  DocHitInfo different_hit_score_info(kDocumentId);
+  different_hit_score_info.UpdateSection(1, 76);
+  EXPECT_THAT(info < different_hit_score_info,
+              Ne(different_hit_score_info < info));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
new file mode 100644
index 0000000..be2df5c
--- /dev/null
+++ b/icing/index/hit/hit.cc

@@ -0,0 +1,100 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/hit/hit.h"
+
+#include "icing/store/document-id.h"
+#include "icing/util/bit-util.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+enum FlagOffset {
+  // This hit, whether exact or not, came from a prefixed section and will
+  // need to be backfilled into branching posting lists if/when those are
+  // created.
+  kInPrefixSection = 0,
+  // This hit represents a prefix of a longer term. If exact matches are
+  // required, then this hit should be ignored.
+  kPrefixHit = 1,
+  // Whether or not the hit has a hit score other than kMaxHitScore.
+  kHasScore = 2,
+  kNumFlags = 3,
+};
+static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
+                  sizeof(Hit::Value) * 8,
+              "HitOverflow");
+
+inline DocumentId InvertDocumentId(DocumentId document_id) {
+  static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1),
+                "(kMaxDocumentId + 1) must not overflow.");
+  static_assert(
+      (kMaxDocumentId + 1) < (1U << kDocumentIdBits),
+      "(kMaxDocumentId + 1) must also fit in kDocumentIdBits wide bitfield");
+  // Invert the document_id value. +1 is added so the resulting range is [1,
+  // kMaxDocumentId + 1].
+  return (kMaxDocumentId + 1) - document_id;
+}
+
+}  // namespace
+
+Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
+         bool in_prefix_section, bool is_prefix_hit)
+    : score_(score) {
+  // Values are stored so that when sorted, they appear in document_id
+  // descending, section_id ascending, order. Also, all else being
+  // equal, non-prefix hits sort before prefix hits. So inverted
+  // document_id appears in the most significant bits, followed by
+  // (uninverted) section_id.
+  value_ = 0;
+  bit_util::BitfieldSet(InvertDocumentId(document_id),
+                        kSectionIdBits + kNumFlags, kDocumentIdBits, &value_);
+  bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &value_);
+  bit_util::BitfieldSet(score != kMaxHitScore, kHasScore, 1, &value_);
+  bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &value_);
+  bit_util::BitfieldSet(in_prefix_section, kInPrefixSection, 1, &value_);
+}
+
+DocumentId Hit::document_id() const {
+  DocumentId inverted_document_id = bit_util::BitfieldGet(
+      value(), kSectionIdBits + kNumFlags, kDocumentIdBits);
+  // Undo the document_id inversion.
+  return InvertDocumentId(inverted_document_id);
+}
+
+SectionId Hit::section_id() const {
+  return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
+}
+
+bool Hit::has_score() const {
+  return bit_util::BitfieldGet(value(), kHasScore, 1);
+}
+
+bool Hit::is_prefix_hit() const {
+  return bit_util::BitfieldGet(value(), kPrefixHit, 1);
+}
+
+bool Hit::is_in_prefix_section() const {
+  return bit_util::BitfieldGet(value(), kInPrefixSection, 1);
+}
+
+bool Hit::EqualsDocumentIdAndSectionId::operator()(const Hit& hit1,
+                                                   const Hit& hit2) const {
+  return (hit1.value() >> kNumFlags) == (hit2.value() >> kNumFlags);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
new file mode 100644
index 0000000..f84dc24
--- /dev/null
+++ b/icing/index/hit/hit.h

@@ -0,0 +1,98 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_HIT_HIT_H_
+#define ICING_INDEX_HIT_HIT_H_
+
+#include <cstdint>
+#include <limits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Hit is a specific encoding that refers to content within a document. A hit
+// consists of:
+// - a DocumentId
+// - a SectionId
+// referring to the document and section that the hit corresponds to, as well as
+// metadata about the hit:
+// - whether the Hit has a Score other than the default value
+// - whether the Hit does not appear exactly in the document, but instead
+//   represents a term that is a prefix of a term in the document
+// - whether the Hit came from a section that has prefix expansion enabled
+// and a score for the hit. Ranging from [0,255] a higher score indicates a
+// higher quality hit.
+// The hit is the most basic unit of the index and, when grouped together by
+// term, can be used to encode what terms appear in what documents.
+class Hit {
+ public:
+  // The datatype used to encode Hit information: the document_id, section_id
+  // and the has_score, prefix hit and in prefix section flags.
+  using Value = uint32_t;
+
+  // WARNING: Changing this value will invalidate any pre-existing posting lists
+  // on user devices.
+  static constexpr Value kInvalidValue = std::numeric_limits<Value>::max();
+  // Docs are sorted in reverse, and 0 is never used as the inverted
+  // DocumentId (because it is the inverse of kInvalidValue), so it is always
+  // the max in a descending sort.
+  static constexpr Value kMaxDocumentIdSortValue = 0;
+
+  // A score reflecting the "quality" of this hit. The higher the score, the
+  // higher quality the hit.
+  using Score = uint8_t;
+  // By default, hits are given the highest possible score.
+  static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max();
+
+  explicit Hit(Value value = kInvalidValue, Score score = kMaxHitScore)
+      : value_(value), score_(score) {}
+  Hit(SectionId section_id, DocumentId document_id, Score score,
+      bool in_prefix_section = false, bool is_prefix_hit = false);
+
+  bool is_valid() const { return value() != kInvalidValue; }
+  Value value() const { return value_; }
+  DocumentId document_id() const;
+  SectionId section_id() const;
+  // Whether or not the hit contains a non-default score. Hits with non-default
+  // score are considered to be of lower quality.
+  bool has_score() const;
+  Score score() const { return score_; }
+  bool is_prefix_hit() const;
+  bool is_in_prefix_section() const;
+
+  bool operator<(const Hit& h2) const { return value() < h2.value(); }
+  bool operator==(const Hit& h2) const { return value() == h2.value(); }
+
+  struct EqualsDocumentIdAndSectionId {
+    bool operator()(const Hit& hit1, const Hit& hit2) const;
+  };
+
+ private:
+  // Value and score must be in this order.
+  // Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags.
+  Value value_;
+  Score score_;
+} __attribute__((packed));
+static_assert(sizeof(Hit) == 5, "");
+// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
+static_assert(icing_is_packed_pod<Hit>::value, "go/icing-ubsan");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_HIT_HIT_H_

diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
new file mode 100644
index 0000000..982bfcf
--- /dev/null
+++ b/icing/index/hit/hit_test.cc

@@ -0,0 +1,137 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/hit/hit.h"
+
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Lt;
+using ::testing::Not;
+
+static constexpr DocumentId kSomeDocumentId = 24;
+static constexpr SectionId kSomeSectionid = 5;
+static constexpr Hit::Score kSomeHitScore = 57;
+
+TEST(HitTest, HasScoreFlag) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  EXPECT_THAT(h1.has_score(), IsFalse());
+  EXPECT_THAT(h1.score(), Eq(Hit::kMaxHitScore));
+
+  Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore);
+  EXPECT_THAT(h2.has_score(), IsTrue());
+  EXPECT_THAT(h2.score(), Eq(kSomeHitScore));
+}
+
+TEST(HitTest, IsPrefixHitFlag) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
+
+  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+         /*in_prefix_section=*/false, /*is_prefix_hit=*/false);
+  EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
+
+  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+         /*in_prefix_section=*/false, /*is_prefix_hit=*/true);
+  EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
+}
+
+TEST(HitTest, IsInPrefixSectionFlag) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
+
+  Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+         /*in_prefix_section=*/false);
+  EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
+
+  Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+         /*in_prefix_section=*/true);
+  EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
+}
+
+TEST(HitTest, Accessors) {
+  Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+  EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
+  EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
+}
+
+TEST(HitTest, Valid) {
+  Hit def;
+  EXPECT_THAT(def.is_valid(), IsFalse());
+
+  Hit explicit_invalid(Hit::kInvalidValue);
+  EXPECT_THAT(explicit_invalid.is_valid(), IsFalse());
+
+  static constexpr Hit::Value kSomeValue = 65372;
+  Hit explicit_valid(kSomeValue);
+  EXPECT_THAT(explicit_valid.is_valid(), IsTrue());
+
+  Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, kSomeHitScore);
+  EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
+
+  Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeHitScore);
+  EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
+
+  Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeHitScore);
+  EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
+
+  Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeHitScore);
+  EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
+}
+
+TEST(HitTest, Comparison) {
+  Hit hit(1, 243, Hit::kMaxHitScore);
+  // DocumentIds are sorted in ascending order. So a hit with a lower
+  // document_id should be considered greater than one with a higher
+  // document_id.
+  Hit higher_document_id_hit(1, 2409, Hit::kMaxHitScore);
+  Hit higher_section_id_hit(15, 243, Hit::kMaxHitScore);
+  // Whether or not a hit score was set is considered, but the score itself is
+  // not.
+  Hit hitscore_hit(1, 243, 12);
+  Hit prefix_hit(1, 243, Hit::kMaxHitScore, /*in_prefix_section=*/false,
+                 /*is_prefix_hit=*/true);
+  Hit hit_in_prefix_section(1, 243, Hit::kMaxHitScore,
+                            /*in_prefix_section=*/true,
+                            /*is_prefix_hit=*/false);
+
+  std::vector<Hit> hits{
+      hit,        higher_document_id_hit, higher_section_id_hit, hitscore_hit,
+      prefix_hit, hit_in_prefix_section};
+  std::sort(hits.begin(), hits.end());
+  EXPECT_THAT(hits,
+              ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
+                          prefix_hit, hitscore_hit, higher_section_id_hit));
+
+  Hit higher_hitscore_hit(1, 243, 108);
+  // Hit score value is not considered when comparing hits.
+  EXPECT_THAT(hitscore_hit, Not(Lt(higher_hitscore_hit)));
+  EXPECT_THAT(higher_hitscore_hit, Not(Lt(hitscore_hit)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
new file mode 100644
index 0000000..c9e07be
--- /dev/null
+++ b/icing/index/index-processor.cc

@@ -0,0 +1,96 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/index-processor.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::Status IndexProcessor::IndexDocument(
+    const DocumentProto& document, DocumentId document_id) {
+  if (index_->last_added_document_id() != kInvalidDocumentId &&
+      document_id <= index_->last_added_document_id()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "DocumentId %d must be greater than last added document_id %d",
+        document_id, index_->last_added_document_id()));
+  }
+  ICING_ASSIGN_OR_RETURN(std::vector<Section> sections,
+                         schema_store_.ExtractSections(document));
+  uint32_t num_tokens = 0;
+  libtextclassifier3::Status overall_status;
+  for (const Section& section : sections) {
+    Index::Editor editor = index_->Edit(document_id, section.metadata.id,
+                                        section.metadata.term_match_type);
+    for (std::string_view subcontent : section.content) {
+      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
+                             tokenizer_factory::CreateIndexingTokenizer(
+                                 section.metadata.tokenizer, &lang_segmenter_));
+      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
+                             tokenizer->Tokenize(subcontent));
+      while (itr->Advance()) {
+        if (++num_tokens > options_.max_tokens_per_document) {
+          switch (options_.token_limit_behavior) {
+            case Options::TokenLimitBehavior::kReturnError:
+              return absl_ports::ResourceExhaustedError(
+                  "Max number of tokens reached!");
+            case Options::TokenLimitBehavior::kSuppressError:
+              return libtextclassifier3::Status::OK;
+          }
+        }
+        std::string term = normalizer_.NormalizeTerm(itr->GetToken().text);
+        // Add this term to the index. Even if adding this hit fails, we keep
+        // trying to add more hits because it's possible that future hits could
+        // still be added successfully. For instance if the lexicon is full, we
+        // might fail to add a hit for a new term, but should still be able to
+        // add hits for terms that are already in the index.
+        auto status = editor.AddHit(term.c_str());
+        if (overall_status.ok() && !status.ok()) {
+          // If we've succeeded to add everything so far, set overall_status to
+          // represent this new failure. If we've already failed, no need to
+          // update the status - we're already going to return a resource
+          // exhausted error.
+          overall_status = status;
+        }
+      }
+    }
+  }
+  return overall_status;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
new file mode 100644
index 0000000..612fdfe
--- /dev/null
+++ b/icing/index/index-processor.h

@@ -0,0 +1,94 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_INDEX_PROCESSOR_H_
+#define ICING_INDEX_INDEX_PROCESSOR_H_
+
+#include <cstdint>
+#include <string>
+
+#include "utils/base/status.h"
+#include "icing/index/index.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-id.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/token.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+class IndexProcessor {
+ public:
+  struct Options {
+    int32_t max_tokens_per_document;
+
+    // Indicates how a document exceeding max_tokens_per_document should be
+    // handled.
+    enum class TokenLimitBehavior {
+      // When set, the first max_tokens_per_document will be indexed. If the
+      // token count exceeds max_tokens_per_document, a ResourceExhausted error
+      // will be returned.
+      kReturnError,
+      // When set, the first max_tokens_per_document will be indexed. If the
+      // token count exceeds max_tokens_per_document, OK will be returned.
+      kSuppressError,
+    };
+    TokenLimitBehavior token_limit_behavior;
+  };
+
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  // TODO(b/141180665): Add nullptr checks for the raw pointers
+  IndexProcessor(const SchemaStore* schema_store,
+                 const LanguageSegmenter* lang_segmenter,
+                 const Normalizer* normalizer, Index* index,
+                 const Options& options)
+      : schema_store_(*schema_store),
+        lang_segmenter_(*lang_segmenter),
+        normalizer_(*normalizer),
+        index_(index),
+        options_(options) {}
+
+  // Add document to the index, associated with document_id. If the number of
+  // tokens in the document exceeds max_tokens_per_document, then only the first
+  // max_tokens_per_document will be added to the index. All tokens of length
+  // exceeding max_token_length will be shortened to max_token_length.
+  //
+  // Returns:
+  //   INVALID_ARGUMENT if document_id is less than the document_id of a
+  //   previously indexed
+  //                    document or tokenization fails.
+  //   RESOURCE_EXHAUSTED if the index is full and can't add anymore content.
+  //   NOT_FOUND if there is no definition for the document's schema type.
+  //   INTERNAL_ERROR if any other errors occur
+  libtextclassifier3::Status IndexDocument(const DocumentProto& document,
+                                           DocumentId document_id);
+
+ private:
+  std::string NormalizeToken(const Token& token);
+
+  const SchemaStore& schema_store_;
+  const LanguageSegmenter& lang_segmenter_;
+  const Normalizer& normalizer_;
+  Index* const index_;
+  const Options options_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_INDEX_PROCESSOR_H_

diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
new file mode 100644
index 0000000..f22d2f2
--- /dev/null
+++ b/icing/index/index-processor_benchmark.cc

@@ -0,0 +1,379 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/index-processor.h"
+#include "icing/index/index.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/logging.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/index:index-processor_benchmark
+//
+//    $ blaze-bin/icing/index/index-processor_benchmark
+//    --benchmarks=all
+//
+// Run on an Android device:
+//    Make target //icing/tokenization:language-segmenter depend on
+//    //third_party/icu
+//
+//    Make target //icing/transform:normalizer depend on
+//    //third_party/icu
+//
+//    Download LangId model file from
+//    //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and
+//    put it into your device:
+//    $ adb push [your model path] /data/local/tmp/
+//
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/index:index-processor_benchmark
+//
+//    $ adb push blaze-bin/icing/index/index-processor_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/index-processor_benchmark --benchmarks=all
+//    --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Creates a fake type config with 10 properties (p0 - p9)
+void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
+  type_config->set_schema_type("Fake_Type");
+
+  for (int i = 0; i < 10; i++) {
+    auto property = type_config->add_properties();
+    property->set_property_name(
+        IcingStringUtil::StringPrintf("p%d", i));  //  p0 - p9
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+  }
+}
+
+DocumentProto CreateDocumentWithOneProperty(int content_length) {
+  return DocumentBuilder()
+      .SetKey("icing", "fake/1")
+      .SetSchema("Fake_Type")
+      .AddStringProperty("p0", std::string(content_length, 'A'))
+      .Build();
+}
+
+DocumentProto CreateDocumentWithTenProperties(int content_length) {
+  int property_length = content_length / 10;
+  return DocumentBuilder()
+      .SetKey("icing", "fake/1")
+      .SetSchema("Fake_Type")
+      .AddStringProperty("p0", std::string(property_length, 'A'))
+      .AddStringProperty("p1", std::string(property_length, 'B'))
+      .AddStringProperty("p2", std::string(property_length, 'C'))
+      .AddStringProperty("p3", std::string(property_length, 'D'))
+      .AddStringProperty("p4", std::string(property_length, 'E'))
+      .AddStringProperty("p5", std::string(property_length, 'F'))
+      .AddStringProperty("p6", std::string(property_length, 'G'))
+      .AddStringProperty("p7", std::string(property_length, 'H'))
+      .AddStringProperty("p8", std::string(property_length, 'I'))
+      .AddStringProperty("p9", std::string(property_length, 'J'))
+      .Build();
+}
+
+DocumentProto CreateDocumentWithDiacriticLetters(int content_length) {
+  std::string content;
+  while (content.length() < content_length) {
+    content.append("àáâãā");
+  }
+  return DocumentBuilder()
+      .SetKey("icing", "fake/1")
+      .SetSchema("Fake_Type")
+      .AddStringProperty("p0", content)
+      .Build();
+}
+
+DocumentProto CreateDocumentWithHiragana(int content_length) {
+  std::string content;
+  while (content.length() < content_length) {
+    content.append("あいうえお");
+  }
+  return DocumentBuilder()
+      .SetKey("icing", "fake/1")
+      .SetSchema("Fake_Type")
+      .AddStringProperty("p0", content)
+      .Build();
+}
+
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+                                   const std::string& index_dir) {
+  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+  return Index::Create(options, &filesystem).ValueOrDie();
+}
+
+std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() {
+  if (absl::GetFlag(FLAGS_adb)) {
+    return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb")
+        .ValueOrDie();
+  } else {
+    return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie();
+  }
+}
+
+std::unique_ptr<Normalizer> CreateNormalizer() {
+  return Normalizer::Create(
+             /*max_term_byte_size=*/std::numeric_limits<int>::max())
+      .ValueOrDie();
+}
+
+std::unique_ptr<SchemaStore> CreateSchemaStore() {
+  Filesystem filesystem;
+  std::unique_ptr<SchemaStore> schema_store =
+      SchemaStore::Create(&filesystem, GetTestTempDir()).ValueOrDie();
+
+  SchemaProto schema;
+  CreateFakeTypeConfig(schema.add_types());
+  auto set_schema_status = schema_store->SetSchema(schema);
+
+  if (!set_schema_status.ok()) {
+    ICING_LOG(ERROR) << set_schema_status.status().error_message();
+  }
+
+  return schema_store;
+}
+
+void CleanUp(const IcingFilesystem& filesystem, const std::string& index_dir) {
+  filesystem.DeleteDirectoryRecursively(index_dir.c_str());
+}
+
+std::unique_ptr<IndexProcessor> CreateIndexProcessor(
+    const SchemaStore* schema_store,
+    const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
+    Index* index) {
+  IndexProcessor::Options processor_options{};
+  processor_options.max_tokens_per_document = 1024 * 1024 * 10;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+  return std::make_unique<IndexProcessor>(schema_store, language_segmenter,
+                                          normalizer, index, processor_options);
+}
+
+void BM_IndexDocumentWithOneProperty(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem filesystem;
+  std::string index_dir = GetTestTempDir() + "/index_test/";
+
+  CleanUp(filesystem, index_dir);
+
+  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+                           normalizer.get(), index.get());
+
+  DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
+
+  DocumentId document_id = 0;
+  for (auto _ : state) {
+    ICING_ASSERT_OK(
+        index_processor->IndexDocument(input_document, document_id++));
+  }
+
+  CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithOneProperty)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_IndexDocumentWithTenProperties(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem filesystem;
+  std::string index_dir = GetTestTempDir() + "/index_test/";
+
+  CleanUp(filesystem, index_dir);
+
+  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+                           normalizer.get(), index.get());
+
+  DocumentProto input_document =
+      CreateDocumentWithTenProperties(state.range(0));
+
+  DocumentId document_id = 0;
+  for (auto _ : state) {
+    ICING_ASSERT_OK(
+        index_processor->IndexDocument(input_document, document_id++));
+  }
+
+  CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithTenProperties)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_IndexDocumentWithDiacriticLetters(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem filesystem;
+  std::string index_dir = GetTestTempDir() + "/index_test/";
+
+  CleanUp(filesystem, index_dir);
+
+  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+                           normalizer.get(), index.get());
+
+  DocumentProto input_document =
+      CreateDocumentWithDiacriticLetters(state.range(0));
+
+  DocumentId document_id = 0;
+  for (auto _ : state) {
+    ICING_ASSERT_OK(
+        index_processor->IndexDocument(input_document, document_id++));
+  }
+
+  CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithDiacriticLetters)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_IndexDocumentWithHiragana(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem filesystem;
+  std::string index_dir = GetTestTempDir() + "/index_test/";
+
+  CleanUp(filesystem, index_dir);
+
+  std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+  std::unique_ptr<IndexProcessor> index_processor =
+      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
+                           normalizer.get(), index.get());
+
+  DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
+
+  DocumentId document_id = 0;
+  for (auto _ : state) {
+    ICING_ASSERT_OK(
+        index_processor->IndexDocument(input_document, document_id++));
+  }
+
+  CleanUp(filesystem, index_dir);
+}
+BENCHMARK(BM_IndexDocumentWithHiragana)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
new file mode 100644
index 0000000..c58898b
--- /dev/null
+++ b/icing/index/index-processor_test.cc

@@ -0,0 +1,569 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/index-processor.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// type and property names of FakeType
+constexpr std::string_view kFakeType = "FakeType";
+constexpr std::string_view kExactProperty = "exact";
+constexpr std::string_view kPrefixedProperty = "prefixed";
+constexpr std::string_view kUnindexedProperty1 = "unindexed1";
+constexpr std::string_view kUnindexedProperty2 = "unindexed2";
+constexpr std::string_view kSubProperty = "submessage";
+constexpr std::string_view kNestedProperty = "nested";
+constexpr std::string_view kRepeatedProperty = "repeated";
+
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+
+constexpr SectionId kExactSectionId = 0;
+constexpr SectionId kPrefixedSectionId = 1;
+constexpr SectionId kRepeatedSectionId = 2;
+constexpr SectionId kNestedSectionId = 3;
+
+using Cardinality = PropertyConfigProto::Cardinality;
+using DataType = PropertyConfigProto::DataType;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Test;
+
+class IndexProcessorTest : public Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+
+    index_dir_ = GetTestTempDir() + "/index_test/";
+    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+    ICING_ASSERT_OK_AND_ASSIGN(index_,
+                               Index::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(lang_segmenter_,
+                               LanguageSegmenter::Create(GetLangIdModelPath()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        normalizer_,
+        Normalizer::Create(
+            /*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
+    SchemaProto schema;
+    CreateFakeTypeConfig(schema.add_types());
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    IndexProcessor::Options processor_options;
+    processor_options.max_tokens_per_document = 1000;
+    processor_options.token_limit_behavior =
+        IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+    index_processor_ = std::make_unique<IndexProcessor>(
+        schema_store_.get(), lang_segmenter_.get(), normalizer_.get(),
+        index_.get(), processor_options);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+  }
+
+  std::unique_ptr<IndexProcessor> index_processor_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<Index> index_;
+  std::unique_ptr<SchemaStore> schema_store_;
+
+ private:
+  static void AddProperty(std::string_view name, DataType::Code type,
+                          Cardinality::Code cardinality,
+                          TermMatchType::Code term_match_type,
+                          SchemaTypeConfigProto* type_config) {
+    auto* prop = type_config->add_properties();
+    prop->set_property_name(std::string(name));
+    prop->set_data_type(type);
+    prop->set_cardinality(cardinality);
+    prop->mutable_indexing_config()->set_term_match_type(term_match_type);
+    prop->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+  }
+
+  static void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
+    type_config->set_schema_type(std::string(kFakeType));
+
+    AddProperty(std::string(kExactProperty), DataType::STRING,
+                Cardinality::REQUIRED, TermMatchType::EXACT_ONLY, type_config);
+
+    AddProperty(std::string(kPrefixedProperty), DataType::STRING,
+                Cardinality::OPTIONAL, TermMatchType::PREFIX, type_config);
+
+    // Don't set IndexingConfig
+    auto* prop = type_config->add_properties();
+    prop->set_property_name(std::string(kUnindexedProperty1));
+    prop->set_data_type(DataType::STRING);
+    prop->set_cardinality(Cardinality::OPTIONAL);
+
+    AddProperty(std::string(kUnindexedProperty2), DataType::BYTES,
+                Cardinality::OPTIONAL, TermMatchType::UNKNOWN, type_config);
+
+    AddProperty(std::string(kRepeatedProperty), DataType::STRING,
+                Cardinality::REPEATED, TermMatchType::PREFIX, type_config);
+
+    AddProperty(kSubProperty, DataType::DOCUMENT, Cardinality::OPTIONAL,
+                TermMatchType::UNKNOWN, type_config);
+
+    std::string recipients_name =
+        absl_ports::StrCat(kSubProperty, kPropertySeparator, kNestedProperty);
+    AddProperty(recipients_name, DataType::STRING, Cardinality::OPTIONAL,
+                TermMatchType::PREFIX, type_config);
+  }
+
+  Filesystem filesystem_;
+  IcingFilesystem icing_filesystem_;
+  std::string index_dir_;
+};
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+TEST_F(IndexProcessorTest, NoTermMatchTypeContent) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kUnindexedProperty1), "foo bar baz")
+          .AddBytesProperty(std::string(kUnindexedProperty2),
+                            "attachment bytes")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+}
+
+TEST_F(IndexProcessorTest, OneDoc) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("hello", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
+                               TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexProcessorTest, MultipleDocs) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "pitbull")
+          .AddStringProperty(std::string(kPrefixedProperty), "mr. world wide")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("world", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(kDocumentId1,
+                                   std::vector<SectionId>{kPrefixedSectionId}),
+                  EqualsDocHitInfo(kDocumentId0,
+                                   std::vector<SectionId>{kExactSectionId})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
+                               TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
+}
+
+TEST_F(IndexProcessorTest, DocWithNestedProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddDocumentProperty(
+              std::string(kSubProperty),
+              DocumentBuilder()
+                  .AddStringProperty(std::string(kNestedProperty),
+                                     "rocky raccoon")
+                  .Build())
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("rocky", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kNestedSectionId})));
+}
+
+TEST_F(IndexProcessorTest, DocWithRepeatedProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kRepeatedProperty), "rocky",
+                             "italian stallion")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("italian", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kRepeatedSectionId})));
+}
+
+TEST_F(IndexProcessorTest, TooManyTokensReturnError) {
+  // Only allow the first four tokens ("hello", "world", "good", "night") to be
+  // indexed.
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 4;
+  options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+  index_processor_ = std::make_unique<IndexProcessor>(
+      schema_store_.get(), lang_segmenter_.get(), normalizer_.get(),
+      index_.get(), options);
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // "night" should have been indexed.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("night", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+
+  // "moon" should not have been.
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("moon", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexProcessorTest, TooManyTokensSuppressError) {
+  // Only allow the first four tokens ("hello", "world", "good", "night") to be
+  // indexed.
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 4;
+  options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
+  index_processor_ = std::make_unique<IndexProcessor>(
+      schema_store_.get(), lang_segmenter_.get(), normalizer_.get(),
+      index_.get(), options);
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // "night" should have been indexed.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("night", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+
+  // "moon" should not have been.
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("moon", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
+
+TEST_F(IndexProcessorTest, TooLongTokens) {
+  // Only allow the tokens of length four, truncating "hello", "world" and
+  // "night".
+  IndexProcessor::Options options;
+  options.max_tokens_per_document = 1000;
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<Normalizer> normalizer,
+                             Normalizer::Create(/*max_token_length=*/4));
+
+  index_processor_ = std::make_unique<IndexProcessor>(
+      schema_store_.get(), lang_segmenter_.get(), normalizer.get(),
+      index_.get(), options);
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "hello world")
+          .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  // "good" should have been indexed normally.
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("good", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+
+  // "night" should not have been.
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("night", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+  // "night" should have been truncated to "nigh".
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("nigh", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+}
+
+TEST_F(IndexProcessorTest, NonPrefixedContentPrefixQuery) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "best rocky movies")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kPrefixedProperty), "rocky raccoon")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+  // Only document_id 1 should surface in a prefix query for "Rock"
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("rock", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
+}
+
+TEST_F(IndexProcessorTest, TokenNormalization) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "all lower case")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("case", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(EqualsDocHitInfo(kDocumentId1,
+                                   std::vector<SectionId>{kExactSectionId}),
+                  EqualsDocHitInfo(kDocumentId0,
+                                   std::vector<SectionId>{kExactSectionId})));
+}
+
+TEST_F(IndexProcessorTest, OutOfOrderDocumentIds) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+
+  // Indexing a document with document_id < last_added_document_id should cause
+  // a failure.
+  document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/2")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty), "all lower case")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // As should indexing a document document_id == last_added_document_id.
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexProcessorTest, NonAsciiIndexing) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(std::string(kFakeType))
+          .AddStringProperty(std::string(kExactProperty),
+                             "你好，世界！你好：世界。“你好”世界？")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
+                             index_->GetIterator("你好", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+}
+
+// TODO(b/142508211) Renable this test once a proper limit on max content length
+// has been determined.
+/*
+TEST_F(IndexProcessorTest,
+       LexiconFullIndexesSmallerTokensReturnsResourceExhausted) {
+  IndexProcessor::Options processor_options;
+  processor_options.max_tokens_per_document = 1000;
+  processor_options.token_limit_behavior =
+      IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+  index_processor_ = std::make_unique<IndexProcessor>(
+      section_manager_.get(), lang_segmenter_.get(), normalizer_.get(),
+      index_.get(), processor_options);
+
+  // This is the maximum token length that an empty lexicon constructed for a
+  // lite index with merge size of 1MiB can support.
+  constexpr int kMaxTokenLength = 16777217;
+  // Create a string "ppppppp..." with a length that is too large to fit into
+  // the lexicon.
+  std::string enormous_string(kMaxTokenLength + 1, 'p');
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/1")
+          .SetSchema(kFakeType)
+          .AddStringProperty(std::string(kExactProperty),
+                             absl_ports::StrCat(enormous_string, " foo"))
+          .AddStringProperty(std::string(kPrefixedProperty), "bar baz")
+          .Build();
+  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("baz", kSectionIdMaskAll,
+                                                TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
+}
+*/
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/index.cc b/icing/index/index.cc
new file mode 100644
index 0000000..7fdb70d
--- /dev/null
+++ b/icing/index/index.cc

@@ -0,0 +1,128 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/index.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator-term.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+libtextclassifier3::StatusOr<LiteIndex::Options> CreateLiteIndexOptions(
+    const Index::Options& options) {
+  if (options.index_merge_size <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "Requested hit buffer size must be greater than 0.");
+  }
+  if (options.index_merge_size > LiteIndex::max_hit_buffer_size()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Requested hit buffer size %d is too large.",
+        options.index_merge_size));
+  }
+  return LiteIndex::Options(options.base_dir + "/idx/lite.",
+                            options.index_merge_size);
+}
+
+// TODO(tjbarron) implement for real when the main index is added.
+IcingDynamicTrie::Options GetMainLexiconOptions() {
+  return IcingDynamicTrie::Options();
+}
+
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
+    const Options& options, const IcingFilesystem* filesystem) {
+  ICING_ASSIGN_OR_RETURN(LiteIndex::Options lite_index_options,
+                         CreateLiteIndexOptions(options));
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<TermIdCodec> term_id_codec,
+      TermIdCodec::Create(
+          IcingDynamicTrie::max_value_index(GetMainLexiconOptions()),
+          IcingDynamicTrie::max_value_index(
+              lite_index_options.lexicon_options)));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<LiteIndex> lite_index,
+                         LiteIndex::Create(lite_index_options, filesystem));
+  return std::unique_ptr<Index>(
+      new Index(options, std::move(term_id_codec), std::move(lite_index)));
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
+Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
+                   TermMatchType::Code term_match_type) {
+  switch (term_match_type) {
+    case TermMatchType::EXACT_ONLY:
+      return std::make_unique<DocHitInfoIteratorTermExact>(
+          term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+    case TermMatchType::PREFIX:
+      return std::make_unique<DocHitInfoIteratorTermPrefix>(
+          term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+    default:
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Invalid TermMatchType: ",
+                             TermMatchType::Code_Name(term_match_type)));
+  }
+}
+
+libtextclassifier3::Status Index::Editor::AddHit(const char* term,
+                                                 Hit::Score score) {
+  // Step 1: See if this term is already in the lexicon
+  uint32_t tvi;
+  auto tvi_or = lite_index_->FindTerm(term);
+
+  // Step 2: Update the lexicon, either add the term or update its properties
+  if (tvi_or.ok()) {
+    ICING_VLOG(1) << "Term " << term
+                  << " is already present in lexicon. Updating.";
+    tvi = tvi_or.ValueOrDie();
+    // Already in the lexicon. Just update the properties.
+    ICING_RETURN_IF_ERROR(lite_index_->UpdateTerm(tvi, term_match_type_));
+  } else {
+    ICING_VLOG(1) << "Term " << term << " is not in lexicon. Inserting.";
+    // Haven't seen this term before. Add it to the lexicon.
+    ICING_ASSIGN_OR_RETURN(tvi,
+                           lite_index_->InsertTerm(term, term_match_type_));
+  }
+
+  // Step 3: Add the hit itself
+  Hit hit(section_id_, document_id_, score,
+          term_match_type_ == TermMatchType::PREFIX);
+  ICING_ASSIGN_OR_RETURN(uint32_t term_id,
+                         term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  return lite_index_->AddHit(term_id, hit);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/index.h b/icing/index/index.h
new file mode 100644
index 0000000..498ce89
--- /dev/null
+++ b/icing/index/index.h

@@ -0,0 +1,171 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_INDEX_H_
+#define ICING_INDEX_INDEX_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <utility>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// The class representing the Icing search index. This index maps terms to hits
+// (document_ids, section_ids).
+// Content is added to the index through the Editor class - which also dedupes
+// hits (calling Editor::AddHit with the same arguments will only result in the
+// creation of a single hit).
+// Ex.
+// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
+// .                Index::Create(MakeIndexOptions()));
+// Index::Editor editor = index->Edit(document_id, section_id,
+//     TermMatchType::EXACT_ONLY); ICING_RETURN_IF_ERROR(editor.AddHit("foo"));
+// ICING_RETURN_IF_ERROR(editor.AddHit("baz"));
+//
+// Content is retrieved from the index through the Iterator class.
+// Ex.
+// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Index> index,
+// .                Index::Create(MakeIndexOptions()));
+// ICING_ASSIGN_OR_RETURN(Index::Iterator iterator =
+//     index->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+// while(iterator->Advance().ok())
+//   ProcessResult(iterator->value());
+class Index {
+ public:
+  struct Options {
+    explicit Options(const std::string& base_dir, uint32_t index_merge_size)
+        : base_dir(base_dir), index_merge_size(index_merge_size) {}
+
+    std::string base_dir;
+    int32_t index_merge_size;
+  };
+  // Creates an instance of Index in the directory pointed by file_dir.
+  static libtextclassifier3::StatusOr<std::unique_ptr<Index>> Create(
+      const Options& options, const IcingFilesystem* filesystem);
+
+  // Clears all files created by the index. Returns OK if all files were
+  // cleared.
+  libtextclassifier3::Status Reset() { return lite_index_->Reset(); }
+
+  // Brings components of the index into memory in anticipation of a query in
+  // order to reduce latency.
+  void Warm() { lite_index_->Warm(); }
+
+  // Syncs all the data and metadata changes to disk.
+  // Returns any encountered IO errors.
+  libtextclassifier3::Status PersistToDisk() {
+    return lite_index_->PersistToDisk();
+  }
+
+  // Compute the checksum over the entire Index's subcomponents.
+  Crc32 ComputeChecksum() { return lite_index_->ComputeChecksum(); }
+
+  // DocumentIds are always inserted in increasing order. Returns the largest
+  // document_id added to the index.
+  DocumentId last_added_document_id() const {
+    return lite_index_->last_added_document_id();
+  }
+
+  // Returns debug information for the index in out.
+  // verbosity <= 0, simplest debug information - just the lexicons and lite
+  //                 index.
+  // verbosity > 0, more detailed debug information including raw postings
+  //                lists.
+  void GetDebugInfo(int verbosity, std::string* out) const {
+    lite_index_->GetDebugInfo(verbosity, out);
+  }
+
+  // Create an iterator to iterate through all doc hit infos in the index that
+  // match the term. section_id_mask can be set to ignore hits from sections not
+  // listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
+  // that occur in section 3.
+  //
+  // Returns:
+  //   unique ptr to a valid DocHitInfoIterator that matches the term
+  //   INVALID_ARGUMENT if given an invalid term_match_type
+  libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>> GetIterator(
+      const std::string& term, SectionIdMask section_id_mask,
+      TermMatchType::Code term_match_type);
+
+  // A class that can be used to add hits to the index.
+  //
+  // An editor groups hits from a particular section within a document together
+  // and dedupes hits for the same term within a section. This removes the
+  // burden of deduping from the caller and direct access to the index
+  // implementation allows for more efficient deduping.
+  class Editor {
+   public:
+    // Does not take any ownership, and all pointers must refer to valid objects
+    // that outlive the one constructed.
+    // TODO(b/141180665): Add nullptr checks for the raw pointers
+    Editor(const TermIdCodec* term_id_codec, LiteIndex* lite_index,
+           DocumentId document_id, SectionId section_id,
+           TermMatchType::Code term_match_type)
+        : term_id_codec_(term_id_codec),
+          lite_index_(lite_index),
+          document_id_(document_id),
+          term_match_type_(term_match_type),
+          section_id_(section_id) {}
+
+    libtextclassifier3::Status AddHit(const char* term,
+                                      Hit::Score score = Hit::kMaxHitScore);
+
+   private:
+    // The Editor is able to store previously seen terms as TermIds. This is
+    // is more efficient than a client doing this externally because TermIds are
+    // not exposed to clients.
+    std::unordered_set<uint32_t> seen_tokens_;
+    const TermIdCodec* term_id_codec_;
+    LiteIndex* lite_index_;
+    DocumentId document_id_;
+    TermMatchType::Code term_match_type_;
+    SectionId section_id_;
+  };
+  Editor Edit(DocumentId document_id, SectionId section_id,
+              TermMatchType::Code term_match_type) {
+    return Editor(term_id_codec_.get(), lite_index_.get(), document_id,
+                  section_id, term_match_type);
+  }
+
+ private:
+  Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
+        std::unique_ptr<LiteIndex>&& lite_index)
+      : lite_index_(std::move(lite_index)),
+        options_(options),
+        term_id_codec_(std::move(term_id_codec)) {}
+
+  std::unique_ptr<LiteIndex> lite_index_;
+  const Options options_;
+  std::unique_ptr<TermIdCodec> term_id_codec_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_INDEX_H_

diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
new file mode 100644
index 0000000..536f9fb
--- /dev/null
+++ b/icing/index/index_test.cc

@@ -0,0 +1,551 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/index.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <random>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsTrue;
+using ::testing::NiceMock;
+using ::testing::Test;
+
+class IndexTest : public Test {
+ protected:
+  void SetUp() override {
+    index_dir_ = GetTestTempDir() + "/index_test/";
+    Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+    ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+  }
+
+  std::unique_ptr<Index> index_;
+  std::string index_dir_;
+  IcingFilesystem filesystem_;
+};
+
+constexpr DocumentId kDocumentId0 = 0;
+constexpr DocumentId kDocumentId1 = 1;
+constexpr DocumentId kDocumentId2 = 2;
+constexpr SectionId kSectionId2 = 2;
+constexpr SectionId kSectionId3 = 3;
+
+std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
+  std::vector<DocHitInfo> infos;
+  while (iterator->Advance().ok()) {
+    infos.push_back(iterator->doc_hit_info());
+  }
+  return infos;
+}
+
+MATCHER_P2(EqualsDocHitInfo, document_id, sections, "") {
+  const DocHitInfo& actual = arg;
+  SectionIdMask section_mask = kSectionIdMaskNone;
+  for (SectionId section : sections) {
+    section_mask |= 1U << section;
+  }
+  *result_listener << "actual is {document_id=" << actual.document_id()
+                   << ", section_mask=" << actual.hit_section_ids_mask()
+                   << "}, but expected was {document_id=" << document_id
+                   << ", section_mask=" << section_mask << "}.";
+  return actual.document_id() == document_id &&
+         actual.hit_section_ids_mask() == section_mask;
+}
+
+TEST_F(IndexTest, EmptyIndex) {
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+}
+
+TEST_F(IndexTest, AdvancePastEnd) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(itr->doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(), IsOk());
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(itr->doc_hit_info(),
+              EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+}
+
+TEST_F(IndexTest, SingleHitSingleTermIndex) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, SingleHitMultiTermIndex) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, NoHitMultiTermIndex) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->Advance(),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, MultiHitMultiTermIndex) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+  edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
+}
+
+TEST_F(IndexTest, MultiHitSectionRestrict) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  SectionIdMask desired_section = 1U << kSectionId2;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, SingleHitDedupeIndex) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+  EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, PrefixHit) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, MultiPrefixHit) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(
+      GetHits(std::move(itr)),
+      ElementsAre(
+          EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+          EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, NoExactHitInPrefixQuery) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY);
+  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+  edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId1, std::vector<SectionId>{kSectionId3})));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, PrefixHitDedupe) {
+  // Act
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+  ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+  // Assert
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, PrefixToString) {
+  SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", id_mask, TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo*"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskAll,
+                                                      TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo*"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskNone,
+                                                      TermMatchType::PREFIX));
+  EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo*"));
+}
+
+TEST_F(IndexTest, ExactToString) {
+  SectionIdMask id_mask = (1U << kSectionId2) | (1U << kSectionId3);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("foo", id_mask, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      itr,
+      index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("foo", kSectionIdMaskNone,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo"));
+}
+
+TEST_F(IndexTest, NonAsciiTerms) {
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
+  ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  ICING_ASSERT_OK_AND_ASSIGN(itr,
+                             index_->GetIterator("あなた", kSectionIdMaskAll,
+                                                 TermMatchType::EXACT_ONLY));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
+TEST_F(IndexTest, FullIndex) {
+  // Make a smaller index so that it's easier to fill up.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024);
+  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+  std::default_random_engine random;
+  libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+  constexpr int kTokenSize = 5;
+  DocumentId document_id = 0;
+  std::vector<std::string> query_terms;
+  while (status.ok()) {
+    for (int i = 0; i < 100; ++i) {
+      Index::Editor edit =
+          index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY);
+      std::string term = RandomString(kAlNumAlphabet, kTokenSize, &random);
+      status = edit.AddHit(term.c_str());
+      if (i % 50 == 0) {
+        // Remember one out of every fifty terms to query for later.
+        query_terms.push_back(std::move(term));
+      }
+      if (!status.ok()) {
+        break;
+      }
+    }
+    ++document_id;
+  }
+
+  // Assert
+  // Adding more hits should fail.
+  Index::Editor edit =
+      index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(edit.AddHit("foo"),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(edit.AddHit("bar"),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+  EXPECT_THAT(edit.AddHit("baz"),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+  for (const std::string& term : query_terms) {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocHitInfoIterator> itr,
+        index_->GetIterator(term.c_str(), kSectionIdMaskAll,
+                            TermMatchType::EXACT_ONLY));
+    // Each query term should contain at least one hit - there may have been
+    // other hits for this term that were added.
+    EXPECT_THAT(itr->Advance(), IsOk());
+  }
+  EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+}
+
+TEST_F(IndexTest, IndexCreateIOFailure) {
+  // Create the index with mock filesystem. By default, Mock will return false,
+  // so the first attempted file operation will fail.
+  NiceMock<IcingMockFilesystem> mock_filesystem;
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+  EXPECT_THAT(Index::Create(options, &mock_filesystem),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(IndexTest, IndexCreateCorruptionFailure) {
+  // Add some content to the index
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+  ASSERT_THAT(edit.AddHit("bar"), IsOk());
+
+  // Close the index.
+  index_.reset();
+
+  // Corrrupt the index file.
+  std::string hit_buffer_filename = index_dir_ + "/idx/lite.hb";
+  ScopedFd sfd(filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
+  ASSERT_THAT(sfd.is_valid(), IsTrue());
+
+  constexpr std::string_view kCorruptBytes = "ffffffffffffffffffffff";
+  // The first page of the hit_buffer is taken up by the header. Overwrite the
+  // first page of content.
+  constexpr int kHitBufferStartOffset = 4096;
+  ASSERT_THAT(filesystem_.PWrite(sfd.get(), kHitBufferStartOffset,
+                                 kCorruptBytes.data(), kCorruptBytes.length()),
+              IsTrue());
+
+  // Recreate the index.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+  EXPECT_THAT(Index::Create(options, &filesystem_),
+              StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
+}
+
+TEST_F(IndexTest, IndexPersistence) {
+  // Add some content to the index
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+  ASSERT_THAT(edit.AddHit("bar"), IsOk());
+  EXPECT_THAT(index_->PersistToDisk(), IsOk());
+
+  // Close the index.
+  index_.reset();
+
+  // Recreate the index.
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+
+  // Check that the hits are present.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocHitInfoIterator> itr,
+      index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+  EXPECT_THAT(GetHits(std::move(itr)),
+              ElementsAre(EqualsDocHitInfo(
+                  kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+  EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, InvalidHitBufferSize) {
+  Index::Options options(
+      index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max());
+  EXPECT_THAT(Index::Create(options, &filesystem_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(IndexTest, ComputeChecksumSameBetweenCalls) {
+  // Add some content to the index.
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+  Crc32 foo_checksum(757666244U);
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_checksum));
+
+  // Calling it again shouldn't change the checksum
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_checksum));
+}
+
+TEST_F(IndexTest, ComputeChecksumSameAcrossInstances) {
+  // Add some content to the index.
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+  Crc32 foo_checksum(757666244U);
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_checksum));
+
+  // Recreate the index, checksum should still be the same across instances
+  index_.reset();
+  Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+  ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_checksum));
+}
+
+TEST_F(IndexTest, ComputeChecksumChangesOnModification) {
+  // Add some content to the index.
+  Index::Editor edit =
+      index_->Edit(kDocumentId0, kSectionId2, TermMatchType::PREFIX);
+  ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+  Crc32 foo_checksum(757666244U);
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_checksum));
+
+  // Modifying the index changes the checksum;
+  EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+  Crc32 foo_bar_checksum(1228959551U);
+  EXPECT_THAT(index_->ComputeChecksum(), Eq(foo_bar_checksum));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc
new file mode 100644
index 0000000..0d5bfea
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.cc

@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorAllDocumentId::DocHitInfoIteratorAllDocumentId(
+    const DocumentId document_id_limit)
+    : document_id_limit_(document_id_limit),
+      current_document_id_(document_id_limit) {}
+
+libtextclassifier3::Status DocHitInfoIteratorAllDocumentId::Advance() {
+  if (!IsDocumentIdValid(current_document_id_)) {
+    // Reached the end, set these to invalid values and return
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  doc_hit_info_.set_document_id(current_document_id_--);
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id.h b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h
new file mode 100644
index 0000000..97ba5f2
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id.h

@@ -0,0 +1,59 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_ALL_DOCUMENT_ID_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_ALL_DOCUMENT_ID_H_
+
+#include <cstdint>
+#include <string>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator for all DocumentIds in range [0, document_id_limit_]: 0 inclusive,
+// document_id_limit_ inclusive. Returns DocumentIds in descending order.
+class DocHitInfoIteratorAllDocumentId : public DocHitInfoIterator {
+ public:
+  explicit DocHitInfoIteratorAllDocumentId(DocumentId document_id_limit);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override { return 0; }
+
+  int32_t GetNumLeafAdvanceCalls() const override {
+    return document_id_limit_ - current_document_id_;
+  }
+
+  std::string ToString() const override {
+    return IcingStringUtil::StringPrintf("(ALL document_id_limit:%d)",
+                                         document_id_limit_);
+  }
+
+ private:
+  const DocumentId document_id_limit_;
+
+  // An internal value for the iterator to track the current doc id.
+  DocumentId current_document_id_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_ALL_DOCUMENT_ID_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc
new file mode 100644
index 0000000..7366b97
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-all-document-id_test.cc

@@ -0,0 +1,113 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAreArray;
+using ::testing::Eq;
+using ::testing::Not;
+
+TEST(DocHitInfoIteratorAllDocumentIdTest, Initialize) {
+  {
+    DocHitInfoIteratorAllDocumentId all_it(100);
+
+    // We'll always start with an invalid document_id, need to Advance before we
+    // get anything out of this.
+    EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(kInvalidDocumentId));
+    EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
+                Eq(kSectionIdMaskNone));
+  }
+
+  {
+    // Can initialize with negative values, but won't ever be able to Advance to
+    // a proper document_id
+    DocHitInfoIteratorAllDocumentId all_it(-5);
+    EXPECT_THAT(all_it.Advance(), Not(IsOk()));
+  }
+}
+
+TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumBlocksInspected) {
+  DocHitInfoIteratorAllDocumentId all_it(100);
+  EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
+
+  // Number of iterations is chosen arbitrarily. Just meant to demonstrate that
+  // no matter how many Advance calls are made, GetNumBlocksInspected should
+  // always return 0.
+  for (int i = 0; i < 5; ++i) {
+    EXPECT_THAT(all_it.Advance(), IsOk());
+    EXPECT_THAT(all_it.GetNumBlocksInspected(), Eq(0));
+  }
+}
+
+TEST(DocHitInfoIteratorAllDocumentIdTest, GetNumLeafAdvanceCalls) {
+  DocHitInfoIteratorAllDocumentId all_it(100);
+  EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(0));
+
+  for (int i = 1; i <= 5; ++i) {
+    EXPECT_THAT(all_it.Advance(), IsOk());
+    EXPECT_THAT(all_it.GetNumLeafAdvanceCalls(), Eq(i));
+  }
+}
+
+TEST(DocHitInfoIteratorAllDocumentIdTest, Advance) {
+  {
+    // Can't advance beyond an invalid DocumentId
+    EXPECT_THAT(DocHitInfoIteratorAllDocumentId(-1).Advance(), Not(IsOk()));
+  }
+
+  {
+    // Test one advance
+    DocHitInfoIteratorAllDocumentId all_it(5);
+    EXPECT_THAT(all_it.Advance(), IsOk());
+    EXPECT_THAT(all_it.doc_hit_info().document_id(), Eq(5));
+
+    // Advancing shouldn't affect the intersect section ids mask, since there's
+    // no intersecting going on
+    EXPECT_THAT(all_it.hit_intersect_section_ids_mask(),
+                Eq(kSectionIdMaskNone));
+  }
+
+  {
+    std::vector<DocumentId> expected_document_ids;
+    expected_document_ids.reserve(125);
+    for (int i = 124; i >= 0; --i) {
+      expected_document_ids.push_back(i);
+    }
+
+    // Many advances
+    DocHitInfoIteratorAllDocumentId all_it(124);
+    EXPECT_THAT(GetDocumentIds(&all_it),
+                ElementsAreArray(expected_document_ids));
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
new file mode 100644
index 0000000..276b78a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc

@@ -0,0 +1,230 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+
+#include <stddef.h>
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// When combining ANDed iterators, n-ary operator has better performance when
+// number of operands > 3 according to benchmark cl/243720660
+// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
+// changes.
+inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
+
+// The minimum number of iterators needed to construct a And iterator. The And
+// constructor currently takes 2 iterators.
+inline constexpr int kMinBinaryIterators = 2;
+
+}  // namespace
+
+std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
+  if (iterators.size() == 1) {
+    return std::move(iterators.at(0));
+  }
+
+  std::unique_ptr<DocHitInfoIterator> iterator;
+  if (iterators.size() <= kBinaryAndIteratorPerformanceThreshold &&
+      iterators.size() >= kMinBinaryIterators) {
+    // Accumulate the iterators that need to be ANDed together.
+    iterator = std::move(iterators.at(0));
+    for (size_t i = 1; i < iterators.size(); ++i) {
+      std::unique_ptr<DocHitInfoIterator> temp_iterator = std::move(iterator);
+      iterator = std::make_unique<DocHitInfoIteratorAnd>(
+          std::move(temp_iterator), std::move(iterators[i]));
+    }
+  } else {
+    // If the vector is too small, the AndNary iterator can handle it and return
+    // an error on the Advance call
+    iterator =
+        std::make_unique<DocHitInfoIteratorAndNary>(std::move(iterators));
+  }
+
+  return iterator;
+}
+
+DocHitInfoIteratorAnd::DocHitInfoIteratorAnd(
+    std::unique_ptr<DocHitInfoIterator> short_it,
+    std::unique_ptr<DocHitInfoIterator> long_it)
+    : short_(std::move(short_it)), long_(std::move(long_it)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorAnd::Advance() {
+  // Advance on short first
+  if (!short_->Advance().ok()) {
+    // Didn't find anything for the first iterator, reset to invalid values and
+    // return.
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  DocumentId short_doc_id = short_->doc_hit_info().document_id();
+
+  // Then AdvanceTo on long
+  ICING_ASSIGN_OR_RETURN(DocumentId long_doc_id,
+                         AdvanceTo(long_.get(), short_doc_id));
+
+  // Now try to align DocHitInfos by moving one or the other.
+  while (short_doc_id != long_doc_id) {
+    if (short_doc_id > long_doc_id) {
+      ICING_ASSIGN_OR_RETURN(short_doc_id,
+                             AdvanceTo(short_.get(), long_doc_id));
+    } else {
+      ICING_ASSIGN_OR_RETURN(long_doc_id, AdvanceTo(long_.get(), short_doc_id));
+    }
+  }
+
+  // Guaranteed that short_doc_id and long_doc_id match now
+  doc_hit_info_ = short_->doc_hit_info();
+  doc_hit_info_.MergeSectionsFrom(long_->doc_hit_info());
+  hit_intersect_section_ids_mask_ = short_->hit_intersect_section_ids_mask() &
+                                    long_->hit_intersect_section_ids_mask();
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorAnd::GetNumBlocksInspected() const {
+  return short_->GetNumBlocksInspected() + long_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorAnd::GetNumLeafAdvanceCalls() const {
+  return short_->GetNumLeafAdvanceCalls() + long_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorAnd::ToString() const {
+  return absl_ports::StrCat("(", short_->ToString(), " AND ", long_->ToString(),
+                            ")");
+}
+
+DocHitInfoIteratorAndNary::DocHitInfoIteratorAndNary(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
+    : iterators_(std::move(iterators)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorAndNary::Advance() {
+  if (iterators_.size() < 2) {
+    return absl_ports::InvalidArgumentError(
+        "Not enough iterators to AND together");
+  }
+
+  // Advance on the first iterator to get a potential hit
+  if (!iterators_.at(0)->Advance().ok()) {
+    // Didn't find anything for the first iterator, reset to invalid values and
+    // return
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  DocumentId potential_document_id =
+      iterators_.at(0)->doc_hit_info().document_id();
+
+  // Our goal is to find the next document_id that exists on all the iterators
+  // by advancing the iterators one by one. We start with some
+  // "potential_document_id", check if it actually matches the above goal. If
+  // yes, return. If not, find the next best "potential" and repeat till we hit
+  // the end.
+
+  // Has the current potential_document_id been found in all the iterators?
+  bool found_document_id = false;
+  while (!found_document_id) {
+    for (auto& iterator : iterators_) {
+      if (iterator->doc_hit_info().document_id() > potential_document_id) {
+        // Advance the current iterator until it's equal to or smaller than the
+        // potential hit doc id
+        DocumentId unused;
+        ICING_ASSIGN_OR_RETURN(
+            unused, AdvanceTo(iterator.get(), potential_document_id));
+      }
+
+      if (iterator->doc_hit_info().document_id() == potential_document_id) {
+        // The potential hit got matched on the iterators so far
+        found_document_id = true;
+        continue;
+      } else if (iterator->doc_hit_info().document_id() <
+                 potential_document_id) {
+        // This iterator doesn't have potential_document_id as we've gone past
+        // it already. Use the current document_id as the new
+        // "potential_document_id" and start checking all iterators again.
+        found_document_id = false;
+        potential_document_id = iterator->doc_hit_info().document_id();
+        break;
+      }
+    }
+  }
+
+  // Found a DocumentId which exists in all the iterators
+  doc_hit_info_ = iterators_.at(0)->doc_hit_info();
+  hit_intersect_section_ids_mask_ =
+      iterators_.at(0)->hit_intersect_section_ids_mask();
+
+  for (size_t i = 1; i < iterators_.size(); i++) {
+    doc_hit_info_.MergeSectionsFrom(iterators_.at(i)->doc_hit_info());
+    hit_intersect_section_ids_mask_ &=
+        iterators_.at(i)->hit_intersect_section_ids_mask();
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorAndNary::GetNumBlocksInspected() const {
+  int32_t blockCount = 0;
+  for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
+    blockCount += iter->GetNumBlocksInspected();
+  }
+  return blockCount;
+}
+
+int32_t DocHitInfoIteratorAndNary::GetNumLeafAdvanceCalls() const {
+  int32_t leafCount = 0;
+  for (const std::unique_ptr<DocHitInfoIterator>& iter : iterators_) {
+    leafCount += iter->GetNumLeafAdvanceCalls();
+  }
+  return leafCount;
+}
+
+std::string DocHitInfoIteratorAndNary::ToString() const {
+  std::string ret = "(";
+
+  for (int i = 0; i < iterators_.size(); ++i) {
+    if (i == iterators_.size() - 1) {
+      // Last element in vector
+      absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), ")");
+    } else {
+      absl_ports::StrAppend(&ret, iterators_.at(i)->ToString(), " AND ");
+    }
+  }
+
+  return ret;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
new file mode 100644
index 0000000..5c4c07e
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h

@@ -0,0 +1,77 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_AND_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_AND_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
+namespace icing {
+namespace lib {
+
+// Given n iterators, will decide what the fastest And-iterator implementation
+// will be.
+std::unique_ptr<DocHitInfoIterator> CreateAndIterator(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+
+// Iterate over a logical AND of two child iterators.
+class DocHitInfoIteratorAnd : public DocHitInfoIterator {
+ public:
+  // Set the shorter iterator to short_it to get performance benefits
+  // for when an underlying iterator has a more efficient AdvanceTo.
+  explicit DocHitInfoIteratorAnd(std::unique_ptr<DocHitInfoIterator> short_it,
+                                 std::unique_ptr<DocHitInfoIterator> long_it);
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> short_;
+  std::unique_ptr<DocHitInfoIterator> long_;
+};
+
+// Iterate over a logical AND of multiple child iterators.
+// NOTE: DocHitInfoIteratorAnd is a faster alternative to AND exactly 2
+// iterators.
+class DocHitInfoIteratorAndNary : public DocHitInfoIterator {
+ public:
+  explicit DocHitInfoIteratorAndNary(
+      std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_AND_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
new file mode 100644
index 0000000..35574b7
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc

@@ -0,0 +1,351 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+TEST(CreateAndIteratorTest, And) {
+  // Basic test that we can create a working And iterator. Further testing of
+  // the And iterator should be done separately below.
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(10)};
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  std::unique_ptr<DocHitInfoIterator> and_iter =
+      CreateAndIterator(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(and_iter.get()), ElementsAre(10));
+}
+
+TEST(CreateAndIteratorTest, AndNary) {
+  // Basic test that we can create a working AndNary iterator. Further testing
+  // of the AndNary iterator should be done separately below.
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(10)};
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+
+  std::unique_ptr<DocHitInfoIterator> and_iter =
+      CreateAndIterator(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(and_iter.get()), ElementsAre(10));
+}
+
+TEST(DocHitInfoIteratorAndTest, Initialize) {
+  DocHitInfoIteratorAnd and_iter(std::make_unique<DocHitInfoIteratorDummy>(),
+                                 std::make_unique<DocHitInfoIteratorDummy>());
+
+  // We start out with invalid values
+  EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
+  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
+              Eq(kSectionIdMaskNone));
+}
+
+TEST(DocHitInfoIteratorAndTest, GetNumBlocksInspected) {
+  int first_iter_blocks = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumBlocksInspected(first_iter_blocks);
+
+  int second_iter_blocks = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumBlocksInspected(second_iter_blocks);
+
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(and_iter.GetNumBlocksInspected(),
+              Eq(first_iter_blocks + second_iter_blocks));
+}
+
+TEST(DocHitInfoIteratorAndTest, GetNumLeafAdvanceCalls) {
+  int first_iter_leaves = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
+
+  int second_iter_leaves = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
+
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
+              Eq(first_iter_leaves + second_iter_leaves));
+}
+
+TEST(DocHitInfoIteratorAndTest, AdvanceNoOverlap) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(10), DocHitInfo(9)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(8), DocHitInfo(7)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(GetDocumentIds(&and_iter), IsEmpty());
+}
+
+TEST(DocHitInfoIteratorAndTest, Advance) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(10), DocHitInfo(8),
+                                          DocHitInfo(6),  DocHitInfo(4),
+                                          DocHitInfo(2),  DocHitInfo(0)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(8), DocHitInfo(4),
+                                           DocHitInfo(0)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(GetDocumentIds(&and_iter), ElementsAre(8, 4, 0));
+}
+
+TEST(DocHitInfoIteratorAndTest, AdvanceNestedIterators) {
+  std::vector<DocHitInfo> first_vector = {
+      DocHitInfo(10), DocHitInfo(9), DocHitInfo(8), DocHitInfo(7),
+      DocHitInfo(6),  DocHitInfo(5), DocHitInfo(4), DocHitInfo(3),
+      DocHitInfo(2),  DocHitInfo(1), DocHitInfo(0)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(10), DocHitInfo(8),
+                                           DocHitInfo(6),  DocHitInfo(4),
+                                           DocHitInfo(2),  DocHitInfo(0)};
+
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(10), DocHitInfo(7),
+                                          DocHitInfo(6), DocHitInfo(2),
+                                          DocHitInfo(1)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  std::unique_ptr<DocHitInfoIterator> third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+
+  std::unique_ptr<DocHitInfoIterator> inner_iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(first_iter),
+                                              std::move(second_iter));
+  std::unique_ptr<DocHitInfoIterator> outer_iter =
+      std::make_unique<DocHitInfoIteratorAnd>(std::move(inner_iter),
+                                              std::move(third_iter));
+
+  EXPECT_THAT(GetDocumentIds(outer_iter.get()), ElementsAre(10, 6, 2));
+}
+
+TEST(DocHitInfoIteratorAndTest, SectionIdMask) {
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+  SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+  SectionIdMask mask_anded_result = 0b00000100;
+  SectionIdMask mask_ored_result = 0b01010111;
+
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
+
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+
+  ICING_EXPECT_OK(and_iter.Advance());
+  EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
+              Eq(mask_ored_result));
+  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, Initialize) {
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+
+  // We start out with invalid values
+  EXPECT_THAT(and_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
+  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(),
+              Eq(kSectionIdMaskNone));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, InitializeEmpty) {
+  // We can initialize it fine even with an empty vector
+  std::vector<std::unique_ptr<DocHitInfoIterator>> empty_vector;
+  DocHitInfoIteratorAndNary empty_iter(std::move(empty_vector));
+
+  // But it won't be able to advance anywhere
+  EXPECT_THAT(empty_iter.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, GetNumBlocksInspected) {
+  int first_iter_blocks = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumBlocksInspected(first_iter_blocks);
+
+  int second_iter_blocks = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumBlocksInspected(second_iter_blocks);
+
+  int third_iter_blocks = 13;  // arbitrary value
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  third_iter->SetNumBlocksInspected(third_iter_blocks);
+
+  int fourth_iter_blocks = 1;  // arbitrary value
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+
+  EXPECT_THAT(and_iter.GetNumBlocksInspected(),
+              Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
+                 fourth_iter_blocks));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, GetNumLeafAdvanceCalls) {
+  int first_iter_leaves = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
+
+  int second_iter_leaves = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
+
+  int third_iter_leaves = 13;  // arbitrary value
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
+
+  int fourth_iter_leaves = 13;  // arbitrary value
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+
+  EXPECT_THAT(and_iter.GetNumLeafAdvanceCalls(),
+              Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
+                 fourth_iter_leaves));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, Advance) {
+  std::vector<DocHitInfo> first_vector = {
+      DocHitInfo(10), DocHitInfo(9), DocHitInfo(8), DocHitInfo(7),
+      DocHitInfo(6),  DocHitInfo(5), DocHitInfo(4), DocHitInfo(3),
+      DocHitInfo(2),  DocHitInfo(1), DocHitInfo(0)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(10), DocHitInfo(8),
+                                           DocHitInfo(6),  DocHitInfo(4),
+                                           DocHitInfo(2),  DocHitInfo(0)};
+
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(9), DocHitInfo(6),
+                                          DocHitInfo(3), DocHitInfo(0)};
+
+  std::vector<DocHitInfo> fourth_vector = {DocHitInfo(6), DocHitInfo(5),
+                                           DocHitInfo(0)};
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(third_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(fourth_vector));
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(&and_iter), ElementsAre(6, 0));
+}
+
+TEST(DocHitInfoIteratorAndNaryTest, SectionIdMask) {
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask section_id_mask1 = 0b01000101;  // hits in sections 0, 2, 6
+  SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+  SectionIdMask section_id_mask3 = 0b00001100;  // hits in sections 2, 3
+  SectionIdMask section_id_mask4 = 0b00100100;  // hits in sections 2, 5
+  SectionIdMask mask_anded_result = 0b00000100;
+  SectionIdMask mask_ored_result = 0b01101111;
+
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(4, section_id_mask3)};
+  std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
+
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+  third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
+  fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+
+  ICING_EXPECT_OK(and_iter.Advance());
+  EXPECT_THAT(and_iter.doc_hit_info().hit_section_ids_mask(),
+              Eq(mask_ored_result));
+  EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
new file mode 100644
index 0000000..a19c1b1
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc

@@ -0,0 +1,142 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
+    std::unique_ptr<DocHitInfoIterator> delegate,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    const Clock* clock, const Options& options)
+    : delegate_(std::move(delegate)),
+      document_store_(*document_store),
+      schema_store_(*schema_store),
+      options_(options),
+      current_seconds_(clock->GetCurrentSeconds()) {
+  // Precompute all the NamespaceIds
+  for (std::string_view name_space : options_.namespaces) {
+    auto namespace_id_or = document_store_.GetNamespaceId(name_space);
+
+    // If we can't find the NamespaceId, just throw it away
+    if (namespace_id_or.ok()) {
+      target_namespace_ids_.emplace(namespace_id_or.ValueOrDie());
+    }
+  }
+
+  // Precompute all the SchemaTypeIds
+  for (std::string_view schema_type : options_.schema_types) {
+    auto schema_type_id_or = schema_store_.GetSchemaTypeId(schema_type);
+
+    // If we can't find the SchemaTypeId, just throw it away
+    if (schema_type_id_or.ok()) {
+      target_schema_type_ids_.emplace(schema_type_id_or.ValueOrDie());
+    }
+  }
+}
+
+libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
+  if (!delegate_->Advance().ok()) {
+    // Didn't find anything on the delegate iterator.
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  if (current_seconds_ == -1) {
+    // We couldn't get the current time, meaning we can't tell which documents
+    // are expired or not. So just don't return anything.
+    return absl_ports::InternalError(
+        "Couldn't get current time. Try again in a bit");
+  }
+
+  if (options_.filter_deleted) {
+    if (!document_store_.DoesDocumentExist(
+            delegate_->doc_hit_info().document_id())) {
+      // Document doesn't exist, keep searching
+      return Advance();
+    }
+  }
+
+  // Try to get the DocumentFilterData
+  auto document_filter_data_or = document_store_.GetDocumentFilterData(
+      delegate_->doc_hit_info().document_id());
+  if (!document_filter_data_or.ok()) {
+    // Didn't find the DocumentFilterData in the filter cache. This could be
+    // because the DocumentId isn't valid or the filter cache is in some invalid
+    // state. This is bad, but not the query's responsibility to fix, so just
+    // skip this result for now.
+    return Advance();
+  }
+  // We should be guaranteed that this exists now.
+  DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+
+  if (!options_.namespaces.empty() &&
+      target_namespace_ids_.count(data.namespace_id()) == 0) {
+    // Doesn't match one of the specified namespaces. Keep searching
+    return Advance();
+  }
+
+  if (!options_.schema_types.empty() &&
+      target_schema_type_ids_.count(data.schema_type_id()) == 0) {
+    // Doesn't match one of the specified schema types. Keep searching
+    return Advance();
+  }
+
+  if (current_seconds_ >= data.expiration_timestamp_secs()) {
+    // Current time has exceeded the document's expiration time
+    return Advance();
+  }
+
+  // Satisfied all our specified filters
+  doc_hit_info_ = delegate_->doc_hit_info();
+  hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask();
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
+  return delegate_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorFilter::GetNumLeafAdvanceCalls() const {
+  return delegate_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorFilter::ToString() const {
+  return delegate_->ToString();
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
new file mode 100644
index 0000000..954a973
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h

@@ -0,0 +1,88 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_FILTER_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_FILTER_H_
+
+#include <cstdint>
+#include <ctime>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-store.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+// A iterator that helps filter out DocHitInfos associated with non-existing
+// document ids.
+class DocHitInfoIteratorFilter : public DocHitInfoIterator {
+ public:
+  struct Options {
+    // Filter out/don't return DocHitInfos that are associated with nonexistent
+    // Documents.
+    bool filter_deleted = true;
+
+    // List of namespaces that documents must have. An empty vector means that
+    // all namespaces are valid, and no documents will be filtered out.
+    //
+    // Note that if we want to reference the strings in namespaces later, ensure
+    // that the caller who passed the Options class outlives the
+    // DocHitInfoIteratorFilter.
+    std::vector<std::string_view> namespaces;
+
+    // List of schema types that documents must have. An empty vector means that
+    // all schema types are valid, and no documents will be filtered out.
+    //
+    // Note that if we want to reference the strings in schema types later,
+    // ensure that the caller who passed the Options class outlives the
+    // DocHitInfoIteratorFilter.
+    std::vector<std::string_view> schema_types;
+  };
+
+  explicit DocHitInfoIteratorFilter(
+      std::unique_ptr<DocHitInfoIterator> delegate,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      const Clock* clock, const Options& options);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> delegate_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+  const Options options_;
+  std::unordered_set<NamespaceId> target_namespace_ids_;
+  std::unordered_set<SchemaTypeId> target_schema_type_ids_;
+  const std::time_t current_seconds_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_FILTER_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
new file mode 100644
index 0000000..9c71d54
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc

@@ -0,0 +1,887 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorDeletedFilterTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorDeletedFilterTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    test_document1_ =
+        DocumentBuilder().SetKey("icing", "email/1").SetSchema("email").Build();
+    test_document2_ =
+        DocumentBuilder().SetKey("icing", "email/2").SetSchema("email").Build();
+    test_document3_ =
+        DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build();
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("email");
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    // Destroy objects before the whole directory is removed because they
+    // persist data in the destructor.
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  DocumentProto test_document1_;
+  DocumentProto test_document2_;
+  DocumentProto test_document3_;
+  DocHitInfoIteratorFilter::Options options_;
+};
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, EmptyOriginalIterator) {
+  ICING_ASSERT_OK(document_store_->Put(test_document1_));
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
+      std::make_unique<DocHitInfoIteratorDummy>();
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator_empty), document_store_.get(),
+      schema_store_.get(), &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, TurnOffDeletedFilterOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(test_document3_));
+
+  // Deletes test document 2
+  ICING_ASSERT_OK(document_store_->Delete(test_document2_.namespace_(),
+                                          test_document2_.uri()));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2),
+                                           DocHitInfo(document_id3)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.filter_deleted = false;
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id1, document_id2, document_id3));
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, DeletedDocumentsAreFiltered) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(test_document3_));
+  // Deletes test document 2
+  ICING_ASSERT_OK(document_store_->Delete(test_document2_.namespace_(),
+                                          test_document2_.uri()));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2),
+                                           DocHitInfo(document_id3)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id1, document_id3));
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, NonExistingDocumentsAreFiltered) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(test_document3_));
+
+  // Document ids 7, 8, 9 are not existing
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2),
+                                           DocHitInfo(document_id3),
+                                           DocHitInfo(7),
+                                           DocHitInfo(8),
+                                           DocHitInfo(9)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id1, document_id2, document_id3));
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, NegativeDocumentIdIsIgnored) {
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(-1)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(filtered_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, InvalidDocumentIdIsIgnored) {
+  // kInvalidDocumentId should be skipped.
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(kInvalidDocumentId)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(filtered_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST_F(DocHitInfoIteratorDeletedFilterTest, GreaterThanMaxDocumentIdIsIgnored) {
+  // Document ids that are greater than the max value is invalid and should be
+  // skipped.
+  DocumentId invalid_greater_than_max = kMaxDocumentId + 2;
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(invalid_greater_than_max)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(filtered_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+class DocHitInfoIteratorNamespaceFilterTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorNamespaceFilterTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    document1_namespace1_ = DocumentBuilder()
+                                .SetKey(namespace1_, "email/1")
+                                .SetSchema("email")
+                                .Build();
+    document2_namespace1_ = DocumentBuilder()
+                                .SetKey(namespace1_, "email/2")
+                                .SetSchema("email")
+                                .Build();
+    document1_namespace2_ = DocumentBuilder()
+                                .SetKey(namespace2_, "email/1")
+                                .SetSchema("email")
+                                .Build();
+    document1_namespace3_ = DocumentBuilder()
+                                .SetKey(namespace3_, "email/1")
+                                .SetSchema("email")
+                                .Build();
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("email");
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    // Destroy objects before the whole directory is removed because they
+    // persist data in the destructor.
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string namespace1_ = "namespace1";
+  const std::string namespace2_ = "namespace2";
+  const std::string namespace3_ = "namespace3";
+  DocumentProto document1_namespace1_;
+  DocumentProto document2_namespace1_;
+  DocumentProto document1_namespace2_;
+  DocumentProto document1_namespace3_;
+  DocHitInfoIteratorFilter::Options options_;
+};
+
+TEST_F(DocHitInfoIteratorNamespaceFilterTest, EmptyOriginalIterator) {
+  std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
+      std::make_unique<DocHitInfoIteratorDummy>();
+
+  options_.namespaces = std::vector<std::string_view>{};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator_empty), document_store_.get(),
+      schema_store_.get(), &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorNamespaceFilterTest,
+       NonexistentNamespacesReturnsEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_namespace1_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorNamespaceFilterTest, NoNamespacesReturnsAll) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_namespace1_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.namespaces = std::vector<std::string_view>{};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorNamespaceFilterTest,
+       FilterOutExistingDocumentFromDifferentNamespace) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_namespace1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_namespace1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document1_namespace2_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2),
+                                           DocHitInfo(document_id3)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.namespaces = std::vector<std::string_view>{namespace1_};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id1, document_id2));
+}
+
+TEST_F(DocHitInfoIteratorNamespaceFilterTest, FilterForMultipleNamespacesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_namespace1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_namespace1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document1_namespace2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+                             document_store_->Put(document1_namespace3_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id1), DocHitInfo(document_id2),
+      DocHitInfo(document_id3), DocHitInfo(document_id4)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id1, document_id2, document_id4));
+}
+
+class DocHitInfoIteratorSchemaTypeFilterTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorSchemaTypeFilterTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    document1_schema1_ =
+        DocumentBuilder().SetKey("namespace", "1").SetSchema(schema1_).Build();
+    document2_schema2_ =
+        DocumentBuilder().SetKey("namespace", "2").SetSchema(schema2_).Build();
+    document3_schema3_ =
+        DocumentBuilder().SetKey("namespace", "3").SetSchema(schema3_).Build();
+    document4_schema1_ =
+        DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type(schema1_);
+    type_config = schema.add_types();
+    type_config->set_schema_type(schema2_);
+    type_config = schema.add_types();
+    type_config->set_schema_type(schema3_);
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    // Destroy objects before the whole directory is removed because they
+    // persist data in the destructor.
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string schema1_ = "email";
+  const std::string schema2_ = "message";
+  const std::string schema3_ = "person";
+  DocumentProto document1_schema1_;
+  DocumentProto document2_schema2_;
+  DocumentProto document3_schema3_;
+  DocumentProto document4_schema1_;
+  DocHitInfoIteratorFilter::Options options_;
+};
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, EmptyOriginalIterator) {
+  std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
+      std::make_unique<DocHitInfoIteratorDummy>();
+
+  options_.schema_types = std::vector<std::string_view>{};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator_empty), document_store_.get(),
+      schema_store_.get(), &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+       NonexistentSchemaTypeReturnsEmpty) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_schema1_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.schema_types =
+      std::vector<std::string_view>{"nonexistent_schema_type"};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, NoSchemaTypesReturnsAll) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_schema1_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.schema_types = std::vector<std::string_view>{};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest,
+       FilterOutExistingDocumentFromDifferentSchemaTypes) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_schema2_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.schema_types = std::vector<std::string_view>{schema1_};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorSchemaTypeFilterTest, FilterForMultipleSchemaTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(document2_schema2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store_->Put(document3_schema3_));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1),
+                                           DocHitInfo(document_id2),
+                                           DocHitInfo(document_id3)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator),
+              ElementsAre(document_id2, document_id3));
+}
+
+class DocHitInfoIteratorExpirationFilterTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorExpirationFilterTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type(email_schema_);
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    // Destroy objects before the whole directory is removed because they
+    // persist data in the destructor.
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string email_schema_ = "email";
+  DocHitInfoIteratorFilter::Options options_;
+};
+
+TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
+  // Insert a document
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema(email_schema_)
+                               .SetCreationTimestampSecs(0)
+                               .SetTtlSecs(0)
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Arbitrary value
+  fake_clock_.SetSeconds(100);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
+  // Insert a document
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema(email_schema_)
+                               .SetCreationTimestampSecs(1)
+                               .SetTtlSecs(100)
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Arbitrary value, but must be less than document's creation_timestamp + ttl
+  fake_clock_.SetSeconds(50);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
+  // Insert a document
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema(email_schema_)
+                               .SetCreationTimestampSecs(0)
+                               .SetTtlSecs(100)
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Current time is exactly the document's creation_timestamp + ttl
+  fake_clock_.SetSeconds(100);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
+  // Insert a document
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema(email_schema_)
+                               .SetCreationTimestampSecs(0)
+                               .SetTtlSecs(100)
+                               .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(document));
+
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Arbitrary value, but must be greater than the document's
+  // creation_timestamp + ttl
+  fake_clock_.SetSeconds(101);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorExpirationFilterTest,
+       InvalidTimeFiltersReturnsInternalError) {
+  // Put something in the original iterator so we don't get a ResourceExhausted
+  // error
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // -1 is the value returned on std::time() error
+  fake_clock_.SetSeconds(-1);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options_);
+
+  EXPECT_THAT(filtered_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+class DocHitInfoIteratorFilterTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    document1_namespace1_schema1_ = DocumentBuilder()
+                                        .SetKey(namespace1_, "1")
+                                        .SetSchema(schema1_)
+                                        .SetCreationTimestampSecs(100)
+                                        .SetTtlSecs(100)
+                                        .Build();
+    document2_namespace1_schema1_ = DocumentBuilder()
+                                        .SetKey(namespace1_, "2")
+                                        .SetSchema(schema1_)
+                                        .SetCreationTimestampSecs(100)
+                                        .SetTtlSecs(100)
+                                        .Build();
+    document3_namespace2_schema1_ = DocumentBuilder()
+                                        .SetKey(namespace2_, "3")
+                                        .SetSchema(schema1_)
+                                        .SetCreationTimestampSecs(100)
+                                        .SetTtlSecs(100)
+                                        .Build();
+    document4_namespace1_schema2_ = DocumentBuilder()
+                                        .SetKey(namespace1_, "4")
+                                        .SetSchema(schema2_)
+                                        .SetCreationTimestampSecs(100)
+                                        .SetTtlSecs(100)
+                                        .Build();
+    document5_namespace1_schema1_ = DocumentBuilder()
+                                        .SetKey(namespace1_, "5")
+                                        .SetSchema(schema1_)
+                                        .SetCreationTimestampSecs(0)
+                                        .SetTtlSecs(100)
+                                        .Build();
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type(schema1_);
+    type_config = schema.add_types();
+    type_config->set_schema_type(schema2_);
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    // Destroy objects before the whole directory is removed because they
+    // persist data in the destructor.
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string namespace1_ = "namespace1";
+  const std::string namespace2_ = "namespace2";
+  const std::string schema1_ = "email";
+  const std::string schema2_ = "message";
+  DocumentProto document1_namespace1_schema1_;
+  DocumentProto document2_namespace1_schema1_;
+  DocumentProto document3_namespace2_schema1_;
+  DocumentProto document4_namespace1_schema2_;
+  DocumentProto document5_namespace1_schema1_;
+};
+
+TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(document1_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(document2_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(document3_namespace2_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id4,
+      document_store_->Put(document4_namespace1_schema2_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id5,
+      document_store_->Put(document5_namespace1_schema1_));
+
+  // Deletes document2, causing it to be filtered out
+  ICING_ASSERT_OK(
+      document_store_->Delete(document2_namespace1_schema1_.namespace_(),
+                              document2_namespace1_schema1_.uri()));
+
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id1), DocHitInfo(document_id2),
+      DocHitInfo(document_id3), DocHitInfo(document_id4),
+      DocHitInfo(document_id5)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter::Options options;
+
+  // Filters out document3 by namespace
+  options.namespaces = std::vector<std::string_view>{namespace1_};
+
+  // Filters out document4 by schema type
+  options.schema_types = std::vector<std::string_view>{schema1_};
+
+  // Filters out document5 since it's expired
+  FakeClock fake_clock;
+  fake_clock.SetSeconds(199);
+
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock, options);
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
+}
+
+TEST_F(DocHitInfoIteratorFilterTest, SectionIdMasksArePopulatedCorrectly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store_->Put(document1_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store_->Put(document2_namespace1_schema1_));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store_->Put(document3_namespace2_schema1_));
+
+  SectionIdMask section_id_mask1 = 0b01001001;  // hits in sections 0, 3, 6
+  SectionIdMask section_id_mask2 = 0b10010010;  // hits in sections 1, 4, 7
+  SectionIdMask section_id_mask3 = 0b00100100;  // hits in sections 2, 5
+  std::vector<SectionId> section_ids1 = {0, 3, 6};
+  std::vector<SectionId> section_ids2 = {1, 4, 7};
+  std::vector<SectionId> section_ids3 = {2, 5};
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id1, section_id_mask1),
+      DocHitInfo(document_id2, section_id_mask2),
+      DocHitInfo(document_id3, section_id_mask3)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorFilter::Options options;
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options);
+
+  EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
+              ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
+                          EqualsDocHitInfo(document_id2, section_ids2),
+                          EqualsDocHitInfo(document_id3, section_ids3)));
+}
+
+TEST_F(DocHitInfoIteratorFilterTest, GetNumBlocksInspected) {
+  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  original_iterator->SetNumBlocksInspected(5);
+
+  DocHitInfoIteratorFilter::Options options;
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options);
+
+  EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
+}
+
+TEST_F(DocHitInfoIteratorFilterTest, GetNumLeafAdvanceCalls) {
+  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  original_iterator->SetNumLeafAdvanceCalls(6);
+
+  DocHitInfoIteratorFilter::Options options;
+  DocHitInfoIteratorFilter filtered_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      &fake_clock_, options);
+
+  EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
new file mode 100644
index 0000000..ff39acc
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc

@@ -0,0 +1,79 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-not.h"
+
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorNot::DocHitInfoIteratorNot(
+    std::unique_ptr<DocHitInfoIterator> to_be_excluded,
+    DocumentId document_id_limit)
+    : to_be_excluded_(std::move(to_be_excluded)),
+      all_document_id_iterator_(
+          DocHitInfoIteratorAllDocumentId(document_id_limit)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
+  if (!all_document_id_iterator_.Advance().ok()) {
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  if (all_document_id_iterator_.doc_hit_info().document_id() <
+      to_be_excluded_->doc_hit_info().document_id()) {
+    // Since DocumentIds are returned from DocHitInfoIterators in decreasing
+    // order, we have passed the last NOT result if we're smaller than its
+    // DocumentId. Advance the NOT result if so.
+    to_be_excluded_->Advance().IgnoreError();
+  }
+
+  if (all_document_id_iterator_.doc_hit_info().document_id() ==
+      to_be_excluded_->doc_hit_info().document_id()) {
+    // This is a NOT result, skip and Advance to the next result.
+    return Advance();
+  }
+
+  // No errors, we've found a valid result
+  doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
+
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
+  return to_be_excluded_->GetNumBlocksInspected() +
+         all_document_id_iterator_.GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorNot::GetNumLeafAdvanceCalls() const {
+  return to_be_excluded_->GetNumLeafAdvanceCalls() +
+         all_document_id_iterator_.GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorNot::ToString() const {
+  return absl_ports::StrCat("(NOT ", to_be_excluded_->ToString(), ")");
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-not.h b/icing/index/iterator/doc-hit-info-iterator-not.h
new file mode 100644
index 0000000..52da3db
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-not.h

@@ -0,0 +1,67 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NOT_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NOT_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator that will return all documents that are *not* specified by the
+// to_be_excluded_iterator.
+//
+// NOTE: The hit_intersect_section_ids_mask is meaningless for this iterator.
+// When this iterator produces a result, it's because the Document was not
+// present in the to_be_excluded_iterator. There is no concept of the Document
+// having been chosen because it's term was in a specific section. Since we
+// don't know anything about the sections for the Document, the
+// hit_intersect_section_ids_mask is always kSectionIdMaskNone. Correspondingly,
+// this means that the doc_hit_info.hit_section_ids_mask will also always be
+// kSectionIdMaskNone.
+class DocHitInfoIteratorNot : public DocHitInfoIterator {
+ public:
+  // to_be_excluded_iterator: The results of this iterator will be excluded
+  //     from this iterator's results.
+  // document_id_limit: The DocumentId that represents the most recently added
+  //     Document to the DocumentStore
+  explicit DocHitInfoIteratorNot(
+      std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator,
+      const DocumentId document_id_limit);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_;
+  DocHitInfoIteratorAllDocumentId all_document_id_iterator_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_NOT_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-not_test.cc b/icing/index/iterator/doc-hit-info-iterator-not_test.cc
new file mode 100644
index 0000000..5d0e4ac
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-not_test.cc

@@ -0,0 +1,161 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-not.h"
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+TEST(DocHitInfoIteratorNotTest, InvalidDocumentIdLimit) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(5),
+                                                   DocHitInfo(4)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/-1);
+  EXPECT_THAT(not_iterator.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+}
+
+TEST(DocHitInfoIteratorNotTest, NotFirstFewDocumentIdsOk) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(5),
+                                                   DocHitInfo(4)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(GetDocumentIds(&not_iterator), ElementsAre(3, 2, 1, 0));
+}
+
+TEST(DocHitInfoIteratorNotTest, NotLastFewDocumentIdsOk) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {DocHitInfo(1),
+                                                   DocHitInfo(0)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(GetDocumentIds(&not_iterator), ElementsAre(5, 4, 3, 2));
+}
+
+TEST(DocHitInfoIteratorNotTest, IntermittentDocumentIdOverlapOk) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {
+      DocHitInfo(8), DocHitInfo(6), DocHitInfo(4), DocHitInfo(2)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/10);
+  EXPECT_THAT(GetDocumentIds(&not_iterator), ElementsAre(10, 9, 7, 5, 3, 1, 0));
+}
+
+TEST(DocHitInfoIteratorNotTest, NoDocumentIdOverlapOk) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(GetDocumentIds(&not_iterator), ElementsAre(5, 4, 3, 2, 1, 0));
+}
+
+TEST(DocHitInfoIteratorNotTest, AllDocumentIdOverlapOk) {
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {
+      DocHitInfo(5), DocHitInfo(4), DocHitInfo(3),
+      DocHitInfo(2), DocHitInfo(1), DocHitInfo(0)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(GetDocumentIds(&not_iterator), IsEmpty());
+}
+
+TEST(DocHitInfoIteratorNotTest, GetNumBlocksInspected) {
+  int to_be_excluded_iterator_blocks = 4;  // arbitrary value
+  auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  to_be_excluded_iterator->SetNumBlocksInspected(
+      to_be_excluded_iterator_blocks);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+
+  // The AllDocumentId iterator doesn't count any blocks as being inspected
+  // since it's just decrementing 1 from the document_id_limit.
+  EXPECT_THAT(not_iterator.GetNumBlocksInspected(),
+              Eq(to_be_excluded_iterator_blocks));
+}
+
+TEST(DocHitInfoIteratorNotTest, GetNumLeafAdvanceCalls) {
+  int to_be_excluded_iterator_leaves = 4;  // arbitrary value
+  auto to_be_excluded_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  to_be_excluded_iterator->SetNumLeafAdvanceCalls(
+      to_be_excluded_iterator_leaves);
+
+  int all_document_id_limit = 5;
+  // Since we iterate from [limit, 0] inclusive, add 1 for the 0th advance call
+  int all_leaf_advance_calls = all_document_id_limit + 1;
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     all_document_id_limit);
+
+  while (not_iterator.Advance().ok()) {
+    // Advance through the whole not iterator
+  }
+
+  // The AllDocumentId iterator counts each DocumentId as a leaf advance call
+  EXPECT_THAT(not_iterator.GetNumLeafAdvanceCalls(),
+              Eq(to_be_excluded_iterator_leaves + all_leaf_advance_calls));
+}
+
+TEST(DocHitInfoIteratorNotTest, SectionIdsAlwaysNone) {
+  SectionIdMask section_id_mask5 = 1U << 5;  // arbitrary non-zero value
+  SectionIdMask section_id_mask4 = 1U << 4;  // arbitrary non-zero value
+  std::vector<DocHitInfo> exclude_doc_hit_infos = {
+      DocHitInfo(5, section_id_mask5), DocHitInfo(4, section_id_mask4)};
+  std::unique_ptr<DocHitInfoIterator> to_be_excluded_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(exclude_doc_hit_infos);
+
+  DocHitInfoIteratorNot not_iterator(std::move(to_be_excluded_iterator),
+                                     /*document_id_limit=*/5);
+  EXPECT_THAT(GetDocHitInfos(&not_iterator),
+              ElementsAre(DocHitInfo(3, kSectionIdMaskNone),
+                          DocHitInfo(2, kSectionIdMaskNone),
+                          DocHitInfo(1, kSectionIdMaskNone),
+                          DocHitInfo(0, kSectionIdMaskNone)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
new file mode 100644
index 0000000..b4dc86a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc

@@ -0,0 +1,239 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// When combining Or iterators, n-ary operator has better performance when
+// number of operands > 2 according to benchmark cl/243321264
+// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
+// changes.
+constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
+
+}  // namespace
+
+std::unique_ptr<DocHitInfoIterator> CreateOrIterator(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators) {
+  if (iterators.size() == 1) {
+    return std::move(iterators.at(0));
+  }
+
+  std::unique_ptr<DocHitInfoIterator> iterator;
+  if (iterators.size() == kBinaryOrIteratorPerformanceThreshold) {
+    iterator = std::make_unique<DocHitInfoIteratorOr>(std::move(iterators[0]),
+                                                      std::move(iterators[1]));
+  } else {
+    // If the vector is too small, the OrNary iterator can handle it and return
+    // an error on the Advance call
+    iterator = std::make_unique<DocHitInfoIteratorOrNary>(std::move(iterators));
+  }
+
+  return iterator;
+}
+
+DocHitInfoIteratorOr::DocHitInfoIteratorOr(
+    std::unique_ptr<DocHitInfoIterator> left_it,
+    std::unique_ptr<DocHitInfoIterator> right_it)
+    : left_(std::move(left_it)), right_(std::move(right_it)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorOr::Advance() {
+  // Cache the document_id of the left iterator for comparison to the right.
+  DocumentId orig_left_document_id = left_document_id_;
+
+  // Advance the left iterator if necessary.
+  if (left_document_id_ != kInvalidDocumentId) {
+    if (right_document_id_ == kInvalidDocumentId ||
+        left_document_id_ >= right_document_id_) {
+      if (left_->Advance().ok()) {
+        left_document_id_ = left_->doc_hit_info().document_id();
+      } else {
+        left_document_id_ = kInvalidDocumentId;
+      }
+    }
+  }
+
+  // Advance the right iterator if necessary, by comparing to the original
+  // left document_id (not the one which may have been updated).
+  if (right_document_id_ != kInvalidDocumentId) {
+    if (orig_left_document_id == kInvalidDocumentId ||
+        right_document_id_ >= orig_left_document_id) {
+      if (right_->Advance().ok()) {
+        right_document_id_ = right_->doc_hit_info().document_id();
+      } else {
+        right_document_id_ = kInvalidDocumentId;
+      }
+    }
+  }
+
+  // Done, we either found a match or we reached the end of potential
+  // DocHitInfos
+  if (left_document_id_ == kInvalidDocumentId &&
+      right_document_id_ == kInvalidDocumentId) {
+    // Reached the end, set these to invalid values and return
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  // Now chose the best one that is not invalid.
+  DocHitInfoIterator* chosen;
+  if (left_document_id_ == kInvalidDocumentId) {
+    chosen = right_.get();
+  } else if (right_document_id_ == kInvalidDocumentId) {
+    chosen = left_.get();
+  } else if (left_document_id_ < right_document_id_) {
+    chosen = right_.get();
+  } else {
+    chosen = left_.get();
+  }
+
+  doc_hit_info_ = chosen->doc_hit_info();
+  hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask();
+
+  // If equal, combine.
+  if (left_document_id_ == right_document_id_) {
+    doc_hit_info_.MergeSectionsFrom(right_->doc_hit_info());
+    hit_intersect_section_ids_mask_ &= right_->hit_intersect_section_ids_mask();
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorOr::GetNumBlocksInspected() const {
+  return left_->GetNumBlocksInspected() + right_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorOr::GetNumLeafAdvanceCalls() const {
+  return left_->GetNumLeafAdvanceCalls() + right_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorOr::ToString() const {
+  return absl_ports::StrCat("(", left_->ToString(), " OR ", right_->ToString(),
+                            ")");
+}
+
+DocHitInfoIteratorOrNary::DocHitInfoIteratorOrNary(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators)
+    : iterators_(std::move(iterators)) {}
+
+libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
+  if (iterators_.size() < 2) {
+    return absl_ports::InvalidArgumentError(
+        "Not enough iterators to OR together");
+  }
+
+  if (doc_hit_info_.document_id() == 0) {
+    // 0 is the smallest (last) DocumentId, can't advance further. Reset to
+    // invalid values and return directly
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  // The maximum possible doc id for the current Advance() call.
+  const DocumentId next_document_id_max = doc_hit_info_.document_id() - 1;
+  doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+  DocumentId next_document_id = kInvalidDocumentId;
+  // Go through the iterators and try to find the maximum document_id that is
+  // equal to or smaller than next_document_id_max
+  for (const auto& iterator : iterators_) {
+    if (iterator->doc_hit_info().document_id() > next_document_id_max) {
+      // Advance the iterator until its value is equal to or smaller than
+      // next_document_id_max
+      if (ABSL_PREDICT_FALSE(
+              !AdvanceTo(iterator.get(), next_document_id_max).ok())) {
+        continue;
+      }
+    }
+    // Now iterator->get_document_id() <= next_document_id_max
+    if (next_document_id == kInvalidDocumentId) {
+      next_document_id = iterator->doc_hit_info().document_id();
+    } else {
+      next_document_id =
+          std::max(next_document_id, iterator->doc_hit_info().document_id());
+    }
+  }
+  if (next_document_id == kInvalidDocumentId) {
+    // None of the iterators had a next document_id, reset to invalid values and
+    // return
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  // Found the next hit DocumentId, now calculate the section info.
+  hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+  for (const auto& iterator : iterators_) {
+    if (iterator->doc_hit_info().document_id() == next_document_id) {
+      if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+        doc_hit_info_ = iterator->doc_hit_info();
+        hit_intersect_section_ids_mask_ =
+            iterator->hit_intersect_section_ids_mask();
+      } else {
+        doc_hit_info_.MergeSectionsFrom(iterator->doc_hit_info());
+        hit_intersect_section_ids_mask_ &=
+            iterator->hit_intersect_section_ids_mask();
+      }
+    }
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+int32_t DocHitInfoIteratorOrNary::GetNumBlocksInspected() const {
+  int32_t blockCount = 0;
+  for (const auto& iter : iterators_) {
+    blockCount += iter->GetNumBlocksInspected();
+  }
+  return blockCount;
+}
+
+int32_t DocHitInfoIteratorOrNary::GetNumLeafAdvanceCalls() const {
+  int32_t leafCount = 0;
+  for (const auto& iter : iterators_) {
+    leafCount += iter->GetNumLeafAdvanceCalls();
+  }
+  return leafCount;
+}
+
+std::string DocHitInfoIteratorOrNary::ToString() const {
+  std::string ret = "(";
+
+  for (int i = 0; i < iterators_.size(); ++i) {
+    absl_ports::StrAppend(&ret, iterators_.at(i)->ToString());
+    if (i != iterators_.size() - 1) {
+      // Not the last element in vector
+      absl_ports::StrAppend(&ret, " OR ");
+    }
+  }
+
+  absl_ports::StrAppend(&ret, ")");
+  return ret;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
new file mode 100644
index 0000000..4128e0f
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h

@@ -0,0 +1,75 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_
+
+#include <cstdint>
+#include <string>
+
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+
+namespace icing {
+namespace lib {
+
+// Given n iterators, will decide what the fastest Or-iterator implementation
+// will be.
+std::unique_ptr<DocHitInfoIterator> CreateOrIterator(
+    std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+
+// Iterate over a logical OR of two child iterators.
+class DocHitInfoIteratorOr : public DocHitInfoIterator {
+ public:
+  explicit DocHitInfoIteratorOr(std::unique_ptr<DocHitInfoIterator> left_it,
+                                std::unique_ptr<DocHitInfoIterator> right_it);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> left_;
+  std::unique_ptr<DocHitInfoIterator> right_;
+  DocumentId left_document_id_ = kMaxDocumentId;
+  DocumentId right_document_id_ = kMaxDocumentId;
+};
+
+// Iterate over a logical OR of multiple child iterators.
+//
+// NOTE: DocHitInfoIteratorOr is a faster alternative to OR exactly 2 iterators.
+class DocHitInfoIteratorOrNary : public DocHitInfoIterator {
+ public:
+  explicit DocHitInfoIteratorOrNary(
+      std::vector<std::unique_ptr<DocHitInfoIterator>> iterators);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_OR_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
new file mode 100644
index 0000000..3faa5ab
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc

@@ -0,0 +1,322 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+
+TEST(CreateAndIteratorTest, Or) {
+  // Basic test that we can create a working Or iterator. Further testing of
+  // the Or iterator should be done separately below.
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(10)};
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  std::unique_ptr<DocHitInfoIterator> or_iter =
+      CreateOrIterator(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(or_iter.get()), ElementsAre(10));
+}
+
+TEST(CreateOrIteratorTest, OrNary) {
+  // Basic test that we can create a working OrNary iterator. Further testing
+  // of the OrNary iterator should be done separately below.
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(10)};
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos));
+
+  std::unique_ptr<DocHitInfoIterator> or_iter =
+      CreateOrIterator(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(or_iter.get()), ElementsAre(10));
+}
+
+TEST(DocHitInfoIteratorOrTest, Initialize) {
+  DocHitInfoIteratorOr or_iter(std::make_unique<DocHitInfoIteratorDummy>(),
+                               std::make_unique<DocHitInfoIteratorDummy>());
+
+  // We start out with invalid values
+  EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
+  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+}
+
+TEST(DocHitInfoIteratorOrTest, GetNumBlocksInspected) {
+  int first_iter_blocks = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumBlocksInspected(first_iter_blocks);
+
+  int second_iter_blocks = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumBlocksInspected(second_iter_blocks);
+
+  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(or_iter.GetNumBlocksInspected(),
+              Eq(first_iter_blocks + second_iter_blocks));
+}
+
+TEST(DocHitInfoIteratorOrTest, GetNumLeafAdvanceCalls) {
+  int first_iter_leaves = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
+
+  int second_iter_leaves = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
+
+  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
+              Eq(first_iter_leaves + second_iter_leaves));
+}
+
+TEST(DocHitInfoIteratorOrTest, Advance) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(10), DocHitInfo(8),
+                                          DocHitInfo(6),  DocHitInfo(4),
+                                          DocHitInfo(2),  DocHitInfo(0)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(9), DocHitInfo(7),
+                                           DocHitInfo(5), DocHitInfo(3),
+                                           DocHitInfo(1)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+  EXPECT_THAT(GetDocumentIds(&or_iter),
+              ElementsAre(10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
+}
+
+TEST(DocHitInfoIteratorOrTest, AdvanceNestedIterators) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(10), DocHitInfo(8)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(9), DocHitInfo(5)};
+
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(7), DocHitInfo(6)};
+
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  std::unique_ptr<DocHitInfoIterator> third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+
+  std::unique_ptr<DocHitInfoIterator> inner_iter =
+      std::make_unique<DocHitInfoIteratorOr>(std::move(first_iter),
+                                             std::move(second_iter));
+  std::unique_ptr<DocHitInfoIterator> outer_iter =
+      std::make_unique<DocHitInfoIteratorOr>(std::move(inner_iter),
+                                             std::move(third_iter));
+
+  EXPECT_THAT(GetDocumentIds(outer_iter.get()), ElementsAre(10, 9, 8, 7, 6, 5));
+}
+
+TEST(DocHitInfoIteratorOrTest, SectionIdMask) {
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+  SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+  SectionIdMask mask_anded_result = 0b00000100;
+  SectionIdMask mask_ored_result = 0b01010111;
+
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
+
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+  DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
+              Eq(mask_ored_result));
+  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  // We start out with invalid values
+  EXPECT_THAT(or_iter.doc_hit_info(), Eq(DocHitInfo(kInvalidDocumentId)));
+  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(kSectionIdMaskNone));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, InitializeEmpty) {
+  // We can initialize it fine even with an empty vector
+  std::vector<std::unique_ptr<DocHitInfoIterator>> empty_vector;
+  DocHitInfoIteratorOrNary empty_iter(std::move(empty_vector));
+
+  // But it won't be able to advance anywhere
+  EXPECT_THAT(empty_iter.Advance(),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, GetNumBlocksInspected) {
+  int first_iter_blocks = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumBlocksInspected(first_iter_blocks);
+
+  int second_iter_blocks = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumBlocksInspected(second_iter_blocks);
+
+  int third_iter_blocks = 13;  // arbitrary value
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  third_iter->SetNumBlocksInspected(third_iter_blocks);
+
+  int fourth_iter_blocks = 1;  // arbitrary value
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  fourth_iter->SetNumBlocksInspected(fourth_iter_blocks);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  EXPECT_THAT(or_iter.GetNumBlocksInspected(),
+              Eq(first_iter_blocks + second_iter_blocks + third_iter_blocks +
+                 fourth_iter_blocks));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, GetNumLeafAdvanceCalls) {
+  int first_iter_leaves = 4;  // arbitrary value
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  first_iter->SetNumLeafAdvanceCalls(first_iter_leaves);
+
+  int second_iter_leaves = 7;  // arbitrary value
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  second_iter->SetNumLeafAdvanceCalls(second_iter_leaves);
+
+  int third_iter_leaves = 13;  // arbitrary value
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  third_iter->SetNumLeafAdvanceCalls(third_iter_leaves);
+
+  int fourth_iter_leaves = 13;  // arbitrary value
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>();
+  fourth_iter->SetNumLeafAdvanceCalls(fourth_iter_leaves);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  EXPECT_THAT(or_iter.GetNumLeafAdvanceCalls(),
+              Eq(first_iter_leaves + second_iter_leaves + third_iter_leaves +
+                 fourth_iter_leaves));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, Advance) {
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(7), DocHitInfo(0)};
+
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(6), DocHitInfo(1)};
+
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(5), DocHitInfo(2)};
+
+  std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4), DocHitInfo(3)};
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(first_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(second_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(third_vector));
+  iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>(fourth_vector));
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  EXPECT_THAT(GetDocumentIds(&or_iter), ElementsAre(7, 6, 5, 4, 3, 2, 1, 0));
+}
+
+TEST(DocHitInfoIteratorOrNaryTest, SectionIdMask) {
+  // Arbitrary section ids for the documents in the DocHitInfoIterators.
+  // Created to test correct section_id_mask behavior.
+  SectionIdMask section_id_mask1 = 0b01000101;  // hits in sections 0, 2, 6
+  SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+  SectionIdMask section_id_mask3 = 0b00001100;  // hits in sections 2, 3
+  SectionIdMask section_id_mask4 = 0b00100100;  // hits in sections 2, 5
+  SectionIdMask mask_anded_result = 0b00000100;
+  SectionIdMask mask_ored_result = 0b01101111;
+
+  std::vector<DocHitInfo> first_vector = {DocHitInfo(4, section_id_mask1)};
+  std::vector<DocHitInfo> second_vector = {DocHitInfo(4, section_id_mask2)};
+  std::vector<DocHitInfo> third_vector = {DocHitInfo(4, section_id_mask3)};
+  std::vector<DocHitInfo> fourth_vector = {DocHitInfo(4, section_id_mask4)};
+
+  auto first_iter = std::make_unique<DocHitInfoIteratorDummy>(first_vector);
+  first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+  auto second_iter = std::make_unique<DocHitInfoIteratorDummy>(second_vector);
+  second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+  auto third_iter = std::make_unique<DocHitInfoIteratorDummy>(third_vector);
+  third_iter->set_hit_intersect_section_ids_mask(section_id_mask3);
+
+  auto fourth_iter = std::make_unique<DocHitInfoIteratorDummy>(fourth_vector);
+  fourth_iter->set_hit_intersect_section_ids_mask(section_id_mask4);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+  iterators.push_back(std::move(fourth_iter));
+
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().hit_section_ids_mask(),
+              Eq(mask_ored_result));
+  EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
new file mode 100644
index 0000000..58e7f2a
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc

@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+DocHitInfoIteratorSectionRestrict::DocHitInfoIteratorSectionRestrict(
+    std::unique_ptr<DocHitInfoIterator> delegate,
+    const DocumentStore* document_store, const SchemaStore* schema_store,
+    std::string_view target_section)
+    : delegate_(std::move(delegate)),
+      document_store_(*document_store),
+      schema_store_(*schema_store),
+      target_section_(target_section) {}
+
+libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
+  if (!delegate_->Advance().ok()) {
+    // Didn't find anything on the delegate iterator.
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  DocumentId document_id = delegate_->doc_hit_info().document_id();
+
+  SectionIdMask section_id_mask =
+      delegate_->doc_hit_info().hit_section_ids_mask();
+
+  auto data_or = document_store_.GetDocumentFilterData(document_id);
+  if (!data_or.ok()) {
+    // Ran into some error retrieving information on this hit, skip
+    return Advance();
+  }
+
+  // Guaranteed that the DocumentFilterData exists at this point
+  DocumentFilterData data = std::move(data_or).ValueOrDie();
+  SchemaTypeId schema_type_id = data.schema_type_id();
+
+  // A hit can be in multiple sections at once, need to check that at least one
+  // of the confirmed section ids match the name of the target section
+  while (section_id_mask != 0) {
+    // There was a hit in this section id
+    SectionId section_id = __builtin_ctz(section_id_mask);
+
+    auto section_metadata_or =
+        schema_store_.GetSectionMetadata(schema_type_id, section_id);
+
+    if (section_metadata_or.ok()) {
+      const SectionMetadata* section_metadata =
+          section_metadata_or.ValueOrDie();
+
+      if (section_metadata->path == target_section_) {
+        // The hit was in the target section name, return OK/found
+        doc_hit_info_ = delegate_->doc_hit_info();
+        hit_intersect_section_ids_mask_ =
+            delegate_->hit_intersect_section_ids_mask();
+        return libtextclassifier3::Status::OK;
+      }
+    }
+
+    // Mark this section as checked
+    section_id_mask &= ~(1U << section_id);
+  }
+
+  // Didn't find a matching section name for this hit, go to the next hit
+  return Advance();
+}
+
+int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
+  return delegate_->GetNumBlocksInspected();
+}
+
+int32_t DocHitInfoIteratorSectionRestrict::GetNumLeafAdvanceCalls() const {
+  return delegate_->GetNumLeafAdvanceCalls();
+}
+
+std::string DocHitInfoIteratorSectionRestrict::ToString() const {
+  return absl_ports::StrCat(target_section_, ": ", delegate_->ToString());
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
new file mode 100644
index 0000000..f9b9b04
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h

@@ -0,0 +1,67 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_SECTION_RESTRICT_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_SECTION_RESTRICT_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/status.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// A iterator that helps filter for DocHitInfos whose term was in a section
+// named target_section.
+//
+// NOTE: This is a little different from the DocHitInfoIteratorFilter class.
+// That class is meant to be applied to the root of a query tree and filter over
+// all results at the end. This class is more used in the limited scope of a
+// term or a small group of terms.
+class DocHitInfoIteratorSectionRestrict : public DocHitInfoIterator {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit DocHitInfoIteratorSectionRestrict(
+      std::unique_ptr<DocHitInfoIterator> delegate,
+      const DocumentStore* document_store, const SchemaStore* schema_store,
+      std::string_view target_section);
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override;
+
+  int32_t GetNumLeafAdvanceCalls() const override;
+
+  std::string ToString() const override;
+
+ private:
+  std::unique_ptr<DocHitInfoIterator> delegate_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+
+  // Ensure that this does not outlive the underlying string value.
+  std::string_view target_section_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_SECTION_RESTRICT_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
new file mode 100644
index 0000000..df79c6d
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc

@@ -0,0 +1,244 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
+ protected:
+  DocHitInfoIteratorSectionRestrictTest()
+      : test_dir_(GetTestTempDir() + "/icing") {}
+
+  void SetUp() override {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    document_ =
+        DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
+
+    auto type_config = schema_.add_types();
+    type_config->set_schema_type("email");
+
+    // Add an indexed property so we generate section metadata on it
+    auto property = type_config->add_properties();
+    property->set_property_name(indexed_property_);
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    // First and only indexed property, so it gets the first id of 0
+    indexed_section_id_ = 0;
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  SchemaProto schema_;
+  DocumentProto document_;
+  const std::string indexed_property_ = "subject";
+  int indexed_section_id_;
+  FakeClock fake_clock_;
+};
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
+  std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
+      std::make_unique<DocHitInfoIteratorDummy>();
+
+  DocHitInfoIteratorSectionRestrict filtered_iterator(
+      std::move(original_iterator_empty), document_store_.get(),
+      schema_store_.get(), /*target_section=*/"");
+
+  EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document_));
+
+  SectionIdMask section_id_mask = 1U << indexed_section_id_;
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, section_id_mask)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filtering for the indexed section name should get a result
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      indexed_property_);
+
+  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator),
+              ElementsAre(document_id));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, NoMatchingDocumentFilterData) {
+  // Create a hit with a document id that doesn't exist in the DocumentStore yet
+  std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filtering for the indexed section name should get a result
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_section=*/"");
+
+  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       DoesntIncludeHitWithWrongSectionName) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document_));
+
+  SectionIdMask section_id_mask = 1U << indexed_section_id_;
+
+  // Create a hit that was found in the indexed section
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, section_id_mask)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Filtering for the indexed section name should get a result
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      "some_section_name");
+
+  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       DoesntIncludeHitWithNoSectionIds) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document_));
+
+  // Create a hit that doesn't exist in any sections, so it shouldn't match any
+  // section filters
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, kSectionIdMaskNone)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      indexed_property_);
+
+  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+       DoesntIncludeHitWithDifferentSectionId) {
+  // Populate the DocumentStore's FilterCache with this document's data
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(document_));
+
+  // Anything that's not 0, which is the indexed property
+  SectionId not_matching_section_id = 2;
+
+  // Create a hit that exists in a different section, so it shouldn't match any
+  // section filters
+  std::vector<DocHitInfo> doc_hit_infos = {
+      DocHitInfo(document_id, kSectionIdMaskNone << not_matching_section_id)};
+
+  std::unique_ptr<DocHitInfoIterator> original_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      indexed_property_);
+
+  EXPECT_THAT(GetDocumentIds(&section_restrict_iterator), IsEmpty());
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
+  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  original_iterator->SetNumBlocksInspected(5);
+
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_section=*/"");
+
+  EXPECT_THAT(section_restrict_iterator.GetNumBlocksInspected(), Eq(5));
+}
+
+TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumLeafAdvanceCalls) {
+  auto original_iterator = std::make_unique<DocHitInfoIteratorDummy>();
+  original_iterator->SetNumLeafAdvanceCalls(6);
+
+  DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+      std::move(original_iterator), document_store_.get(), schema_store_.get(),
+      /*target_section=*/"");
+
+  EXPECT_THAT(section_restrict_iterator.GetNumLeafAdvanceCalls(), Eq(6));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-term.cc b/icing/index/iterator/doc-hit-info-iterator-term.cc
new file mode 100644
index 0000000..9cbb438
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-term.cc

@@ -0,0 +1,125 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/iterator/doc-hit-info-iterator-term.h"
+
+#include <cstdint>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::string SectionIdMaskToString(SectionIdMask section_id_mask) {
+  std::string mask(kMaxSectionId + 1, '0');
+  for (SectionId i = kMaxSectionId; i >= 0; --i) {
+    if (section_id_mask & (1U << i)) {
+      mask[kMaxSectionId - i] = '1';
+    }
+  }
+  return mask;
+}
+
+}  // namespace
+
+libtextclassifier3::Status DocHitInfoIteratorTerm::Advance() {
+  if (cached_hits_idx_ == -1) {
+    ICING_RETURN_IF_ERROR(RetrieveMoreHits());
+  } else {
+    ++cached_hits_idx_;
+  }
+  if (cached_hits_idx_ == -1 || cached_hits_idx_ >= cached_hits_.size()) {
+    // Nothing more for the iterator to return. Set these members to invalid
+    // values.
+    doc_hit_info_ = DocHitInfo();
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+  doc_hit_info_ = cached_hits_.at(cached_hits_idx_);
+  hit_intersect_section_ids_mask_ = doc_hit_info_.hit_section_ids_mask();
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocHitInfoIteratorTermExact::RetrieveMoreHits() {
+  // Exact match only. All hits in lite lexicon are exact.
+  ICING_ASSIGN_OR_RETURN(uint32_t tvi, lite_index_->FindTerm(term_));
+  ICING_ASSIGN_OR_RETURN(uint32_t term_id,
+                         term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+  lite_index_->AppendHits(term_id, section_restrict_mask_,
+                          /*only_from_prefix_sections=*/false, &cached_hits_);
+  cached_hits_idx_ = 0;
+  return libtextclassifier3::Status::OK;
+}
+
+std::string DocHitInfoIteratorTermExact::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_);
+}
+
+libtextclassifier3::Status DocHitInfoIteratorTermPrefix::RetrieveMoreHits() {
+  // Take union of lite terms.
+  int term_len = term_.length();
+  int terms_matched = 0;
+  for (LiteIndex::PrefixIterator it = lite_index_->FindTermPrefixes(term_);
+       it.IsValid(); it.Advance()) {
+    bool exact_match = strlen(it.GetKey()) == term_len;
+    ICING_ASSIGN_OR_RETURN(
+        uint32_t term_id,
+        term_id_codec_->EncodeTvi(it.GetValueIndex(), TviType::LITE));
+    lite_index_->AppendHits(term_id, section_restrict_mask_,
+                            /*only_from_prefix_sections=*/!exact_match,
+                            &cached_hits_);
+    ++terms_matched;
+  }
+  if (terms_matched > 1) {
+    SortAndDedupeDocumentIds();
+  }
+  cached_hits_idx_ = 0;
+  return libtextclassifier3::Status::OK;
+}
+
+void DocHitInfoIteratorTermPrefix::SortAndDedupeDocumentIds() {
+  // Re-sort cached document_ids and merge sections.
+  sort(cached_hits_.begin(), cached_hits_.end());
+
+  int idx = 0;
+  for (int i = 1; i < cached_hits_.size(); ++i) {
+    const DocHitInfo& hit_info = cached_hits_.at(i);
+    DocHitInfo& collapsed_hit_info = cached_hits_.at(idx);
+    if (collapsed_hit_info.document_id() == hit_info.document_id()) {
+      collapsed_hit_info.MergeSectionsFrom(hit_info);
+    } else {
+      // New document_id.
+      cached_hits_.at(++idx) = hit_info;
+    }
+  }
+  // idx points to last doc hit info.
+  cached_hits_.resize(idx + 1);
+}
+
+std::string DocHitInfoIteratorTermPrefix::ToString() const {
+  return absl_ports::StrCat(SectionIdMaskToString(section_restrict_mask_), ":",
+                            term_, "*");
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/iterator/doc-hit-info-iterator-term.h b/icing/index/iterator/doc-hit-info-iterator-term.h
new file mode 100644
index 0000000..f209f0d
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-term.h

@@ -0,0 +1,108 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_
+
+#include <cstdint>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/lite-index.h"
+#include "icing/index/term-id-codec.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+class DocHitInfoIteratorTerm : public DocHitInfoIterator {
+ public:
+  explicit DocHitInfoIteratorTerm(const TermIdCodec* term_id_codec,
+                                  LiteIndex* lite_index, const std::string term,
+                                  SectionIdMask section_restrict_mask)
+      : term_(term),
+        lite_index_(lite_index),
+        cached_hits_idx_(-1),
+        term_id_codec_(term_id_codec),
+        num_advance_calls_(0),
+        section_restrict_mask_(section_restrict_mask) {}
+
+  libtextclassifier3::Status Advance() override;
+
+  int32_t GetNumBlocksInspected() const override {
+    // TODO(b/137862424): Implement this once the main index is added.
+    return 0;
+  }
+  int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
+
+ protected:
+  // Add DocHitInfos corresponding to term_ to cached_hits_.
+  virtual libtextclassifier3::Status RetrieveMoreHits() = 0;
+
+  const std::string term_;
+  LiteIndex* const lite_index_;
+  // Stores hits retrieved from the index. This may only be a subset of the hits
+  // that are present in the index. Current value pointed to by the Iterator is
+  // tracked by cached_hits_idx_.
+  std::vector<DocHitInfo> cached_hits_;
+  int cached_hits_idx_;
+  const TermIdCodec* term_id_codec_;
+  int num_advance_calls_;
+  // Mask indicating which sections hits should be considered for.
+  // Ex. 0000 0000 0000 0010 means that only hits from section 1 are desired.
+  const SectionIdMask section_restrict_mask_;
+};
+
+class DocHitInfoIteratorTermExact : public DocHitInfoIteratorTerm {
+ public:
+  explicit DocHitInfoIteratorTermExact(const TermIdCodec* term_id_codec,
+                                       LiteIndex* lite_index,
+                                       const std::string& term,
+                                       SectionIdMask section_id_mask)
+      : DocHitInfoIteratorTerm(term_id_codec, lite_index, term,
+                               section_id_mask) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+};
+
+class DocHitInfoIteratorTermPrefix : public DocHitInfoIteratorTerm {
+ public:
+  explicit DocHitInfoIteratorTermPrefix(const TermIdCodec* term_id_codec,
+                                        LiteIndex* lite_index,
+                                        const std::string& term,
+                                        SectionIdMask section_id_mask)
+      : DocHitInfoIteratorTerm(term_id_codec, lite_index, term,
+                               section_id_mask) {}
+
+  std::string ToString() const override;
+
+ protected:
+  libtextclassifier3::Status RetrieveMoreHits() override;
+
+ private:
+  // After retrieving DocHitInfos from the index, a DocHitInfo for docid 1 and
+  // "foo" and a DocHitInfo for docid 1 and "fool". These DocHitInfos should be
+  // merged.
+  void SortAndDedupeDocumentIds();
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TERM_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
new file mode 100644
index 0000000..ac9a3a9
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h

@@ -0,0 +1,115 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Dummy class to help with testing. It starts with an kInvalidDocumentId doc
+// hit info until an Advance is called (like normal DocHitInfoIterators). It
+// will then proceed to return the doc_hit_infos in order as Advance's are
+// called. After all doc_hit_infos are returned, Advance will return a NotFound
+// error (also like normal DocHitInfoIterators).
+class DocHitInfoIteratorDummy : public DocHitInfoIterator {
+ public:
+  DocHitInfoIteratorDummy() = default;
+  explicit DocHitInfoIteratorDummy(std::vector<DocHitInfo> doc_hit_infos)
+      : doc_hit_infos_(std::move(doc_hit_infos)) {}
+
+  libtextclassifier3::Status Advance() override {
+    if (index_ < doc_hit_infos_.size()) {
+      doc_hit_info_ = doc_hit_infos_.at(index_);
+      index_++;
+      return libtextclassifier3::Status::OK;
+    }
+
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+
+  void set_hit_intersect_section_ids_mask(
+      SectionIdMask hit_intersect_section_ids_mask) {
+    hit_intersect_section_ids_mask_ = hit_intersect_section_ids_mask;
+  }
+
+  int32_t GetNumBlocksInspected() const override {
+    return num_blocks_inspected_;
+  }
+
+  void SetNumBlocksInspected(int32_t num_blocks_inspected) {
+    num_blocks_inspected_ = num_blocks_inspected;
+  }
+
+  int32_t GetNumLeafAdvanceCalls() const override {
+    return num_leaf_advance_calls_;
+  }
+
+  void SetNumLeafAdvanceCalls(int32_t num_leaf_advance_calls) {
+    num_leaf_advance_calls_ = num_leaf_advance_calls;
+  }
+
+  std::string ToString() const override {
+    std::string ret = "<";
+    for (auto& doc_hit_info : doc_hit_infos_) {
+      absl_ports::StrAppend(&ret, IcingStringUtil::StringPrintf(
+                                      "[%d,%d]", doc_hit_info.document_id(),
+                                      doc_hit_info.hit_section_ids_mask()));
+    }
+    absl_ports::StrAppend(&ret, ">");
+    return ret;
+  }
+
+ private:
+  int32_t index_ = 0;
+  int32_t num_blocks_inspected_ = 0;
+  int32_t num_leaf_advance_calls_ = 0;
+  std::vector<DocHitInfo> doc_hit_infos_;
+};
+
+inline std::vector<DocumentId> GetDocumentIds(DocHitInfoIterator* iterator) {
+  std::vector<DocumentId> ids;
+  while (iterator->Advance().ok()) {
+    ids.push_back(iterator->doc_hit_info().document_id());
+  }
+  return ids;
+}
+
+inline std::vector<DocHitInfo> GetDocHitInfos(DocHitInfoIterator* iterator) {
+  std::vector<DocHitInfo> doc_hit_infos;
+  while (iterator->Advance().ok()) {
+    doc_hit_infos.push_back(iterator->doc_hit_info());
+  }
+  return doc_hit_infos;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
new file mode 100644
index 0000000..eace911
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator.h

@@ -0,0 +1,99 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
+#define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_
+
+#include <cstdint>
+#include <string>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Iterator over DocHitInfos (collapsed Hits) in REVERSE document_id order.
+//
+// NOTE: You must call Advance() before calling hit_info() or
+// hit_intersect_section_ids_mask().
+//
+// Example:
+// DocHitInfoIterator itr = GetIterator(...);
+// while (itr.Advance()) {
+//   HandleDocHitInfo(itr.hit_info());
+// }
+class DocHitInfoIterator {
+ public:
+  virtual ~DocHitInfoIterator() = default;
+
+  // Returns:
+  //   OK if was able to advance to a new document_id.
+  //   RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over
+  virtual libtextclassifier3::Status Advance() = 0;
+
+  // Returns the DocHitInfo that the iterator is currently at. The DocHitInfo
+  // will have a kInvalidDocumentId if Advance() was not called after
+  // construction or if Advance returned an error.
+  const DocHitInfo& doc_hit_info() const { return doc_hit_info_; }
+
+  // SectionIdMask representing which sections (if any) have matched *ALL* query
+  // terms for the current document_id.
+  SectionIdMask hit_intersect_section_ids_mask() const {
+    return hit_intersect_section_ids_mask_;
+  }
+
+  // Gets the number of flash index blocks that have been read as a
+  // result of operations on this object.
+  virtual int32_t GetNumBlocksInspected() const = 0;
+
+  // HitIterators may be constructed into trees. Internal nodes will return the
+  // sum of the number of Advance() calls to all leaf nodes. Leaf nodes will
+  // return the number of times Advance() was called on it.
+  virtual int32_t GetNumLeafAdvanceCalls() const = 0;
+
+  // A string representing the iterator.
+  virtual std::string ToString() const = 0;
+
+ protected:
+  DocHitInfo doc_hit_info_;
+  SectionIdMask hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+
+  // Helper function to advance the given iterator to at most the given
+  // document_id.
+  libtextclassifier3::StatusOr<DocumentId> AdvanceTo(DocHitInfoIterator* it,
+                                                     DocumentId document_id) {
+    while (it->Advance().ok()) {
+      if (it->doc_hit_info().document_id() <= document_id) {
+        return it->doc_hit_info().document_id();
+      }
+    }
+
+    // Didn't find anything for the other iterator, reset to invalid values and
+    // return.
+    doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+    hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+    return absl_ports::ResourceExhaustedError(
+        "No more DocHitInfos in iterator");
+  }
+};  // namespace DocHitInfoIterator
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_H_

diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
new file mode 100644
index 0000000..90e4888
--- /dev/null
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc

@@ -0,0 +1,185 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <vector>
+
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/index/iterator:doc-hit-info-iterator_benchmark
+//
+//    $
+//    blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark
+//    --benchmarks=all
+//
+// Run on an Android device:
+//    $ blaze build --config=android_arm64 -c opt --dynamic_mode=off
+//    --copt=-gmlt
+//    //icing/index/iterator:doc-hit-info-iterator_benchmark
+//
+//    $ adb push
+//    blaze-bin/icing/index/iterator/doc-hit-info-iterator_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/doc-hit-info-iterator_benchmark
+//    --benchmarks=all
+
+// Functor to be used with std::generate to create a container of DocHitInfos.
+// DocHitInfos are generated starting at docid starting_docid and continuing at
+// every n docid.
+// Ex. std::vector<DocHitInfo> infos(6);
+//     std::generate(infos.begin(), infos.end(), GeneratorEveryOtherN(25, 5));
+// infos will now hold: {DocHitInfo(25), DocHitInfo(20), DocHitInfo(15),
+//                       DocHitInfo(10), DocHitInfo(5), DocHitInfo(0)}
+struct GeneratorEveryOtherN {
+  explicit GeneratorEveryOtherN(DocumentId starting_docid, int n)
+      : current_docid(starting_docid), interval(n) {}
+
+  DocHitInfo operator()() {
+    DocHitInfo info(current_docid, kSectionIdMaskAll);
+    current_docid -= interval;
+    return info;
+  }
+
+  DocumentId current_docid;
+  int interval;
+};
+
+void BM_DocHitInfoIteratorAndBenchmark(benchmark::State& state) {
+  // First iterator: If starting_docid is 1024 and interval is 2, docids
+  // [1024, 1022, 1020, 1018, ..., 2, 0]
+  DocumentId starting_docid = state.range(0);
+  int interval = state.range(1);
+  std::vector<DocHitInfo> first_infos((starting_docid / interval) + 1);
+  std::generate(first_infos.begin(), first_infos.end(),
+                GeneratorEveryOtherN(starting_docid, interval));
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_infos);
+
+  // Second iterator: An iterator with 1/4 of the hits as first_iter. If
+  // starting_docid is 1024 and interval is 2, docids
+  // [1024, 1016, 1008, 1000, ..., 8, 0]
+  interval *= 4;
+  std::vector<DocHitInfo> second_infos((starting_docid / interval) + 1);
+  std::generate(second_infos.begin(), second_infos.end(),
+                GeneratorEveryOtherN(starting_docid, interval));
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_infos);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iters;
+  iters.push_back(std::move(first_iter));
+  iters.push_back(std::move(second_iter));
+  std::unique_ptr<DocHitInfoIterator> and_iter =
+      CreateAndIterator(std::move(iters));
+  for (auto _ : state) {
+    while (and_iter->Advance().ok()) {
+      // Intentionally left blank.
+    }
+  }
+}
+BENCHMARK(BM_DocHitInfoIteratorAndBenchmark)
+    ->ArgPair(1024, 1)
+    ->ArgPair(1024, 2)
+    ->ArgPair(1024, 4)
+    ->ArgPair(1024, 16)
+    ->ArgPair(1024, 128)
+    ->ArgPair(8192, 1)
+    ->ArgPair(8192, 2)
+    ->ArgPair(8192, 4)
+    ->ArgPair(8192, 16)
+    ->ArgPair(8192, 128)
+    ->ArgPair(65536, 1)
+    ->ArgPair(65536, 2)
+    ->ArgPair(65536, 4)
+    ->ArgPair(65536, 16)
+    ->ArgPair(65536, 128);
+
+void BM_DocHitInfoIteratorAndNaryBenchmark(benchmark::State& state) {
+  // First iterator: If starting_docid is 1024 and interval is 2, docids
+  // [1024, 1022, 1020, 1018, ..., 2, 0]
+  DocumentId starting_docid = state.range(0);
+  int interval = state.range(1);
+  std::vector<DocHitInfo> first_infos((starting_docid / interval) + 1);
+  std::generate(first_infos.begin(), first_infos.end(),
+                GeneratorEveryOtherN(starting_docid, interval));
+  std::unique_ptr<DocHitInfoIterator> first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_infos);
+
+  // Second iterator: An iterator with 1/2 of the hits as first_iter. If
+  // starting_docid is 1024 and interval is 2, docids
+  // [1024, 1020, 1016, 1012, ..., 4, 0]
+  interval *= 2;
+  std::vector<DocHitInfo> second_infos((starting_docid / interval) + 1);
+  std::generate(second_infos.begin(), second_infos.end(),
+                GeneratorEveryOtherN(starting_docid, interval));
+  std::unique_ptr<DocHitInfoIterator> second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_infos);
+
+  // Third iterator: An iterator with 1/4 of the hits as first_iter. If
+  // starting_docid is 1024 and interval is 2, docids
+  // [1024, 1016, 1008, 1000, ..., 8, 0]
+  interval *= 4;
+  std::vector<DocHitInfo> third_infos((starting_docid / interval) + 1);
+  std::generate(third_infos.begin(), third_infos.end(),
+                GeneratorEveryOtherN(starting_docid, interval));
+  std::unique_ptr<DocHitInfoIterator> third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_infos);
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iters;
+  iters.push_back(std::move(first_iter));
+  iters.push_back(std::move(second_iter));
+  iters.push_back(std::move(third_iter));
+  std::unique_ptr<DocHitInfoIterator> and_iter =
+      CreateAndIterator(std::move(iters));
+  for (auto _ : state) {
+    while (and_iter->Advance().ok()) {
+      // Intentionally left blank.
+    }
+  }
+}
+BENCHMARK(BM_DocHitInfoIteratorAndNaryBenchmark)
+    ->ArgPair(1024, 1)
+    ->ArgPair(1024, 2)
+    ->ArgPair(1024, 4)
+    ->ArgPair(1024, 16)
+    ->ArgPair(1024, 128)
+    ->ArgPair(8192, 1)
+    ->ArgPair(8192, 2)
+    ->ArgPair(8192, 4)
+    ->ArgPair(8192, 16)
+    ->ArgPair(8192, 128)
+    ->ArgPair(65536, 1)
+    ->ArgPair(65536, 2)
+    ->ArgPair(65536, 4)
+    ->ArgPair(65536, 16)
+    ->ArgPair(65536, 128);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/lite-index.cc b/icing/index/lite-index.cc
new file mode 100644
index 0000000..56b8def
--- /dev/null
+++ b/icing/index/lite-index.cc

@@ -0,0 +1,420 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/lite-index.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-lite-index-header.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Point at which we declare the trie full.
+constexpr double kTrieFullFraction = 0.95;
+
+std::string MakeHitBufferFilename(const std::string& filename_base) {
+  return filename_base + "hb";
+}
+
+size_t header_size() { return sizeof(IcingLiteIndex_HeaderImpl::HeaderData); }
+
+}  // namespace
+
+const LiteIndex::Element::Value LiteIndex::Element::kInvalidValue =
+    LiteIndex::Element(0, Hit()).value();
+
+libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> LiteIndex::Create(
+    const LiteIndex::Options& options, const IcingFilesystem* filesystem) {
+  std::unique_ptr<LiteIndex> lite_index =
+      std::unique_ptr<LiteIndex>(new LiteIndex(options, filesystem));
+  ICING_RETURN_IF_ERROR(lite_index->Initialize());
+  return std::move(lite_index);
+}
+
+// size is max size in elements. An appropriate lexicon and display
+// mapping size will be chosen based on hit buffer size.
+LiteIndex::LiteIndex(const LiteIndex::Options& options,
+                     const IcingFilesystem* filesystem)
+    : hit_buffer_(*filesystem),
+      hit_buffer_crc_(0),
+      lexicon_(options.filename_base + "lexicon", MakeTrieRuntimeOptions(),
+               filesystem),
+      header_mmap_(false, MAP_SHARED),
+      options_(options),
+      filesystem_(filesystem) {}
+
+LiteIndex::~LiteIndex() {
+  if (initialized()) {
+    libtextclassifier3::Status unused = PersistToDisk();
+  }
+}
+
+IcingDynamicTrie::RuntimeOptions LiteIndex::MakeTrieRuntimeOptions() {
+  return IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+      IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc);
+}
+
+libtextclassifier3::Status LiteIndex::Initialize() {
+  // Size of hit buffer's header struct, rounded up to the nearest number of
+  // system memory pages.
+  const size_t header_padded_size =
+      IcingMMapper::page_aligned_size(header_size());
+
+  // Variable declarations cannot cross goto jumps, so declare these up top.
+  libtextclassifier3::Status status;
+  uint64_t file_size;
+  IcingTimer timer;
+
+  if (!lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+      !lexicon_.Init()) {
+    return absl_ports::InternalError("Failed to initialize lexicon trie");
+  }
+
+  hit_buffer_fd_.reset(filesystem_->OpenForWrite(
+      MakeHitBufferFilename(options_.filename_base).c_str()));
+  if (!hit_buffer_fd_.is_valid()) {
+    status = absl_ports::InternalError("Failed to open hit buffer file");
+    goto error;
+  }
+
+  file_size = filesystem_->GetFileSize(hit_buffer_fd_.get());
+  if (file_size == IcingFilesystem::kBadFileSize) {
+    status = absl_ports::InternalError("Failed to query hit buffer file size");
+    goto error;
+  }
+
+  if (file_size < header_padded_size) {
+    if (file_size != 0) {
+      status = absl_ports::InternalError(IcingStringUtil::StringPrintf(
+          "Hit buffer had unexpected size %" PRIu64, file_size));
+      goto error;
+    }
+
+    ICING_VLOG(2) << "Creating new hit buffer";
+    // Make sure files are fresh.
+    if (!lexicon_.Remove() ||
+        !lexicon_.CreateIfNotExist(options_.lexicon_options) ||
+        !lexicon_.Init()) {
+      status =
+          absl_ports::InternalError("Failed to refresh lexicon during clear");
+      goto error;
+    }
+
+    // Create fresh hit buffer by first emptying the hit buffer file and then
+    // allocating header_padded_size of the cleared space.
+    if (!filesystem_->Truncate(hit_buffer_fd_.get(), 0) ||
+        !filesystem_->Truncate(hit_buffer_fd_.get(), header_padded_size)) {
+      status = absl_ports::InternalError("Failed to truncate hit buffer file");
+      goto error;
+    }
+
+    // Set up header.
+    header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+    header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
+        reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
+            header_mmap_.address()));
+    header_->Reset();
+
+    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+                          sizeof(Element::Value), header_->cur_size(),
+                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+      status = absl_ports::InternalError("Failed to initialize new hit buffer");
+      goto error;
+    }
+
+    UpdateChecksum();
+  } else {
+    header_mmap_.Remap(hit_buffer_fd_.get(), 0, header_size());
+    header_ = std::make_unique<IcingLiteIndex_HeaderImpl>(
+        reinterpret_cast<IcingLiteIndex_HeaderImpl::HeaderData*>(
+            header_mmap_.address()));
+
+    if (!hit_buffer_.Init(hit_buffer_fd_.get(), header_padded_size, true,
+                          sizeof(Element::Value), header_->cur_size(),
+                          options_.hit_buffer_size, &hit_buffer_crc_, true)) {
+      status = absl_ports::InternalError(
+          "Failed to re-initialize existing hit buffer");
+      goto error;
+    }
+
+    // Check integrity.
+    if (!header_->check_magic()) {
+      status = absl_ports::InternalError("Lite index header magic mismatch");
+      goto error;
+    }
+    Crc32 crc = ComputeChecksum();
+    if (crc.Get() != header_->lite_index_crc()) {
+      status = absl_ports::DataLossError(
+          IcingStringUtil::StringPrintf("Lite index crc check failed: %u vs %u",
+                                        crc.Get(), header_->lite_index_crc()));
+      goto error;
+    }
+  }
+
+  ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index init ok in %.3fms",
+                                                 timer.Elapsed() * 1000);
+  return status;
+
+error:
+  header_ = nullptr;
+  header_mmap_.Unmap();
+  lexicon_.Close();
+  hit_buffer_crc_ = 0;
+  hit_buffer_.Reset();
+  hit_buffer_fd_.reset();
+  if (status.ok()) {
+    return absl_ports::InternalError(
+        "Error handling code ran but status was ok");
+  }
+  return status;
+}
+
+Crc32 LiteIndex::ComputeChecksum() {
+  IcingTimer timer;
+
+  // Update crcs.
+  uint32_t dependent_crcs[2];
+  hit_buffer_.UpdateCrc();
+  dependent_crcs[0] = hit_buffer_crc_;
+  dependent_crcs[1] = lexicon_.UpdateCrc();
+
+  // Compute the master crc.
+
+  // Header crc, excluding the actual crc field.
+  Crc32 all_crc(header_->CalculateHeaderCrc());
+  all_crc.Append(std::string_view(reinterpret_cast<const char*>(dependent_crcs),
+                                  sizeof(dependent_crcs)));
+  ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+      "Lite index crc computed in %.3fms", timer.Elapsed() * 1000);
+
+  return all_crc;
+}
+
+libtextclassifier3::Status LiteIndex::Reset() {
+  IcingTimer timer;
+
+  // TODO(b/140436942): When these components have been changed to return errors
+  // they should be propagated from here.
+  lexicon_.Clear();
+  hit_buffer_.Clear();
+  header_->Reset();
+  UpdateChecksum();
+
+  ICING_VLOG(2) << IcingStringUtil::StringPrintf("Lite index clear in %.3fms",
+                                                 timer.Elapsed() * 1000);
+  return libtextclassifier3::Status::OK;
+}
+
+void LiteIndex::Warm() {
+  hit_buffer_.Warm();
+  lexicon_.Warm();
+}
+
+libtextclassifier3::Status LiteIndex::PersistToDisk() {
+  bool success = true;
+  if (!lexicon_.Sync()) {
+    ICING_VLOG(1) << "Failed to sync the lexicon.";
+    success = false;
+  }
+  hit_buffer_.Sync();
+  UpdateChecksum();
+  header_mmap_.Sync();
+
+  return (success) ? libtextclassifier3::Status::OK
+                   : absl_ports::InternalError(
+                         "Unable to sync lite index components.");
+}
+
+void LiteIndex::UpdateChecksum() {
+  header_->set_lite_index_crc(ComputeChecksum().Get());
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::InsertTerm(
+    const std::string& term, TermMatchType::Code term_match_type) {
+  uint32_t tvi;
+  if (!lexicon_.Insert(term.c_str(), "", &tvi, false)) {
+    return absl_ports::ResourceExhaustedError(
+        absl_ports::StrCat("Unable to add term ", term, " to lexicon!"));
+  }
+  ICING_RETURN_IF_ERROR(UpdateTerm(tvi, term_match_type));
+  return tvi;
+}
+
+libtextclassifier3::Status LiteIndex::UpdateTerm(
+    uint32_t tvi, TermMatchType::Code term_match_type) {
+  if (term_match_type != TermMatchType::PREFIX) {
+    return libtextclassifier3::Status::OK;
+  }
+
+  if (!lexicon_.SetProperty(tvi, kHasHitsInPrefixSection)) {
+    return absl_ports::ResourceExhaustedError(
+        "Insufficient disk space to create property!");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status LiteIndex::AddHit(uint32_t term_id, const Hit& hit) {
+  if (is_full()) {
+    return absl_ports::ResourceExhaustedError("Hit buffer is full!");
+  }
+
+  header_->set_last_added_docid(hit.document_id());
+
+  Element elt(term_id, hit);
+  uint32_t cur_size = header_->cur_size();
+  Element::Value* valp = hit_buffer_.GetMutableMem<Element::Value>(cur_size, 1);
+  if (valp == nullptr) {
+    return absl_ports::ResourceExhaustedError(
+        "Allocating more space in hit buffer failed!");
+  }
+  *valp = elt.value();
+  header_->set_cur_size(cur_size + 1);
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<uint32_t> LiteIndex::FindTerm(
+    const std::string& term) const {
+  char dummy;
+  uint32_t tvi;
+  if (!lexicon_.Find(term.c_str(), &dummy, &tvi)) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Could not find ", term, " in the lexicon."));
+  }
+  return tvi;
+}
+
+uint32_t LiteIndex::AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
+                               bool only_from_prefix_sections,
+                               std::vector<DocHitInfo>* hits_out) {
+  uint32_t count = 0;
+  DocumentId last_document_id = kInvalidDocumentId;
+  for (uint32_t idx = Seek(term_id); idx < header_->cur_size(); idx++) {
+    Element elt(hit_buffer_.array_cast<Element>()[idx]);
+    if (elt.term_id() != term_id) break;
+
+    const Hit& hit = elt.hit();
+    // Check sections.
+    if (((1u << hit.section_id()) & section_id_mask) == 0) {
+      continue;
+    }
+    // Check prefix section only.
+    if (only_from_prefix_sections && !hit.is_in_prefix_section()) {
+      continue;
+    }
+    DocumentId document_id = hit.document_id();
+    if (document_id != last_document_id) {
+      count++;
+      if (hits_out != nullptr) {
+        hits_out->push_back(DocHitInfo(document_id));
+      }
+      last_document_id = document_id;
+    }
+    if (hits_out != nullptr) {
+      hits_out->back().UpdateSection(hit.section_id(), hit.score());
+    }
+  }
+  return count;
+}
+
+bool LiteIndex::is_full() const {
+  return (header_->cur_size() == options_.hit_buffer_size ||
+          lexicon_.min_free_fraction() < (1.0 - kTrieFullFraction));
+}
+
+void LiteIndex::GetDebugInfo(int verbosity, std::string* out) const {
+  absl_ports::StrAppend(
+      out, IcingStringUtil::StringPrintf("Lite Index\nHit buffer %u/%u\n",
+                                         header_->cur_size(),
+                                         options_.hit_buffer_size));
+
+  // Lexicon.
+  out->append("Lexicon stats:\n");
+  lexicon_.GetDebugInfo(verbosity, out);
+}
+
+uint32_t LiteIndex::Seek(uint32_t term_id) {
+  // Make searchable by sorting by hit buffer.
+  uint32_t sort_len = header_->cur_size() - header_->searchable_end();
+  if (sort_len > 0) {
+    IcingTimer timer;
+
+    auto* array_start =
+        hit_buffer_.GetMutableMem<Element::Value>(0, header_->cur_size());
+    Element::Value* sort_start = array_start + header_->searchable_end();
+    std::sort(sort_start, array_start + header_->cur_size());
+
+    // Now merge with previous region. Since the previous region is already
+    // sorted and deduplicated, optimize the merge by skipping everything less
+    // than the new region's smallest value.
+    if (header_->searchable_end() > 0) {
+      std::inplace_merge(array_start, array_start + header_->searchable_end(),
+                         array_start + header_->cur_size());
+    }
+    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+        "Lite index sort and merge %u into %u in %.3fms", sort_len,
+        header_->searchable_end(), timer.Elapsed() * 1000);
+
+    // Now the entire array is sorted.
+    header_->set_searchable_end(header_->cur_size());
+
+    // Update crc in-line.
+    UpdateChecksum();
+  }
+
+  // Binary search for our term_id.  Make sure we get the first
+  // element.  Using kBeginSortValue ensures this for the hit value.
+  Element elt(term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore));
+
+  const Element::Value* array = hit_buffer_.array_cast<Element::Value>();
+  const Element::Value* ptr =
+      std::lower_bound(array, array + header_->cur_size(), elt.value());
+  return ptr - array;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/lite-index.h b/icing/index/lite-index.h
new file mode 100644
index 0000000..ff573a0
--- /dev/null
+++ b/icing/index/lite-index.h

@@ -0,0 +1,223 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A small index with continuous updates (doesn't need explicit Flush
+// to persiste) but has more possibility for corruption. It can always
+// detect corruption reliably.
+
+#ifndef ICING_INDEX_LITE_INDEX_H_
+#define ICING_INDEX_LITE_INDEX_H_
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/hit/hit.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-lite-index-header.h"
+#include "icing/legacy/index/icing-lite-index-options.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/util/bit-util.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+enum TermPropertyId {
+  kHasHitsInPrefixSection = 0,
+};
+
+class LiteIndex {
+ public:
+  // An entry in the hit buffer.
+  class Element {
+   public:
+    // Layout bits: 24 termid + 32 hit value + 8 hit score.
+    using Value = uint64_t;
+
+    static constexpr int kTermIdBits = 24;
+    static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
+    static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8;
+
+    static const Value kInvalidValue;
+
+    explicit Element(Value v = kInvalidValue) : value_(v) {}
+
+    Element(uint32_t term_id, const Hit& hit) {
+      static_assert(
+          kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8,
+          "LiteIndexElementTooBig");
+
+      value_ = 0;
+      // Term id goes into the most significant bits because it takes
+      // precedent in sorts.
+      bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits,
+                            &value_);
+      bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_);
+      bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_);
+    }
+
+    uint32_t term_id() const {
+      return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits,
+                                   kTermIdBits);
+    }
+
+    Hit hit() const {
+      return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits),
+                 bit_util::BitfieldGet(value_, 0, kHitScoreBits));
+    }
+
+    Value value() const { return value_; }
+
+   private:
+    Value value_;
+  };
+
+  using Options = IcingLiteIndexOptions;
+
+  // Updates checksum of subcomponents.
+  ~LiteIndex();
+
+  // Creates lite index from storage. The files will be created if they do not
+  // already exist.
+  // If Create() fails, a non-ok Status will be returned.
+  static libtextclassifier3::StatusOr<std::unique_ptr<LiteIndex>> Create(
+      const Options& options, const IcingFilesystem* filesystem);
+
+  // Resets all internal members of the index. Returns OK if all operations were
+  // successful.
+  libtextclassifier3::Status Reset();
+
+  // Advises the OS to cache pages in the index, which will be accessed for a
+  // query soon.
+  void Warm();
+
+  // Syncs all modified files in the index to disk. Returns non-OK status if any
+  // file fails to sync properly.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculate the checksum of all sub-components of the LiteIndex
+  Crc32 ComputeChecksum();
+
+  // Returns term_id if term found, NOT_FOUND otherwise.
+  libtextclassifier3::StatusOr<uint32_t> FindTerm(
+      const std::string& term) const;
+
+  // Returns an iterator for all terms for which 'prefix' is a prefix.
+  class PrefixIterator {
+   public:
+    explicit PrefixIterator(const IcingDynamicTrie::Iterator& delegate)
+        : delegate_(delegate) {}
+    bool IsValid() const { return delegate_.IsValid(); }
+
+    void Advance() { delegate_.Advance(); }
+
+    const char* GetKey() const { return delegate_.GetKey(); }
+
+    uint32_t GetValueIndex() const { return delegate_.GetValueIndex(); }
+
+   private:
+    IcingDynamicTrie::Iterator delegate_;
+  };
+
+  PrefixIterator FindTermPrefixes(const std::string& prefix) const {
+    return PrefixIterator(IcingDynamicTrie::Iterator(lexicon_, prefix.c_str()));
+  }
+
+  // Insert a term. Returns non-OK if lexicon is full.
+  libtextclassifier3::StatusOr<uint32_t> InsertTerm(
+      const std::string& term, TermMatchType::Code term_match_type);
+
+  // Updates term properties by setting the bit for has_hits_in_prefix_section
+  // only if term_match_type == PREFIX. Otherwise, this does nothing.
+  libtextclassifier3::Status UpdateTerm(uint32_t tvi,
+                                        TermMatchType::Code term_match_type);
+
+  // Append hit to buffer. term_id must be encoded using the same term_id_codec
+  // supplied to the index constructor. Returns non-OK if hit cannot be added
+  // (either due to hit buffer or file system capacity reached).
+  libtextclassifier3::Status AddHit(uint32_t term_id, const Hit& hit);
+
+  // Add all hits with term_id from the sections specified in section_id_mask,
+  // skipping hits in non-prefix sections if only_from_prefix_sections is true,
+  // to hits_out.
+  uint32_t AppendHits(uint32_t term_id, SectionIdMask section_id_mask,
+                      bool only_from_prefix_sections,
+                      std::vector<DocHitInfo>* hits_out);
+
+  // Check if buffer has reached its capacity.
+  bool is_full() const;
+
+  constexpr static uint32_t max_hit_buffer_size() {
+    return std::numeric_limits<uint32_t>::max() / sizeof(LiteIndex::Element);
+  }
+
+  // We keep track of the last added document_id. This is always the largest
+  // document_id that has been added because hits can only be added in order of
+  // increasing document_id.
+  DocumentId last_added_document_id() const {
+    return header_->last_added_docid();
+  }
+
+  // Returns debug information for the index in out.
+  // verbosity <= 0, simplest debug information - size of lexicon, hit buffer
+  // verbosity > 0, more detailed debug information from the lexicon.
+  void GetDebugInfo(int verbosity, std::string* out) const;
+
+ private:
+  static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
+
+  LiteIndex(const Options& options, const IcingFilesystem* filesystem);
+
+  // Initializes lite index from storage. Must be called exactly once after
+  // object construction. If init fails, returns a non-ok Status.
+  libtextclassifier3::Status Initialize();
+
+  bool initialized() const { return header_ != nullptr; }
+
+  // Sets the computed checksum in the header
+  void UpdateChecksum();
+
+  // Returns the position of the first element with term_id, or the size of the
+  // hit buffer if term_id is not present.
+  uint32_t Seek(uint32_t term_id);
+
+  ScopedFd hit_buffer_fd_;
+
+  IcingArrayStorage hit_buffer_;
+  uint32_t hit_buffer_crc_;
+  IcingDynamicTrie lexicon_;
+  // TODO(b/140437260): Port over to MemoryMappedFile
+  IcingMMapper header_mmap_;
+  std::unique_ptr<IcingLiteIndex_Header> header_;
+  const Options options_;
+  // TODO(b/139087650) Move to icing::Filesystem
+  const IcingFilesystem* const filesystem_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_LITE_INDEX_H_

diff --git a/icing/index/term-id-codec.cc b/icing/index/term-id-codec.cc
new file mode 100644
index 0000000..49e75f6
--- /dev/null
+++ b/icing/index/term-id-codec.cc

@@ -0,0 +1,98 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/term-id-codec.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/legacy/core/icing-string-util.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::unique_ptr<TermIdCodec>> TermIdCodec::Create(
+    uint32_t max_main_tvi, uint32_t max_lite_tvi) {
+  uint64_t sum =
+      static_cast<uint64_t>(max_main_tvi) + static_cast<uint64_t>(max_lite_tvi);
+  if (sum > std::numeric_limits<uint32_t>::max()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Sum of max_main_tvi, %d, and max_lite_tvi, %d must be less than the "
+        "uint32_t max, %d.",
+        max_main_tvi, max_lite_tvi, std::numeric_limits<uint32_t>::max()));
+  }
+
+  // TODO(cassiewang): When we convert these values to signed ints, we should
+  // check to make sure they're >= 0.
+
+  return std::unique_ptr<TermIdCodec>(
+      new TermIdCodec(max_main_tvi, max_lite_tvi));
+}
+
+libtextclassifier3::StatusOr<uint32_t> TermIdCodec::EncodeTvi(
+    uint32_t tvi, TviType tvi_type) const {
+  switch (tvi_type) {
+    case TviType::MAIN:
+      if (tvi >= max_main_tvi_) {
+        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+            "Main tvi %d is greater "
+            "than or equal to the max_main_tvi %d",
+            tvi, max_main_tvi_));
+      }
+      return tvi;
+    case TviType::LITE: {
+      if (tvi >= max_lite_tvi_) {
+        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+            "Lite tvi %d is greater "
+            "than or equal to the max_lite_tvi %d",
+            tvi, max_lite_tvi_));
+      }
+      return max_main_tvi_ + tvi;
+    }
+  }
+}
+
+libtextclassifier3::StatusOr<TviType> TermIdCodec::DecodeTviType(
+    uint32_t term_id) const {
+  if (term_id < max_main_tvi_) {
+    return TviType::MAIN;
+  } else if (term_id < max_term_id()) {
+    return TviType::LITE;
+  }
+  return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+      "Given TermId %d is greater than or equal to the max TermId %d", term_id,
+      max_term_id()));
+}
+
+libtextclassifier3::StatusOr<TermIdCodec::DecodedTermInfo>
+TermIdCodec::DecodeTermInfo(uint32_t term_id) const {
+  DecodedTermInfo result;
+  ICING_ASSIGN_OR_RETURN(result.tvi_type, DecodeTviType(term_id));
+  switch (result.tvi_type) {
+    case TviType::MAIN:
+      result.tvi = term_id;
+      break;
+    case TviType::LITE:
+      result.tvi = term_id - max_main_tvi_;
+      break;
+  }
+  return result;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/index/term-id-codec.h b/icing/index/term-id-codec.h
new file mode 100644
index 0000000..cead108
--- /dev/null
+++ b/icing/index/term-id-codec.h

@@ -0,0 +1,107 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_INDEX_TERM_ID_CODEC_H_
+#define ICING_INDEX_TERM_ID_CODEC_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "utils/base/statusor.h"
+
+// Encodes/decodes TermIds into different TviTypes. A "tvi" is a
+// term_value_index into some space, essentially a unique id within that space.
+// Across TviTypes, tvis are not necessarily unique (i.e. we can have a tvi of 0
+// in the LITE space and a tvi of 0 in the MAIN space). The codec maps tvis into
+// one overall TermId space so that tvis can be represented by a unique TermId
+// across all TviTypes (i.e. a MAIN tvi of 0 can be represented by 0, and a LITE
+// tvi of 0 can be represented by 10). The max TermId will be the sum of the max
+// MAIN tvi and the max LITE tvi.
+//
+// Example use:
+//   ICING_ASSIGN_OR_RETURN(auto term_id_codec,
+//       TermIdCodec::Create(/*max_main_tvi=*/5, /*max_lite_tvi=*/5);
+//
+//   term_id_codec->tvi_type(0); // TviType::Main
+//   term_id_codec->tvi_type(4); // TviType::Main
+//   term_id_codec->tvi_type(5); // TviType::Lite
+//   term_id_codec->tvi_type(9); // TviType::Lite
+//
+//   term_id_codec->tvi_type(100); // INVALID_ARGUMENT, exceeds max TermId
+//
+// TODO(cassiewang): Use signed integers for the tvi values. Currently, the max
+// values that are passed in are ~5 million for max_main_tvi, and ~1 million for
+// max_lite_tvi. Since the sum of both of these is still well under the int32_t
+// max, we should use signed integers (go/totw/159) (go/totw/159). But since
+// we're getting these values from icing::DynamicTrie, we need to convert all
+// the uints at once to avoid even worse undefined conversion behavior.
+namespace icing {
+namespace lib {
+
+enum TviType { MAIN, LITE };
+
+class TermIdCodec {
+ public:
+  struct DecodedTermInfo {
+    TviType tvi_type;
+    uint32_t tvi;
+  };
+
+  // Encodes/decodes TermIds based on a max main tvi and a max lite tvi. The max
+  // tvis are an exclusive upper bound on the values. For example, Create(5, 5)
+  // creates a MAIN encoding that holds [0, 1, 2, 3, 4] TermIds and a LITE
+  // encoding that holds [5, 6, 7, 8, 9] TermIds.
+  //
+  // Returns:
+  //   unique_ptr to a TermIdCodec on success
+  //   INVALID_ARGUMENT if the sum of max_main_tvi and max_lite_tvi is greater
+  //     than the max uint32_t value
+  static libtextclassifier3::StatusOr<std::unique_ptr<TermIdCodec>> Create(
+      uint32_t max_main_tvi, uint32_t max_lite_tvi);
+
+  // Returns:
+  //   TermId that would represent the given tvi of tvi_type
+  //   INVALID_ARGUMENT if the tvi of tvi_type would exceed the max TermId
+  libtextclassifier3::StatusOr<uint32_t> EncodeTvi(uint32_t tvi,
+                                                   TviType tvi_type) const;
+
+  // Returns:
+  //   TviType of the encoded TermId
+  //   INVALID_ARGUMENT if the term_id exceeds the max TermId
+  libtextclassifier3::StatusOr<TviType> DecodeTviType(uint32_t term_id) const;
+
+  // Returns:
+  //   Decoded info of the given term_id
+  //   INVALID_ARGUMENT if the term_id exceeds the max TermId
+  libtextclassifier3::StatusOr<DecodedTermInfo> DecodeTermInfo(
+      uint32_t term_id) const;
+
+  uint32_t max_main_tvi() const { return max_main_tvi_; }
+
+  uint32_t max_lite_tvi() const { return max_lite_tvi_; }
+
+  uint32_t max_term_id() const { return max_main_tvi_ + max_lite_tvi_; }
+
+ private:
+  explicit TermIdCodec(uint32_t max_main_tvi, uint32_t max_lite_tvi)
+      : max_main_tvi_(max_main_tvi), max_lite_tvi_(max_lite_tvi) {}
+
+  uint32_t max_main_tvi_;
+  uint32_t max_lite_tvi_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_INDEX_TERM_ID_CODEC_H_

diff --git a/icing/index/term-id-codec_test.cc b/icing/index/term-id-codec_test.cc
new file mode 100644
index 0000000..fa7c9e3
--- /dev/null
+++ b/icing/index/term-id-codec_test.cc

@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/index/term-id-codec.h"
+
+#include <cstdint>
+#include <limits>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(TermIdCodecTest, CreateExceedsInt32MaxInvalid) {
+  EXPECT_THAT(TermIdCodec::Create(
+                  /*max_main_tvi=*/std::numeric_limits<uint32_t>::max() - 10,
+                  /*max_lite_tvi=*/std::numeric_limits<uint32_t>::max() - 10),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(TermIdCodecTest, CreateWithPositiveTvisOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/10, /*max_lite_tvi=*/10));
+}
+
+TEST(TermIdCodecTest, CreateWithZeroTvisOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/0, /*max_lite_tvi=*/0));
+}
+
+TEST(TermIdCodecTest, Encode) {
+  // Create a codec where main TVIs are: [0,3), lite: [3,10)
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/3, /*max_lite_tvi=*/7));
+
+  EXPECT_THAT(codec->EncodeTvi(0, TviType::MAIN), IsOkAndHolds(Eq(0)));
+  EXPECT_THAT(codec->EncodeTvi(2, TviType::MAIN), IsOkAndHolds(Eq(2)));
+  EXPECT_THAT(codec->EncodeTvi(3, TviType::MAIN),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(codec->EncodeTvi(0, TviType::LITE), IsOkAndHolds(Eq(3)));
+  EXPECT_THAT(codec->EncodeTvi(6, TviType::LITE), IsOkAndHolds(Eq(9)));
+  EXPECT_THAT(codec->EncodeTvi(7, TviType::LITE),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(TermIdCodecTest, DecodeTermInfo) {
+  // Create a codec where main TVIs are: [0,3), lite: [3,10)
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/3, /*max_lite_tvi=*/7));
+
+  ICING_ASSERT_OK_AND_ASSIGN(TermIdCodec::DecodedTermInfo decoded_term,
+                             codec->DecodeTermInfo(0));
+  EXPECT_THAT(decoded_term.tvi_type, Eq(TviType::MAIN));
+  EXPECT_THAT(decoded_term.tvi, Eq(0));
+
+  ICING_ASSERT_OK_AND_ASSIGN(decoded_term, codec->DecodeTermInfo(3));
+  EXPECT_THAT(decoded_term.tvi_type, Eq(TviType::LITE));
+  EXPECT_THAT(decoded_term.tvi, Eq(0));
+
+  EXPECT_THAT(codec->DecodeTermInfo(10),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(TermIdCodecTest, TviType) {
+  // Create a codec where main TVIs are: [0,3), lite: [3,10)
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/3, /*max_lite_tvi=*/7));
+
+  EXPECT_THAT(codec->DecodeTviType(0), IsOkAndHolds(Eq(TviType::MAIN)));
+  EXPECT_THAT(codec->DecodeTviType(2), IsOkAndHolds(Eq(TviType::MAIN)));
+  EXPECT_THAT(codec->DecodeTviType(3), IsOkAndHolds(Eq(TviType::LITE)));
+  EXPECT_THAT(codec->DecodeTviType(9), IsOkAndHolds(Eq(TviType::LITE)));
+  EXPECT_THAT(codec->DecodeTviType(10),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST(TermIdCodecTest, Max) {
+  // Create a codec where main TVIs are: [0,3), lite: [3,10)
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<TermIdCodec> codec,
+      TermIdCodec::Create(/*max_main_tvi=*/3, /*max_lite_tvi=*/7));
+
+  EXPECT_THAT(codec->max_main_tvi(), Eq(3));
+  EXPECT_THAT(codec->max_lite_tvi(), Eq(7));
+  EXPECT_THAT(codec->max_term_id(), Eq(10));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/core/icing-compat.h b/icing/legacy/core/icing-compat.h
new file mode 100644
index 0000000..4340707
--- /dev/null
+++ b/icing/legacy/core/icing-compat.h

@@ -0,0 +1,33 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2014 Google Inc. All Rights Reserved.
+// Author: csyoung@google.com (C. Sean Young)
+
+#ifndef ICING_LEGACY_CORE_ICING_COMPAT_H_
+#define ICING_LEGACY_CORE_ICING_COMPAT_H_
+
+#ifdef __APPLE__
+// iOS does not allow dlopen/dlclose/dlsym.
+#define ICING_ALLOW_DYNAMIC_EXTENSIONS 0
+// Xcode 6.3/LLVM 3.6 removed std::hash specialization for std::basic_string,
+// so we need the versions from util/hash/hash.h. The implementations in
+// util/hash/* have added an AVOID_TRADE_SECRET_CODE macro that's defined on
+// Apple (and some other client targets) to prevent leaking proprietary hash
+// code.
+#else
+#define ICING_ALLOW_DYNAMIC_EXTENSIONS 1
+#endif  // __APPLE__
+
+#endif  // ICING_LEGACY_CORE_ICING_COMPAT_H_

diff --git a/icing/legacy/core/icing-core-types.h b/icing/legacy/core/icing-core-types.h
new file mode 100644
index 0000000..cc12663
--- /dev/null
+++ b/icing/legacy/core/icing-core-types.h

@@ -0,0 +1,62 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: sbanacho@google.com (Scott Banachowski)
+//
+// This header is used to declare typedefs and constants
+// that are shared across several modules.
+
+#ifndef ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
+#define ICING_LEGACY_CORE_ICING_CORE_TYPES_H_
+
+#include <stdint.h>
+
+#include <cstddef>  // size_t not defined implicitly for all platforms.
+#include <vector>
+
+#include "icing/legacy/core/icing-compat.h"
+
+namespace icing {
+namespace lib {
+
+enum IcingTokenizerType {
+  // Group tokens by clustering characters that match IsRuneLetter
+  // together.
+  TOKENIZER_PLAIN,
+  // Marks tokens that look like html tags (enclosed in <>) or entities
+  // (enclosed in &;).
+  TOKENIZER_HTML,
+  // Skip over html tags.
+  TOKENIZER_HTML_IGNORE_TAGS,
+  // Email address parsing.  Marks the address, local address and rfc token
+  // portion of from, to, cc headers.  This assumes the rfc822 tokens
+  // were pre-processed to canonical form by the Android Java Rfc822Tokenizer.
+  TOKENIZER_RFC822,
+  // For tokenizing queries, recognizes query syntax.
+  TOKENIZER_QUERY,
+  // For tokenizing simple queries, which only breaks on whitespace tokens.
+  TOKENIZER_QUERY_SIMPLE,
+  // For tokenizing ST-like queries.
+  TOKENIZER_ST_QUERY,
+  // For tokenizing URLs.
+  TOKENIZER_URL,
+  // For not tokenizing and returning one token same as the input.
+  TOKENIZER_VERBATIM,
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_CORE_ICING_CORE_TYPES_H_

diff --git a/icing/legacy/core/icing-packed-pod.h b/icing/legacy/core/icing-packed-pod.h
new file mode 100644
index 0000000..b2db680
--- /dev/null
+++ b/icing/legacy/core/icing-packed-pod.h

@@ -0,0 +1,40 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_CORE_ICING_PACKED_POD_H_
+#define ICING_LEGACY_CORE_ICING_PACKED_POD_H_
+
+#include <type_traits>
+
+namespace icing {
+namespace lib {
+
+// Any struct whose size is not a multiple of its alignment will lead to a
+// padding of sizeof(T) % alignof(T) bytes per element when creating an array of
+// that type.
+template <typename T>
+struct no_array_padding
+    : std::integral_constant<bool, sizeof(T) % alignof(T) == 0> {};
+
+// See go/icing-ubsan for details on requirements of packed objects.
+template <typename T>
+struct icing_is_packed_pod
+    : std::conjunction<std::integral_constant<bool, alignof(T) == 1>,
+                        std::is_standard_layout<T>,
+                        std::is_trivially_copyable<T>> {};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_CORE_ICING_PACKED_POD_H_

diff --git a/icing/legacy/core/icing-string-util.cc b/icing/legacy/core/icing-string-util.cc
new file mode 100644
index 0000000..1954cd3
--- /dev/null
+++ b/icing/legacy/core/icing-string-util.cc

@@ -0,0 +1,105 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//         sbanacho@google.com (Scott Banachowski)
+//
+// This is a list of IsGoogleLetter letters. It is copied from
+// google3/util/utf8/proptables/letters.txt CL 19164202.
+#include "icing/legacy/core/icing-string-util.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+
+#include "icing/legacy/portable/icing-zlib.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {}  // namespace
+uint32_t IcingStringUtil::UpdateCrc32(uint32_t crc, const char *str, int len) {
+  if (len > 0) {
+    crc = ~crc32(~crc, reinterpret_cast<const Bytef *>(str), len);
+  }
+  return crc;
+}
+
+uint32_t IcingStringUtil::UpdateAtPositionCrc32(uint32_t crc, int orig_len,
+                                                int offset,
+                                                const char *xored_str,
+                                                int len) {
+  // For appending, use UpdateCrc32.
+  if (offset + len > orig_len) {
+    ICING_LOG(FATAL) << "offset: " << offset << " length: " << len
+                     << "original_length: " << orig_len;
+  }
+
+  // We have CRC(A|U|B) and we want CRC(A|V|B) where U is the slice
+  // that updated to V.
+  //
+  // Then if xored_str = X = U ^ V:
+  //
+  // CRC(A|V|B) = CRC(0_lenA|X|0_lenB ^ A|U|B)
+  // = CRC(0_lenA|X|0_lenB) ^ CRC(A|U|B)
+  // = CRC_COMBINE(CRC(0_lenA), CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
+  //
+  // But CRC(0s) = 0, and CRC_COMBINE(0, X) = X, so this becomes
+  // = CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
+  uint32_t update_crc = UpdateCrc32(0, xored_str, len);
+  update_crc = crc32_combine(update_crc, 0, orig_len - (offset + len));
+  return crc ^ update_crc;
+}
+
+void IcingStringUtil::SStringAppendV(std::string *strp, int bufsize,
+                                     const char *fmt, va_list arglist) {
+  int capacity = bufsize;
+  if (capacity <= 0) {
+    va_list backup;
+    va_copy(backup, arglist);
+    capacity = vsnprintf(nullptr, 0, fmt, backup);
+    va_end(arglist);
+  }
+
+  size_t start = strp->size();
+  strp->resize(strp->size() + capacity + 1);
+
+  int written = vsnprintf(&(*strp)[start], capacity + 1, fmt, arglist);
+  va_end(arglist);
+  strp->resize(start + std::min(capacity, written));
+}
+
+void IcingStringUtil::SStringAppendF(std::string *strp, int bufsize,
+                                     const char *fmt, ...) {
+  va_list arglist;
+  va_start(arglist, fmt);
+  SStringAppendV(strp, bufsize, fmt, arglist);
+}
+
+std::string IcingStringUtil::StringPrintf(const char *fmt, ...) {
+  std::string s;
+  va_list arglist;
+  va_start(arglist, fmt);
+  SStringAppendV(&s, 0, fmt, arglist);
+  return s;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/core/icing-string-util.h b/icing/legacy/core/icing-string-util.h
new file mode 100644
index 0000000..01c17f1
--- /dev/null
+++ b/icing/legacy/core/icing-string-util.h

@@ -0,0 +1,69 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//         sbanacho@google.com (Scott Banachowski)
+
+#ifndef ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
+#define ICING_LEGACY_CORE_ICING_STRING_UTIL_H_
+
+#include <stdarg.h>
+#include <stdint.h>
+
+#include <string>
+
+#include "icing/legacy/core/icing-compat.h"
+
+namespace icing {
+namespace lib {
+
+class IcingStringUtil {
+ public:
+  // Returns true if the character is not the first byte of
+  // a multi-byte UTF8 character
+  static bool IsContinuationByte(char byte) {
+    return (static_cast<uint8_t>(byte) & 0xC0) == 0x80;
+  }
+
+  static bool IsAsciiChar(char c) { return static_cast<signed char>(c) >= 0; }
+
+  // Update a rolling crc32. This undoes the one's complement
+  // pre-conditioning and post-conditioning of zlib's
+  // crc32. Therefore, UpdateCrc32(0, str, len) != HashCrc32(str,
+  // len).
+  static uint32_t UpdateCrc32(uint32_t crc, const char *str, int len);
+
+  // Update a string's rolling crc for when its value at offset is
+  // xor'ed with the buffer [xored_str, xored_str + len).
+  //
+  // REQUIRES: orig_len >= offset + len.
+  static uint32_t UpdateAtPositionCrc32(uint32_t crc, int orig_len, int offset,
+                                        const char *xored_str, int len);
+
+  // Append vsnprintf to strp. If bufsize hint is > 0 it is
+  // used. Otherwise we compute the required bufsize (which is somewhat
+  // expensive).
+  static void SStringAppendV(std::string *strp, int bufsize, const char *fmt,
+                             va_list arglist);
+  static void SStringAppendF(std::string *strp, int bufsize, const char *fmt,
+                             ...) __attribute__((format(printf, 3, 4)));
+  static std::string StringPrintf(const char *fmt, ...)
+      __attribute__((format(printf, 1, 2)));
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_CORE_ICING_STRING_UTIL_H_

diff --git a/icing/legacy/core/icing-timer.cc b/icing/legacy/core/icing-timer.cc
new file mode 100644
index 0000000..f29ac40
--- /dev/null
+++ b/icing/legacy/core/icing-timer.cc

@@ -0,0 +1,43 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/core/icing-timer.h"
+
+namespace icing {
+namespace lib {
+
+double IcingTimer::WallTimeNow() {
+  struct timeval tv;
+  gettimeofday(&tv, nullptr);
+  return tv.tv_sec + tv.tv_usec / 1e6;
+}
+
+double IcingTimer::ClockTime() {
+#ifdef __APPLE__
+  // iOS targets can't rely on clock_gettime(). So, fallback to WallTime_Now().
+  return WallTimeNow();
+#else
+  struct timespec ts;
+  clock_gettime(CLOCK_MONOTONIC, &ts);
+  return ts.tv_sec + ts.tv_nsec / 1e9;
+#endif  // __APPLE__
+}
+
+IcingTimer::IcingTimer() { Reset(); }
+
+void IcingTimer::Reset() { start_ = ClockTime(); }
+
+double IcingTimer::Elapsed() const { return ClockTime() - start_; }
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/core/icing-timer.h b/icing/legacy/core/icing-timer.h
new file mode 100644
index 0000000..49ba9ad
--- /dev/null
+++ b/icing/legacy/core/icing-timer.h

@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_CORE_ICING_TIMER_H_
+#define ICING_LEGACY_CORE_ICING_TIMER_H_
+
+#include <sys/time.h>
+#include <time.h>
+
+namespace icing {
+namespace lib {
+
+// A simple stop-watch timer for performance measurement.
+class IcingTimer {
+ public:
+  static double WallTimeNow();
+
+  IcingTimer();
+
+  void Reset();
+
+  // Elapsed wall time since last Reset().
+  double Elapsed() const;
+
+ private:
+  static double ClockTime();
+
+  double start_;
+};
+
+}  // namespace lib
+}  // namespace icing
+#endif  // ICING_LEGACY_CORE_ICING_TIMER_H_

diff --git a/icing/legacy/index/icing-array-storage.cc b/icing/legacy/index/icing-array-storage.cc
new file mode 100644
index 0000000..aeb3fa3
--- /dev/null
+++ b/icing/legacy/index/icing-array-storage.cc

@@ -0,0 +1,402 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-array-storage.h"
+
+#include <inttypes.h>
+#include <sys/mman.h>
+
+#include <algorithm>
+
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/util/logging.h"
+
+using std::max;
+using std::min;
+using std::vector;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Do the cast and const dance.
+void *MakeVoidPtr(const void *ptr) { return const_cast<void *>(ptr); }
+
+}  // namespace
+
+const uint32_t IcingArrayStorage::kPartialCrcLimitDiv = 8;  // limit is 1/8th
+const size_t IcingArrayStorage::kGrowElts = 1u << 14;       // 16KB
+
+IcingArrayStorage::IcingArrayStorage(const IcingFilesystem &filesystem)
+    : mmapper_(nullptr), filesystem_(filesystem) {
+  Reset();
+}
+
+IcingArrayStorage::~IcingArrayStorage() { delete mmapper_; }
+
+bool IcingArrayStorage::Init(int fd, size_t fd_offset, bool map_shared,
+                             uint32_t elt_size, uint32_t num_elts,
+                             uint32_t max_num_elts, uint32_t *crc_ptr,
+                             bool init_crc) {
+  if (is_initialized()) {
+    return true;
+  }
+
+  // Compute capacity_num_.
+  uint64_t file_size = filesystem_.GetFileSize(fd);
+  if (file_size == IcingFilesystem::kBadFileSize) {
+    ICING_LOG(ERROR) << "Array storage could not get file size";
+    return false;
+  }
+  if (file_size < fd_offset) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Array storage file size %" PRIu64 " less than offset %zu", file_size,
+        fd_offset);
+    return false;
+  }
+
+  uint32_t capacity_num_elts = (file_size - fd_offset) / elt_size;
+  if (capacity_num_elts < num_elts) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Array storage num elts %u > capacity num elts %u", num_elts,
+        capacity_num_elts);
+    return false;
+  }
+
+  // Map beyond the capacity. We will grow underlying file to avoid
+  // SIGBUS.
+  mmapper_ = new IcingMMapper(fd, false, fd_offset, max_num_elts * elt_size,
+                              map_shared ? MAP_SHARED : MAP_PRIVATE);
+  if (!mmapper_->is_valid()) {
+    ICING_LOG(ERROR) << "Array storage map failed";
+    delete mmapper_;
+    mmapper_ = nullptr;
+    return false;
+  }
+
+  fd_ = fd;
+  fd_offset_ = fd_offset;
+  map_shared_ = map_shared;
+  elt_size_ = elt_size;
+  // changes_end_ refers to the last element that was included in the
+  // current crc. If we change it, we must also update *crc_ptr_ to
+  // 0. Otherwise UpdateCrc will fail.
+  cur_num_ = changes_end_ = num_elts;
+  max_num_ = max_num_elts;
+  capacity_num_ = capacity_num_elts;
+  crc_ptr_ = crc_ptr;
+
+  if (crc_ptr_) {
+    uint32_t crc = IcingStringUtil::UpdateCrc32(0, array_cast<char>(),
+                                                cur_num_ * elt_size_);
+    if (init_crc) {
+      *crc_ptr_ = crc;
+    } else if (crc != *crc_ptr_) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Array storage bad crc %u vs %u", crc, *crc_ptr_);
+      goto failed;
+    }
+  }
+  return true;
+
+failed:
+  Reset();
+  return false;
+}
+
+void IcingArrayStorage::Reset() {
+  fd_ = -1;
+  fd_offset_ = 0;
+  map_shared_ = false;
+  delete mmapper_;
+  mmapper_ = nullptr;
+  elt_size_ = 0;
+  cur_num_ = 0;
+  changes_end_ = 0;
+  max_num_ = 0;
+  capacity_num_ = 0;
+  crc_ptr_ = nullptr;
+  changes_.clear();
+  saved_orig_buf_.clear();
+  dirty_pages_.clear();
+}
+
+void IcingArrayStorage::Truncate(uint32_t len) {
+  if (len > cur_num_) {
+    ICING_LOG(FATAL) << "Length exceeds current size";
+  }
+
+  cur_num_ = len;
+}
+
+void *IcingArrayStorage::GetMutableMemInternal(uint32_t elt_idx,
+                                               uint32_t elt_len) {
+  uint32_t start_byte = elt_idx * elt_size_;
+  uint32_t len_bytes = elt_len * elt_size_;
+
+  if (ABSL_PREDICT_FALSE(!GrowIfNecessary(elt_idx + elt_len))) {
+    return nullptr;
+  }
+
+  cur_num_ = max(cur_num_, elt_idx + elt_len);
+
+  if (crc_ptr_) {
+    // Cache original value to update crcs.
+    if (elt_idx < changes_end_) {
+      uint32_t change_len = min(changes_end_, elt_idx + elt_len) - elt_idx;
+
+      // If we exceed kPartialCrcLimitDiv, clear changes_end_ to
+      // revert to full CRC.
+      if ((saved_orig_buf_.size() + change_len * elt_size_) *
+              kPartialCrcLimitDiv >
+          changes_end_ * elt_size_) {
+        ICING_VLOG(2) << "Array storage change tracking limit exceeded";
+        changes_.clear();
+        saved_orig_buf_.clear();
+        changes_end_ = 0;
+        *crc_ptr_ = 0;
+      } else {
+        changes_.push_back(Change(elt_idx, change_len));
+        saved_orig_buf_.append(array_cast<char>() + start_byte,
+                               change_len * elt_size_);
+      }
+    }
+  }
+
+  if (!map_shared_) {
+    // Mark dirty pages.
+    int start_page = start_byte / IcingMMapper::system_page_size();
+    int end_page =
+        (start_byte + len_bytes - 1) / IcingMMapper::system_page_size();
+
+    for (int i = start_page; i <= end_page; i++) {
+      if (static_cast<size_t>(i) >= dirty_pages_.size()) {
+        dirty_pages_.resize(i + 1);
+      }
+      dirty_pages_[i] = true;
+    }
+  }
+
+  return MakeVoidPtr(&(array())[start_byte]);
+}
+
+bool IcingArrayStorage::GrowIfNecessary(uint32_t num_elts) {
+  if (ABSL_PREDICT_TRUE(num_elts <= capacity_num_)) return true;
+  if (num_elts > max_num_) return false;
+
+  // Need to grow.
+  uint64_t new_file_size = fd_offset_ + uint64_t{num_elts} * elt_size_;
+  // Grow to kGrowElts boundary.
+  new_file_size = AlignUp(new_file_size, kGrowElts * elt_size_);
+  if (!filesystem_.Grow(fd_, new_file_size)) {
+    return false;
+  }
+  capacity_num_ = (new_file_size - fd_offset_) / elt_size_;
+  return true;
+}
+
+void IcingArrayStorage::UpdateCrc() {
+  if (!crc_ptr_) return;
+
+  // First apply the modified area. Keep a bitmap of already updated
+  // regions so we don't double-update.
+  vector<bool> updated(changes_end_);
+  uint32_t cur_offset = 0;
+  uint32_t cur_crc = *crc_ptr_;
+  int num_partial_crcs = 0;
+  int num_truncated = 0;
+  int num_overlapped = 0;
+  int num_duplicate = 0;
+  for (size_t i = 0; i < changes_.size(); i++) {
+    const Change &change = changes_[i];
+    if (change.elt_offset + change.elt_len > changes_end_) {
+      ICING_LOG(FATAL) << "Off " << change.elt_offset << " len "
+                       << change.elt_len << " end " << changes_end_;
+    }
+
+    // Skip truncated tracked changes.
+    if (change.elt_offset >= cur_num_) {
+      ++num_truncated;
+      continue;
+    }
+
+    // Turn change buf into change^orig.
+    const char *buf_end =
+        &saved_orig_buf_[cur_offset + change.elt_len * elt_size_];
+    const char *cur_array = array_cast<char>() + change.elt_offset * elt_size_;
+    // Now xor in. SSE acceleration please?
+    for (char *cur = &saved_orig_buf_[cur_offset]; cur < buf_end;
+         cur++, cur_array++) {
+      *cur ^= *cur_array;
+    }
+
+    // Skip over already updated bytes by setting update to 0.
+    bool new_update = false;
+    bool overlap = false;
+    uint32_t cur_elt = change.elt_offset;
+    for (char *cur = &saved_orig_buf_[cur_offset]; cur < buf_end;
+         cur_elt++, cur += elt_size_) {
+      if (updated[cur_elt]) {
+        memset(cur, 0, elt_size_);
+        overlap = true;
+      } else {
+        updated[cur_elt] = true;
+        new_update = true;
+      }
+    }
+
+    // Apply update to crc.
+    if (new_update) {
+      cur_crc = IcingStringUtil::UpdateAtPositionCrc32(
+          cur_crc, changes_end_ * elt_size_, change.elt_offset * elt_size_,
+          buf_end - change.elt_len * elt_size_, change.elt_len * elt_size_);
+      num_partial_crcs++;
+      if (overlap) {
+        num_overlapped++;
+      }
+    } else {
+      num_duplicate++;
+    }
+    cur_offset += change.elt_len * elt_size_;
+  }
+  if (!changes_.empty()) {
+    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+        "Array update partial crcs %d truncated %d overlapped %d duplicate %d",
+        num_partial_crcs, num_truncated, num_overlapped, num_duplicate);
+  }
+
+  // Now update with grown area.
+  if (changes_end_ < cur_num_) {
+    cur_crc = IcingStringUtil::UpdateCrc32(
+        cur_crc, array_cast<char>() + changes_end_ * elt_size_,
+        (cur_num_ - changes_end_) * elt_size_);
+    ICING_VLOG(2) << IcingStringUtil::StringPrintf(
+        "Array update tail crc offset %u -> %u", changes_end_, cur_num_);
+  }
+
+  // Clear, now that we've applied changes.
+  changes_.clear();
+  saved_orig_buf_.clear();
+  changes_end_ = cur_num_;
+
+  // Commit new crc.
+  *crc_ptr_ = cur_crc;
+}
+
+void IcingArrayStorage::Warm() const {
+  if (madvise(MakeVoidPtr(array()),
+              IcingMMapper::page_aligned_size(cur_num_ * elt_size_),
+              MADV_WILLNEED) != 0) {
+    ICING_LOG(FATAL) << "Failed to madvise()";
+  }
+}
+
+void IcingArrayStorage::Clear() {
+  cur_num_ = 0;
+  changes_end_ = 0;
+  changes_.clear();
+  saved_orig_buf_.clear();
+  dirty_pages_.clear();
+  if (crc_ptr_) *crc_ptr_ = 0;
+}
+
+// TODO(b/69383247): investigate strange behavior here
+// If map_shared_ is false (i.e. we are using MAP_PRIVATE), dirty pages are
+// flushed to the underlying file, but strangely a sync isn't done.
+// If map_shared_ is true, then we call sync.
+uint32_t IcingArrayStorage::Sync() {
+  if (!map_shared_) {
+    IcingTimer timer;
+    uint32_t num_flushed = 0;     // pages flushed
+    uint32_t num_contiguous = 0;  // contiguous series of pages flushed
+    uint32_t dirty_pages_size = dirty_pages_.size();
+
+    bool in_dirty = false;
+    uint32_t dirty_start = 0;
+    for (size_t i = 0; i < dirty_pages_size; i++) {
+      bool is_dirty = dirty_pages_[i];
+      if (in_dirty && !is_dirty) {
+        // Flush pages between dirty_start and this.
+        uint32_t dirty_end = i * IcingMMapper::system_page_size();
+        num_contiguous++;
+        num_flushed +=
+            (dirty_end - dirty_start) / IcingMMapper::system_page_size();
+
+        if (pwrite(fd_, array() + dirty_start, dirty_end - dirty_start,
+                   fd_offset_ + dirty_start) !=
+            static_cast<ssize_t>(dirty_end - dirty_start)) {
+          ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+              "Flushing pages failed (%u, %u)", dirty_start, dirty_end);
+        }
+        in_dirty = false;
+      } else if (!in_dirty && is_dirty) {
+        dirty_start = i * IcingMMapper::system_page_size();
+        in_dirty = true;
+      }
+    }
+
+    // Flush remaining.
+    if (in_dirty) {
+      uint32_t dirty_end = dirty_pages_size * IcingMMapper::system_page_size();
+      num_contiguous++;
+      num_flushed +=
+          (dirty_end - dirty_start) / IcingMMapper::system_page_size();
+
+      if (pwrite(fd_, array() + dirty_start, dirty_end - dirty_start,
+                 fd_offset_ + dirty_start) !=
+          static_cast<ssize_t>(dirty_end - dirty_start)) {
+        ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+            "Flushing pages failed (%u, %u)", dirty_start, dirty_end);
+      }
+    }
+
+    // Clear in one shot.
+    dirty_pages_.clear();
+
+    // Invalidate region so that we are rid of dirty private pages.
+    if (madvise(MakeVoidPtr(array()),
+                IcingMMapper::page_aligned_size(cur_num_ * elt_size_),
+                MADV_DONTNEED) != 0) {
+      ICING_LOG(FATAL) << "Failed to madvise()";
+    }
+
+    if (num_flushed > 0) {
+      ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+          "Flushing %u/%u %u contiguous pages in %.3fms", num_flushed,
+          dirty_pages_size, num_contiguous, timer.Elapsed() * 1000.);
+    }
+
+    return num_flushed;
+  } else {
+    // Changes have been applied. msync() to ensure they are written out.
+    // Don't sync 0-length, which is an error in iOS and a no-op on Android
+    const size_t sync_length =
+        IcingMMapper::page_aligned_size(cur_num_ * elt_size_);
+    if (sync_length > 0) {
+      if (msync(MakeVoidPtr(array()), sync_length, MS_SYNC) != 0) {
+        ICING_LOG(FATAL) << "Failed to msync()";
+      }
+    }
+
+    return 0;
+  }
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-array-storage.h b/icing/legacy/index/icing-array-storage.h
new file mode 100644
index 0000000..fad0565
--- /dev/null
+++ b/icing/legacy/index/icing-array-storage.h

@@ -0,0 +1,168 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// A disk-backed array.
+
+#ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
+#define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_
+
+#include <stdint.h>
+
+#include <string>
+#include <vector>
+
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+
+namespace icing {
+namespace lib {
+
+class IcingArrayStorage {
+ public:
+  explicit IcingArrayStorage(const IcingFilesystem &filesystem);
+  ~IcingArrayStorage();
+
+  // Mmap a disk-backed array at fd_offset in fd. fd is owned by the
+  // caller and must be kept valid.
+  //
+  // If map_shared is true, changes to GetMutableMem immediately apply
+  // to the backing store. Otherwise changes are kept private until an
+  // explicit call to Flush.
+  //
+  // Each element in the array is elt_size bytes and the array is
+  // valid up to num_elts. max_num_elts is the max that the array is
+  // allowed to grow to.
+  //
+  // If crc_ptr is not NULL, explicit calls to UpdateCrc keep the crc
+  // of the array in *crc_ptr.
+  //
+  // If init_crc is true, the crc of the array is recomputed and
+  // written into crc_ptr. Else, the crc of the array is checked
+  // against the current value in crc_ptr and Init fails if the crc
+  // does not match.
+  //
+  // REQUIRES: !is_initialized()
+  bool Init(int fd, size_t fd_offset, bool map_shared, uint32_t elt_size,
+            uint32_t num_elts, uint32_t max_num_elts, uint32_t *crc_ptr,
+            bool init_crc);
+
+  // Undo Init. Make is_initialized() == false.
+  void Reset();
+
+  bool is_initialized() const { return mmapper_ != nullptr; }
+
+  // Attempt to swap into RAM.
+  void Warm() const;
+
+  // Make array empty again.
+  void Clear();
+
+  // Intent to write memory at (elt_idx, elt_idx + elt_len). Returns
+  // NULL if file cannot be grown to accommodate that offset.
+  template <class T>
+  T *GetMutableMem(uint32_t elt_idx, uint32_t elt_len) {
+    return static_cast<T *>(GetMutableMemInternal(elt_idx, elt_len));
+  }
+
+  // Resizes to first elt_len elements.
+  // REQUIRES: elt_len <= num_elts()
+  void Truncate(uint32_t len);
+
+  // Push changes to crc into crc_ptr. No effect if crc_ptr is NULL.
+  void UpdateCrc();
+
+  // Write and sync dirty pages to fd starting at offset. Returns
+  // number of pages synced.
+  uint32_t Sync();
+
+  // Accessors.
+  const uint8_t *array() const { return mmapper_->address(); }
+  template <class T>
+  const T *array_cast() const {
+    return reinterpret_cast<const T *>(array());
+  }
+  uint32_t num_elts() const { return cur_num_; }
+  uint32_t max_num_elts() const { return max_num_; }
+  uint32_t max_size() const { return max_num_elts() * elt_size_; }
+
+  // For stats.
+  uint32_t num_dirty_pages() const {
+    uint32_t num = 0;
+    for (size_t i = 0; i < dirty_pages_.size(); i++) {
+      if (dirty_pages_[i]) num++;
+    }
+    return num;
+  }
+
+ private:
+  // We track partial updates to the array for CRC updating. This
+  // requires extra memory to keep track of original buffers but
+  // allows for much faster CRC re-computation. This is the frac limit
+  // of byte len after which we will discard recorded changes and
+  // recompute the entire CRC instead.
+  static const uint32_t kPartialCrcLimitDiv;  // 10 means limit is 1/10
+
+  // Grow file by at least this many elts if array is growable.
+  static const size_t kGrowElts;
+
+  // A change record (somebody called GetMutableMem on this
+  // region). We only keep changes <= changes_end_.
+  struct Change {
+    Change(uint32_t o, uint32_t l) : elt_offset(o), elt_len(l) {}
+
+    uint32_t elt_offset;
+    uint32_t elt_len;
+  };
+  static_assert(8 == sizeof(Change), "sizeof(Change) != 8");
+  static_assert(4 == alignof(Change), "alignof(Change) != 4");
+
+  void *GetMutableMemInternal(uint32_t elt_idx, uint32_t elt_len);
+
+  bool GrowIfNecessary(uint32_t num_elts);
+
+  int fd_;
+  size_t fd_offset_;
+  bool map_shared_;
+  IcingMMapper *mmapper_;
+
+  // Size of an element in the array.
+  uint32_t elt_size_;
+
+  // In bytes.
+  uint32_t cur_num_;       // cur boundary of written elts
+  uint32_t changes_end_;   // cur_num_ at last call to UpdateCrc
+  uint32_t max_num_;       // size of array in elts
+  uint32_t capacity_num_;  // num elts that can be accommodated by file size
+
+  uint32_t *crc_ptr_;
+
+  // Changes that have happened since the last update
+  // (between [0, changes_end_)).
+  std::vector<Change> changes_;
+  std::string saved_orig_buf_;
+
+  // Keep track of all pages we touched so we can write them back to
+  // disk.
+  std::vector<bool> dirty_pages_;
+
+  const IcingFilesystem &filesystem_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_

diff --git a/icing/legacy/index/icing-bit-util.h b/icing/legacy/index/icing-bit-util.h
new file mode 100644
index 0000000..3273a68
--- /dev/null
+++ b/icing/legacy/index/icing-bit-util.h

@@ -0,0 +1,136 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// Utilities for fiddling bits.
+
+#ifndef ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
+#define ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include <limits>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+// Manipulating bit fields.
+//
+// x       value containing the bit field(s)
+// offset  offset of bit field in x
+// len     len of bit field in x
+//
+// REQUIREMENTS
+//
+// - x an unsigned integer <= 64 bits
+// - offset + len <= sizeof(x) * 8
+//
+// There is no error checking so you will get garbage if you don't
+// ensure the above.
+//
+// To set a value, use BITFIELD_CLEAR then BITFIELD_OR.
+
+// Shifting by more than the word length is undefined (on ARM it has the
+// intended effect, but on Intel it shifts by % word length), so check the
+// length).
+#define BITFIELD_MASK(len) ((len == 0) ? 0U : ((~uint64_t{0}) >> (64 - (len))))
+#define BITFIELD_GET(x, offset, len) (((x) >> (offset)) & BITFIELD_MASK(len))
+// The following modify x.
+#define BITFIELD_CLEAR(x, offset, len) (x) &= ~(BITFIELD_MASK(len) << (offset))
+// We conservatively mask val at len so x won't be corrupted if val >=
+// 1 << len.
+#define BITFIELD_OR(x, offset, len, val) \
+  (x) |= (uint64_t{val} & BITFIELD_MASK(len)) << (offset)
+
+// Number of bits needed to store the range [0, n).
+inline uint8_t BitsToStore(uint32_t n) {
+  if (n <= 1) {
+    return 0;
+  } else {
+    return 32 - __builtin_clz(n - 1);
+  }
+}
+
+#define ALIGN_UP(n, alignment) \
+  ((((n) + (alignment)-1) / (alignment)) * (alignment))
+
+// Align up to a multiple.
+inline uint64_t AlignUp(uint64_t n, uint64_t alignment) {
+  return ALIGN_UP(n, alignment);
+}
+
+inline bool SumOverflowsUint32(std::vector<uint64_t> values) {
+  uint64_t sum = 0L;
+  for (uint64_t value : values) {
+    sum += value;
+  }
+  return sum > std::numeric_limits<uint32_t>::max();
+}
+
+// VarInt (See
+// https://developers.google.com/protocol-buffers/docs/encoding)
+#define VAR_INT_MAX_ENCODED_LEN(n_size) (ALIGN_UP(8 * (n_size), 7) / 7)
+
+class VarInt {
+ public:
+  // 7 bits per byte.
+  static size_t MaxEncodedLen(size_t n_size) {
+    return VAR_INT_MAX_ENCODED_LEN(n_size);
+  }
+  static const int kMaxEncodedLen64 = VAR_INT_MAX_ENCODED_LEN(8);
+
+  // Encode n into buf. Return encoded len. buf must be at least
+  // kMaxEncodedLen64 long.
+  static size_t Encode(uint64_t n, uint8_t *buf) {
+    uint8_t *start = buf;
+    do {
+      *buf = 0x80 | (n & 0x7f);
+      n >>= 7;
+      buf++;
+    } while (n);
+    // buf is one past last byte. Last byte must have MSB cleared.
+    *(buf - 1) &= 0x7f;
+    return buf - start;
+  }
+
+  // Decode buf into unsigned integral type pn. Return length
+  // decoded. buf must terminate with a byte with MSB cleared. No
+  // error checking is done but if buf is null-terminated, Decode
+  // won't crash. If decoded doesn't fit into *pn higher order bits
+  // will be dropped.
+  template <class T>
+  static size_t Decode(const uint8_t *buf, T *pn) {
+    const uint8_t *start = buf;
+    *pn = 0;
+    int offset = 0;
+    while ((*buf & 0x80)) {
+      *pn |= static_cast<T>(*buf & 0x7f) << offset;
+      offset += 7;
+      buf++;
+    }
+    // Last byte.
+    *pn |= static_cast<T>(*buf) << offset;
+    // Buf is pointing to last byte, not one off the end.
+    return buf - start + 1;
+  }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_BIT_UTIL_H_

diff --git a/icing/legacy/index/icing-common-types.h b/icing/legacy/index/icing-common-types.h
new file mode 100644
index 0000000..592b549
--- /dev/null
+++ b/icing/legacy/index/icing-common-types.h

@@ -0,0 +1,129 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2014 Google Inc. All Rights Reserved.
+// Author: sbanacho@google.com (Scott Banachowski)
+// Author: csyoung@google.com (C. Sean Young)
+
+#ifndef ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_
+#define ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_
+
+#include "icing/legacy/core/icing-core-types.h"
+
+// Protocol buffers are shared across several components.
+namespace com {
+namespace google {
+namespace android {
+namespace gms {
+namespace icing {
+namespace lib {
+
+class ClientFileGroup;
+class Document;
+class Document_Section;
+class DocumentStoreStatusProto;
+class IMEUpdate;
+class IMEUpdateResponse;
+class IndexCorpusScoringConfig;
+class IndexCorpusScoringConfig_Section;
+class IndexScoringConfig;
+class InitStatus;
+class InitStatus_CorpusInitInfo;
+class PendingDeleteUsageReport;
+class PhraseAffinityRequest;
+class QueryResponse;
+class QueryResponse_Corpus;
+class QueryResponse_Corpus_Section;
+class QueryResponse_Corpus_Tag;
+class QueryRequestSpec;
+class QueryRequestSpec_CorpusSpec;
+class QueryRequestSpec_SectionSpec;
+class ResponseDebugInfo;
+class ResultDebugInfo;
+class SectionConfig;
+class SuggestionResponse;
+class SuggestionResponse_Suggestion;
+class UsageReportsResponse;
+class UsageStats;
+class UsageStats_Corpus;
+
+}  // namespace lib
+}  // namespace icing
+}  // namespace gms
+}  // namespace android
+}  // namespace google
+}  // namespace com
+
+namespace icing {
+namespace lib {
+
+// Typedefs.
+using IcingDocId = uint32_t;
+
+using IcingSectionId = uint32_t;
+
+using IcingCorpusId = uint16_t;
+using IcingSectionIdMask = uint16_t;
+
+using IcingTagsCount = uint16_t;
+
+using IcingSequenceNumber = int64_t;
+
+using IcingScore = uint64_t;
+
+constexpr size_t kIcingMaxTokenLen = 30;  // default shared between query
+                                          // processor and indexer
+constexpr int kIcingQueryTermLimit = 50;  // Maximum number of terms in a query
+constexpr int kIcingMaxVariantsPerToken = 10;  // Maximum number of variants
+
+// LINT.IfChange
+constexpr int kIcingDocIdBits = 20;  // 1M docs
+constexpr IcingDocId kIcingInvalidDocId = (1u << kIcingDocIdBits) - 1;
+constexpr IcingDocId kIcingMaxDocId = kIcingInvalidDocId - 1;
+// LINT.ThenChange(//depot/google3/wireless/android/icing/plx/google_sql_common_macros.sql)
+
+constexpr int kIcingDocScoreBits = 32;
+
+constexpr int kIcingSectionIdBits = 4;  // 4 bits for 16 values
+constexpr IcingSectionId kIcingMaxSectionId = (1u << kIcingSectionIdBits) - 1;
+constexpr IcingSectionId kIcingInvalidSectionId = kIcingMaxSectionId + 1;
+constexpr IcingSectionIdMask kIcingSectionIdMaskAll = ~IcingSectionIdMask{0};
+constexpr IcingSectionIdMask kIcingSectionIdMaskNone = IcingSectionIdMask{0};
+
+constexpr int kIcingCorpusIdBits = 15;  // 32K
+constexpr IcingCorpusId kIcingInvalidCorpusId = (1u << kIcingCorpusIdBits) - 1;
+constexpr IcingCorpusId kIcingMaxCorpusId = kIcingInvalidCorpusId - 1;
+
+constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1;  // 64K
+// Max num tokens per document. 64KB is our original maximum (searchable)
+// document size. We clip if document exceeds this.
+constexpr uint32_t kIcingMaxNumTokensPerDoc =
+    kIcingMaxSearchableDocumentSize / 5;
+constexpr uint32_t kIcingMaxNumHitsPerDocument =
+    kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
+
+constexpr IcingTagsCount kIcingInvalidTagCount = ~IcingTagsCount{0};
+constexpr IcingTagsCount kIcingMaxTagCount = kIcingInvalidTagCount - 1;
+
+// Location refers to document storage.
+constexpr uint64_t kIcingInvalidLocation = ~uint64_t{0};
+constexpr uint64_t kIcingMaxDocStoreWriteLocation = uint64_t{1}
+                                                    << 32;  // 4bytes.
+
+// Dump symbols in the proto namespace.
+using namespace ::com::google::android::gms::icing;  // NOLINT(build/namespaces)
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_COMMON_TYPES_H_

diff --git a/icing/legacy/index/icing-dynamic-trie.cc b/icing/legacy/index/icing-dynamic-trie.cc
new file mode 100644
index 0000000..a3d6316
--- /dev/null
+++ b/icing/legacy/index/icing-dynamic-trie.cc

@@ -0,0 +1,2349 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// We store the trie in three areas: nodes, nexts and suffixes.
+//
+// Nodes contain an index to a children array (kept in nexts) or to
+// suffixes (for leaf nodes). Nexts contain children arrays of
+// different sizes. Each child entry has the matched char and an index
+// back into the nodes. Leaf nodes index into suffixes instead of the
+// nexts array. Each suffix is a NULL-terminated suffix off the trie,
+// followed by a 4-byte value associated with that key.
+//
+// Allocation
+//
+// Nodes are allocated and never removed. Nexts contain arrays of
+// sizes in power-of-2 increments, i.e. 1, 2, 4, ..., 256. When the
+// number of children of a node increases, it is relocated to an array
+// with the proper size. The (smaller) unused array is added to a free
+// list. A free list is kept for each array size. Allocations happen
+// from the free list first, and then from the end of the nexts
+// array. Suffixes are never freed or compacted. If a node wants to
+// refer to a smaller suffix, it moves the pointer forward and the
+// characters before the new pointer are wasted.
+//
+// Keys can contain any character except '\0'. The '\0' char is
+// special in that it specifies an end-of-key in the child array.
+//
+// Ideas to try:
+//
+// - Put suffix index in a Next instead of creating a leaf node.
+// - Change allocation buckets to 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, ..., 256
+// - Compact next array
+// - GroupVarByte and delta-encode the next array
+// - Collapse nodes with single children
+//
+// Persistence
+//
+// We persist the trie in a binary format such that resurrecting the
+// trie is simply a few file reads. The file is laid out as such:
+//
+// - Header
+// - Nodes
+// - Nexts
+// - Suffixes
+//
+// Each section is aligned to IcingMMapper::system_page_size(). The max
+// requested value for each array is pre-allocated in the file. When
+// we make modifications to the arrays, we set bits in a dirty bitmap
+// of pages. No changes get written to disk until an explicit call to
+// Flush. Then we only write the pages that have their dirty bit set.
+
+#include "icing/legacy/index/icing-dynamic-trie.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <memory>
+#include <utility>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-array-storage.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-flash-bitmap.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/util/logging.h"
+#include "icing/util/math-util.h"
+
+using std::inplace_merge;
+using std::lower_bound;
+using std::max;
+using std::mismatch;
+using std::pair;
+using std::sort;
+using std::vector;
+
+namespace icing {
+namespace lib {
+
+// Based on the bit field widths.
+const uint32_t IcingDynamicTrie::Options::kMaxNodes = (1U << 24) - 1;
+const uint32_t IcingDynamicTrie::Options::kMaxNexts = (1U << 27) - 1;
+const uint32_t IcingDynamicTrie::Options::kMaxSuffixesSize = 1U << 27;
+const uint32_t IcingDynamicTrie::Options::kMaxValueSize = 1U << 16;
+
+const uint32_t IcingDynamicTrie::kInvalidNodeIndex = (1U << 24) - 1;
+const uint32_t IcingDynamicTrie::kInvalidNextIndex = ~0U;
+const uint32_t IcingDynamicTrie::kInvalidSuffixIndex = ~0U;
+
+const int IcingDynamicTrie::kMaxNextArraySize;
+const int IcingDynamicTrie::kNumNextAllocationBuckets;
+
+const uint32_t IcingDynamicTrie::kMaxPropertyId;
+
+const uint32_t IcingDynamicTrie::kInvalidValueIndex;
+
+const uint32_t IcingDynamicTrie::kNoCrc;
+
+// Manages logical node candidates while searching for possible
+// variant matches. Currently implemented as depth first search. The
+// max stack depth is key length * variant fanout. Since max variant
+// fanout is 3, we don't need to worry about blowup of the depth first
+// search stack.
+//
+// Keeps track of original matched string (the string actually present
+// in the trie) for every candidate.
+class IcingDynamicTrie::CandidateSet {
+ public:
+  struct Candidate {
+    LogicalNode logical_node;
+    const char *key;
+    int matched_prefix_len;
+    std::string matched_span;
+
+    Candidate() {}
+
+    Candidate(const LogicalNode &logical_node_in, const char *key_in,
+              int matched_prefix_len_in, const char *matched_span_in,
+              int matched_span_len_in)
+        : logical_node(logical_node_in),
+          key(key_in),
+          matched_prefix_len(matched_prefix_len_in),
+          matched_span(matched_span_in, matched_span_len_in) {}
+
+    int matched_len() const { return matched_prefix_len + matched_span.size(); }
+  };
+
+  explicit CandidateSet(bool prefix) : prefix_(prefix) {}
+
+  bool IsTerminal(const char *key, uint32_t value_index) const {
+    // Terminal match condition:
+    //
+    // 1. Key was entirely consumed.
+    // 2. The entire suffix was consumed (hence value index is
+    //    valid). OR, we are ok with prefix matches.
+    return *key == 0 && (value_index != kInvalidValueIndex || prefix_);
+  }
+
+  // Push a terminal or non-terminal.
+  void Push(const LogicalNode &logical_node, const char *key,
+            uint32_t value_index, int matched_prefix_len,
+            const char *matched_span, int matched_span_len) {
+    if (!AddMatchIfTerminal(key, value_index, matched_span, matched_span_len)) {
+      PushNonTerminal(logical_node, key, matched_prefix_len, matched_span,
+                      matched_span_len);
+    }
+  }
+
+  bool AddMatchIfTerminal(const char *key, uint32_t value_index,
+                          const char *matched_span, int matched_span_len) {
+    if (!IsTerminal(key, value_index)) {
+      return false;
+    }
+
+    // Terminal match.
+    matches_.push_back(OriginalMatch());
+    OriginalMatch *match = &matches_.back();
+    match->value_index = value_index;
+    match->orig.reserve(cur_prefix_.size() + matched_span_len);
+    match->orig.append(cur_prefix_).append(matched_span, matched_span_len);
+    return true;
+  }
+
+  // Push a definite non-terminal.
+  void PushNonTerminal(const LogicalNode &logical_node, const char *key,
+                       int matched_prefix_len, const char *matched_span,
+                       int matched_span_len) {
+    candidates_.push_back(Candidate(logical_node, key, matched_prefix_len,
+                                    matched_span, matched_span_len));
+  }
+
+  void Pop(Candidate *candidate) {
+    *candidate = candidates_.back();
+    if (cur_prefix_.size() < candidate->matched_prefix_len) {
+      ICING_LOG(FATAL)
+          << "Length of current prefix is smaller than length of matched "
+             "prefer, there're inconsistencies in dynamic trie.";
+    }
+
+    cur_prefix_.resize(candidate->matched_prefix_len);
+    cur_prefix_.append(candidate->matched_span);
+    candidates_.pop_back();
+  }
+
+  bool empty() const { return candidates_.empty(); }
+
+  void Release(vector<OriginalMatch> *ret) {
+    if (!empty()) {
+      ICING_LOG(FATAL) << "Candidate set not empty before releasing matches";
+    }
+
+    ret->swap(matches_);
+
+    cur_prefix_.clear();
+    candidates_.clear();
+    matches_.clear();
+  }
+
+ private:
+  const bool prefix_;
+
+  std::string cur_prefix_;
+  vector<Candidate> candidates_;
+
+  vector<IcingDynamicTrie::OriginalMatch> matches_;
+};
+
+// Options.
+bool IcingDynamicTrie::Options::is_valid() const {
+  return max_nodes <= kMaxNodes && max_nodes > 0 && max_nexts <= kMaxNexts &&
+         max_nexts > 0 && max_suffixes_size <= kMaxSuffixesSize &&
+         max_suffixes_size > 0 && value_size <= kMaxValueSize;
+}
+
+// IcingDynamicTrieStorage
+class IcingDynamicTrie::IcingDynamicTrieStorage {
+ public:
+  IcingDynamicTrieStorage(const std::string &file_basename,
+                          const RuntimeOptions &runtime_options,
+                          const IcingFilesystem *filesystem);
+  ~IcingDynamicTrieStorage();
+
+  bool is_initialized() const { return hdr_mmapper_.is_valid(); }
+
+  bool CreateIfNotExist(const Options &options);
+  bool Init();
+  static bool Remove(const std::string &file_basename,
+                     const IcingFilesystem &filesystem);
+  bool Sync();
+  uint64_t GetDiskUsage() const;
+  void Warm();
+
+  void Clear();
+
+  bool empty() const { return hdr().num_nodes() == 0; }
+
+  // Never cast off these consts when writing to the arrays. Always
+  // use the GetMutable* helpers above.
+  const Node *GetNode(uint32_t idx) const {
+    return &array_storage_[NODE].array_cast<Node>()[idx];
+  }
+  const Node *GetRootNode() const { return GetNode(0); }
+  const Next *GetNext(uint32_t idx, int child) const {
+    return &array_storage_[NEXT].array_cast<Next>()[idx + child];
+  }
+  const char *GetSuffix(uint32_t idx) const {
+    return &array_storage_[SUFFIX].array_cast<char>()[idx];
+  }
+
+  uint32_t GetNodeIndex(const Node *node) const { return node - GetNode(0); }
+  uint32_t GetNextArrayIndex(const Next *next) const {
+    return next - GetNext(0, 0);
+  }
+  uint32_t GetSuffixIndex(const char *suffix) const {
+    return suffix - GetSuffix(0);
+  }
+
+  // By default, nodes_, nexts_ and suffixes_ are read-only. This
+  // returns a writable element or array within and sets
+  // dirty_pages_[array_type] as a side effect, assuming the mutable
+  // area will get written to.
+  Node *GetMutableNode(uint32_t idx);
+  Next *GetMutableNextArray(uint32_t idx, uint32_t len);
+  char *GetMutableSuffix(uint32_t idx, uint32_t len);
+
+  // Update crcs based on current contents. Returns all_crc or kNoCrc.
+  uint32_t UpdateCrc();
+
+  // Allocators.
+  uint32_t nodes_left() const;
+  uint32_t nexts_left() const;
+  uint32_t suffixes_left() const;
+
+  // REQUIRES: nodes_left() > 0.
+  Node *AllocNode();
+  // REQUIRES: nexts_left() >= kMaxNextArraySize.
+  Next *AllocNextArray(int size);
+  void FreeNextArray(Next *next, int log2_size);
+  // REQUIRES: suffixes_left() >= strlen(suffix) + 1 + value_size()
+  uint32_t MakeSuffix(const char *suffix, const void *value,
+                      uint32_t *value_index);
+
+  const IcingDynamicTrieHeader &hdr() const { return hdr_.hdr; }
+
+  uint32_t value_size() const { return hdr().value_size(); }
+
+  void FillDirtyPageStats(Stats *stats) const;
+
+  void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); }
+
+ private:
+  friend void IcingDynamicTrie::SetHeader(
+      const IcingDynamicTrieHeader &new_hdr);
+
+  enum ArrayType { NODE, NEXT, SUFFIX, NUM_ARRAY_TYPES };
+
+  // Returns all filenames that are part of the storage. First
+  // filename is the header and the rest correspond to ArrayType enum
+  // values.
+  static void GetFilenames(const std::string &file_basename,
+                           vector<std::string> *filenames);
+  static std::string GetHeaderFilename(const std::string &file_basename);
+
+  uint32_t GetHeaderCrc() const;
+
+  uint32_t GetAllCrc() const;
+
+  uint32_t UpdateCrcInternal(bool write_hdr);
+
+  // Initializes hdr_ with options and writes the resulting header to disk.
+  bool CreateNewHeader(IcingScopedFd sfd, const Options &options);
+  bool WriteHeader();
+
+  // Header block. On-disk header block format is as follows:
+  //
+  // |serialized-header|pad|crcs|
+  // <--- system_page_size() --->
+
+  // Wrapper for header protobuf.
+  class Header {
+    // Serialized format:
+    //
+    // magic(4)|size(4)|serialized hdr(size)
+    static const uint32_t kMagic;
+    // TODO(b/77482303) : Remove version from the IcingFlashBitmap header -
+    // magic makes it unnecessary.
+    static const uint32_t kCurVersion;
+
+   public:
+    void Init(const Options &options);
+    bool Init(const uint8_t *buf, uint32_t buf_size);
+    void Invalidate() { hdr.Clear(); }
+    bool SerializeToArray(uint8_t *buf, uint32_t buf_size) const;
+    bool Verify();
+
+    IcingDynamicTrieHeader hdr;
+  };
+
+  std::string file_basename_;
+
+  Header hdr_;
+
+  IcingMMapper hdr_mmapper_;
+
+  struct Crcs {
+    uint32_t all_crc;
+    uint32_t header_crc;
+    uint32_t array_crcs[NUM_ARRAY_TYPES];
+  };
+  Crcs *crcs_;
+
+  static uint32_t serialized_header_max() {
+    return IcingMMapper::system_page_size() - sizeof(Crcs);
+  }
+
+  RuntimeOptions runtime_options_;
+
+  // Info kept about each array (NODE, NEXT, SUFFIX) to manage
+  // storage.
+  IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
+  std::vector<IcingArrayStorage> array_storage_;
+  const IcingFilesystem *filesystem_;
+};
+
+IcingDynamicTrie::IcingDynamicTrieStorage::IcingDynamicTrieStorage(
+    const std::string &file_basename, const RuntimeOptions &runtime_options,
+    const IcingFilesystem *filesystem)
+    : file_basename_(file_basename),
+      hdr_mmapper_(false, MAP_SHARED),
+      crcs_(nullptr),
+      runtime_options_(runtime_options),
+      array_storage_(NUM_ARRAY_TYPES, IcingArrayStorage(*filesystem)),
+      filesystem_(filesystem) {}
+
+IcingDynamicTrie::IcingDynamicTrieStorage::~IcingDynamicTrieStorage() {
+  if (is_initialized()) {
+    for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+      array_storage_[i].Reset();
+    }
+  }
+}
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::GetFilenames(
+    const std::string &file_basename, vector<std::string> *filenames) {
+  const char *kArrayFilenameSuffixes[NUM_ARRAY_TYPES] = {
+      ".n",
+      ".x",
+      ".s",
+  };
+
+  filenames->clear();
+  filenames->push_back(GetHeaderFilename(file_basename));
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    filenames->push_back(file_basename + kArrayFilenameSuffixes[i]);
+  }
+}
+
+std::string IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderFilename(
+    const std::string &file_basename) {
+  constexpr char kHeaderFilenameSuffix[] = ".h";
+  return file_basename + kHeaderFilenameSuffix;
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
+  bool init_crcs = false;
+  const bool map_shared =
+      runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc;
+
+  // Open files.
+  vector<std::string> filenames;
+  GetFilenames(file_basename_, &filenames);
+  for (size_t i = 0; i < filenames.size(); i++) {
+    uint64_t file_size = filesystem_->GetFileSize(filenames[i].c_str());
+    if (file_size == IcingFilesystem::kBadFileSize) {
+      goto failed;
+    }
+    IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
+    if (!sfd.is_valid()) {
+      goto failed;
+    }
+    // The first filename is the header and the rest correspond to ArrayType
+    // enum values. The header's fd can be closed immediately after mmapping
+    // (see b/114830334). Other files' fds are tracked in array_fds_ for later
+    // closing.
+    if (i == 0) {
+      // Header.
+      if (file_size != IcingMMapper::system_page_size()) {
+        ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+            "Trie hdr wrong size: %" PRIu64, file_size);
+        goto failed;
+      }
+
+      // Open hdr.
+      hdr_mmapper_.Remap(sfd.get(), 0, IcingMMapper::system_page_size());
+      if (!hdr_mmapper_.is_valid()) {
+        ICING_LOG(ERROR) << "Trie map header failed";
+        goto failed;
+      }
+    } else {
+      array_fds_[i - 1] = std::move(sfd);
+    }
+  }
+
+  // Point crcs_ to correct region.
+  crcs_ = reinterpret_cast<Crcs *>(hdr_mmapper_.address() +
+                                   serialized_header_max());
+  if (crcs_->header_crc == kNoCrc) {
+    // Create crcs.
+    crcs_->header_crc = GetHeaderCrc();
+
+    // Do the same for the arrays.
+    init_crcs = true;
+  } else {
+    // Verify crc.
+    if (crcs_->header_crc != GetHeaderCrc()) {
+      ICING_LOG(ERROR) << "Trie header crc failed";
+      goto failed;
+    }
+  }
+
+  // Deserialize and verify header.
+  if (!hdr_.Init(hdr_mmapper_.address(),
+                 IcingMMapper::system_page_size() - sizeof(Crcs)) ||
+      !hdr_.Verify()) {
+    ICING_LOG(ERROR) << "Trie reading header failed";
+    goto failed;
+  }
+
+  // We have the header set up. Now read in the arrays.
+  if (!array_storage_[NODE].Init(array_fds_[NODE].get(), 0, map_shared,
+                                 sizeof(Node), hdr_.hdr.num_nodes(),
+                                 hdr_.hdr.max_nodes(), &crcs_->array_crcs[NODE],
+                                 init_crcs)) {
+    ICING_LOG(ERROR) << "Trie mmap node failed";
+    goto failed;
+  }
+
+  if (!array_storage_[NEXT].Init(array_fds_[NEXT].get(), 0, map_shared,
+                                 sizeof(Next), hdr_.hdr.num_nexts(),
+                                 hdr_.hdr.max_nexts(), &crcs_->array_crcs[NEXT],
+                                 init_crcs)) {
+    ICING_LOG(ERROR) << "Trie mmap next failed";
+    goto failed;
+  }
+
+  if (!array_storage_[SUFFIX].Init(array_fds_[SUFFIX].get(), 0, map_shared,
+                                   sizeof(char), hdr_.hdr.suffixes_size(),
+                                   hdr_.hdr.max_suffixes_size(),
+                                   &crcs_->array_crcs[SUFFIX], init_crcs)) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Trie mmap suffix failed");
+    goto failed;
+  }
+
+  // Overall crc.
+  if (init_crcs) {
+    crcs_->all_crc = GetAllCrc();
+  } else {
+    // Verify crc.
+    if (crcs_->all_crc != GetAllCrc()) {
+      ICING_LOG(ERROR) << "Trie all crc failed";
+      goto failed;
+    }
+  }
+
+  return true;
+
+failed:
+  crcs_ = nullptr;
+  hdr_mmapper_.Unmap();
+  hdr_.Invalidate();
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    array_storage_[i].Reset();
+    array_fds_[i].reset();
+  }
+
+  return false;
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateIfNotExist(
+    const Options &options) {
+  vector<std::string> filenames;
+  GetFilenames(file_basename_, &filenames);
+
+  // Check already exists. Just header file check is enough.
+  if (filesystem_->FileExists(filenames[0].c_str())) {
+    return true;
+  }
+
+  // Ensure the storage directory exists
+  std::string storage_dir = filesystem_->GetDirname(filenames[0].c_str());
+  if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
+    return false;
+  }
+
+  // Create files.
+  for (size_t i = 0; i < filenames.size(); i++) {
+    IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
+    if (!sfd.is_valid()) {
+      Remove(file_basename_, *filesystem_);
+      return false;
+    }
+
+    if (i == 0) {
+      if (!CreateNewHeader(std::move(sfd), options)) {
+        ICING_LOG(ERROR) << "Serialize trie header failed";
+        Remove(file_basename_, *filesystem_);
+        return false;
+      }
+    } else {
+      // Crcs are automatically kNoCrc so they will be initialized
+      // upon first call to Init.
+      if (!filesystem_->Truncate(*sfd, 0)) {
+        Remove(file_basename_, *filesystem_);
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateNewHeader(
+    IcingScopedFd sfd, const Options &options) {
+  ICING_VLOG(1) << "Creating header with write+sync";
+  hdr_.Init(options);
+  auto buf = std::make_unique<uint8_t[]>(IcingMMapper::system_page_size());
+  // serialized_header_max must be less than system_page_size so we don't
+  // overflow buf when serializing the header.
+  if (serialized_header_max() > IcingMMapper::system_page_size()) {
+    ICING_LOG(FATAL) << "serialized_header_max exceeds system page size";
+  }
+
+  return hdr_.SerializeToArray(buf.get(), serialized_header_max()) &&
+         filesystem_->Write(sfd.get(), buf.get(),
+                            IcingMMapper::system_page_size()) &&
+         filesystem_->DataSync(sfd.get());
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Remove(
+    const std::string &file_basename, const IcingFilesystem &filesystem) {
+  bool success = true;
+  vector<std::string> files;
+  GetFilenames(file_basename, &files);
+  for (size_t i = 0; i < files.size(); i++) {
+    if (!filesystem.DeleteFile(files[i].c_str())) {
+      success = false;
+    }
+  }
+  return success;
+}
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::Warm() {
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    array_storage_[i].Warm();
+  }
+}
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::Clear() {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  // Clear header.
+  hdr_.hdr.set_num_nodes(0);
+  hdr_.hdr.set_num_nexts(0);
+  hdr_.hdr.set_suffixes_size(0);
+  for (int i = 0; i < hdr_.hdr.free_lists_size(); i++) {
+    hdr_.hdr.set_free_lists(i, kInvalidNextIndex);
+  }
+  hdr_.hdr.set_num_keys(0);
+
+  // Clear array storage.
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    array_storage_[i].Clear();
+  }
+
+  // Copy to persistence.
+  WriteHeader();
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  uint32_t total_flushed = 0;
+  bool success = true;
+
+  // Sync all array types.
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    total_flushed += array_storage_[i].Sync();
+    if (!filesystem_->DataSync(array_fds_[i].get())) {
+      ICING_LOG(ERROR) << "Unable to sync data for flushing";
+      success = false;
+    }
+  }
+
+  if (!WriteHeader()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flushing trie header failed: %s", strerror(errno));
+    success = false;
+  }
+
+  // Need to update CRCs before we sync the header mmap.
+  UpdateCrcInternal(false);
+
+  // Sync header.
+  if (!hdr_mmapper_.Sync()) {
+    ICING_LOG(ERROR) << "Unable to sync trie header for flushing";
+    success = false;
+  }
+
+  if (total_flushed > 0) {
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf("Flushing %u pages of trie",
+                                                   total_flushed);
+  }
+
+  return success;
+}
+
+uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetDiskUsage() const {
+  // Trie files themselves.
+  uint64_t total = 0;
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    IcingFilesystem::IncrementByOrSetInvalid(
+        filesystem_->GetDiskUsage(array_fds_[i].get()), &total);
+  }
+
+  // Header.
+  std::string header_filename = GetHeaderFilename(file_basename_);
+  IcingFilesystem::IncrementByOrSetInvalid(
+      filesystem_->GetFileDiskUsage(header_filename.c_str()), &total);
+
+  return total;
+}
+
+IcingDynamicTrie::Node *IcingDynamicTrie::IcingDynamicTrieStorage::AllocNode() {
+  if (nodes_left() == 0) {
+    ICING_LOG(FATAL) << "No allocated nodes left";
+  }
+
+  hdr_.hdr.set_num_nodes(hdr_.hdr.num_nodes() + 1);
+  return GetMutableNode(hdr_.hdr.num_nodes() - 1);
+}
+
+IcingDynamicTrie::Next *
+IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
+  if (size > kMaxNextArraySize) {
+    ICING_LOG(FATAL) << "Array size exceeds the max 'next' array size";
+  }
+
+  if (nexts_left() < static_cast<uint32_t>(kMaxNextArraySize)) {
+    ICING_LOG(FATAL) << "'next' buffer not enough";
+  }
+
+  // Compute ceil(log2(size)).
+  int log2_size = 0;
+  while ((1 << log2_size) < size) log2_size++;
+  // Note: size <= aligned_size <= kMaxNextArraySize
+  int aligned_size = 1 << log2_size;
+
+  // Look in free list.
+  Next *ret;
+  if (hdr_.hdr.free_lists(log2_size) != kInvalidNextIndex) {
+    ret = GetMutableNextArray(hdr_.hdr.free_lists(log2_size), aligned_size);
+    uint32_t next_link = ret->next_index();
+    if (next_link != kInvalidNextIndex && next_link >= hdr_.hdr.max_nexts()) {
+      ICING_LOG(FATAL) << "'next' index is out of range";
+    }
+    hdr_.hdr.set_free_lists(log2_size, next_link);
+  } else {
+    // Allocate a new one.
+    ret = GetMutableNextArray(hdr_.hdr.num_nexts(), aligned_size);
+    hdr_.hdr.set_num_nexts(hdr_.hdr.num_nexts() + aligned_size);
+  }
+
+  // Fill with char 0xff so we are sorted properly.
+  for (int i = 0; i < aligned_size; i++) {
+    ret[i].set_val(0xff);
+    ret[i].set_node_index(kInvalidNodeIndex);
+  }
+  return ret;
+}
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::FreeNextArray(Next *next,
+                                                              int log2_size) {
+  if (GetNextArrayIndex(next) + (1 << log2_size) > hdr_.hdr.max_nexts()) {
+    ICING_LOG(FATAL) << "'next' array is out of range";
+  }
+
+  // Put it in free list.
+  next->set_next_index(hdr_.hdr.free_lists(log2_size));
+  hdr_.hdr.set_free_lists(log2_size, GetNextArrayIndex(next));
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::MakeSuffix(
+    const char *suffix, const void *value, uint32_t *value_index) {
+  int suffix_len = strlen(suffix);
+  if (suffixes_left() < suffix_len + 1 + value_size()) {
+    ICING_LOG(FATAL) << "'suffix' buffer not enough";
+  }
+
+  char *start =
+      GetMutableSuffix(hdr_.hdr.suffixes_size(), suffix_len + 1 + value_size());
+  memcpy(start, suffix, suffix_len + 1);
+  memcpy(start + suffix_len + 1, value, value_size());
+  if (value_index) *value_index = GetSuffixIndex(start + suffix_len + 1);
+  hdr_.hdr.set_suffixes_size(hdr_.hdr.suffixes_size() + suffix_len + 1 +
+                             value_size());
+
+  return GetSuffixIndex(start);
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderCrc() const {
+  return IcingStringUtil::UpdateCrc32(
+      0, reinterpret_cast<const char *>(hdr_mmapper_.address()),
+      serialized_header_max());
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::GetAllCrc() const {
+  // Append array crcs to header crc.
+  return IcingStringUtil::UpdateCrc32(
+      crcs_->header_crc, reinterpret_cast<const char *>(crcs_->array_crcs),
+      sizeof(crcs_->array_crcs));
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrc() {
+  return UpdateCrcInternal(true);
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrcInternal(
+    bool write_hdr) {
+  if (write_hdr && !WriteHeader()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flushing trie header failed: %s", strerror(errno));
+  }
+
+  crcs_->header_crc = GetHeaderCrc();
+
+  for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
+    array_storage_[i].UpdateCrc();
+  }
+
+  crcs_->all_crc = GetAllCrc();
+
+  return crcs_->all_crc;
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::WriteHeader() {
+  return hdr_.SerializeToArray(hdr_mmapper_.address(), serialized_header_max());
+}
+
+IcingDynamicTrie::Node *
+IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNode(uint32_t idx) {
+  return array_storage_[NODE].GetMutableMem<Node>(idx, 1);
+}
+
+IcingDynamicTrie::Next *
+IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNextArray(uint32_t idx,
+                                                               uint32_t len) {
+  return array_storage_[NEXT].GetMutableMem<Next>(idx, len);
+}
+
+char *IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableSuffix(
+    uint32_t idx, uint32_t len) {
+  return array_storage_[SUFFIX].GetMutableMem<char>(idx, len);
+}
+
+// Header functions.
+const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kMagic =
+    0x6dfba6ae;
+// For future revisions, this should be synced with global index version.
+// See comments on Upgrade() in native-index-impl.h for versioning.
+const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kCurVersion =
+    4;
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
+    const IcingDynamicTrie::Options &options) {
+  hdr.Clear();
+
+  hdr.set_version(kCurVersion);
+  hdr.set_max_nodes(options.max_nodes);
+  hdr.set_max_nexts(options.max_nexts);
+  hdr.set_max_suffixes_size(options.max_suffixes_size);
+  hdr.set_value_size(options.value_size);
+
+  for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+    hdr.add_free_lists(kInvalidNextIndex);
+  }
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
+    const uint8_t *buf, uint32_t buf_size) {
+  // Check magic and length.
+  if (buf_size <= sizeof(kMagic) + sizeof(uint32_t)) {
+    ICING_LOG(ERROR) << "Trie header too short";
+    return false;
+  }
+
+  uint32_t magic;
+  memcpy(&magic, buf, sizeof(magic));
+  if (magic != kMagic) {
+    ICING_LOG(ERROR) << "Trie header magic mismatch";
+    return false;
+  }
+  uint32_t len;
+  memcpy(&len, buf + sizeof(magic), sizeof(len));
+  if (len > buf_size - sizeof(magic) - sizeof(len)) {
+    ICING_LOG(ERROR) << "Trie header too short";
+    return false;
+  }
+
+  return hdr.ParseFromArray(buf + sizeof(magic) + sizeof(len), len);
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
+    uint8_t *buf, uint32_t buf_size) const {
+  uint32_t size = hdr.ByteSize();
+  if (size + sizeof(kMagic) + sizeof(uint32_t) > buf_size) return false;
+  memcpy(buf, &kMagic, sizeof(kMagic));
+  memcpy(buf + sizeof(kMagic), &size, sizeof(uint32_t));
+  hdr.SerializeWithCachedSizesToArray(buf + sizeof(kMagic) + sizeof(uint32_t));
+  return true;
+}
+
+bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Verify() {
+  // Check version.
+  if (hdr.version() != kCurVersion) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Trie version %u mismatch", hdr.version());
+    return false;
+  }
+
+  // Check that indices in hdr are within bounds. Note that this is
+  // not a comprehensive integrity check for the entire trie.
+  if (hdr.num_nodes() > hdr.max_nodes() || hdr.num_nexts() > hdr.max_nexts() ||
+      hdr.suffixes_size() > hdr.max_suffixes_size() ||
+      hdr.value_size() >= hdr.max_suffixes_size()) {
+    ICING_LOG(ERROR) << "Trie header array size out of bounds";
+    return false;
+  }
+
+  if (hdr.free_lists_size() != kNumNextAllocationBuckets) {
+    ICING_LOG(ERROR) << "Bad number of free lists";
+    return false;
+  }
+
+  for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+    if (hdr.free_lists(i) != kInvalidNextIndex &&
+        hdr.free_lists(i) >= hdr.max_nexts()) {
+      ICING_LOG(ERROR) << "Free list index out of bounds";
+      return false;
+    }
+  }
+
+  return true;
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nodes_left() const {
+  return hdr_.hdr.max_nodes() - hdr_.hdr.num_nodes();
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nexts_left() const {
+  return hdr_.hdr.max_nexts() - hdr_.hdr.num_nexts();
+}
+
+uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const {
+  return hdr_.hdr.max_suffixes_size() - hdr_.hdr.suffixes_size();
+}
+
+void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats(
+    Stats *stats) const {
+  stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages();
+  stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages();
+  stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages();
+}
+
+// Dumper.
+class IcingDynamicTrie::Dumper {
+ public:
+  explicit Dumper(const IcingDynamicTrie &trie)
+      : all_props_(trie), del_prop_(trie), storage_(trie.storage_.get()) {}
+
+  void Dump(std::ostream *pretty_print, vector<std::string> *keys) const {
+    if (storage_->empty()) {
+      *pretty_print << "(empty)\n";
+    } else {
+      DumpNodeRecursive("", *storage_->GetRootNode(), 0, pretty_print, keys);
+    }
+  }
+
+ private:
+  std::string SuffixToValueAsString(const char *suffix) const {
+    int suffix_len = strlen(suffix);
+    std::string ret;
+    ret.reserve(storage_->value_size() * 2);
+    for (uint32_t i = 0; i < storage_->value_size(); i++) {
+      IcingStringUtil::SStringAppendF(&ret, 10, "%02x",
+                                      suffix[suffix_len + 1 + i]);
+    }
+
+    // Now dump set properties.
+    uint32_t value_index = storage_->GetSuffixIndex(suffix + suffix_len + 1);
+    if (del_prop_.HasProperty(value_index)) {
+      ret += " (deleted)";
+    }
+    ret += " [";
+    for (size_t i = 0; i < all_props_.size(); i++) {
+      if (all_props_.HasProperty(i, value_index)) {
+        IcingStringUtil::SStringAppendF(&ret, 10, "%zu", i);
+      }
+    }
+    ret += ']';
+
+    return ret;
+  }
+
+  // Inputs:
+  //   prefix - the key prefix of the current node (so we can rebuild the key)
+  //   node - the node we're at
+  //   level - how many levels deep we are in the trie
+  //   ret - the stream to pretty print to
+  //   keys - the keys encountered are appended to this
+  void DumpNodeRecursive(const std::string &prefix, const Node &node, int level,
+                         std::ostream *ret, vector<std::string> *keys) const {
+    if (node.is_leaf()) {
+      // Dump suffix and value.
+      for (int i = 0; i < level; i++) {
+        *ret << ' ';
+      }
+      const char *suffix = storage_->GetSuffix(node.next_index());
+      *ret << suffix;
+      *ret << ' ';
+      *ret << SuffixToValueAsString(suffix);
+      *ret << '\n';
+      keys->push_back(prefix + suffix);
+    } else {
+      // Go through each child (next) node. Print char and recursively
+      // print trie underneath.
+      for (uint32_t i = 0; i < (1U << node.log2_num_children()); i++) {
+        const Next &next = *storage_->GetNext(node.next_index(), i);
+        if (next.node_index() == kInvalidNodeIndex) break;
+        for (int j = 0; j < level; j++) {
+          *ret << ' ';
+        }
+        std::string new_prefix = prefix;
+        if (next.val()) {
+          *ret << static_cast<char>(next.val());
+          new_prefix += next.val();
+        } else {
+          *ret << "null";
+        }
+        *ret << '\n';
+        DumpNodeRecursive(new_prefix, *storage_->GetNode(next.node_index()),
+                          level + 1, ret, keys);
+      }
+    }
+  }
+
+  PropertyReadersAll all_props_;
+  PropertyDeletedReader del_prop_;
+  const IcingDynamicTrie::IcingDynamicTrieStorage *storage_;
+};
+
+// IcingDynamicTrie.
+IcingDynamicTrie::IcingDynamicTrie(const std::string &filename_base,
+                                   const RuntimeOptions &runtime_options,
+                                   const IcingFilesystem *filesystem)
+    : IIcingStorage(),
+      filename_base_(filename_base),
+      is_initialized_(false),
+      runtime_options_(runtime_options),
+      storage_(nullptr),
+      property_bitmaps_prefix_(filename_base_ + ".prop."),
+      deleted_bitmap_filename_(filename_base_ + ".deleted"),
+      deleted_bitmap_(nullptr),
+      filesystem_(filesystem) {}
+
+IcingDynamicTrie::~IcingDynamicTrie() { Close(); }
+
+bool IcingDynamicTrie::Init() {
+  if (is_initialized_) return true;
+
+  if (storage_ != nullptr) {
+    ICING_LOG(FATAL) << "Storage is not null before initialization";
+  }
+
+  storage_ = std::make_unique<IcingDynamicTrieStorage>(
+      filename_base_, runtime_options_, filesystem_);
+  if (!storage_->Init() || !InitPropertyBitmaps()) {
+    storage_.reset();
+    return false;
+  }
+  is_initialized_ = true;
+  return true;
+}
+
+bool IcingDynamicTrie::CreateIfNotExist(const Options &options) {
+  // Initialized means exists.
+  if (is_initialized_) return true;
+
+  if (!options.is_valid()) {
+    ICING_LOG(ERROR) << "Trie options invalid";
+    return false;
+  }
+
+  auto storage = std::make_unique<IcingDynamicTrieStorage>(
+      filename_base_, runtime_options_, filesystem_);
+  return storage->CreateIfNotExist(options);
+}
+
+void IcingDynamicTrie::Close() {
+  if (!is_initialized_) return;
+
+  UpdateCrc();
+
+  storage_.reset();
+  property_bitmaps_.clear();
+  deleted_bitmap_.reset();
+  is_initialized_ = false;
+}
+
+bool IcingDynamicTrie::Remove() {
+  if (is_initialized()) {
+    Close();
+  }
+
+  bool success = true;
+
+  // Remove storage files.
+  if (!IcingDynamicTrieStorage::Remove(filename_base_, *filesystem_)) {
+    success = false;
+  }
+
+  // Also remove property bitmaps.
+  vector<std::string> files;
+  if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
+                                     &files)) {
+    return false;
+  }
+  for (size_t i = 0; i < files.size(); i++) {
+    if (!filesystem_->DeleteFile(files[i].c_str())) success = false;
+  }
+  // And deleted bitmap.
+  if (!filesystem_->DeleteFile(deleted_bitmap_filename_.c_str()))
+    success = false;
+
+  return success;
+}
+
+bool IcingDynamicTrie::Sync() {
+  if (!is_initialized_) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  bool success = true;
+  IcingTimer timer;
+
+  // Sync property bitmaps.
+  for (size_t i = 0; i < property_bitmaps_.size(); i++) {
+    if (property_bitmaps_[i]) {
+      if (!property_bitmaps_[i]->Sync()) success = false;
+    }
+  }
+  if (!deleted_bitmap_->Sync()) success = false;
+
+  // Sync storage.
+  if (!storage_->Sync()) success = false;
+
+  Warm();
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Syncing dynamic trie %s took %.3fms", filename_base_.c_str(),
+      timer.Elapsed() * 1000.);
+
+  return success;
+}
+
+uint64_t IcingDynamicTrie::GetDiskUsage() const {
+  uint64_t total = 0;
+  // Property bitmaps.
+  IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
+                                           &total);
+
+  for (auto &bitmap : property_bitmaps_) {
+    if (bitmap == nullptr) continue;
+    IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
+  }
+
+  // Storage.
+  IcingFilesystem::IncrementByOrSetInvalid(storage_->GetDiskUsage(), &total);
+  return total;
+}
+
+std::unique_ptr<IcingFlashBitmap> IcingDynamicTrie::OpenAndInitBitmap(
+    const std::string &filename, bool verify,
+    const IcingFilesystem *filesystem) {
+  auto bitmap = std::make_unique<IcingFlashBitmap>(filename, filesystem);
+  if (!bitmap->Init() || (verify && !bitmap->Verify())) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Init of %s failed",
+                                                      filename.c_str());
+    return nullptr;
+  }
+  return bitmap;
+}
+
+bool IcingDynamicTrie::InitPropertyBitmaps() {
+  // Only called on init.
+  if (!property_bitmaps_.empty()) {
+    ICING_LOG(FATAL) << "Property bitmaps not empty before initialization";
+  }
+
+  if (deleted_bitmap_ != nullptr) {
+    ICING_LOG(FATAL) << "Deleted bitmap not null before initialization";
+  }
+
+  // Truncate property bitmap files at current value index. Last value
+  // is at suffixes_size - value_size(). We want to clear everything
+  // after that.
+  uint64_t truncate_idx =
+      storage_->hdr().suffixes_size() > 0
+          ? ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size() -
+                                            value_size()) +
+                1
+          : 0;
+
+  // Discover property bitmaps by scanning the dir.
+  vector<std::string> files;
+  if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
+                                     &files)) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
+    goto failed;
+  }
+  for (size_t i = 0; i < files.size(); i++) {
+    // Decode property id from filename.
+    size_t property_id_start_idx = files[i].rfind('.');
+    if (property_id_start_idx == std::string::npos) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
+                                                        files[i].c_str());
+      continue;
+    }
+    property_id_start_idx++;  // skip dot
+    char *end;
+    uint32_t property_id =
+        strtol(files[i].c_str() + property_id_start_idx, &end, 10);  // NOLINT
+    if (!end || end != (files[i].c_str() + files[i].size())) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Malformed filename %s",
+                                                        files[i].c_str());
+      continue;
+    }
+    std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
+        files[i],
+        runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
+        filesystem_);
+    if (!bitmap) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Open prop bitmap failed: %s", files[i].c_str());
+      goto failed;
+    }
+    bitmap->Truncate(truncate_idx);
+    if (property_id >= property_bitmaps_.size()) {
+      property_bitmaps_.resize(property_id + 1);
+    }
+    property_bitmaps_[property_id] = std::move(bitmap);
+  }
+
+  deleted_bitmap_ = OpenAndInitBitmap(
+      deleted_bitmap_filename_,
+      runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
+      filesystem_);
+  if (!deleted_bitmap_) {
+    goto failed;
+  }
+  deleted_bitmap_->Truncate(truncate_idx);
+
+  return true;
+
+failed:
+  property_bitmaps_.clear();
+  deleted_bitmap_.reset();
+  return false;
+}
+
+void IcingDynamicTrie::Warm() const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  return storage_->Warm();
+}
+
+void IcingDynamicTrie::OnSleep() {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  // Update crcs so we can verify when we come back.
+  UpdateCrc();
+}
+
+IcingDynamicTrie::NewValueMap::~NewValueMap() {}
+
+bool IcingDynamicTrie::Compact(
+    const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
+    std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const {
+  if (old_to_new_tvi == nullptr) {
+    ICING_LOG(ERROR) << "TVI is null";
+  }
+
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  PropertyReadersAll prop_readers(*this);
+
+  old_to_new_tvi->clear();
+  old_to_new_tvi->rehash(size() * 2);
+
+  for (Iterator it_all(*this, ""); it_all.IsValid(); it_all.Advance()) {
+    uint32_t value_index = it_all.GetValueIndex();
+    const void *new_value = old_tvi_to_new_value.GetNewValue(value_index);
+    if (!new_value) continue;
+
+    uint32_t new_value_index;
+    if (!out->Insert(it_all.GetKey(), new_value, &new_value_index, false)) {
+      return false;
+    }
+
+    old_to_new_tvi->insert({value_index, new_value_index});
+
+    // Copy properties.
+    for (size_t i = 0; i < prop_readers.size(); i++) {
+      if (prop_readers.HasProperty(i, value_index)) {
+        if (!out->SetProperty(new_value_index, i)) {
+          // Ouch. We need to bail.
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+uint32_t IcingDynamicTrie::size() const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+  return storage_->hdr().num_keys();
+}
+
+void IcingDynamicTrie::CollectStatsRecursive(const Node &node,
+                                             Stats *stats) const {
+  if (node.is_leaf()) {
+    stats->num_leaves++;
+    const char *suffix = storage_->GetSuffix(node.next_index());
+    stats->suffixes_used += strlen(suffix) + 1 + value_size();
+    if (!suffix[0]) {
+      stats->null_suffixes++;
+    }
+  } else {
+    stats->num_intermediates++;
+    uint32_t i = 0;
+    for (; i < (1U << node.log2_num_children()); i++) {
+      const Next &next = *storage_->GetNext(node.next_index(), i);
+      if (next.node_index() == kInvalidNodeIndex) break;
+      CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats);
+    }
+
+    // At least one valid node in each next array
+    if (i == 0) {
+      ICING_LOG(FATAL) << "No valid node in 'next' array";
+    }
+
+    stats->child_counts[i - 1]++;
+    stats->wasted[node.log2_num_children()] +=
+        (1 << node.log2_num_children()) - i;
+    stats->total_wasted += (1 << node.log2_num_children()) - i;
+  }
+}
+
+void IcingDynamicTrie::CollectStats(Stats *stats) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  memset(stats, 0, sizeof(*stats));
+
+  stats->num_keys = storage_->hdr().num_keys();
+  stats->num_nodes = storage_->hdr().num_nodes();
+  stats->max_nodes = storage_->hdr().max_nodes();
+  stats->num_nexts = storage_->hdr().num_nexts();
+  stats->max_nexts = storage_->hdr().max_nexts();
+  stats->suffixes_size = storage_->hdr().suffixes_size();
+  stats->max_suffixes_size = storage_->hdr().max_suffixes_size();
+
+  // Stats collected from traversing the trie.
+  if (!storage_->empty()) {
+    CollectStatsRecursive(*storage_->GetRootNode(), stats);
+  }
+
+  // Free-list stats.
+  for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+    for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex;
+         cur = storage_->GetNext(cur, 0)->next_index()) {
+      stats->num_free[i]++;
+    }
+    stats->total_free += stats->num_free[i] * (1 << i);
+  }
+
+  // Dirty page counts.
+  storage_->FillDirtyPageStats(stats);
+}
+
+std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
+  std::string ret;
+  IcingStringUtil::SStringAppendF(
+      &ret, 0,
+      "Keys %u "
+      "Nodes (%u/%u) %.3f%% "
+      "Nexts (%u/%u) %.3f%% "
+      "Suffixes (%u/%u) %.3f%%\n",
+      num_keys, num_nodes, max_nodes,
+      100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts,
+      100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size,
+      max_suffixes_size,
+      100. * math_util::SafeDivide(suffixes_size, max_suffixes_size));
+
+  if (verbosity > 0) {
+    for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+      if (num_free[i] > 0) {
+        IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i,
+                                        num_free[i]);
+      }
+    }
+    IcingStringUtil::SStringAppendF(
+        &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts,
+        100. * math_util::SafeDivide(total_free, num_nexts));
+
+    for (int i = 0; i < 256; i++) {
+      if (child_counts[i] > 0) {
+        IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1,
+                                        child_counts[i]);
+      }
+    }
+    for (int i = 0; i < kNumNextAllocationBuckets; i++) {
+      IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i,
+                                      wasted[i]);
+    }
+    IcingStringUtil::SStringAppendF(
+        &ret, 0,
+        "Wasted total: %u\n"
+        "Num intermediates %u num leaves %u "
+        "suffixes used %u null %u\n"
+        "Total next frag: %.3f%%\n",
+        total_wasted, num_intermediates, num_leaves, suffixes_used,
+        null_suffixes,
+        100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
+  }
+  IcingStringUtil::SStringAppendF(
+      &ret, 0, "Memory usage: %zu/%zu bytes\n",
+      num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size,
+      max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size);
+
+  IcingStringUtil::SStringAppendF(
+      &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n",
+      dirty_pages_nodes,
+      math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1,
+                            getpagesize()),
+      dirty_pages_nexts,
+      math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1,
+                            getpagesize()),
+      dirty_pages_suffixes,
+      math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize()));
+
+  return ret;
+}
+
+void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print,
+                                vector<std::string> *keys) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  Dumper dumper(*this);
+  dumper.Dump(pretty_print, keys);
+}
+
+void IcingDynamicTrie::Clear() {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  storage_->Clear();
+  for (auto &bitmap : property_bitmaps_) {
+    if (bitmap) {
+      bitmap->Delete();
+      bitmap.reset();
+    }
+  }
+  deleted_bitmap_->Truncate(0);
+}
+
+bool IcingDynamicTrie::Insert(const char *key, const void *value,
+                              uint32_t *value_index, bool replace,
+                              bool *pnew_key) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (pnew_key) *pnew_key = false;
+
+  // Find out ahead of time whether things will fit. A conservative
+  // check based on allocations made below.
+  //
+  // IMPORTANT: This needs to be updated if the alloc patterns below
+  // change.
+  size_t key_len = strlen(key);
+  if (!(storage_->nodes_left() >= 2 + key_len + 1 &&
+        storage_->nexts_left() >= 2 + key_len + 1 + kMaxNextArraySize &&
+        storage_->suffixes_left() >= key_len + 1 + value_size())) {
+    // No more space left.
+    return false;
+  }
+
+  uint32_t best_node_index;
+  int key_offset;
+  FindBestNode(key, &best_node_index, &key_offset, false);
+
+  // A negative key_offset indicates that storage_ is empty
+  if (key_offset < 0) {
+    // First key.
+    if (!storage_->empty()) {
+      ICING_LOG(FATAL) << "Key offset is negative but storage is not empty, "
+                          "there're inconsistencies in dynamic trie.";
+    }
+    Node *node = storage_->AllocNode();
+    node->set_next_index(storage_->MakeSuffix(key, value, value_index));
+    node->set_is_leaf(true);
+    node->set_log2_num_children(0);
+  } else if (storage_->GetNode(best_node_index)->is_leaf()) {
+    // Prefix in the trie. Split at leaf.
+    Node *split_node = storage_->GetMutableNode(best_node_index);
+    const char *prev_suffix = storage_->GetSuffix(split_node->next_index());
+
+    // Find the common prefix length.
+    const char *prev_suffix_cur = prev_suffix;
+    const char *key_cur = key + key_offset;
+    while (*prev_suffix_cur && *prev_suffix_cur == *key_cur) {
+      prev_suffix_cur++;
+      key_cur++;
+    }
+
+    // Equal strings?
+    if (*prev_suffix_cur == 0 && *key_cur == 0) {
+      // Update value if replace == true and return.
+      if (value_index) {
+        *value_index = storage_->GetSuffixIndex(prev_suffix_cur + 1);
+      }
+      if (replace) {
+        char *mutable_prev_suffix_cur = storage_->GetMutableSuffix(
+            storage_->GetSuffixIndex(prev_suffix_cur + 1), value_size());
+        memcpy(mutable_prev_suffix_cur, value, value_size());
+      }
+      return true;
+    }
+
+    if (*prev_suffix_cur == *key_cur) {
+      ICING_LOG(FATAL) << "The suffix cursor and key cursor should diverge "
+                          "after finding the common prefix.";
+    }
+
+    // Create single-branch children for the common prefix
+    // length. After the loop, split_node points to the node that
+    // will have more than 1 char.
+    int common_len = prev_suffix_cur - prev_suffix;
+    for (int i = 0; i < common_len; i++) {
+      // Create a single-branch child node.
+      Next *split_next = storage_->AllocNextArray(1);
+      split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
+      split_node->set_is_leaf(false);
+      split_node->set_log2_num_children(0);
+      Node *child_node = storage_->AllocNode();
+      split_next[0].set_val(*(prev_suffix + i));
+      split_next[0].set_node_index(storage_->GetNodeIndex(child_node));
+
+      split_node = child_node;
+    }
+
+    // Fill a split.
+    Next *split_next = storage_->AllocNextArray(2);
+    split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
+    split_node->set_is_leaf(false);
+    split_node->set_log2_num_children(1);
+    Node *prev_suffix_node = storage_->AllocNode();
+    Node *key_node = storage_->AllocNode();
+    split_next[0].set_val(*(prev_suffix + common_len));
+    split_next[0].set_node_index(storage_->GetNodeIndex(prev_suffix_node));
+    if (*(prev_suffix + common_len)) {
+      uint32_t next_index =
+          storage_->GetSuffixIndex(prev_suffix + common_len) + 1;
+      prev_suffix_node->set_next_index(next_index);
+    } else {
+      uint32_t next_index = storage_->GetSuffixIndex(prev_suffix + common_len);
+      prev_suffix_node->set_next_index(next_index);
+    }
+    prev_suffix_node->set_is_leaf(true);
+    prev_suffix_node->set_log2_num_children(0);
+    split_next[1].set_val(*(key + key_offset + common_len));
+    split_next[1].set_node_index(storage_->GetNodeIndex(key_node));
+    if (*(key + key_offset + common_len)) {
+      uint32_t next_index = storage_->MakeSuffix(
+          key + key_offset + common_len + 1, value, value_index);
+      key_node->set_next_index(next_index);
+    } else {
+      uint32_t next_index = storage_->MakeSuffix(key + key_offset + common_len,
+                                                 value, value_index);
+      key_node->set_next_index(next_index);
+    }
+    key_node->set_is_leaf(true);
+    key_node->set_log2_num_children(0);
+
+    std::sort(split_next, split_next + 2);
+  } else {
+    // Insert into intermediate node.
+    const Node *best_node = storage_->GetNode(best_node_index);
+
+    // Add our value as a node + suffix.
+    Node *new_leaf_node = storage_->AllocNode();
+    if (*(key + key_offset)) {
+      uint32_t next_index =
+          storage_->MakeSuffix(key + key_offset + 1, value, value_index);
+      new_leaf_node->set_next_index(next_index);
+    } else {
+      uint32_t next_index =
+          storage_->MakeSuffix(key + key_offset, value, value_index);
+      new_leaf_node->set_next_index(next_index);
+    }
+    new_leaf_node->set_is_leaf(true);
+    new_leaf_node->set_log2_num_children(0);
+
+    // Figure out the real length of the existing next array.
+    Next *cur_next = storage_->GetMutableNextArray(
+        best_node->next_index(), 1 << best_node->log2_num_children());
+    int next_len = 0;
+    for (; next_len < (1 << best_node->log2_num_children()) &&
+           cur_next[next_len].node_index() != kInvalidNodeIndex;
+         next_len++) {
+    }
+    Next *new_next = cur_next;
+    if (next_len == (1 << best_node->log2_num_children())) {
+      // Allocate a new, larger, array.
+      new_next = storage_->AllocNextArray(next_len + 1);
+      memcpy(new_next, cur_next, sizeof(Next) * next_len);
+    }
+
+    // Write a link to our new leaf node and sort.
+    new_next[next_len].set_val(*(key + key_offset));
+    new_next[next_len].set_node_index(storage_->GetNodeIndex(new_leaf_node));
+    inplace_merge(new_next, new_next + next_len, new_next + next_len + 1);
+    next_len++;
+
+    // If this was new, update the parent node and free the old next
+    // array.
+    if (new_next != cur_next) {
+      Node *mutable_best_node =
+          storage_->GetMutableNode(storage_->GetNodeIndex(best_node));
+      mutable_best_node->set_next_index(storage_->GetNextArrayIndex(new_next));
+      mutable_best_node->set_is_leaf(false);
+      uint8_t log2_num_children = mutable_best_node->log2_num_children();
+
+      // 8 == log2(256)
+      if (log2_num_children >= 8) {
+        ICING_LOG(FATAL) << "Number of children exceeds the max allowed size";
+      }
+
+      mutable_best_node->set_log2_num_children(log2_num_children + 1);
+
+      storage_->FreeNextArray(cur_next,
+                              mutable_best_node->log2_num_children() - 1);
+    }
+  }
+
+  // We added a new key.
+  storage_->inc_num_keys();
+
+  if (pnew_key) *pnew_key = true;
+  return true;
+}
+
+const void *IcingDynamicTrie::GetValueAtIndex(uint32_t value_index) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  return static_cast<const void *>(storage_->GetSuffix(value_index));
+}
+
+void IcingDynamicTrie::SetValueAtIndex(uint32_t value_index,
+                                       const void *value) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (value_index > storage_->hdr().max_suffixes_size() - value_size()) {
+    ICING_LOG(FATAL) << "Value index is out of range";
+  }
+
+  memcpy(storage_->GetMutableSuffix(value_index, value_size()), value,
+         value_size());
+}
+
+bool IcingDynamicTrie::Find(const char *key, void *value,
+                            uint32_t *value_index) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  uint32_t best_node_index;
+  int key_offset;
+  FindBestNode(key, &best_node_index, &key_offset, false);
+
+  const Node *best_node = storage_->GetNode(best_node_index);
+  if (key_offset >= 0 && best_node->is_leaf() &&
+      !strcmp(key + key_offset, storage_->GetSuffix(best_node->next_index()))) {
+    uint32_t vidx = best_node->next_index() +
+                    strlen(storage_->GetSuffix(best_node->next_index())) + 1;
+    if (value_index) *value_index = vidx;
+    if (value) memcpy(value, storage_->GetSuffix(vidx), value_size());
+    return true;
+  } else {
+    return false;
+  }
+}
+
+IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
+                                     const char *prefix)
+    : cur_key_(prefix),
+      cur_suffix_(nullptr),
+      cur_suffix_len_(0),
+      single_leaf_match_(false),
+      trie_(trie) {
+  if (!trie.is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  Reset();
+}
+
+void IcingDynamicTrie::Iterator::LeftBranchToLeaf(uint32_t node_index) {
+  // Go down the trie, following the left-most child until we hit a
+  // leaf. Push to stack and cur_key nodes and chars as we go.
+  for (; !trie_.storage_->GetNode(node_index)->is_leaf();
+       node_index =
+           trie_.storage_
+               ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
+               ->node_index()) {
+    branch_stack_.push_back(Branch(node_index));
+    cur_key_.push_back(
+        trie_.storage_
+            ->GetNext(trie_.storage_->GetNode(node_index)->next_index(), 0)
+            ->val());
+  }
+
+  // We're at a leaf.
+  cur_suffix_ = trie_.storage_->GetSuffix(
+      trie_.storage_->GetNode(node_index)->next_index());
+  cur_suffix_len_ = strlen(cur_suffix_);
+  cur_key_.append(cur_suffix_, cur_suffix_len_);
+}
+
+void IcingDynamicTrie::Iterator::Reset() {
+  size_t strip_len = branch_stack_.size() + cur_suffix_len_;
+
+  if (cur_key_.size() < strip_len) {
+    ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
+                        "size, there're inconsistencies in dynamic trie";
+  }
+
+  // Trim back cur_key_ to original prefix.
+  cur_key_.resize(cur_key_.size() - strip_len);
+  cur_suffix_ = nullptr;
+  cur_suffix_len_ = 0;
+  single_leaf_match_ = false;
+  branch_stack_.clear();
+
+  // Nothing to do with an empty trie.
+  if (trie_.storage_->empty()) return;
+
+  // Find node matching prefix.
+  uint32_t node_index;
+  int key_offset;
+  trie_.FindBestNode(cur_key_.c_str(), &node_index, &key_offset, true);
+
+  // Two cases/states:
+  //
+  // - Found an intermediate node. If we matched all of prefix
+  //   (cur_key_), LeftBranchToLeaf.
+  //
+  // - Found a leaf node, which is the ONLY matching key for this
+  //   prefix. Check that suffix matches the prefix. Then we set
+  //   single_leaf_match_ = true and apply different logic for
+  //   Advance.
+  if (key_offset < 0) {
+    // A negative key_offset indicates that trie_.storage_ is empty
+    ICING_LOG(FATAL) << "Trie storage is empty";
+  }
+
+  const Node *best_node = trie_.storage_->GetNode(node_index);
+  if (best_node->is_leaf() &&
+      !strncmp(cur_key_.c_str() + key_offset,
+               trie_.storage_->GetSuffix(best_node->next_index()),
+               cur_key_.size() - key_offset)) {
+    // Copy the entire suffix into the current key.
+    cur_key_.resize(key_offset);
+    cur_key_.append(trie_.storage_->GetSuffix(best_node->next_index()));
+    cur_suffix_ = trie_.storage_->GetSuffix(best_node->next_index());
+    cur_suffix_len_ = strlen(cur_suffix_);
+    single_leaf_match_ = true;
+  } else if (static_cast<size_t>(key_offset) == cur_key_.size()) {
+    LeftBranchToLeaf(node_index);
+  }
+}
+
+bool IcingDynamicTrie::Iterator::Advance() {
+  if (!IsValid()) return false;
+  if (single_leaf_match_) {
+    // If we only have an exact match, the Advance logic does not
+    // apply. Invalidate the iterator and return.
+    cur_suffix_ = nullptr;
+    cur_suffix_len_ = 0;
+    return false;
+  }
+
+  if (cur_key_.size() < (branch_stack_.size() + cur_suffix_len_)) {
+    ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
+                        "size, there're inconsistencies in dynamic trie";
+  }
+
+  // Move up from the current leaf.
+  cur_key_.resize(cur_key_.size() - cur_suffix_len_);
+  cur_suffix_ = nullptr;
+  cur_suffix_len_ = 0;
+
+  while (!branch_stack_.empty()) {
+    Branch *branch = &branch_stack_.back();
+    const Node *node = trie_.storage_->GetNode(branch->node_idx);
+    branch->child_idx++;
+    if (branch->child_idx < (1 << node->log2_num_children()) &&
+        trie_.storage_->GetNext(node->next_index(), branch->child_idx)
+                ->node_index() != kInvalidNodeIndex) {
+      // Successfully incremented to the next child. Update the char
+      // value at this depth.
+      cur_key_[cur_key_.size() - 1] =
+          trie_.storage_->GetNext(node->next_index(), branch->child_idx)->val();
+      // We successfully found a sub-trie to explore.
+      LeftBranchToLeaf(
+          trie_.storage_->GetNext(node->next_index(), branch->child_idx)
+              ->node_index());
+      return true;
+    }
+    branch_stack_.pop_back();
+    cur_key_.resize(cur_key_.size() - 1);
+  }
+
+  // Un-wound the entire stack. We are done.
+  return false;
+}
+
+bool IcingDynamicTrie::Iterator::IsValid() const {
+  return cur_suffix_ != nullptr;
+}
+
+const char *IcingDynamicTrie::Iterator::GetKey() const {
+  // cur_key_ can have a NULL in it so cur_key_ can be wrong but
+  // cur_key_.c_str() is always right.
+  return IsValid() ? cur_key_.c_str() : nullptr;
+}
+
+const void *IcingDynamicTrie::Iterator::GetValue() const {
+  if (!IsValid()) return nullptr;
+
+  return static_cast<const void *>(cur_suffix_ + cur_suffix_len_ + 1);
+}
+
+uint32_t IcingDynamicTrie::Iterator::GetValueIndex() const {
+  if (!IsValid()) return kInvalidSuffixIndex;
+
+  return trie_.storage_->GetSuffixIndex(cur_suffix_ + cur_suffix_len_ + 1);
+}
+
+void IcingDynamicTrie::Utf8Iterator::LeftBranchToUtf8End() {
+  if (cur_len_ <= 0) {
+    ICING_LOG(FATAL) << "Invalid UTF-8 character length";
+  }
+
+  if (branch_end_ - branch_stack_ != cur_len_) {
+    ICING_LOG(FATAL) << "Depth from first visited node to last visited node "
+                        "doesn't match the current UTF-8 character length";
+  }
+
+  // Use branch at top of stack to determine where to follow.
+  const Branch &branch = *(branch_end_ - 1);
+  const Node *node = trie_.storage_->GetNode(branch.child->node_index());
+
+  // If we start with non-ascii, take all left branches while there is
+  // a continuation byte.
+  if (!IcingStringUtil::IsAsciiChar(cur_[cur_len_ - 1])) {
+    while (!node->is_leaf()) {
+      if (ABSL_PREDICT_FALSE(cur_len_ >= UTFmax)) break;
+
+      InitBranch(branch_end_, node, 0);
+      // When we are looking to complete a utf8 char, skip 0s.
+      if (branch_end_->child->val() == 0) {
+        // Check if we already have a valid cur_.
+        cur_[cur_len_] = 0;
+        Rune rune;
+        chartorune(&rune, cur_);
+        if (rune == Runeerror && node->log2_num_children() > 0) {
+          branch_end_->child++;
+        } else {
+          // Good termination. Just break.
+          break;
+        }
+      }
+
+      if (!IcingStringUtil::IsContinuationByte(branch_end_->child->val()))
+        break;
+
+      cur_[cur_len_++] = branch_end_->child->val();
+      node = trie_.storage_->GetNode(branch_end_->child->node_index());
+      branch_end_++;
+    }
+
+    cur_logical_node_.node = node;
+
+    // Maybe go into suffixes and set suffix_offset.
+    if (node->is_leaf()) {
+      GoIntoSuffix(node);
+    } else {
+      cur_logical_node_.suffix_offset = 0;
+    }
+  } else {  // ascii
+    cur_logical_node_.node = node;
+    cur_logical_node_.suffix_offset = 0;
+  }
+
+  // NULL-terminate.
+  cur_[cur_len_] = 0;
+}
+
+void IcingDynamicTrie::Utf8Iterator::GoIntoSuffix(const Node *node) {
+  const char *suffix = trie_.storage_->GetSuffix(node->next_index());
+  const char *cur_suffix;
+  for (cur_suffix = suffix; ABSL_PREDICT_TRUE(cur_len_ < UTFmax) &&
+                            IcingStringUtil::IsContinuationByte(*cur_suffix);
+       cur_suffix++) {
+    cur_[cur_len_++] = *cur_suffix;
+  }
+  cur_logical_node_.suffix_offset = cur_suffix - suffix;
+}
+
+void IcingDynamicTrie::Utf8Iterator::Reset() {
+  cur_[0] = 0;
+  cur_len_ = 0;
+  branch_end_ = branch_stack_;
+
+  if (start_node_) {
+    // Take the first char node's children.
+    const Next *next = trie_.storage_->GetNext(start_node_->next_index(), 0);
+    branch_end_->node = start_node_;
+    branch_end_->child_end = next + (1 << start_node_->log2_num_children());
+    if (next->val() == 0) {
+      // Skip any nulls at this position. We don't return empty string
+      // as an iteration.
+      next++;
+    }
+    branch_end_->child = next;
+    cur_[cur_len_++] = next->val();
+    branch_end_++;
+
+    // Will NULL-terminate cur_.
+    LeftBranchToUtf8End();
+  } else {
+    // Nothing to return.
+    cur_logical_node_.node = nullptr;
+    cur_logical_node_.suffix_offset = 0;
+  }
+}
+
+bool IcingDynamicTrie::Utf8Iterator::Advance() {
+  if (!IsValid()) return false;
+
+  // Clip to branch.
+  cur_len_ = branch_end_ - branch_stack_;
+
+  while (branch_end_ > branch_stack_) {
+    Branch *branch = branch_end_ - 1;
+    branch->child++;
+    if (!branch->IsFinished()) {
+      // Successfully incremented to the next child. Update the char
+      // value at this depth.
+      cur_[cur_len_ - 1] = branch->child->val();
+
+      // We successfully found a sub-trie to explore.
+      LeftBranchToUtf8End();
+      return true;
+    }
+    cur_len_--;
+    branch_end_--;
+  }
+
+  // Un-wound the entire stack. We are done.
+  return false;
+}
+
+void IcingDynamicTrie::Utf8Iterator::InitBranch(Branch *branch,
+                                                const Node *start,
+                                                char key_char) {
+  branch->node = start;
+  branch->child = trie_.storage_->GetNext(start->next_index(), 0);
+  branch->child_end = branch->child + (1 << start->log2_num_children());
+  if (key_char) {
+    branch->child =
+        trie_.LowerBound(branch->child, branch->child_end, key_char);
+  }
+}
+
+bool IcingDynamicTrie::Utf8Iterator::Branch::IsFinished() {
+  return child >= child_end || child->node_index() == kInvalidNodeIndex;
+}
+
+bool IcingDynamicTrie::Utf8Iterator::IsValid() const { return cur_len_ > 0; }
+
+const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
+    const Node *node, uint8_t key_char) const {
+  const Next *next_start = storage_->GetNext(node->next_index(), 0);
+  const Next *next_end = next_start + (1 << node->log2_num_children());
+
+  const Next *found = LowerBound(next_start, next_end, key_char);
+  if (found >= next_end || found->val() != key_char ||
+      found->node_index() == kInvalidNodeIndex) {
+    return nullptr;
+  }
+
+  return found;
+}
+
+const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
+    const Next *start, const Next *end, uint8_t key_char) const {
+  // Above this value will use binary search instead of linear
+  // search. 16 was chosen from running some benchmarks with
+  // different values.
+  static const uint32_t kBinarySearchCutoff = 16;
+
+  if (end - start >= kBinarySearchCutoff) {
+    // Binary search.
+    Next key_next(key_char, 0);
+    return lower_bound(start, end, key_next);
+  } else {
+    // Linear search.
+    const Next *found;
+    for (found = start; found < end; found++) {
+      if (found->val() >= key_char) {
+        // Should have gotten match.
+        break;
+      }
+    }
+    return found;
+  }
+}
+
+void IcingDynamicTrie::FindBestNode(const char *key, uint32_t *best_node_index,
+                                    int *key_offset, bool prefix) const {
+  // Find the best node such that:
+  //
+  // - If key is NOT in the trie, key[0..key_offset) is a prefix to
+  //   everything under best_node_index.
+  //
+  // - If key is in the trie, best_node_index is the leaf that points
+  //   to the key suffix and key_offset == strlen(key).
+  //
+  // If prefix is true, when key is both in the trie AND a prefix
+  // (e.g. "ab" and "abc" are in the trie), we return the intermediate
+  // node with key as the prefix as opposed to the exactly matching
+  // leaf node.
+  if (storage_->empty()) {
+    *best_node_index = 0;
+    *key_offset = -1;
+    return;
+  }
+
+  const Node *cur_node = storage_->GetRootNode();
+  const char *cur_key = key;
+  while (!cur_node->is_leaf()) {
+    const Next *found = GetNextByChar(cur_node, *cur_key);
+    if (!found) break;
+
+    if (prefix && found->val() == 0) {
+      break;
+    }
+
+    cur_node = storage_->GetNode(found->node_index());
+
+    // End of key.
+    if (*cur_key == 0) {
+      break;
+    }
+    cur_key++;
+  }
+
+  *best_node_index = storage_->GetNodeIndex(cur_node);
+  *key_offset = reinterpret_cast<const char *>(cur_key) - key;
+}
+
+void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
+  Stats stats;
+  CollectStats(&stats);
+  out->append(stats.DumpStats(verbosity));
+
+  // Property files.
+  vector<std::string> files;
+  if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
+                                     &files)) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Could not get files at prefix %s", property_bitmaps_prefix_.c_str());
+    return;
+  }
+  for (size_t i = 0; i < files.size(); i++) {
+    IcingStringUtil::SStringAppendF(
+        out, 1000, "Prop file %s size %" PRIu64 "\n",
+        filesystem_->GetBasename(files[i].c_str()).c_str(),
+        filesystem_->GetFileSize(files[i].c_str()));
+  }
+  IcingStringUtil::SStringAppendF(
+      out, 1000, "Deleted file %s size %" PRIu64 "\n",
+      filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(),
+      filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
+}
+
+double IcingDynamicTrie::min_free_fraction() const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  return 1.0 - max(max(static_cast<double>(storage_->hdr().num_nodes()) /
+                           storage_->hdr().max_nodes(),
+                       static_cast<double>(storage_->hdr().num_nexts()) /
+                           storage_->hdr().max_nexts()),
+                   static_cast<double>(storage_->hdr().suffixes_size()) /
+                       storage_->hdr().max_suffixes_size());
+}
+
+uint32_t IcingDynamicTrie::value_size() const {
+  return storage_->hdr().value_size();
+}
+
+uint32_t IcingDynamicTrie::max_value_index() const {
+  return storage_->hdr().max_suffixes_size();
+}
+
+uint32_t IcingDynamicTrie::UpdateCrc() {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (runtime_options_.storage_policy != RuntimeOptions::kMapSharedWithCrc) {
+    return kNoCrc;
+  }
+
+  // Combine storage crc with property bitmap crcs.
+  uint32_t crc = storage_->UpdateCrc();
+
+  // Update crcs on bitmaps.
+  for (size_t i = 0; i < property_bitmaps_.size(); ++i) {
+    if (property_bitmaps_[i]) {
+      // Combine property id with the bitmap crc.
+      uint64_t this_crc = property_bitmaps_[i]->UpdateCrc();
+      this_crc = (this_crc << 32) | i;
+      crc = IcingStringUtil::UpdateCrc32(
+          crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
+    }
+  }
+  uint32_t this_crc = deleted_bitmap_->UpdateCrc();
+  crc = IcingStringUtil::UpdateCrc32(
+      crc, reinterpret_cast<const char *>(&this_crc), sizeof(this_crc));
+
+  return crc;
+}
+
+IcingFlashBitmap *IcingDynamicTrie::OpenOrCreatePropertyBitmap(
+    uint32_t property_id) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (property_id > kMaxPropertyId) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Property id %u out of range", property_id);
+    return nullptr;
+  }
+
+  if (property_id >= property_bitmaps_.size()) {
+    property_bitmaps_.resize(property_id + 1);
+  }
+  if (!property_bitmaps_[property_id]) {
+    std::string filename;
+    IcingStringUtil::SStringAppendF(
+        &filename, property_bitmaps_prefix_.size() + 10, "%s%u",
+        property_bitmaps_prefix_.c_str(), property_id);
+    property_bitmaps_[property_id] =
+        OpenAndInitBitmap(filename, false, filesystem_);
+  }
+  return property_bitmaps_[property_id].get();
+}
+
+bool IcingDynamicTrie::SetProperty(uint32_t value_index, uint32_t property_id) {
+  IcingFlashBitmap *bitmap = OpenOrCreatePropertyBitmap(property_id);
+  if (!bitmap) {
+    return false;
+  }
+  uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
+
+  // Also clear deleted bit.
+  return bitmap->SetBit(idx, true) && deleted_bitmap_->SetBit(idx, false);
+}
+
+bool IcingDynamicTrie::ClearProperty(uint32_t value_index,
+                                     uint32_t property_id) {
+  if (property_id >= property_bitmaps_.size() ||
+      !property_bitmaps_[property_id]) {
+    // No bitmap is ok for clearing.
+    return true;
+  }
+
+  uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
+  return property_bitmaps_[property_id]->SetBit(idx, false);
+}
+
+bool IcingDynamicTrie::SetDeleted(uint32_t value_index) {
+  uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
+  return deleted_bitmap_->SetBit(idx, true);
+}
+
+bool IcingDynamicTrie::ClearDeleted(uint32_t value_index) {
+  uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
+  return deleted_bitmap_->SetBit(idx, false);
+}
+
+bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  PropertyReadersAll readers(*this);
+  if (!readers.Exists(property_id)) {
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Properties for id %u don't exist", property_id);
+    return true;
+  }
+
+  // Mark values that have no other properties set as as deleted.
+  uint64_t max_idx =
+      ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size());
+  // TODO(vishwajith) Inefficient to do this bit by bit, should be word by
+  // word. Removing a corpus is likely rare enough that this is low priority.
+  for (uint64_t i = 0; i < max_idx; ++i) {
+    // See if the bit is set in our property map.
+    if (readers.IsPropertyUnique(property_id, i)) {
+      deleted_bitmap_->SetBit(i, true);
+    }
+  }
+
+  // Now delete the bitmap file for this property.
+  std::unique_ptr<IcingFlashBitmap> bitmap(
+      std::move(property_bitmaps_[property_id]));
+  // bitmap cannot be null here, because then readers.Exists(property_id) would
+  // have returned false earlier, and we wouldn't get here.
+  if (bitmap == nullptr) {
+    ICING_LOG(ERROR) << "Property bitmap is null";
+    return false;
+  }
+
+  return bitmap->Delete();
+}
+
+bool IcingDynamicTrie::PropertyReaderBase::Exists() const {
+  return bitmap_ != nullptr;
+}
+
+bool IcingDynamicTrie::PropertyReaderBase::HasProperty(
+    uint32_t value_index) const {
+  return bitmap_ &&
+         bitmap_->GetBit(trie_.ValueIndexToPropertyBitmapIndex(value_index));
+}
+
+IcingDynamicTrie::PropertyReaderBase::PropertyReaderBase(
+    const IcingDynamicTrie &trie, bool deleted, uint32_t property_id)
+    : trie_(trie) {
+  if (!trie.is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  if (deleted) {
+    bitmap_ = trie.deleted_bitmap_.get();
+  } else if (property_id < trie.property_bitmaps_.size()) {
+    bitmap_ = trie.property_bitmaps_[property_id].get();
+  } else {
+    bitmap_ = nullptr;
+  }
+}
+
+IcingDynamicTrie::PropertyReadersAll::PropertyReadersAll(
+    const IcingDynamicTrie &trie)
+    : trie_(trie) {
+  if (!trie.is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+}
+
+bool IcingDynamicTrie::PropertyReadersAll::Exists(uint32_t property_id) const {
+  return property_id < trie_.property_bitmaps_.size() &&
+         trie_.property_bitmaps_[property_id];
+}
+
+bool IcingDynamicTrie::PropertyReadersAll::HasProperty(
+    uint32_t property_id, uint32_t value_index) const {
+  return property_id < trie_.property_bitmaps_.size() &&
+         trie_.property_bitmaps_[property_id] &&
+         trie_.property_bitmaps_[property_id]->GetBit(
+             trie_.ValueIndexToPropertyBitmapIndex(value_index));
+}
+
+bool IcingDynamicTrie::PropertyReadersAll::IsPropertyUnique(
+    uint32_t property_id, uint32_t value_index) const {
+  uint32_t idx = trie_.ValueIndexToPropertyBitmapIndex(value_index);
+
+  // First check that value is set for the requested id.
+  if (property_id >= trie_.property_bitmaps_.size() ||
+      !trie_.property_bitmaps_[property_id] ||
+      !trie_.property_bitmaps_[property_id]->GetBit(idx)) {
+    return false;
+  }
+
+  // Now check that the value is not set for the rest.
+  for (size_t i = 0; i < trie_.property_bitmaps_.size(); ++i) {
+    if (i == property_id) {
+      continue;
+    }
+    if (trie_.property_bitmaps_[i] && trie_.property_bitmaps_[i]->GetBit(idx)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+size_t IcingDynamicTrie::PropertyReadersAll::size() const {
+  return trie_.property_bitmaps_.size();
+}
+
+uint64_t IcingDynamicTrie::ValueIndexToPropertyBitmapIndex(
+    uint32_t value_index) const {
+  // We know that value indices are separated by at least 1 +
+  // value_size() bytes (for the null terminator and the value).
+  return value_index / (value_size() + 1);
+}
+
+// Testing hooks.
+void IcingDynamicTrie::GetHeader(IcingDynamicTrieHeader *hdr) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  *hdr = storage_->hdr();
+}
+
+void IcingDynamicTrie::SetHeader(const IcingDynamicTrieHeader &new_hdr) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "DynamicTrie not initialized";
+  }
+
+  storage_->hdr_.hdr = new_hdr;
+  storage_->WriteHeader();
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-dynamic-trie.h b/icing/legacy/index/icing-dynamic-trie.h
new file mode 100644
index 0000000..2e93ef1
--- /dev/null
+++ b/icing/legacy/index/icing-dynamic-trie.h

@@ -0,0 +1,616 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// Trie for word prefix lookups. Features:
+//
+// - Dynamic additions (but not deletions)
+// - Low memory usage
+// - Reasonable latency but not QPS
+// - Revive from persistence is a disk read
+// - Stores a 4-byte value associated with every key
+//
+// Associated with each value in the trie is a set of property ids.  For
+// efficiency, property ids should start at 0 and be densely packed.  A value
+// may have more than one id set.  There is an additional deleted property
+// for each value, which is set only when all the property ids associated with a
+// value have been cleared.  In the flash_index, property ids are used to track
+// corpus ids.
+//
+// Not thread-safe.
+
+#ifndef ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
+#define ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "icing/legacy/core/icing-compat.h"
+#include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/legacy/index/icing-storage.h"
+#include "icing/legacy/index/proto/icing-dynamic-trie-header.pb.h"
+#include "utf.h"
+
+namespace icing {
+namespace lib {
+
+class IcingFlashBitmap;
+
+class IcingDynamicTrie : public IIcingStorage {
+  class Dumper;
+  class IcingDynamicTrieStorage;
+
+ public:
+  // Adjacent bit fields are usually packed automatically. However, that is
+  // implementation specific:
+  // http://en.cppreference.com/w/cpp/language/bit_field
+  // So we'll set packed to be explicit.
+  class Node {
+   public:
+    // This object is only ever used by an ArrayStorage, which allocates
+    // sizeof(Node) bytes, zeroes them out and then casts to a Node.
+    Node() = delete;
+
+    uint32_t next_index() const { return next_index_; }
+    void set_next_index(uint32_t next_index) { next_index_ = next_index; }
+
+    bool is_leaf() const { return is_leaf_; }
+    void set_is_leaf(bool is_leaf) { is_leaf_ = is_leaf; }
+
+    uint8_t log2_num_children() const { return log2_num_children_; }
+    void set_log2_num_children(uint8_t log2_num_children) {
+      log2_num_children_ = log2_num_children;
+    }
+
+   private:
+    uint32_t next_index_ : 27;
+    uint32_t is_leaf_ : 1;
+    uint32_t log2_num_children_ : 4;
+  } __attribute__((packed));
+  static_assert(sizeof(Node) == 4, "");
+  static_assert(icing_is_packed_pod<Node>::value, "go/icing-ubsan");
+
+  // Adjacent bit fields are usually packed automatically. However, that is
+  // implementation specific:
+  // http://en.cppreference.com/w/cpp/language/bit_field.
+  // So we'll set packed to be explicit.
+  union Next {
+    Next(uint8_t val, uint32_t node_index) {
+      used.val = val;
+      used.node_index = node_index;
+    }
+
+    uint8_t val() const { return used.val; }
+    void set_val(uint8_t val) { used.val = val; }
+
+    uint32_t node_index() const { return used.node_index; }
+    void set_node_index(uint32_t node_index) { used.node_index = node_index; }
+
+    uint32_t next_index() const { return freelink.next_index; }
+    void set_next_index(uint32_t next_index) {
+      freelink.next_index = next_index;
+    }
+
+    bool operator<(const Next &next2) const {
+      if (val() == next2.val()) {
+        return node_index() < next2.node_index();
+      }
+      return val() < next2.val();
+    }
+
+   private:
+    // This object is only ever used by an ArrayStorage, which allocates
+    // sizeof(Node) bytes, zeroes them out and then casts to a Node.
+    Next() = default;
+
+    struct {
+      uint32_t val : 8;
+      uint32_t node_index : 24;
+    } used;
+    struct {
+      uint32_t next_index : 32;
+    } freelink;
+  } __attribute__((packed));
+  static_assert(sizeof(Next) == 4, "");
+  static_assert(sizeof(Next) % alignof(Next) == 0, "");
+  static_assert(icing_is_packed_pod<Next>::value, "go/icing-ubsan");
+
+  static const int kMaxNextArraySize = 256;
+  static const int kNumNextAllocationBuckets = 9;  // [log2(1), log2(256)]
+
+  static const uint32_t kMaxPropertyId = (1 << 16) - 1;
+
+  static const uint32_t kInvalidValueIndex = 0;
+
+  static const uint32_t kNoCrc = 0;
+
+  struct Stats {
+    uint32_t num_keys;
+
+    // Node stats
+
+    uint32_t num_nodes;
+    uint32_t max_nodes;
+    // Count of intermediate nodes.
+    uint32_t num_intermediates;
+    // Count of leaf nodes.
+    uint32_t num_leaves;
+
+    // Next stats
+
+    uint32_t num_nexts;
+    uint32_t max_nexts;
+    // Count of next arrays by size.
+    uint32_t child_counts[kMaxNextArraySize];
+    // Wasted next array space per allocation bucket (in Nexts, not
+    // bytes).
+    uint32_t wasted[kNumNextAllocationBuckets];
+    // Sum of wasted array.
+    uint32_t total_wasted;
+
+    // Suffix stats
+
+    uint32_t suffixes_size;
+    uint32_t max_suffixes_size;
+    // Bytes actually used by suffixes.
+    uint32_t suffixes_used;
+    // Number of suffixes that are just empty strings.
+    uint32_t null_suffixes;
+
+    // Next free-list stats
+    uint32_t num_free[kNumNextAllocationBuckets];
+    // Total Next nodes free (weighted sum of the above).
+    uint32_t total_free;
+
+    // Dirty pages.
+    uint32_t dirty_pages_nodes;
+    uint32_t dirty_pages_nexts;
+    uint32_t dirty_pages_suffixes;
+
+    std::string DumpStats(int verbosity) const;
+  };
+
+  // Options when creating the trie. Maximums for the node/next/suffix
+  // arrays must be specified in advance.
+  struct Options {
+    // Absolute maximums.
+    static const uint32_t kMaxNodes, kMaxNexts, kMaxSuffixesSize, kMaxValueSize;
+
+    // The default takes 13MB of memory and can take about 1M English
+    // words.
+    Options()
+        : max_nodes(1U << 20),
+          max_nexts(1U << 20),
+          max_suffixes_size(5U << 20),
+          value_size(sizeof(uint32_t)) {}
+    Options(uint32_t max_nodes_in, uint32_t max_nexts_in,
+            uint32_t max_suffixes_size_in, uint32_t value_size_in)
+        : max_nodes(max_nodes_in),
+          max_nexts(max_nexts_in),
+          max_suffixes_size(max_suffixes_size_in),
+          value_size(value_size_in) {}
+
+    uint32_t max_nodes;
+    uint32_t max_nexts;
+    uint32_t max_suffixes_size;
+    uint32_t value_size;
+
+    // True if options do not exceed absolute maximums.
+    bool is_valid() const;
+  };
+
+  // These can be supplied during runtime, as opposed to the persisted
+  // Options above.
+  struct RuntimeOptions {
+    enum StoragePolicy {
+      // Changes are reflected in the underlying file immediately but
+      // more vulnerable to corruption.
+      kMapSharedWithCrc,
+
+      // Changes only applied during Flush. Smaller window of
+      // vulnerability to corruption.
+      kExplicitFlush,
+    };
+
+    RuntimeOptions &set_storage_policy(StoragePolicy sp) {
+      storage_policy = sp;
+      return *this;
+    }
+
+    StoragePolicy storage_policy = kExplicitFlush;
+  };
+
+  static uint32_t max_value_index(const Options &options) {
+    return options.max_suffixes_size;
+  }
+
+  // Light-weight constructor. Real work happens in Create or Init.
+  IcingDynamicTrie(const std::string &filename_base,
+                   const RuntimeOptions &runtime_options,
+                   const IcingFilesystem *filesystem);
+  ~IcingDynamicTrie() override;
+
+  bool is_initialized() const { return is_initialized_; }
+
+  // Create, but do not Init, a new trie with options if the file does
+  // not already exist.
+  //
+  // Returns true if successfully created all files or files already
+  // exist. Does not do a complete sanity check for when files seem to
+  // exist. Cleans up files if creation fails midstream.
+  bool CreateIfNotExist(const Options &options);
+
+  bool UpgradeTo(int new_version) override { return true; }
+  bool Init() override;
+  void Close() override;
+  bool Remove() override;
+  uint64_t GetDiskUsage() const override;
+
+  // REQUIRED: For all functions below is_initialized() == true.
+
+  // Number of keys in trie.
+  uint32_t size() const;
+
+  // Collecting stats.
+  void CollectStats(Stats *stats) const;
+
+  // Gets all of the contents of the trie for debugging purposes. Note: this
+  // stores the entire set of terms in memory.
+  //   pretty_print - The tree structure of the trie will be written to this.
+  //   keys - All keys in the trie are appended to this vector.
+  void DumpTrie(std::ostream *pretty_print,
+                std::vector<std::string> *keys) const;
+
+  // Empty out the trie without closing or removing.
+  void Clear();
+
+  // Sync to disk.
+  bool Sync() override;
+
+  // Tell kernel we will access the memory shortly.
+  void Warm() const;
+
+  // Potentially about to get nuked.
+  void OnSleep() override;
+
+  // Compact trie into out for value indices present in old_tvi_to_new_value.
+  class NewValueMap {
+   public:
+    virtual ~NewValueMap();
+
+    // Returns the new value we want to assign to the entry at old
+    // value index. We don't take ownership of the pointer.
+    virtual const void *GetNewValue(uint32_t old_value_index) const = 0;
+  };
+  // Compacts this trie. This drops all deleted keys, drops all keys for which
+  // old_tvi_to_new_value returns nullptr, updates values to be the values
+  // returned by old_tvi_to_new_value, rewrites tvis, and saves the results into
+  // the trie given in 'out'. 'old_to_new_tvi' is be populated with a mapping of
+  // old value_index to new value_index.
+  bool Compact(const NewValueMap &old_tvi_to_new_value, IcingDynamicTrie *out,
+               std::unordered_map<uint32_t, uint32_t> *old_to_new_tvi) const;
+
+  // Insert value at key. If key already exists and replace == true,
+  // replaces old value with value. We take a copy of value.
+  //
+  // If value_index is not NULL, returns a pointer to value in
+  // value_index. This can then be used with SetValueAtIndex
+  // below. value_index is not valid past a Clear/Read/Write.
+  //
+  // Returns false if there is no space left in the trie.
+  //
+  // REQUIRES: value a buffer of size value_size()
+  bool Insert(const char *key, const void *value) {
+    return Insert(key, value, nullptr, true, nullptr);
+  }
+  bool Insert(const char *key, const void *value, uint32_t *value_index,
+              bool replace) {
+    return Insert(key, value, value_index, replace, nullptr);
+  }
+  bool Insert(const char *key, const void *value, uint32_t *value_index,
+              bool replace, bool *pnew_key);
+
+  // Get a value returned by Insert value_index. This points to the
+  // value in the trie. The pointer is immutable and always valid
+  // while the trie is alive.
+  const void *GetValueAtIndex(uint32_t value_index) const;
+
+  // Set a value returned by Insert value_index. We take a copy of
+  // value.
+  //
+  // REQUIRES: value a buffer of size value_size()
+  void SetValueAtIndex(uint32_t value_index, const void *value);
+
+  // Returns true if key is found and sets value. If value_index is
+  // not NULL, returns value_index (see Insert discussion above).
+  // If the key is not found, returns false and neither value nor
+  // value_index is modified.
+  //
+  // REQUIRES: value a buffer of size value_size()
+  bool Find(const char *key, void *value) const {
+    return Find(key, value, nullptr);
+  }
+  bool Find(const char *key, void *value, uint32_t *value_index) const;
+
+  // Find the input key and all keys that are a variant of the input
+  // key according to a variant map. Currently supports
+  // transliteration. For example "a" is a variant for "à" or "á" so
+  // an "a" in the input key can match those characters in the trie in
+  // addition to itself.
+  //
+  // If prefix is set, also returns any prefix matches (so value_index
+  // will be invalid).
+  //
+  // REQUIRES: all terms in the lexicon to be valid utf8.
+  struct OriginalMatch {
+    uint32_t value_index;
+    std::string orig;
+
+    OriginalMatch() : value_index(kInvalidValueIndex) {}
+
+    bool is_full_match() const { return value_index != kInvalidValueIndex; }
+  };
+
+  void GetDebugInfo(int verbosity, std::string *out) const override;
+
+  double min_free_fraction() const;
+
+  uint32_t value_size() const;
+
+  uint32_t max_value_index() const;
+
+  // If in kMapSharedWithCrc mode, update crcs and return the master
+  // crc, else return kNoCrc. This crc includes both the trie files
+  // and property bitmaps.
+  uint32_t UpdateCrc();
+
+  // Store dynamic properties for each value.  When a property is added to
+  // a value, the deleted flag is cleared for it (if it was previously set).
+  bool SetProperty(uint32_t value_index, uint32_t property_id);
+  bool ClearProperty(uint32_t value_index, uint32_t property_id);
+
+  // Store deleted property for each value.
+  // This method is not the only way the deleted property can be set; the trie
+  // may set this property itself during other operations if it can determine a
+  // value becomes superfluous.
+  bool SetDeleted(uint32_t value_index);
+
+  // Clears the deleted property for each value.
+  bool ClearDeleted(uint32_t value_index);
+
+  // Clear a specific property id from all values.  For each value that has this
+  // property cleared, also check to see if it was the only property set;  if
+  // so, set the deleted property for the value to indicate it no longer has any
+  // properties associated with it.
+  bool ClearPropertyForAllValues(uint32_t property_id);
+
+  // Access properties. Usage:
+  //
+  // IcingDynamicTrie::PropertyReader reader(trie, 10);
+  // char value[SIZE];
+  // uint32_t value_index;
+  // if (trie.Find("abc", value, &value_index) &&
+  //     reader.HasProperty(value_index)) {
+  //     ...
+  // }
+  //
+  // Readers are valid as long as the underlying trie is open.
+  class PropertyReaderBase {
+   public:
+    // Whether underlying file exists.
+    bool Exists() const;
+
+    // Returns false for all values if underlying file is missing.
+    bool HasProperty(uint32_t value_index) const;
+
+   protected:
+    PropertyReaderBase(const IcingDynamicTrie &trie, bool deleted,
+                       uint32_t property_id);
+
+    // Does not own.
+    const IcingFlashBitmap *bitmap_;
+    const IcingDynamicTrie &trie_;
+  };
+
+  // Reader for a given property. It is invalidated when the underlying property
+  // is deleted, or the trie is closed.
+  class PropertyReader : public PropertyReaderBase {
+   public:
+    PropertyReader(const IcingDynamicTrie &trie, uint32_t property_id)
+        : PropertyReaderBase(trie, false, property_id) {}
+  };
+
+  // Reader for the deleted property. It is invalidated when the trie is closed.
+  class PropertyDeletedReader : public PropertyReaderBase {
+   public:
+    explicit PropertyDeletedReader(const IcingDynamicTrie &trie)
+        : PropertyReaderBase(trie, true, 0) {}
+  };
+
+  // Reader for all properties (but not the deleted one). It is invalidated when
+  // the trie is closed.
+  class PropertyReadersAll {
+   public:
+    explicit PropertyReadersAll(const IcingDynamicTrie &trie);
+
+    // Whether underlying file for property_id exists.
+    bool Exists(uint32_t property_id) const;
+
+    // Returns false if underlying file or property doesn't exist.
+    bool HasProperty(uint32_t property_id, uint32_t value_index) const;
+
+    // Returns true if the value at value_index is set for the only the supplied
+    // property_id, and none of the other properties.
+    bool IsPropertyUnique(uint32_t property_id, uint32_t value_index) const;
+
+    // For iterating.
+    size_t size() const;
+
+   private:
+    const IcingDynamicTrie &trie_;
+  };
+
+  // Iterate through trie in lexicographic order.
+  //
+  // Not thread-safe.
+  //
+  // Change in underlying trie invalidates iterator.
+  class Iterator {
+   public:
+    Iterator(const IcingDynamicTrie &trie, const char *prefix);
+    void Reset();
+    bool Advance();
+
+    // If !IsValid(), GetKey() will return NULL and GetValue() will
+    // return 0.
+    bool IsValid() const;
+    const char *GetKey() const;
+    // This points directly to the underlying data and is valid while
+    // the trie is alive. We keep ownership of the pointer.
+    const void *GetValue() const;
+    uint32_t GetValueIndex() const;
+
+   private:
+    Iterator();
+    // Copy is ok.
+
+    // Helper function that takes the left-most branch down
+    // intermediate nodes to a leaf.
+    void LeftBranchToLeaf(uint32_t node_index);
+
+    std::string cur_key_;
+    const char *cur_suffix_;
+    int cur_suffix_len_;
+    struct Branch {
+      uint32_t node_idx;
+      int child_idx;
+
+      explicit Branch(uint32_t ni) : node_idx(ni), child_idx(0) {}
+    };
+    std::vector<Branch> branch_stack_;
+    bool single_leaf_match_;
+
+    const IcingDynamicTrie &trie_;
+  };
+
+  // Represents a non-leaf node or a "virtual" trie node in the suffix
+  // region.
+  struct LogicalNode {
+    const Node *node;
+    int suffix_offset;
+
+    LogicalNode() : node(nullptr), suffix_offset(0) {}
+    LogicalNode(const Node *node_in, int suffix_offset_in)
+        : node(node_in), suffix_offset(suffix_offset_in) {}
+  };
+
+  // Iterate over all utf8 chars in the trie anchored at prefix (or
+  // node). If trie has invalid utf8 chars, behavior is undefined (but
+  // won't crash).
+  class Utf8Iterator {
+   public:
+    void Reset();
+    bool Advance();
+
+    bool IsValid() const;
+
+   private:
+    struct Branch {
+      const Node *node;
+      const Next *child;
+      const Next *child_end;
+
+      bool IsFinished();
+    };
+
+    Utf8Iterator();
+    // Copy is ok.
+
+    void LeftBranchToUtf8End();
+    void InitBranch(Branch *branch, const Node *start, char key_char);
+    void GoIntoSuffix(const Node *node);
+
+    char cur_[UTFmax + 1];  // NULL-terminated
+    int cur_len_;
+    LogicalNode cur_logical_node_;
+
+    Branch branch_stack_[UTFmax];
+    Branch *branch_end_;
+
+    const IcingDynamicTrie &trie_;
+    const Node *start_node_;
+  };
+
+ private:
+  class CandidateSet;
+
+  // For testing only.
+  friend class IcingDynamicTrieTest_SyncErrorRecovery_Test;
+  friend class IcingDynamicTrieTest_BitmapsClosedWhenInitFails_Test;
+  void GetHeader(IcingDynamicTrieHeader *hdr) const;
+  void SetHeader(const IcingDynamicTrieHeader &new_hdr);
+
+  static const uint32_t kInvalidNodeIndex;
+  static const uint32_t kInvalidNextIndex;
+  static const uint32_t kInvalidSuffixIndex;
+
+  // Stats helpers.
+  void CollectStatsRecursive(const Node &node, Stats *stats) const;
+
+  // Helpers for Find and Insert.
+  const Next *GetNextByChar(const Node *node, uint8_t key_char) const;
+  const Next *LowerBound(const Next *start, const Next *end,
+                         uint8_t key_char) const;
+  void FindBestNode(const char *key, uint32_t *best_node_index, int *key_offset,
+                    bool prefix) const;
+
+  // For value properties.  This truncates the data by clearing it, but leaving
+  // the storage intact.
+  bool InitPropertyBitmaps();
+
+  // Returns a pointer to a bitmap that is successfully opened.
+  static std::unique_ptr<IcingFlashBitmap> OpenAndInitBitmap(
+      const std::string &filename, bool verify,
+      const IcingFilesystem *filesystem);
+
+  // Returns a pointer to a writable bitmap, creating it if necessary.  Returned
+  // pointer should not be freed, it will be maintained by property_bitmaps_.
+  // Returns null if bitmap failed to load.
+  IcingFlashBitmap *OpenOrCreatePropertyBitmap(uint32_t property_id);
+
+  uint64_t ValueIndexToPropertyBitmapIndex(uint32_t value_index) const;
+
+  const std::string filename_base_;
+  bool is_initialized_;
+  const RuntimeOptions runtime_options_;
+  std::unique_ptr<IcingDynamicTrieStorage> storage_;
+  const std::string property_bitmaps_prefix_;
+  std::vector<std::unique_ptr<IcingFlashBitmap>> property_bitmaps_;
+  const std::string deleted_bitmap_filename_;
+  std::unique_ptr<IcingFlashBitmap> deleted_bitmap_;
+  const IcingFilesystem *const filesystem_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_DYNAMIC_TRIE_H_

diff --git a/icing/legacy/index/icing-filesystem.cc b/icing/legacy/index/icing-filesystem.cc
new file mode 100644
index 0000000..b1e1193
--- /dev/null
+++ b/icing/legacy/index/icing-filesystem.cc

@@ -0,0 +1,638 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-filesystem.h"
+
+#include <dirent.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <unordered_set>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/legacy/portable/icing-zlib.h"
+#include "icing/util/logging.h"
+
+using std::vector;
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// The size of the block for st_blksize returned by stat() and as a
+// consequence also the granularity of GetDiskUsage(). It seems that there is
+// no appropriate constant for this. See http://linux.die.net/man/2/stat
+constexpr int kStatBlockSize = 512;
+
+// Logs information about open file descriptors.
+//
+// This function uses getrlimit() to find the maximum number of file
+// descriptors, then calls readlink("/proc/self/fd/N") for each possible file
+// descriptor number to get a description of the open file from procfs.
+//
+// We don't use readdir() to list the contents of /proc/self/fd (which would be
+// the more obvious approach) because that would require a free file descriptor
+// to open the directory, while we call this function when all file descriptors
+// are in use.
+void LogOpenFileDescriptors() {
+  // Determine the limit on file descriptor numbers. RLIMIT_NOFILE should return
+  // the maximum file descriptor + 1, which is 1024 on Android by default. We
+  // restrict the limit to 4096 so we don't take too much time if the value
+  // turns out to be much higher for some reason.
+  constexpr int kMaxFileDescriptorsToStat = 4096;
+  struct rlimit rlim = {0, 0};
+  if (getrlimit(RLIMIT_NOFILE, &rlim) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "getrlimit() failed (errno=%d)", errno);
+    return;
+  }
+  int fd_lim = rlim.rlim_cur;
+  if (fd_lim > kMaxFileDescriptorsToStat) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Maximum number of file descriptors (%d) too large.", fd_lim);
+    fd_lim = kMaxFileDescriptorsToStat;
+  }
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "Listing up to %d file descriptors.", fd_lim);
+
+  // Verify that /proc/self/fd is a directory. If not, procfs is not mounted or
+  // inaccessible for some other reason. In that case, there's no point trying
+  // to read from it.
+  struct stat statbuf;
+  if (stat("/proc/self/fd", &statbuf) != 0 || !S_ISDIR(statbuf.st_mode)) {
+    ICING_LOG(ERROR) << "/proc/self/fd not available. Giving up.";
+    return;
+  }
+
+  // Now read each link individually.
+  char path[1024];
+  char target[1024];
+  for (int fd = 0; fd < fd_lim; ++fd) {
+    snprintf(path, arraysize(path), "/proc/self/fd/%d", fd);
+    ssize_t len = readlink(path, target, arraysize(target));
+    if (len >= 0) {
+      // Zero-terminate the buffer, because readlink() won't.
+      target[len < arraysize(target) ? len : arraysize(target) - 1] = '\0';
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> \"%s\"", fd,
+                                                        target);
+    } else if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("fd %d -> ? (errno=%d)",
+                                                        fd, errno);
+    }
+  }
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "File descriptor list complete.");
+}
+
+// Logs an error formatted as: desc1 + file_name + desc2 + strerror(errnum).
+//
+// If errnum == EMFILE (too many open files), then it also logs a list of open
+// file descriptors (see LogOpenFileDescriptors() above).
+void LogOpenError(const char *desc1, const char *file_name, const char *desc2,
+                  int errnum) {
+  ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+      "%s%s%s%s", desc1, file_name, desc2, strerror(errnum));
+  if (errnum == EMFILE) {
+    LogOpenFileDescriptors();
+  }
+}
+
+// Recursive implementation of ListDirectory. Prefix is used to prepend the
+// directory name during recursion.
+// We cannot use scandir due to a bug in old platform versions. See b/7339844.
+bool ListDirectoryInternal(const char *dir_name,
+                           const std::unordered_set<std::string> &exclude,
+                           bool recursive, const char *prefix,
+                           std::vector<std::string> *entries) {
+  DIR *dir = opendir(dir_name);
+  if (!dir) {
+    LogOpenError("Unable to open directory ", dir_name, ": ", errno);
+    return false;
+  }
+
+  dirent *p;
+  // readdir's implementation seems to be thread safe.
+  while ((p = readdir(dir)) != nullptr) {
+    std::string file_name(p->d_name);
+    if (file_name == "." || file_name == ".." ||
+        exclude.find(file_name) != exclude.end()) {
+      continue;
+    }
+    std::string relative_path = absl_ports::StrCat(prefix, p->d_name);
+    entries->push_back(relative_path);
+    // Recurse down directories, if requested.
+    if (recursive && (p->d_type == DT_DIR)) {
+      std::string sub_dir_name = absl_ports::StrCat(dir_name, "/", p->d_name);
+      std::string relative_path_with_slash =
+          absl_ports::StrCat(relative_path, "/");
+      if (!ListDirectoryInternal(sub_dir_name.c_str(), exclude, recursive,
+                                 relative_path_with_slash.c_str(), entries)) {
+        return false;
+      }
+    }
+  }
+  if (closedir(dir) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Error closing %s: %s", dir_name, strerror(errno));
+  }
+  return true;
+}
+
+}  // namespace
+
+IcingScopedFd::~IcingScopedFd() {
+  if (fd_ >= 0) {
+    close(fd_);
+  }
+}
+
+void IcingScopedFd::reset(int fd) {
+  if (fd_ >= 0) {
+    close(fd_);
+  }
+  fd_ = fd;
+}
+
+const uint64_t IcingFilesystem::kBadFileSize;
+
+bool IcingFilesystem::DeleteFile(const char *file_name) const {
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf("Deleting file %s", file_name);
+  int ret = unlink(file_name);
+  bool success = (ret == 0) || (errno == ENOENT);
+  if (!success) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Deleting file %s failed: %s", file_name, strerror(errno));
+  }
+  return success;
+}
+
+bool IcingFilesystem::DeleteDirectory(const char *dir_name) const {
+  int ret = rmdir(dir_name);
+  bool success = (ret == 0) || (errno == ENOENT);
+  if (!success) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Deleting directory %s failed: %s", dir_name, strerror(errno));
+  }
+  return success;
+}
+
+bool IcingFilesystem::DeleteDirectoryRecursively(const char *dir_name) const {
+  // Ensure the dir_name really is a directory and exists.
+  struct stat st;
+  if (stat(dir_name, &st) < 0) {
+    if (errno == ENOENT) {
+      return true;  // If directory didn't exist, this was successful.
+    }
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Stat %s failed: %s", dir_name, strerror(errno));
+    return false;
+  }
+  vector<std::string> entries;
+  if (!ListDirectory(dir_name, &entries)) {
+    return false;
+  }
+
+  bool success = true;
+  for (vector<std::string>::iterator i = entries.begin(); i != entries.end();
+       ++i) {
+    std::string filename = std::string(dir_name) + '/' + *i;
+    if (stat(filename.c_str(), &st) < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Stat %s failed: %s", filename.c_str(), strerror(errno));
+      success = false;
+    } else if (S_ISDIR(st.st_mode)) {
+      success = DeleteDirectoryRecursively(filename.c_str()) && success;
+    } else {
+      success = DeleteFile(filename.c_str()) && success;
+    }
+  }
+
+  if (success) {
+    success = DeleteDirectory(dir_name);
+  }
+
+  return success;
+}
+
+bool IcingFilesystem::FileExists(const char *file_name) const {
+  bool exists = false;
+  struct stat st;
+  if (stat(file_name, &st) == 0) {
+    exists = S_ISREG(st.st_mode) != 0;
+  } else {
+    if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Unable to stat file %s: %s", file_name, strerror(errno));
+    }
+    exists = false;
+  }
+  return exists;
+}
+
+bool IcingFilesystem::DirectoryExists(const char *dir_name) const {
+  bool exists = false;
+  struct stat st;
+  if (stat(dir_name, &st) == 0) {
+    exists = S_ISDIR(st.st_mode) != 0;
+  } else {
+    if (errno != ENOENT) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Unable to stat directory %s: %s", dir_name, strerror(errno));
+    }
+    exists = false;
+  }
+  return exists;
+}
+
+int IcingFilesystem::GetBasenameIndex(const char *file_name) const {
+  // Find final slash.
+  const char *last_slash = strrchr(file_name, '/');
+  if (!last_slash) {
+    // file_name is just basename.
+    return 0;
+  }
+
+  // Skip slash.
+  return last_slash + 1 - file_name;
+}
+
+std::string IcingFilesystem::GetBasename(const char *file_name) const {
+  size_t len = strlen(file_name);
+  int idx = GetBasenameIndex(file_name);
+  return std::string(file_name + idx, len - idx);
+}
+
+std::string IcingFilesystem::GetDirname(const char *file_name) const {
+  int idx = GetBasenameIndex(file_name);
+  // Remove the trailing slash
+  if (idx > 0) {
+    idx -= 1;
+  }
+  return std::string(file_name, idx);
+}
+
+bool IcingFilesystem::ListDirectory(const char *dir_name,
+                                    vector<std::string> *entries) const {
+  entries->clear();
+  return ListDirectory(dir_name, /*exclude=*/{}, /*recursive=*/false, entries);
+}
+
+bool IcingFilesystem::ListDirectory(
+    const char *dir_name, const std::unordered_set<std::string> &exclude,
+    bool recursive, std::vector<std::string> *entries) const {
+  return ListDirectoryInternal(dir_name, exclude, recursive, /*prefix=*/"",
+                               entries);
+}
+
+bool IcingFilesystem::GetMatchingFiles(const char *glob,
+                                       vector<std::string> *matches) const {
+  matches->clear();
+
+  // Split dirname/basename.
+  int basename_idx = GetBasenameIndex(glob);
+  if (basename_idx == 0) {
+    // We need a directory.
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Expected directory, no matching files for: %s", glob);
+    return true;
+  }
+  const char *basename_glob = glob + basename_idx;
+  std::string dirname(glob, basename_idx);
+  vector<std::string> entries;
+  if (!ListDirectory(dirname.c_str(), &entries) && errno != ENOENT) {
+    return false;
+  }
+
+  for (vector<std::string>::iterator i = entries.begin(); i != entries.end();
+       ++i) {
+    // The filename needs to match glob following last_slash.
+    if (!fnmatch(basename_glob, i->c_str(), FNM_PATHNAME)) {
+      // Add it to the list.
+      matches->push_back(dirname + *i);
+    }
+  }
+  return true;
+}
+
+int IcingFilesystem::OpenForWrite(const char *file_name) const {
+  int fd = open(file_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for write failed: ", errno);
+  }
+  return fd;
+}
+
+int IcingFilesystem::OpenForAppend(const char *file_name) const {
+  // Don't use the O_APPEND flag because, although it opens for
+  // append, it doesn't set the file cursor to at the end until
+  // first write occurs.  This can be confusing if you expect
+  // the file position at the end.  Instead, explicitly
+  // seek to end after opening.
+  int fd = open(file_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for write failed: ", errno);
+  } else {
+    lseek(fd, 0, SEEK_END);
+  }
+  return fd;
+}
+
+int IcingFilesystem::OpenForRead(const char *file_name) const {
+  int fd = open(file_name, O_RDONLY);
+  if (fd < 0) {
+    LogOpenError("Opening file ", file_name, " for read failed: ", errno);
+  }
+  return fd;
+}
+
+uint64_t IcingFilesystem::GetFileSize(int fd) const {
+  struct stat st;
+  uint64_t size = kBadFileSize;
+  if (fstat(fd, &st) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
+                                                      strerror(errno));
+  } else {
+    size = st.st_size;
+  }
+  return size;
+}
+
+uint64_t IcingFilesystem::GetFileSize(const char *filename) const {
+  struct stat st;
+  uint64_t size = kBadFileSize;
+  if (stat(filename, &st) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Unable to stat file %s: %s", filename, strerror(errno));
+  } else {
+    size = st.st_size;
+  }
+  return size;
+}
+
+bool IcingFilesystem::Truncate(int fd, uint64_t new_size) const {
+  int ret = ftruncate(fd, new_size);
+  if (ret == 0) {
+    lseek(fd, new_size, SEEK_SET);
+  } else {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Unable to truncate file: %s", strerror(errno));
+  }
+  return (ret == 0);
+}
+
+bool IcingFilesystem::Truncate(const char *filename, uint64_t new_size) const {
+  int fd = OpenForAppend(filename);
+  if (fd == -1) {
+    return false;
+  }
+  bool success = Truncate(fd, new_size);
+  close(fd);
+  return success;
+}
+
+bool IcingFilesystem::Grow(int fd, uint64_t new_size) const {
+  int ret = ftruncate(fd, new_size);
+  if (ret != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to grow file: %s",
+                                                      strerror(errno));
+  }
+  return (ret == 0);
+}
+
+bool IcingFilesystem::Write(int fd, const void *data, size_t data_size) const {
+  size_t write_len = data_size;
+  do {
+    // Don't try to write too much at once.
+    size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
+    ssize_t wrote = write(fd, data, chunk_size);
+    if (wrote < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
+                                                        strerror(errno));
+      return false;
+    }
+    data = static_cast<const uint8_t *>(data) + wrote;
+    write_len -= wrote;
+  } while (write_len > 0);
+  return true;
+}
+
+bool IcingFilesystem::PWrite(int fd, off_t offset, const void *data,
+                             size_t data_size) const {
+  size_t write_len = data_size;
+  do {
+    // Don't try to write too much at once.
+    size_t chunk_size = std::min<size_t>(write_len, 64u * 1024);
+    ssize_t wrote = pwrite(fd, data, chunk_size, offset);
+    if (wrote < 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Bad write: %s",
+                                                        strerror(errno));
+      return false;
+    }
+    data = static_cast<const uint8_t *>(data) + wrote;
+    write_len -= wrote;
+    offset += wrote;
+  } while (write_len > 0);
+  return true;
+}
+
+bool IcingFilesystem::DataSync(int fd) const {
+#ifdef __APPLE__  // iOS has no fdatasync(), only fsync()
+  int result = fsync(fd);
+#else
+  int result = fdatasync(fd);
+#endif
+
+  if (result < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to sync data: %s",
+                                                      strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool IcingFilesystem::RenameFile(const char *old_name,
+                                 const char *new_name) const {
+  if (rename(old_name, new_name) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Unable to rename file %s to %s: %s", old_name, new_name,
+        strerror(errno));
+    return false;
+  }
+  return true;
+}
+
+bool IcingFilesystem::SwapFiles(const char *one, const char *two) const {
+  std::string tmp_name = absl_ports::StrCat(one, ".tmp");
+  const char *tmp_cstr = tmp_name.c_str();
+
+  // Blow away a tmp file if it already exists
+  if (FileExists(tmp_cstr) && !DeleteFile(tmp_cstr)) {
+    return false;
+  }
+  if (DirectoryExists(tmp_cstr) && !DeleteDirectoryRecursively(tmp_cstr)) {
+    return false;
+  }
+
+  // Perform the swap
+  if (!RenameFile(one, tmp_cstr)) {
+    return false;
+  }
+  if (!RenameFile(two, one)) {
+    return false;
+  }
+  if (!RenameFile(tmp_cstr, two)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool IcingFilesystem::CreateDirectory(const char *dir_name) const {
+  bool success = DirectoryExists(dir_name);
+  if (!success) {
+    if (mkdir(dir_name, S_IRUSR | S_IWUSR | S_IXUSR) == 0) {
+      success = true;
+    } else {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+          "Creating directory %s failed: %s", dir_name, strerror(errno));
+    }
+  }
+  return success;
+}
+
+bool IcingFilesystem::CreateDirectoryRecursively(const char *dir_name) const {
+  if ((strlen(dir_name) == 0) || DirectoryExists(dir_name)) {
+    return true;
+  }
+  std::string path_before = GetDirname(dir_name);
+  if (!CreateDirectoryRecursively(path_before.c_str())) {
+    return false;
+  }
+  return CreateDirectory(dir_name);
+}
+
+bool IcingFilesystem::CopyFile(const char *src, const char *dst) const {
+  bool success = false;
+
+  int src_fd = -1;
+  int dst_fd = -1;
+  uint64_t size = 0;
+  IcingMMapper mapper(true, MAP_PRIVATE);
+
+  if ((src_fd = OpenForRead(src)) < 0) {
+    goto end;
+  }
+  if ((dst_fd = OpenForWrite(dst)) < 0) {
+    goto end;
+  }
+  size = GetFileSize(src_fd);
+  mapper.Remap(src_fd, 0, size);
+  if (!mapper.is_valid()) {
+    goto end;
+  }
+  success = Write(dst_fd, mapper.address(), mapper.len());
+
+end:
+  if (src_fd > 0) close(src_fd);
+  if (dst_fd > 0) close(dst_fd);
+  if (!success) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Couldn't copy file %s to %s", src, dst);
+  }
+  return success;
+}
+
+bool IcingFilesystem::ComputeChecksum(int fd, uint32_t *checksum,
+                                      uint64_t offset, uint64_t length) const {
+  if (length == 0) {
+    return true;
+  }
+  IcingMMapper mapper(fd, true, offset, length, MAP_PRIVATE);
+  if (!mapper.is_valid()) {
+    return false;
+  }
+  *checksum = adler32(*checksum, mapper.address(), mapper.len());
+  return true;
+}
+
+uint64_t IcingFilesystem::GetDiskUsage(int fd) const {
+  struct stat st;
+  if (fstat(fd, &st) < 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat file: %s",
+                                                      strerror(errno));
+    return kBadFileSize;
+  }
+  return st.st_blocks * kStatBlockSize;
+}
+
+uint64_t IcingFilesystem::GetFileDiskUsage(const char *path) const {
+  struct stat st;
+  if (stat(path, &st) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
+                                                      path, strerror(errno));
+    return kBadFileSize;
+  }
+  return st.st_blocks * kStatBlockSize;
+}
+
+uint64_t IcingFilesystem::GetDiskUsage(const char *path) const {
+  struct stat st;
+  if (stat(path, &st) != 0) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unable to stat %s: %s",
+                                                      path, strerror(errno));
+    return kBadFileSize;
+  }
+  uint64_t result = st.st_blocks * kStatBlockSize;
+  if (S_ISDIR(st.st_mode)) {
+    vector<std::string> list;
+    if (!ListDirectory(path, &list)) {
+      return kBadFileSize;
+    }
+    for (vector<std::string>::iterator i = list.begin(); i != list.end(); ++i) {
+      std::string sub_path = std::string(path) + '/' + *i;
+      uint64_t sub_usage = GetDiskUsage(sub_path.c_str());
+      if (sub_usage != kBadFileSize) {
+        result += sub_usage;
+      }  // Else just ignore the failing entry.
+    }
+  }
+  return result;
+}
+
+void IcingFilesystem::IncrementByOrSetInvalid(uint64_t size,
+                                              uint64_t *to_increment) {
+  if (*to_increment == kBadFileSize) {
+    return;
+  }
+  if (size == kBadFileSize) {
+    *to_increment = kBadFileSize;
+    return;
+  }
+  *to_increment += size;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-filesystem.h b/icing/legacy/index/icing-filesystem.h
new file mode 100644
index 0000000..2b10c1c
--- /dev/null
+++ b/icing/legacy/index/icing-filesystem.h

@@ -0,0 +1,230 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Methods for interacting with the filesystem.
+
+#ifndef ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
+#define ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <memory>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+namespace icing {
+namespace lib {
+
+// Closes fd when it goes out of scope, if fd >= 0.
+class IcingScopedFd {
+ public:
+  explicit IcingScopedFd(int fd = -1) : fd_(fd) {}
+  IcingScopedFd(const IcingScopedFd &) = delete;
+  IcingScopedFd(IcingScopedFd &&other) : IcingScopedFd() {
+    *this = std::move(other);
+  }
+
+  IcingScopedFd &operator=(const IcingScopedFd &) = delete;
+  IcingScopedFd &operator=(IcingScopedFd &&other) {
+    std::swap(fd_, other.fd_);
+    return *this;
+  }
+  ~IcingScopedFd();
+
+  bool is_valid() const { return fd_ >= 0; }
+  int operator*() const { return fd_; }
+  int get() const { return fd_; }
+  void reset(int fd = -1);
+
+ private:
+  int fd_;
+};
+
+struct IcingFILEDeleter {
+  void operator()(FILE *fp) const {
+    if (fp) {
+      fclose(fp);
+    }
+  }
+};
+typedef std::unique_ptr<FILE, IcingFILEDeleter> IcingScopedFILE;
+
+// Class containing file operation methods.
+// If you change methods in this class, don't forget to update the mock:
+// java/com/google/android/gmscore/integ/modules/icing/jni/index/mock-filesystem.h
+class IcingFilesystem {
+ public:
+  static const uint64_t kBadFileSize = static_cast<uint64_t>(-1);
+
+  constexpr IcingFilesystem() {}
+  virtual ~IcingFilesystem() {}
+
+  // Deletes a file, returns true on success or if the file did
+  // not yet exist.
+  virtual bool DeleteFile(const char *file_name) const;
+
+  // Deletes a directory, returns true on success or if the directory did
+  // not yet exist.
+  virtual bool DeleteDirectory(const char *dir_name) const;
+
+  // Deletes a directory, including any contents, and returns true on
+  // success or if the directory did not yet exist.
+  virtual bool DeleteDirectoryRecursively(const char *dir_name) const;
+
+  // Returns true if a file exists.  False if the file doesn't exist.
+  // If there is an error getting stat on the file, it logs the error and
+  // asserts.
+  virtual bool FileExists(const char *file_name) const;
+
+  // Returns true if a directory exists.  False if the file doesn't exist.
+  // If there is an error getting stat on the file, it logs the error and
+  // asserts.
+  virtual bool DirectoryExists(const char *dir_name) const;
+
+  // Return index to start of basename in file_name. Anything before
+  // basename is the dirname (including the final slash).
+  virtual int GetBasenameIndex(const char *file_name) const;
+
+  // Return a string containing the basename.
+  virtual std::string GetBasename(const char *file_name) const;
+
+  // Return a string containing the dirname.
+  virtual std::string GetDirname(const char *file_name) const;
+
+  // Gets the names of the entries of a given directory. Does not include "."
+  // and "..". Returns false on error.
+  virtual bool ListDirectory(const char *dir_name,
+                             std::vector<std::string> *entries) const;
+
+  // Adds the names of the entries of a given directory -- recursively if
+  // specified, and excluding files/directories named in exclude -- to entries.
+  // Regardless of exclude, does not include "." and "..".  Excluded files are
+  // excluded at every level.  Returns false on error.
+  //
+  // Example use case: list all files & directories in fooDir/, recursively,
+  // excluding anything named "tmp" or "cache" (presumed directories) and the
+  // files within them.
+  virtual bool ListDirectory(const char *dir_name,
+                             const std::unordered_set<std::string> &exclude,
+                             bool recursive,
+                             std::vector<std::string> *entries) const;
+
+  // Use glob to return matched files into "matches". Returns false if
+  // glob had an error.
+  //
+  // Cannot match multiple directories so everything up the last slash
+  // must be literal.
+  virtual bool GetMatchingFiles(const char *glob,
+                                std::vector<std::string> *matches) const;
+
+  // Opens the file for read/write. Creates if not existing.  Returns
+  // -1 on fail or an open file descriptor on success.
+  virtual int OpenForWrite(const char *file_name) const;
+
+  // Opens the file for read/write, and positions the file at the
+  // end for appending.  Creates if not existing.  Returns -1 on fail
+  // or an open file descriptor on success.
+  virtual int OpenForAppend(const char *file_name) const;
+
+  // Opens a file for read only.  Fails if file does exist.  Returns
+  // file descriptor or -1 on fail.  Set quiet to true to suppress
+  // log warnings.
+  virtual int OpenForRead(const char *file_name) const;
+
+  // Gets the size of a file, given an open file descriptor.
+  // Returns kBadFileSize on error.
+  virtual uint64_t GetFileSize(int fd) const;
+
+  // Gets the size of a file, given a filename.
+  virtual uint64_t GetFileSize(const char *filename) const;
+
+  // Truncates the file to the requested size.  Seeks to the
+  // end position of the file after truncate.  Returns false
+  // if fails.
+  virtual bool Truncate(int fd, uint64_t new_size) const;
+
+  // Truncates the file to the requested size.
+  // Returns false if fails.
+  virtual bool Truncate(const char *filename, uint64_t new_size) const;
+
+  // Grows the file to the requested size.  Does not change the
+  // position pointer.
+  virtual bool Grow(int fd, uint64_t new_size) const;
+
+  // Writes to a file.  Returns true if all the data was successfully
+  // written.  Handles interrupted writes.
+  virtual bool Write(int fd, const void *data, size_t data_size) const;
+  virtual bool PWrite(int fd, off_t offset, const void *data,
+                      size_t data_size) const;
+
+  // Syncs the file to disk (fdatasync). Returns true on success.
+  virtual bool DataSync(int fd) const;
+
+  // Renames a file.  A file with new_name must not already exist.
+  virtual bool RenameFile(const char *old_name, const char *new_name) const;
+
+  // Renames two files or directories so their names are swapped.
+  // Both names must already exist.
+  virtual bool SwapFiles(const char *one, const char *two) const;
+
+  // Creates a directory if it does not yet exist.
+  virtual bool CreateDirectory(const char *dir_name) const;
+
+  // Creates a directory if it does not yet exist, building the entire path
+  // if it does not yet exist.
+  virtual bool CreateDirectoryRecursively(const char *dir_name) const;
+
+  // Copy a file.
+  virtual bool CopyFile(const char *src, const char *dst) const;
+
+  // Compute an adler32 checksum over the [offset, offset+length) span
+  // of an open file.  Returns false if the file could not be read.
+  // The checksum is an input/output variable (whatever value is
+  // stored there will prime the checksum computation).  If length is
+  // 0, can be used to prime a checksum for future appends.
+  virtual bool ComputeChecksum(int fd, uint32_t *checksum, uint64_t offset,
+                               uint64_t length) const;
+
+  // Compute the disk usage of the given file. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length.
+  // Returns kBadFileSize on error.
+  virtual uint64_t GetDiskUsage(int fd) const;
+
+  // Compute the disk usage of the given file or directory. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length. Returns kBadFileSize on
+  // error.
+  // Does not recurse on directories.
+  virtual uint64_t GetFileDiskUsage(const char *path) const;
+
+  // Compute the disk usage of the given file or directory. Similarly to the
+  // 'du' command, it attempts to estimate the actual disk usage, so for
+  // sparse files it may return less than their length. Returns kBadFileSize on
+  // error.
+  // Recurses on directories.
+  virtual uint64_t GetDiskUsage(const char *path) const;
+
+  // Increments to_increment by size if size is valid, or sets to_increment
+  // to kBadFileSize if either size or to_increment is kBadFileSize.
+  static void IncrementByOrSetInvalid(uint64_t size, uint64_t *to_increment);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_FILESYSTEM_H_

diff --git a/icing/legacy/index/icing-flash-bitmap.cc b/icing/legacy/index/icing-flash-bitmap.cc
new file mode 100644
index 0000000..56dec00
--- /dev/null
+++ b/icing/legacy/index/icing-flash-bitmap.cc

@@ -0,0 +1,421 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-flash-bitmap.h"
+
+#include <sys/mman.h>
+
+#include <memory>
+
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(b/77482303) : Remove version from the IcingFlashBitmap header - magic
+// makes it unnecessary.
+struct IcingFlashBitmap::Header {
+  uint32_t magic;
+  uint32_t version;
+  uint32_t crc;
+  uint32_t dirty;
+};
+
+// Helper class used to access the data and the header regions
+// of the shared memory.  The header appears first, followed by the
+// bitmap memory.
+class IcingFlashBitmap::Accessor {
+ public:
+  explicit Accessor(IcingMMapper *mmapper) : mmapper_(mmapper) {}
+  IcingFlashBitmap::Header *header() {
+    return reinterpret_cast<IcingFlashBitmap::Header *>(mmapper_->address());
+  }
+  const IcingFlashBitmap::Header *header() const {
+    return reinterpret_cast<const IcingFlashBitmap::Header *>(
+        mmapper_->address());
+  }
+  const char *data() const {
+    return reinterpret_cast<const char *>(mmapper_->address() +
+                                          sizeof(IcingFlashBitmap::Header));
+  }
+  size_t data_size() const {
+    return mmapper_->len() - sizeof(IcingFlashBitmap::Header);
+  }
+  size_t num_words() const { return data_size() / sizeof(Word); }
+  Word *data32() {
+    return reinterpret_cast<Word *>(mmapper_->address() +
+                                    sizeof(IcingFlashBitmap::Header));
+  }
+  const Word *data32() const { return reinterpret_cast<const Word *>(data()); }
+  const Word *end32() const {
+    return reinterpret_cast<const Word *>(mmapper_->address() +
+                                          mmapper_->len());
+  }
+
+ private:
+  IcingMMapper *const mmapper_;
+};
+
+bool IcingFlashBitmap::Verify() const {
+  if (!is_initialized()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Can't verify unopened flash bitmap %s", filename_.c_str());
+    return false;
+  }
+  if (mmapper_ == nullptr) {
+    // Opened for read and file doesn't exist.
+    return true;
+  }
+  Accessor accessor(mmapper_.get());
+  if (accessor.header()->magic != kMagic) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flash bitmap %s has incorrect magic header", filename_.c_str());
+    return false;
+  }
+  if (accessor.header()->version != kCurVersion) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flash bitmap %s has incorrect version", filename_.c_str());
+    return false;
+  }
+  if (accessor.header()->dirty) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flash bitmap %s is dirty", filename_.c_str());
+    return false;
+  }
+  uint32_t crc =
+      IcingStringUtil::UpdateCrc32(0, accessor.data(), accessor.data_size());
+  if (accessor.header()->crc != crc) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Flash bitmap %s has incorrect CRC32 %u %u", filename_.c_str(),
+        accessor.header()->crc, crc);
+    return false;
+  }
+  return true;
+}
+
+bool IcingFlashBitmap::Init() {
+  Close();
+
+  // Ensure the storage directory exists
+  std::string storage_dir = filesystem_->GetDirname(filename_.c_str());
+  if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
+    return false;
+  }
+
+  IcingScopedFd fd(filesystem_->OpenForWrite(filename_.c_str()));
+  if (!fd.is_valid()) {
+    return false;
+  }
+
+  // Figure out our file size for mmap.
+  uint64_t orig_file_size = filesystem_->GetFileSize(fd.get());
+  uint64_t file_size = orig_file_size;
+  if (orig_file_size == IcingFilesystem::kBadFileSize) {
+    goto error;
+  }
+
+  // Make sure we have something to mmap.
+  if (orig_file_size < kGrowSize) {
+    if (!filesystem_->Grow(fd.get(), kGrowSize)) {
+      goto error;
+    }
+    file_size = kGrowSize;
+  }
+
+  // Mmap for write.
+  mmapper_ =
+      std::make_unique<IcingMMapper>(fd.get(), false, 0, file_size, MAP_SHARED);
+  if (!mmapper_->is_valid()) {
+    goto error;
+  }
+
+  // Set open_type_ before the possible flush on create.
+  open_type_ = READ_WRITE;
+
+  if (orig_file_size == 0) {
+    Accessor accessor(mmapper_.get());
+    // Original file didn't yet exist, create header.
+    accessor.header()->magic = kMagic;
+    accessor.header()->version = kCurVersion;
+    accessor.header()->dirty = true;  // Forces crc update at sync.
+    // Sync file so we know it's supposed to exist.
+    if (!Sync()) {
+      goto error;
+    }
+  }
+  return true;
+
+error:
+  open_type_ = UNOPENED;
+  mmapper_.reset();
+  return false;
+}
+
+bool IcingFlashBitmap::InitForRead() {
+  IcingTimer open_timer;
+  Close();
+
+  // Cannot mmap non-existing or zero-size files.
+  // It's not an error in this case, it just means the
+  // bitmap is empty, so proceed without mapping it.
+  if (!filesystem_->FileExists(filename_.c_str()) ||
+      filesystem_->GetFileSize(filename_.c_str()) == 0) {
+    open_type_ = READ_ONLY;
+    return true;
+  }
+
+  IcingScopedFd fd(filesystem_->OpenForRead(filename_.c_str()));
+  if (!fd.is_valid()) {
+    return false;
+  }
+
+#ifdef __APPLE__
+  // No MAP_POPULATE in iOS (so no pre-page-faulting.  See man mmap)
+  // On Apple we need MAP_SHARED even for sharing the state within the same
+  // process (which gets optimized in the linux-implementation).
+  // Usages of flash-bitmap are expected to flush the content (delayed for
+  // performance reasons). That implies that the copy-on-write behavior of
+  // MAP_PRIVATE is a performance optimization, and MAP_SHARED as alternative
+  // behavior is acceptable.
+  int flags = MAP_SHARED;
+#else
+  int flags = MAP_PRIVATE | MAP_POPULATE;
+#endif
+
+  // Figure out our file size for mmap.
+  uint64_t file_size = filesystem_->GetFileSize(fd.get());
+  if (file_size == IcingFilesystem::kBadFileSize) {
+    goto error;
+  }
+
+  // Slurp the bitmap in one go with MAP_POPULATE
+  mmapper_ =
+      std::make_unique<IcingMMapper>(fd.get(), true, 0, file_size, flags);
+  if (!mmapper_->is_valid()) {
+    goto error;
+  }
+
+  open_type_ = READ_ONLY;
+  return true;
+
+error:
+  open_type_ = UNOPENED;
+  mmapper_.reset();
+  return false;
+}
+
+void IcingFlashBitmap::Close() {
+  if (is_initialized()) {
+    UpdateCrc();
+    mmapper_.reset();
+    open_type_ = UNOPENED;
+  }
+}
+
+bool IcingFlashBitmap::Delete() {
+  Close();
+  return filesystem_->DeleteFile(filename_.c_str());
+}
+
+bool IcingFlashBitmap::Sync() const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "Bitmap not initialized";
+  }
+
+  UpdateCrc();
+  return (mmapper_ == nullptr) ? true : mmapper_->Sync();
+}
+
+uint64_t IcingFlashBitmap::GetDiskUsage() const {
+  // For non-existing files, size is 0.
+  if (mmapper_ == nullptr) {
+    return 0;
+  }
+  return filesystem_->GetFileDiskUsage(filename_.c_str());
+}
+
+uint32_t IcingFlashBitmap::UpdateCrc() const {
+  Accessor accessor(mmapper_.get());
+  if (open_type_ == READ_WRITE && accessor.header()->dirty) {
+    accessor.header()->crc = IcingStringUtil::UpdateCrc32(
+        kEmptyCrc, accessor.data(), accessor.data_size());
+    accessor.header()->dirty = false;
+  }
+
+  // Non-existent mmapper means file does not exist. An empty file has
+  // a crc of kEmptyCrc, so just return that.
+  return mmapper_.get() ? accessor.header()->crc : kEmptyCrc;
+}
+
+bool IcingFlashBitmap::Grow(size_t new_file_size) {
+  IcingScopedFd fd(filesystem_->OpenForWrite(filename_.c_str()));
+  if (!filesystem_->Grow(fd.get(), new_file_size)) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Grow %s to new size %zu failed", filename_.c_str(), new_file_size);
+    return false;
+  }
+  if (!mmapper_->Remap(fd.get(), 0, new_file_size)) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Remap of %s after grow failed", filename_.c_str());
+    return false;
+  }
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Grew %s new size %zu", filename_.c_str(), new_file_size);
+  Accessor accessor(mmapper_.get());
+  accessor.header()->dirty = true;
+  return true;
+}
+
+bool IcingFlashBitmap::SetBit(uint64_t idx, bool value) {
+  if (open_type_ != READ_WRITE) {
+    ICING_LOG(FATAL) << "Bitmap not opened with type READ_WRITE";
+  }
+
+  Accessor accessor(mmapper_.get());
+
+  // Figure out which word needs to be modified.
+  uint64_t word_offset = idx / kWordBits;
+
+  // Grow file (and mmap) if word_offset >= file size / sizeof(Word).
+  if (word_offset >= accessor.num_words()) {
+    if (!value) {
+      // Values beyond the end of file are false by default, don't write it.
+      return true;
+    }
+    // Grow enough to fit word_offset (including the header).
+    size_t file_size = sizeof(Header) + (word_offset + 1) * sizeof(Word);
+    // Align to kGrowSize.
+    file_size = ALIGN_UP(file_size, kGrowSize);
+    if (!Grow(file_size)) {
+      return false;
+    }
+  }
+
+  // Set the word in the mmapped region.
+  Word *words = accessor.data32();
+  Word mask = GetWordBitmask(idx);
+  Word old_word = words[word_offset];
+  Word new_word = value ? old_word | mask : old_word & ~mask;
+  if (new_word != old_word) {
+    words[word_offset] = new_word;
+    accessor.header()->dirty = true;
+  }
+  return true;
+}
+
+bool IcingFlashBitmap::GetBit(uint64_t idx) const {
+  return GetWord(idx / kWordBits) & GetWordBitmask(idx);
+}
+
+IcingFlashBitmap::Word IcingFlashBitmap::GetWord(uint64_t idx) const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "Bitmap not initialized";
+  }
+
+  // For non-existing files, always return false.
+  if (mmapper_ == nullptr) {
+    return 0;
+  }
+
+  Accessor accessor(mmapper_.get());
+  // Check that we are within limits.
+  if (idx >= accessor.num_words()) {
+    return 0;
+  }
+  return accessor.data32()[idx];
+}
+
+size_t IcingFlashBitmap::NumWords() const {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "Bitmap not initialized";
+  }
+
+  // For non-existing files, always return false.
+  if (mmapper_ == nullptr) {
+    return 0;
+  }
+
+  return Accessor(mmapper_.get()).num_words();
+}
+
+IcingFlashBitmap::Word IcingFlashBitmap::GetWordBitmask(uint64_t idx) {
+  return 1u << (idx % kWordBits);
+}
+
+void IcingFlashBitmap::Truncate(uint64_t idx) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "Bitmap not initialized";
+  }
+
+  Accessor accessor(mmapper_.get());
+  size_t num_words = accessor.num_words();
+
+  uint64_t word_offset = idx / kWordBits;
+  if (word_offset >= num_words) {
+    // Truncation offset beyond actual file. We're done.
+    return;
+  }
+
+  Word *words = accessor.data32();
+
+  // Keep only bits < idx in the last word.
+  words[word_offset] &= (GetWordBitmask(idx) - 1);
+
+  // Clear everything starting at word_offset + 1
+  uint64_t last_word_offset = word_offset + 1;
+  if (last_word_offset < num_words) {
+    memset(words + last_word_offset, 0,
+           (num_words - last_word_offset) * sizeof(Word));
+  }
+  accessor.header()->dirty = true;
+  UpdateCrc();
+}
+
+bool IcingFlashBitmap::OrBitmap(const IcingFlashBitmap &bitmap) {
+  if (!is_initialized()) {
+    ICING_LOG(FATAL) << "Bitmap not initialized";
+  }
+
+  if (mmapper_ == nullptr || bitmap.mmapper_ == nullptr) {
+    // TODO(b/32125196): Figure out how we can get into this state.
+    return false;
+  }
+
+  // If this bitmap is smaller than the source, then grow the
+  // size to match.
+  if (mmapper_->len() < bitmap.mmapper_->len()) {
+    if (!Grow(bitmap.mmapper_->len())) {
+      return false;
+    }
+  }
+  Accessor src_accessor(bitmap.mmapper_.get());
+  const Word *src = src_accessor.data32();
+  const Word *end = src_accessor.end32();
+
+  Accessor dst_accessor(mmapper_.get());
+  Word *dst = dst_accessor.data32();
+  while (src < end) {
+    *dst++ |= *src++;
+  }
+  dst_accessor.header()->dirty = true;
+  UpdateCrc();
+  return true;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-flash-bitmap.h b/icing/legacy/index/icing-flash-bitmap.h
new file mode 100644
index 0000000..9abd369
--- /dev/null
+++ b/icing/legacy/index/icing-flash-bitmap.h

@@ -0,0 +1,154 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// A disk-backed bitmap.
+//
+// For writing:
+//
+// Init();
+// SetBit(bit_idx, <true|false>)  // Automatically grows with SetBit.
+// ...
+// Sync();  // SetBit takes effect immediately but Sync persists to disk.
+//
+// For reading:
+//
+// InitForRead();
+// GetBit(bit_idx);
+// ...
+// Close();
+//
+// InitForRead uses mmap MAP_POPULATE to fault the entire file to
+// memory. Subsequent random GetBits are very fast (nanoseconds).
+//
+// It's ok to call Init after InitForRead. The last "Init" call takes
+// effect.
+
+#ifndef ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
+#define ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_
+
+#include <stdint.h>
+
+#include <memory>
+#include <string>
+
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mmapper.h"
+
+namespace icing {
+namespace lib {
+
+class IcingFlashBitmap {
+ public:
+  using Word = uint32_t;
+
+  static constexpr uint32_t kEmptyCrc = 0;
+  static constexpr size_t kGrowSize = (1u << 12);  // 4KB;
+  static constexpr size_t kWordBits = 8 * sizeof(Word);
+
+  IcingFlashBitmap(const std::string &filename,
+                   const IcingFilesystem *filesystem)
+      : filesystem_(filesystem), filename_(filename), open_type_(UNOPENED) {}
+  ~IcingFlashBitmap() { Close(); }
+
+  // Init for read and write. Returns true on success.  Does not verify
+  // the data checksum.  To do so call Verify explicitly.
+  bool Init();
+
+  // Init for read only. Also faults the entire file into memory with
+  // MAP_POPULATE.  Does not verify  the data checksum.  To do so call Verify
+  // explicitly.
+  bool InitForRead();
+
+  // Verifies the integrity of the data by checking the header values
+  // and data checksum. Returns true if opened with InitForRead() and
+  // file does not exist.
+  bool Verify() const;
+
+  // If either of the init functions was called successfully.
+  bool is_initialized() const { return open_type_ != UNOPENED; }
+
+  // Close file and release resources. Leaves the bitmap in uninitialized state.
+  void Close();
+
+  // The following functions require is_initialized() with Init()
+  // EXCEPT GetBit() which requires Init() or InitForRead().
+
+  // Close and delete the underlying file. Leaves the bitmap in uninitialized
+  // state (even if deletion failed).
+  bool Delete();
+
+  // Delete the underlying file, and reinitialize it. If successful, the bitmap
+  // is initialized.
+  bool Clear() { return Delete() && Init(); }
+
+  // Sync the changes to disk.
+  bool Sync() const;
+
+  uint64_t GetDiskUsage() const;
+
+  // Set or clear a bit at idx. Automatically resizes the file to fit
+  // idx.  Returns true on success.
+  bool SetBit(uint64_t idx, bool value);
+
+  // Get the value of bit at idx. If idx is out of range, returns false.
+  // Can be called with InitForRead().
+  bool GetBit(uint64_t idx) const;
+
+  // Get the idx'th word in the bitmap. If idx is out of range, returns zero.
+  // Can be called with InitForRead().
+  Word GetWord(uint64_t idx) const;
+  size_t NumWords() const;
+
+  // Clear all bits starting at idx.
+  void Truncate(uint64_t idx);
+
+  // Ors all the set bits from a given bitmap into this bitmap.
+  bool OrBitmap(const IcingFlashBitmap &bitmap);
+
+  const std::string &filename() const { return filename_; }
+
+  // If the bitmap is dirty, update the crc and mark it clean.
+  uint32_t UpdateCrc() const;
+
+ private:
+  class Accessor;
+  struct Header;
+
+  static const uint32_t kMagic = 0x394b0698;
+  static const uint32_t kCurVersion = 18;
+
+  enum OpenType { UNOPENED, READ_ONLY, READ_WRITE };
+
+  static Word GetWordBitmask(uint64_t idx);
+
+  // Increase the size of the bitmap file to the new size.  Return true
+  // on success.
+  bool Grow(size_t new_file_size);
+
+  // Upgrade for version 18.
+  bool UpgradeTo18();
+
+  const IcingFilesystem *const filesystem_;
+  std::string filename_;
+  OpenType open_type_;
+  std::unique_ptr<IcingMMapper> mmapper_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_FLASH_BITMAP_H_

diff --git a/icing/legacy/index/icing-lite-index-header.h b/icing/legacy/index/icing-lite-index-header.h
new file mode 100644
index 0000000..ac2d3c0
--- /dev/null
+++ b/icing/legacy/index/icing-lite-index-header.h

@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
+#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_
+
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-common-types.h"
+
+namespace icing {
+namespace lib {
+
+// A wrapper around the actual mmapped header data.
+class IcingLiteIndex_Header {
+ public:
+  virtual ~IcingLiteIndex_Header() = default;
+
+  // Returns true if the magic of the header matches the hard-coded magic
+  // value associated with this header format.
+  virtual bool check_magic() const = 0;
+
+  virtual uint32_t lite_index_crc() const = 0;
+  virtual void set_lite_index_crc(uint32_t crc) = 0;
+
+  virtual uint32_t last_added_docid() const = 0;
+  virtual void set_last_added_docid(uint32_t last_added_docid) = 0;
+
+  virtual uint32_t cur_size() const = 0;
+  virtual void set_cur_size(uint32_t cur_size) = 0;
+
+  virtual uint32_t searchable_end() const = 0;
+  virtual void set_searchable_end(uint32_t searchable_end) = 0;
+
+  virtual uint32_t CalculateHeaderCrc() const = 0;
+
+  virtual void Reset() = 0;
+};
+
+class IcingLiteIndex_HeaderImpl : public IcingLiteIndex_Header {
+ public:
+  struct HeaderData {
+    static const uint32_t kMagic = 0x6dfba6a0;
+
+    uint32_t lite_index_crc;
+    uint32_t magic;
+    // This field is available to be reclaimed for another purpose without
+    // forcing a change in header size. NOTE: claiming this fields doesn't
+    // guarantee that the newly claimed field will have the proper value. If you
+    // are depending on the value of this field then you will have to have a
+    // migration - either a one-time event during Upgrade() or Init() or
+    // determined by a flag change in Init().
+    uint32_t padding;
+    uint32_t last_added_docid;
+    uint32_t cur_size;
+    uint32_t searchable_end;
+  };
+
+  explicit IcingLiteIndex_HeaderImpl(HeaderData *hdr) : hdr_(hdr) {}
+
+  bool check_magic() const override {
+    return hdr_->magic == HeaderData::kMagic;
+  }
+
+  uint32_t lite_index_crc() const override { return hdr_->lite_index_crc; }
+  void set_lite_index_crc(uint32_t crc) override { hdr_->lite_index_crc = crc; }
+
+  uint32_t last_added_docid() const override { return hdr_->last_added_docid; }
+  void set_last_added_docid(uint32_t last_added_docid) override {
+    hdr_->last_added_docid = last_added_docid;
+  }
+
+  uint32_t cur_size() const override { return hdr_->cur_size; }
+  void set_cur_size(uint32_t cur_size) override { hdr_->cur_size = cur_size; }
+
+  uint32_t searchable_end() const override { return hdr_->searchable_end; }
+  void set_searchable_end(uint32_t searchable_end) override {
+    hdr_->searchable_end = searchable_end;
+  }
+
+  uint32_t CalculateHeaderCrc() const override {
+    return IcingStringUtil::UpdateCrc32(
+        0, reinterpret_cast<const char *>(hdr_) + offsetof(HeaderData, magic),
+        sizeof(HeaderData) - offsetof(HeaderData, magic));
+  }
+
+  void Reset() override {
+    hdr_->lite_index_crc = 0;
+    hdr_->magic = HeaderData::kMagic;
+    hdr_->last_added_docid = kIcingInvalidDocId;
+    hdr_->cur_size = 0;
+    hdr_->searchable_end = 0;
+  }
+
+ private:
+  HeaderData *hdr_;
+};
+static_assert(24 == sizeof(IcingLiteIndex_HeaderImpl::HeaderData),
+              "sizeof(HeaderData) != 24");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_LITE_INDEX_HEADER_H_

diff --git a/icing/legacy/index/icing-lite-index-options.cc b/icing/legacy/index/icing-lite-index-options.cc
new file mode 100644
index 0000000..4bf0d38
--- /dev/null
+++ b/icing/legacy/index/icing-lite-index-options.cc

@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-lite-index-options.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
+  constexpr uint32_t kHitBufferSlopMult = 2;
+
+  // Add a 2x slop for the hit buffer. We need to make sure we can at
+  // least fit one document with index variants.
+  // TODO(b/111690435) Move LiteIndex::Element to a separate file so that this
+  // can use sizeof(LiteIndex::Element)
+  uint32_t hit_capacity_elts_with_slop =
+      hit_buffer_want_merge_bytes / sizeof(uint64_t);
+  // Add some slop for index variants on top of max num tokens.
+  hit_capacity_elts_with_slop += kIcingMaxNumHitsPerDocument;
+  hit_capacity_elts_with_slop *= kHitBufferSlopMult;
+
+  return hit_capacity_elts_with_slop;
+}
+
+IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
+  // The default min is 1/5th of the main index lexicon, which can
+  // hold >1M terms. We don't need values so value size is 0. We
+  // conservatively scale from there.
+  //
+  // We can give this a lot of headroom because overestimating the
+  // requirement has minimal resource impact.
+  double scaling_factor =
+      std::max(1.0, static_cast<double>(hit_buffer_size) / (100u << 10));
+  return IcingDynamicTrie::Options((200u << 10) * scaling_factor,
+                                   (200u << 10) * scaling_factor,
+                                   (1u << 20) * scaling_factor, 0);
+}
+
+}  // namespace
+
+IcingLiteIndexOptions::IcingLiteIndexOptions(
+    const std::string& filename_base, uint32_t hit_buffer_want_merge_bytes)
+    : filename_base(filename_base),
+      hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes) {
+  hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
+  lexicon_options = CalculateTrieOptions(hit_buffer_size);
+  display_mappings_options = CalculateTrieOptions(hit_buffer_size);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-lite-index-options.h b/icing/legacy/index/icing-lite-index-options.h
new file mode 100644
index 0000000..2922621
--- /dev/null
+++ b/icing/legacy/index/icing-lite-index-options.h

@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
+#define ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_
+
+#include "icing/legacy/index/icing-common-types.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+
+namespace icing {
+namespace lib {
+
+struct IcingLiteIndexOptions {
+  IcingLiteIndexOptions() = default;
+  // Creates IcingLiteIndexOptions based off of the specified parameters. All
+  // other fields are calculated based on the value of
+  // hit_buffer_want_merge_bytes and the logic in CalculateHitBufferSize and
+  // CalculateTrieOptions.
+  IcingLiteIndexOptions(const std::string& filename_base,
+                        uint32_t hit_buffer_want_merge_bytes);
+
+  IcingDynamicTrie::Options lexicon_options;
+  IcingDynamicTrie::Options display_mappings_options;
+
+  std::string filename_base;
+  uint32_t hit_buffer_want_merge_bytes = 0;
+  uint32_t hit_buffer_size = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_LITE_INDEX_OPTIONS_H_

diff --git a/icing/legacy/index/icing-mmapper.cc b/icing/legacy/index/icing-mmapper.cc
new file mode 100644
index 0000000..737335c
--- /dev/null
+++ b/icing/legacy/index/icing-mmapper.cc

@@ -0,0 +1,106 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: sbanacho@google.com (Scott Banachowski)
+//
+#include "icing/legacy/index/icing-mmapper.h"
+
+#include <errno.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+IcingMMapper::IcingMMapper(bool read_only, int flags)
+    : address_(nullptr),
+      len_(0),
+      flags_(flags),
+      location_(0),
+      mmap_len_(0),
+      mmap_result_(nullptr),
+      read_only_(read_only) {}
+
+IcingMMapper::IcingMMapper(int fd, bool read_only, uint64_t location,
+                           size_t size, int flags)
+    : address_(nullptr),
+      len_(0),
+      flags_(flags),
+      location_(0),
+      mmap_len_(0),
+      mmap_result_(nullptr),
+      read_only_(read_only) {
+  DoMapping(fd, location, size);
+}
+
+void IcingMMapper::DoMapping(int fd, uint64_t location, size_t size) {
+  uint64_t aligned_offset =
+      (location / system_page_size()) * system_page_size();
+  size_t alignment_adjustment = location - aligned_offset;
+  size_t mmap_len = alignment_adjustment + size;
+
+  int prot = read_only_ ? (PROT_READ) : (PROT_READ | PROT_WRITE);
+
+  mmap_result_ = mmap(nullptr, mmap_len, prot, flags_, fd, aligned_offset);
+
+  if (mmap_result_ != MAP_FAILED) {
+    len_ = size;
+    location_ = location;
+    mmap_len_ = mmap_len;
+    address_ = reinterpret_cast<uint8_t *>(mmap_result_) + alignment_adjustment;
+  } else {
+    const char *errstr = strerror(errno);
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "Could not mmap file for reading: %s", errstr);
+    mmap_result_ = nullptr;
+  }
+}
+
+bool IcingMMapper::Remap(int fd, uint64_t location, size_t size) {
+  Unmap();
+  DoMapping(fd, location, size);
+  return is_valid();
+}
+
+void IcingMMapper::Unmap() {
+  if (mmap_result_ != nullptr) {
+    munmap(mmap_result_, mmap_len_);
+  }
+  address_ = nullptr;
+  len_ = 0;
+  location_ = 0;
+  mmap_len_ = 0;
+  mmap_result_ = nullptr;
+}
+
+IcingMMapper::~IcingMMapper() { Unmap(); }
+
+bool IcingMMapper::Sync() {
+  if (is_valid() && !read_only_) {
+    if (msync(mmap_result_, mmap_len_, MS_SYNC) != 0) {
+      ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("msync failed: %s",
+                                                        strerror(errno));
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-mmapper.h b/icing/legacy/index/icing-mmapper.h
new file mode 100644
index 0000000..bf62aa5
--- /dev/null
+++ b/icing/legacy/index/icing-mmapper.h

@@ -0,0 +1,94 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: sbanacho@google.com (Scott Banachowski)
+//
+// This class is a helper for mmapping a file.
+// Use as a scoped allocator, the memory is mapped
+// on construction and released on destruction.
+
+#ifndef ICING_LEGACY_INDEX_ICING_MMAPPER_H_
+#define ICING_LEGACY_INDEX_ICING_MMAPPER_H_
+
+#include <stdint.h>
+#include <unistd.h>
+
+namespace icing {
+namespace lib {
+
+class IcingMMapper {
+ public:
+  // Provide a valid, open file description (with matching permissions
+  // for read or write).  The location into the file you wish to map,
+  // and the size. "flags" are passed in directly to mmap.
+  IcingMMapper(int fd, bool read_only, uint64_t location, size_t size,
+               int flags);
+
+  // Set up Mmapper, but delay mapping until Remap is called.
+  IcingMMapper(bool read_only, int flags);
+
+  // Will unmap the region on delete.  Does not close the file.
+  ~IcingMMapper();
+
+  // Move the location of the mapping to a new location.  Returns
+  // true if valid.
+  bool Remap(int fd, uint64_t location, size_t size);
+
+  // Close the mapping and become invalid.
+  void Unmap();
+
+  // Sync the mapped region to the filesystem.
+  bool Sync();
+
+  // Check to see if the file was successfully mapped.
+  bool is_valid() const { return (address_ != nullptr); }
+
+  // The address in memory of the mapped file, returns NULL if the
+  // mapping of the region was unsuccesful.
+  const uint8_t *address() const { return address_; }
+
+  uint8_t *address() { return address_; }
+
+  size_t len() const { return len_; }
+
+  uint64_t location() const { return location_; }
+
+  static size_t __attribute__((const)) system_page_size() {
+    static const size_t page_size = sysconf(_SC_PAGE_SIZE);
+    return page_size;
+  }
+
+  // Rounds `size` up to a multiple of the system page size.
+  static size_t page_aligned_size(uint32_t size) {
+    return ((size + system_page_size() - 1) / system_page_size()) *
+           system_page_size();
+  }
+
+ private:
+  void DoMapping(int fd, uint64_t location, size_t size);
+
+  uint8_t *address_;
+  size_t len_;         // the requested mapping length
+  const int flags_;    // flags passed in to mmap
+  uint64_t location_;  // the requested mapping file location
+  size_t mmap_len_;    // the actual mapping length
+  void *mmap_result_;
+  const bool read_only_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_MMAPPER_H_

diff --git a/icing/legacy/index/icing-mock-filesystem.h b/icing/legacy/index/icing-mock-filesystem.h
new file mode 100644
index 0000000..31e012a
--- /dev/null
+++ b/icing/legacy/index/icing-mock-filesystem.h

@@ -0,0 +1,98 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
+#define ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "icing/legacy/index/icing-filesystem.h"
+#include "gmock/gmock.h"
+
+namespace icing {
+namespace lib {
+
+class IcingMockFilesystem : public IcingFilesystem {
+ public:
+  MOCK_CONST_METHOD1(DeleteFile, bool(const char *file_name));
+
+  MOCK_CONST_METHOD1(DeleteDirectory, bool(const char *dir_name));
+
+  MOCK_CONST_METHOD1(DeleteDirectoryRecursively, bool(const char *dir_name));
+
+  MOCK_CONST_METHOD1(FileExists, bool(const char *file_name));
+
+  MOCK_CONST_METHOD1(DirectoryExists, bool(const char *dir_name));
+
+  MOCK_CONST_METHOD1(GetBasenameIndex, int(const char *file_name));
+
+  MOCK_CONST_METHOD1(GetBasename, std::string(const char *file_name));
+
+  MOCK_CONST_METHOD1(GetDirname, std::string(const char *file_name));
+
+  MOCK_CONST_METHOD2(ListDirectory, bool(const char *dir_name,
+                                         std::vector<std::string> *entries));
+
+  MOCK_CONST_METHOD2(GetMatchingFiles,
+                     bool(const char *glob, std::vector<std::string> *matches));
+
+  MOCK_CONST_METHOD1(OpenForWrite, int(const char *file_name));
+
+  MOCK_CONST_METHOD1(OpenForAppend, int(const char *file_name));
+
+  MOCK_CONST_METHOD1(OpenForRead, int(const char *file_name));
+
+  MOCK_CONST_METHOD1(GetFileSize, uint64_t(int fd));
+
+  MOCK_CONST_METHOD1(GetFileSize, uint64_t(const char *filename));
+
+  MOCK_CONST_METHOD2(Truncate, bool(int fd, uint64_t new_size));
+
+  MOCK_CONST_METHOD2(Truncate, bool(const char *filename, uint64_t new_size));
+
+  MOCK_CONST_METHOD2(Grow, bool(int fd, uint64_t new_size));
+
+  MOCK_CONST_METHOD3(Write, bool(int fd, const void *data, size_t data_size));
+  MOCK_CONST_METHOD4(PWrite, bool(int fd, off_t offset, const void *data,
+                                  size_t data_size));
+
+  MOCK_CONST_METHOD1(DataSync, bool(int fd));
+
+  MOCK_CONST_METHOD2(RenameFile,
+                     bool(const char *old_name, const char *new_name));
+
+  MOCK_CONST_METHOD2(SwapFiles, bool(const char *one, const char *two));
+
+  MOCK_CONST_METHOD1(CreateDirectory, bool(const char *dir_name));
+
+  MOCK_CONST_METHOD1(CreateDirectoryRecursively, bool(const char *dir_name));
+
+  MOCK_CONST_METHOD2(CopyFile, bool(const char *src, const char *dst));
+
+  MOCK_CONST_METHOD4(ComputeChecksum, bool(int fd, uint32_t *checksum,
+                                           uint64_t offset, uint64_t length));
+
+  MOCK_CONST_METHOD1(GetDiskUsage, uint64_t(const char *path));
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_MOCK_FILESYSTEM_H_

diff --git a/icing/legacy/index/icing-storage-collection.cc b/icing/legacy/index/icing-storage-collection.cc
new file mode 100644
index 0000000..d31f892
--- /dev/null
+++ b/icing/legacy/index/icing-storage-collection.cc

@@ -0,0 +1,120 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-storage-collection.h"
+
+#include "icing/legacy/core/icing-compat.h"
+#include "icing/legacy/index/icing-filesystem.h"
+
+namespace icing {
+namespace lib {
+
+IcingStorageCollection::~IcingStorageCollection() {
+  // TODO(b/75960589): fix loop styling throughout
+  for (size_t i = 0; i < files_.size(); ++i) {
+    delete files_[i].file;
+  }
+}
+
+void IcingStorageCollection::Add(IIcingStorage *file,
+                                 bool remove_if_corrupted) {
+  files_.push_back(FileInfo(file, remove_if_corrupted));
+}
+
+void IcingStorageCollection::Swap(const IIcingStorage *current_file,
+                                  IIcingStorage *new_file) {
+  for (size_t i = 0; i < files_.size(); ++i) {
+    if (files_[i].file == current_file) {
+      delete files_[i].file;
+      files_[i] = FileInfo(new_file, files_[i].remove_if_corrupted);
+    }
+  }
+}
+
+bool IcingStorageCollection::UpgradeTo(int new_version) {
+  size_t count = 0;
+  for (size_t i = 0; i < files_.size(); ++i) {
+    if (files_[i].file->UpgradeTo(new_version)) {
+      ++count;
+    }
+  }
+  return count == files_.size();
+}
+
+bool IcingStorageCollection::Init() {
+  size_t count = 0;
+  for (size_t i = 0; i < files_.size(); ++i) {
+    if (files_[i].remove_if_corrupted) {
+      if (IIcingStorage::InitWithRetry(files_[i].file)) {
+        ++count;
+      }
+    } else {
+      if (files_[i].file->Init()) {
+        ++count;
+      }
+    }
+  }
+  return count == files_.size();
+}
+
+void IcingStorageCollection::Close() {
+  for (size_t i = 0; i < files_.size(); ++i) {
+    files_[i].file->Close();
+  }
+}
+
+bool IcingStorageCollection::Remove() {
+  size_t count = 0;
+  for (size_t i = 0; i < files_.size(); ++i) {
+    if (files_[i].file->Remove()) {
+      ++count;
+    }
+  }
+  return count == files_.size();
+}
+
+bool IcingStorageCollection::Sync() {
+  size_t count = 0;
+  for (size_t i = 0; i < files_.size(); ++i) {
+    if (files_[i].file->Sync()) {
+      ++count;
+    }
+  }
+  return count == files_.size();
+}
+
+uint64_t IcingStorageCollection::GetDiskUsage() const {
+  uint64_t total = 0;
+  for (auto &file_info : files_) {
+    IcingFilesystem::IncrementByOrSetInvalid(file_info.file->GetDiskUsage(),
+                                             &total);
+  }
+  return total;
+}
+
+void IcingStorageCollection::OnSleep() {
+  for (size_t i = 0; i < files_.size(); ++i) {
+    files_[i].file->OnSleep();
+  }
+}
+
+void IcingStorageCollection::GetDebugInfo(int verbosity,
+                                          std::string *out) const {
+  for (size_t i = 0; i < files_.size(); ++i) {
+    files_[i].file->GetDebugInfo(verbosity, out);
+  }
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-storage-collection.h b/icing/legacy/index/icing-storage-collection.h
new file mode 100644
index 0000000..dedfe33
--- /dev/null
+++ b/icing/legacy/index/icing-storage-collection.h

@@ -0,0 +1,64 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: sbanacho@google.com (Scott Banachowski)
+//         vmarko@google.com (Vladimir Marko)
+//
+// An implementation of IIcingStorage that holds an arbitrary number of nested
+// IIcingStorage items.
+
+#ifndef ICING_LEGACY_INDEX_ICING_STORAGE_COLLECTION_H_
+#define ICING_LEGACY_INDEX_ICING_STORAGE_COLLECTION_H_
+
+#include <string>
+#include <vector>
+
+#include "icing/legacy/index/icing-storage.h"
+
+namespace icing {
+namespace lib {
+
+// Class that owns a list of IDocumentStores.
+class IcingStorageCollection : public IIcingStorage {
+ public:
+  ~IcingStorageCollection() override;
+  // remove_if_corrupted specifies Init behavior when backing file is
+  // corrupted: if false, Init will fail, else Init will clear the
+  // underlying file and succeed.
+  void Add(IIcingStorage *file, bool remove_if_corrupted);
+  void Swap(const IIcingStorage *current_file, IIcingStorage *new_file);
+  bool UpgradeTo(int new_version) override;
+  bool Init() override;
+  void Close() override;
+  bool Remove() override;
+  bool Sync() override;
+  uint64_t GetDiskUsage() const override;
+  void OnSleep() override;
+  void GetDebugInfo(int verbosity, std::string *out) const override;
+
+ private:
+  struct FileInfo {
+    FileInfo(IIcingStorage *file_in, bool remove_if_corrupted_in)
+        : file(file_in), remove_if_corrupted(remove_if_corrupted_in) {}
+
+    IIcingStorage *file;
+    bool remove_if_corrupted;
+  };
+  std::vector<FileInfo> files_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_STORAGE_COLLECTION_H_

diff --git a/icing/legacy/index/icing-storage-file.cc b/icing/legacy/index/icing-storage-file.cc
new file mode 100644
index 0000000..b27ec67
--- /dev/null
+++ b/icing/legacy/index/icing-storage-file.cc

@@ -0,0 +1,118 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/legacy/index/icing-storage-file.h"
+
+#include <inttypes.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "icing/legacy/core/icing-compat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/legacy/core/icing-timer.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+IcingStorageFile::IcingStorageFile(const std::string &filename,
+                                   const IcingFilesystem *filesystem)
+    : IIcingStorage(), filesystem_(filesystem), filename_(filename) {}
+
+bool IcingStorageFile::Init() {
+  if (!is_initialized_) {
+    // Ensure the storage directory exists
+    std::string storage_dir = filesystem_->GetDirname(filename_.c_str());
+    if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
+      return false;
+    }
+
+    is_initialized_ = OnInit();
+
+    if (is_initialized_ && fd_.get() < 0) {  // if initalized, fd better be set
+      ICING_LOG(FATAL)
+          << "Storage file descriptor not set after initialization";
+    }
+  }
+  return is_initialized_;
+}
+
+void IcingStorageFile::Close() {
+  if (is_initialized_) {
+    OnClose();
+    fd_.reset();
+    is_initialized_ = false;
+  }
+}
+
+bool IcingStorageFile::Remove() {
+  Close();
+  return filesystem_->DeleteFile(filename_.c_str());
+}
+
+bool IcingStorageFile::Sync() {
+  if (!is_initialized_) {
+    ICING_LOG(FATAL) << "Storage file not initialized";
+  }
+
+  IcingTimer timer;
+  if (!PreSync()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Pre-sync %s failed",
+                                                      filename_.c_str());
+    return false;
+  }
+  if (!filesystem_->DataSync(fd_.get())) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Sync %s failed",
+                                                      filename_.c_str());
+    return false;
+  }
+  if (!PostSync()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Post-sync %s failed",
+                                                      filename_.c_str());
+    return false;
+  }
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Syncing %s took %.3fms", filename_.c_str(), timer.Elapsed() * 1000.);
+  return true;
+}
+
+uint64_t IcingStorageFile::GetDiskUsage() const {
+  return filesystem_->GetDiskUsage(fd_.get());
+}
+
+bool IcingStorageFile::PreSync() {
+  // Default implementation is a no-op.
+  return true;
+}
+
+bool IcingStorageFile::PostSync() {
+  // Default implementation is a no-op.
+  return true;
+}
+
+void IcingStorageFile::GetDebugInfo(int verbosity, std::string *out) const {
+  if (!is_initialized_) {
+    ICING_LOG(FATAL) << "Storage file not initialized";
+  }
+
+  if (verbosity >= 0) {  // Always
+    uint64_t size = filesystem_->GetFileSize(fd_.get());
+    IcingStringUtil::SStringAppendF(
+        out, 1000, "Filename: %s Size: %" PRIu64 "\n", filename_.c_str(), size);
+  }
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/legacy/index/icing-storage-file.h b/icing/legacy/index/icing-storage-file.h
new file mode 100644
index 0000000..94cab21
--- /dev/null
+++ b/icing/legacy/index/icing-storage-file.h

@@ -0,0 +1,96 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Base class for single file-based IIcingStorage implementations.
+
+#ifndef ICING_LEGACY_INDEX_ICING_STORAGE_FILE_H_
+#define ICING_LEGACY_INDEX_ICING_STORAGE_FILE_H_
+
+#include <string>
+
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-storage.h"
+
+namespace icing {
+namespace lib {
+
+// Minimal implementation that is backed by a filename and file descriptor.
+class IcingStorageFile : virtual public IIcingStorage {
+ public:
+  IcingStorageFile(const std::string &filename,
+                   const IcingFilesystem *filesystem);
+
+  // This must be called before the object is usable.
+  // Returns true if the storage is in a usable state.
+  bool Init() override;
+
+  // Default implementation is a no-op.
+  bool UpgradeTo(int new_version) override { return true; }
+
+  // Closes all files and system resources.
+  // Init() must be called before the object is used again.
+  void Close() override;
+
+  // Closes all system resources, then removes the backing file.
+  // Init() is required before the object is used again.
+  // Returns true on success.
+  bool Remove() override;
+
+  // Syncs any unwritten data to disk.
+  // REQUIRES: is_initialized() == true
+  bool Sync() override;
+
+  // Gets the file size of the underlying file.
+  // Returns kBadFileSize on error.
+  uint64_t GetDiskUsage() const override;
+
+  bool is_initialized() const { return is_initialized_; }
+
+  const std::string &filename() const { return filename_; }
+
+  void GetDebugInfo(int verbosity, std::string *out) const override;
+
+ protected:
+  // Implements any initialization, returning true if successful.
+  // The child is repsonsible for calling open on the fd_ file descriptor,
+  // before returning from OnInit();
+  virtual bool OnInit() = 0;
+
+  // OnClose should remove any resources, other than the file, created
+  // during the Init.
+  // The file itself will be closed after the OnClose.
+  virtual void OnClose() = 0;
+
+  // Called before the file is synced.  The child should write
+  // anything it hasn't yet written to the file so that it can be
+  // stored.  Default implementation is to do nothing. Return true if
+  // successful.
+  virtual bool PreSync();
+
+  // Called after the file is synced. Default implementation is to do
+  // nothing. Return true if successful.
+  virtual bool PostSync();
+
+  const IcingFilesystem *const filesystem_;
+  IcingScopedFd fd_;
+
+ private:
+  const std::string filename_;
+  bool is_initialized_ = false;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_STORAGE_FILE_H_

diff --git a/icing/legacy/index/icing-storage.h b/icing/legacy/index/icing-storage.h
new file mode 100644
index 0000000..cc06c54
--- /dev/null
+++ b/icing/legacy/index/icing-storage.h

@@ -0,0 +1,91 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Author: sbanacho@google.com (Scott Banachowski)
+//         vmarko@google.com (Vladimir Marko)
+//
+// Interface class for disk-backed storage.
+
+#ifndef ICING_LEGACY_INDEX_ICING_STORAGE_H_
+#define ICING_LEGACY_INDEX_ICING_STORAGE_H_
+
+#include <string>
+
+namespace icing {
+namespace lib {
+
+// Abstract base class for interface.
+class IIcingStorage {
+ public:
+  // Any resource that is not removed in the Close() function should
+  // be removed in the child's destructor.
+  virtual ~IIcingStorage() = default;
+
+  // This is called to upgrade to a new version.
+  // Returns true if the data store can be upgraded successfully.
+  virtual bool UpgradeTo(int new_version) = 0;
+
+  // This must be called before the object is usable.
+  // Returns true if the storage is in a usable state.
+  virtual bool Init() = 0;
+
+  // Attempts to init the given IIcingStorage. On failure, clears the underlying
+  // data and tries again. Returns false of the second init is also a failure.
+  static bool InitWithRetry(IIcingStorage* file_in) {
+    if (file_in->Init()) {
+      return true;
+    }
+    return file_in->Remove() && file_in->Init();
+  }
+
+  // Closes all files and system resources.
+  // Init() must be called before the object is used again.
+  virtual void Close() = 0;
+
+  // Closes all system resources, then removes the backing file.
+  // Init() is required before the object is used again.
+  // Returns true on success.
+  virtual bool Remove() = 0;
+
+  // Syncs any unwritten data to disk.
+  virtual bool Sync() = 0;
+
+  // Gets the total amount of disk usage for the object (i.e. the sum of the
+  // bytes of all underlying files).
+  // Note: reported values are estimated via the number of blocks the file takes
+  // up on disk. Sparse files are reported as their physical disk usage, as
+  // opposed to the logical size when read.
+  // Returns kBadFileSize on error.
+  virtual uint64_t GetDiskUsage() const = 0;
+
+  // Optional handler for when our process is entering a vulnerable
+  // state (highly likely to get killed). Default implementation does
+  // nothing.
+  virtual void OnSleep() {}
+
+  virtual void GetDebugInfo(int verbosity, std::string* out) const = 0;
+
+ protected:
+  IIcingStorage() = default;
+
+ private:
+  // Document stores are non-copyable.
+  IIcingStorage(const IIcingStorage&);
+  IIcingStorage& operator=(const IIcingStorage&);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_LEGACY_INDEX_ICING_STORAGE_H_

diff --git a/icing/legacy/index/icing-variant-map.h b/icing/legacy/index/icing-variant-map.h
new file mode 100644
index 0000000..dc55305
--- /dev/null
+++ b/icing/legacy/index/icing-variant-map.h

@@ -0,0 +1,805 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2013 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// Generated from translit-table.cpp. Maps the first character of
+// ascii to possible utf8 variants. Also a set of utf8 characters are
+// nullable (e.g., may be skipped in the key). These are listed as
+// "[0]".
+//
+// Then, each mapping can be an exact mapping (a -> à) or must/can
+// match a longer prefix in the key (d or dj -> đ). These are encoded
+// after the mapping and are called restricts.
+//
+// Only includes lowercase and isletter characters.
+
+#ifndef ICING_LEGACY_INDEX_ICING_VARIANT_MAP_H_
+#define ICING_LEGACY_INDEX_ICING_VARIANT_MAP_H_
+
+const char kVariantMapElts[] =
+    "ʰ"             // [0] -> [ʰ] \312\260
+    "ʱ"             // [0] -> [ʱ] \312\261
+    "ʲ"             // [0] -> [ʲ] \312\262
+    "ʳ"             // [0] -> [ʳ] \312\263
+    "ʴ"             // [0] -> [ʴ] \312\264
+    "ʵ"             // [0] -> [ʵ] \312\265
+    "ʶ"             // [0] -> [ʶ] \312\266
+    "ʷ"             // [0] -> [ʷ] \312\267
+    "ʸ"             // [0] -> [ʸ] \312\270
+    "ʹ"             // [0] -> [ʹ] \312\271
+    "ʺ"             // [0] -> [ʺ] \312\272
+    "ʻ"             // [0] -> [ʻ] \312\273
+    "ʼ"             // [0] -> [ʼ] \312\274
+    "ʽ"             // [0] -> [ʽ] \312\275
+    "ʾ"             // [0] -> [ʾ] \312\276
+    "ʿ"             // [0] -> [ʿ] \312\277
+    "ˀ"             // [0] -> [ˀ] \313\200
+    "ˁ"             // [0] -> [ˁ] \313\201
+    "ˆ"             // [0] -> [ˆ] \313\206
+    "ˇ"             // [0] -> [ˇ] \313\207
+    "ˈ"             // [0] -> [ˈ] \313\210
+    "ˉ"             // [0] -> [ˉ] \313\211
+    "ˊ"             // [0] -> [ˊ] \313\212
+    "ˋ"             // [0] -> [ˋ] \313\213
+    "ˌ"             // [0] -> [ˌ] \313\214
+    "ˍ"             // [0] -> [ˍ] \313\215
+    "ˎ"             // [0] -> [ˎ] \313\216
+    "ˏ"             // [0] -> [ˏ] \313\217
+    "ː"             // [0] -> [ː] \313\220
+    "ˑ"             // [0] -> [ˑ] \313\221
+    "ˠ"             // [0] -> [ˠ] \313\240
+    "ˡ"             // [0] -> [ˡ] \313\241
+    "ˢ"             // [0] -> [ˢ] \313\242
+    "ˣ"             // [0] -> [ˣ] \313\243
+    "ˤ"             // [0] -> [ˤ] \313\244
+    "ˬ"             // [0] -> [ˬ] \313\254
+    "ˮ"             // [0] -> [ˮ] \313\256
+    "̀"              // [0] -> [̀] \314\200
+    "́"              // [0] -> [́] \314\201
+    "̂"              // [0] -> [̂] \314\202
+    "̃"              // [0] -> [̃] \314\203
+    "̄"              // [0] -> [̄] \314\204
+    "̅"              // [0] -> [̅] \314\205
+    "̆"              // [0] -> [̆] \314\206
+    "̇"              // [0] -> [̇] \314\207
+    "̈"              // [0] -> [̈] \314\210
+    "̉"              // [0] -> [̉] \314\211
+    "̊"              // [0] -> [̊] \314\212
+    "̋"              // [0] -> [̋] \314\213
+    "̌"              // [0] -> [̌] \314\214
+    "̍"              // [0] -> [̍] \314\215
+    "̎"              // [0] -> [̎] \314\216
+    "̏"              // [0] -> [̏] \314\217
+    "̐"              // [0] -> [̐] \314\220
+    "̑"              // [0] -> [̑] \314\221
+    "̒"              // [0] -> [̒] \314\222
+    "̓"              // [0] -> [̓] \314\223
+    "̔"              // [0] -> [̔] \314\224
+    "̕"              // [0] -> [̕] \314\225
+    "̖"              // [0] -> [̖] \314\226
+    "̗"              // [0] -> [̗] \314\227
+    "̘"              // [0] -> [̘] \314\230
+    "̙"              // [0] -> [̙] \314\231
+    "̚"              // [0] -> [̚] \314\232
+    "̛"              // [0] -> [̛] \314\233
+    "̜"              // [0] -> [̜] \314\234
+    "̝"              // [0] -> [̝] \314\235
+    "̞"              // [0] -> [̞] \314\236
+    "̟"              // [0] -> [̟] \314\237
+    "̠"              // [0] -> [̠] \314\240
+    "̡"              // [0] -> [̡] \314\241
+    "̢"              // [0] -> [̢] \314\242
+    "̣"              // [0] -> [̣] \314\243
+    "̤"              // [0] -> [̤] \314\244
+    "̥"              // [0] -> [̥] \314\245
+    "̦"              // [0] -> [̦] \314\246
+    "̧"              // [0] -> [̧] \314\247
+    "̨"              // [0] -> [̨] \314\250
+    "̩"              // [0] -> [̩] \314\251
+    "̪"              // [0] -> [̪] \314\252
+    "̫"              // [0] -> [̫] \314\253
+    "̬"              // [0] -> [̬] \314\254
+    "̭"              // [0] -> [̭] \314\255
+    "̮"              // [0] -> [̮] \314\256
+    "̯"              // [0] -> [̯] \314\257
+    "̰"              // [0] -> [̰] \314\260
+    "̱"              // [0] -> [̱] \314\261
+    "̲"              // [0] -> [̲] \314\262
+    "̳"              // [0] -> [̳] \314\263
+    "̴"              // [0] -> [̴] \314\264
+    "̵"              // [0] -> [̵] \314\265
+    "̶"              // [0] -> [̶] \314\266
+    "̷"              // [0] -> [̷] \314\267
+    "̸"              // [0] -> [̸] \314\270
+    "̹"              // [0] -> [̹] \314\271
+    "̺"              // [0] -> [̺] \314\272
+    "̻"              // [0] -> [̻] \314\273
+    "̼"              // [0] -> [̼] \314\274
+    "̽"              // [0] -> [̽] \314\275
+    "̾"              // [0] -> [̾] \314\276
+    "̿"              // [0] -> [̿] \314\277
+    "̀"              // [0] -> [̀] \315\200
+    "́"              // [0] -> [́] \315\201
+    "͂"              // [0] -> [͂] \315\202
+    "̓"              // [0] -> [̓] \315\203
+    "̈́"              // [0] -> [̈́] \315\204
+    "ͅ"              // [0] -> [ͅ] \315\205
+    "͆"              // [0] -> [͆] \315\206
+    "͇"              // [0] -> [͇] \315\207
+    "͈"              // [0] -> [͈] \315\210
+    "͉"              // [0] -> [͉] \315\211
+    "͊"              // [0] -> [͊] \315\212
+    "͋"              // [0] -> [͋] \315\213
+    "͌"              // [0] -> [͌] \315\214
+    "͍"              // [0] -> [͍] \315\215
+    "͎"              // [0] -> [͎] \315\216
+    "͏"            // [0] -> [͏] \315\217
+    "͐"              // [0] -> [͐] \315\220
+    "͑"              // [0] -> [͑] \315\221
+    "͒"              // [0] -> [͒] \315\222
+    "͓"              // [0] -> [͓] \315\223
+    "͔"              // [0] -> [͔] \315\224
+    "͕"              // [0] -> [͕] \315\225
+    "͖"              // [0] -> [͖] \315\226
+    "͗"              // [0] -> [͗] \315\227
+    "͘"              // [0] -> [͘] \315\230
+    "͙"              // [0] -> [͙] \315\231
+    "͚"              // [0] -> [͚] \315\232
+    "͛"              // [0] -> [͛] \315\233
+    "͜"              // [0] -> [͜] \315\234
+    "͝"              // [0] -> [͝] \315\235
+    "͞"              // [0] -> [͞] \315\236
+    "͟"              // [0] -> [͟] \315\237
+    "͠"              // [0] -> [͠] \315\240
+    "͡"              // [0] -> [͡] \315\241
+    "͢"              // [0] -> [͢] \315\242
+    "ͣ"              // [0] -> [ͣ] \315\243
+    "ͤ"              // [0] -> [ͤ] \315\244
+    "ͥ"              // [0] -> [ͥ] \315\245
+    "ͦ"              // [0] -> [ͦ] \315\246
+    "ͧ"              // [0] -> [ͧ] \315\247
+    "ͨ"              // [0] -> [ͨ] \315\250
+    "ͩ"              // [0] -> [ͩ] \315\251
+    "ͪ"              // [0] -> [ͪ] \315\252
+    "ͫ"              // [0] -> [ͫ] \315\253
+    "ͬ"              // [0] -> [ͬ] \315\254
+    "ͭ"              // [0] -> [ͭ] \315\255
+    "ͮ"              // [0] -> [ͮ] \315\256
+    "ͯ"              // [0] -> [ͯ] \315\257
+    "ъ"             // [0] -> [ъ] \321\212
+    "ь"             // [0] -> [ь] \321\214
+    "֑"              // [0] -> [֑] \326\221
+    "֒"              // [0] -> [֒] \326\222
+    "֓"              // [0] -> [֓] \326\223
+    "֔"              // [0] -> [֔] \326\224
+    "֕"              // [0] -> [֕] \326\225
+    "֖"              // [0] -> [֖] \326\226
+    "֗"              // [0] -> [֗] \326\227
+    "֘"              // [0] -> [֘] \326\230
+    "֙"              // [0] -> [֙] \326\231
+    "֚"              // [0] -> [֚] \326\232
+    "֛"              // [0] -> [֛] \326\233
+    "֜"              // [0] -> [֜] \326\234
+    "֝"              // [0] -> [֝] \326\235
+    "֞"              // [0] -> [֞] \326\236
+    "֟"              // [0] -> [֟] \326\237
+    "֠"              // [0] -> [֠] \326\240
+    "֡"              // [0] -> [֡] \326\241
+    "֢"              // [0] -> [֢] \326\242
+    "֣"              // [0] -> [֣] \326\243
+    "֤"              // [0] -> [֤] \326\244
+    "֥"              // [0] -> [֥] \326\245
+    "֦"              // [0] -> [֦] \326\246
+    "֧"              // [0] -> [֧] \326\247
+    "֨"              // [0] -> [֨] \326\250
+    "֩"              // [0] -> [֩] \326\251
+    "֪"              // [0] -> [֪] \326\252
+    "֫"              // [0] -> [֫] \326\253
+    "֬"              // [0] -> [֬] \326\254
+    "֭"              // [0] -> [֭] \326\255
+    "֮"              // [0] -> [֮] \326\256
+    "֯"              // [0] -> [֯] \326\257
+    "ְ"              // [0] -> [ְ] \326\260
+    "ֱ"              // [0] -> [ֱ] \326\261
+    "ֲ"              // [0] -> [ֲ] \326\262
+    "ֳ"              // [0] -> [ֳ] \326\263
+    "ִ"              // [0] -> [ִ] \326\264
+    "ֵ"              // [0] -> [ֵ] \326\265
+    "ֶ"              // [0] -> [ֶ] \326\266
+    "ַ"              // [0] -> [ַ] \326\267
+    "ָ"              // [0] -> [ָ] \326\270
+    "ֹ"              // [0] -> [ֹ] \326\271
+    "ֺ"              // [0] -> [ֺ] \326\272
+    "ֻ"              // [0] -> [ֻ] \326\273
+    "ּ"              // [0] -> [ּ] \326\274
+    "ֽ"              // [0] -> [ֽ] \326\275
+    "ֿ"              // [0] -> [ֿ] \326\277
+    "ׁ"              // [0] -> [ׁ] \327\201
+    "ׂ"              // [0] -> [ׂ] \327\202
+    "ׄ"              // [0] -> [ׄ] \327\204
+    "ׅ"              // [0] -> [ׅ] \327\205
+    "ׇ"              // [0] -> [ׇ] \327\207
+    "ؐ"              // [0] -> [ؐ] \330\220
+    "ؑ"              // [0] -> [ؑ] \330\221
+    "ؒ"              // [0] -> [ؒ] \330\222
+    "ؓ"              // [0] -> [ؓ] \330\223
+    "ؔ"              // [0] -> [ؔ] \330\224
+    "ؕ"              // [0] -> [ؕ] \330\225
+    "ً"              // [0] -> [ً] \331\213
+    "ٌ"              // [0] -> [ٌ] \331\214
+    "ٍ"              // [0] -> [ٍ] \331\215
+    "َ"              // [0] -> [َ] \331\216
+    "ُ"              // [0] -> [ُ] \331\217
+    "ِ"              // [0] -> [ِ] \331\220
+    "ّ"              // [0] -> [ّ] \331\221
+    "ْ"              // [0] -> [ْ] \331\222
+    "ٓ"              // [0] -> [ٓ] \331\223
+    "ٔ"              // [0] -> [ٔ] \331\224
+    "ٕ"              // [0] -> [ٕ] \331\225
+    "ٖ"              // [0] -> [ٖ] \331\226
+    "ٗ"              // [0] -> [ٗ] \331\227
+    "٘"              // [0] -> [٘] \331\230
+    "ٙ"              // [0] -> [ٙ] \331\231
+    "ٚ"              // [0] -> [ٚ] \331\232
+    "ٛ"              // [0] -> [ٛ] \331\233
+    "ٜ"              // [0] -> [ٜ] \331\234
+    "ٝ"              // [0] -> [ٝ] \331\235
+    "ٞ"              // [0] -> [ٞ] \331\236
+    "ٰ"              // [0] -> [ٰ] \331\260
+    "ۖ"              // [0] -> [ۖ] \333\226
+    "ۗ"              // [0] -> [ۗ] \333\227
+    "ۘ"              // [0] -> [ۘ] \333\230
+    "ۙ"              // [0] -> [ۙ] \333\231
+    "ۚ"              // [0] -> [ۚ] \333\232
+    "ۛ"              // [0] -> [ۛ] \333\233
+    "ۜ"              // [0] -> [ۜ] \333\234
+    "۟"              // [0] -> [۟] \333\237
+    "۠"              // [0] -> [۠] \333\240
+    "ۡ"              // [0] -> [ۡ] \333\241
+    "ۢ"              // [0] -> [ۢ] \333\242
+    "ۣ"              // [0] -> [ۣ] \333\243
+    "ۤ"              // [0] -> [ۤ] \333\244
+    "ۧ"              // [0] -> [ۧ] \333\247
+    "ۨ"              // [0] -> [ۨ] \333\250
+    "۪"              // [0] -> [۪] \333\252
+    "۫"              // [0] -> [۫] \333\253
+    "۬"              // [0] -> [۬] \333\254
+    "ۭ"              // [0] -> [ۭ] \333\255
+    "ം"             // [0] -> [ം] \340\264\202
+    "ഃ"             // [0] -> [ഃ] \340\264\203
+    "്"              // [0] -> [്] \340\265\215
+    "่"              // [0] -> [่] \340\271\210
+    "้"              // [0] -> [้] \340\271\211
+    "๊"              // [0] -> [๊] \340\271\212
+    "๋"              // [0] -> [๋] \340\271\213
+    "່"              // [0] -> [່] \340\273\210
+    "້"              // [0] -> [້] \340\273\211
+    "໊"              // [0] -> [໊] \340\273\212
+    "໋"              // [0] -> [໋] \340\273\213
+    "‌"           // [0] -> [‌] \342\200\214
+    "‍"           // [0] -> [‍] \342\200\215
+    "\0"            // [0] end
+    "à"             // [a] -> [à] \303\240
+    "á"             // [a] -> [á] \303\241
+    "â"             // [a] -> [â] \303\242
+    "ã"             // [a] -> [ã] \303\243
+    "äa\0ae\0"      // [a] -> [ä] \303\244
+    "åa\0aa\0"      // [a] -> [å] \303\245
+    "æae\0"         // [a] -> [æ] \303\246
+    "ā"             // [a] -> [ā] \304\201
+    "ă"             // [a] -> [ă] \304\203
+    "ą"             // [a] -> [ą] \304\205
+    "ά"             // [a] -> [ά] \316\254
+    "α"             // [a] -> [α] \316\261
+    "а"             // [a] -> [а] \320\260
+    "ъ"             // [a] -> [ъ] \321\212
+    "ա"             // [a] -> [ա] \325\241
+    "ა"             // [a] -> [ა] \341\203\220
+    "ḁ"             // [a] -> [ḁ] \341\270\201
+    "ẚ"             // [a] -> [ẚ] \341\272\232
+    "ạ"             // [a] -> [ạ] \341\272\241
+    "ả"             // [a] -> [ả] \341\272\243
+    "ấ"             // [a] -> [ấ] \341\272\245
+    "ầ"             // [a] -> [ầ] \341\272\247
+    "ẩ"             // [a] -> [ẩ] \341\272\251
+    "ẫ"             // [a] -> [ẫ] \341\272\253
+    "ậ"             // [a] -> [ậ] \341\272\255
+    "ắ"             // [a] -> [ắ] \341\272\257
+    "ằ"             // [a] -> [ằ] \341\272\261
+    "ẳ"             // [a] -> [ẳ] \341\272\263
+    "ẵ"             // [a] -> [ẵ] \341\272\265
+    "ặ"             // [a] -> [ặ] \341\272\267
+    "ἀ"             // [a] -> [ἀ] \341\274\200
+    "ἁ"             // [a] -> [ἁ] \341\274\201
+    "ἂ"             // [a] -> [ἂ] \341\274\202
+    "ἃ"             // [a] -> [ἃ] \341\274\203
+    "ἄ"             // [a] -> [ἄ] \341\274\204
+    "ἅ"             // [a] -> [ἅ] \341\274\205
+    "ἆ"             // [a] -> [ἆ] \341\274\206
+    "ἇ"             // [a] -> [ἇ] \341\274\207
+    "\0"            // [a] end
+    "б"             // [b] -> [б] \320\261
+    "բ"             // [b] -> [բ] \325\242
+    "ბ"             // [b] -> [ბ] \341\203\221
+    "ḃ"             // [b] -> [ḃ] \341\270\203
+    "ḅ"             // [b] -> [ḅ] \341\270\205
+    "ḇ"             // [b] -> [ḇ] \341\270\207
+    "\0"            // [b] end
+    "ç"             // [c] -> [ç] \303\247
+    "ć"             // [c] -> [ć] \304\207
+    "ĉ"             // [c] -> [ĉ] \304\211
+    "ċ"             // [c] -> [ċ] \304\213
+    "č"             // [c] -> [č] \304\215
+    "ц"             // [c] -> [ц] \321\206
+    "чc\0ch\0"      // [c] -> [ч] \321\207
+    "ћ"             // [c] -> [ћ] \321\233
+    "ҹ"             // [c] -> [ҹ] \322\271
+    "չch\0"         // [c] -> [չ] \325\271
+    "ჩch\0"         // [c] -> [ჩ] \341\203\251
+    "ჭch\0"         // [c] -> [ჭ] \341\203\255
+    "ḉ"             // [c] -> [ḉ] \341\270\211
+    "\0"            // [c] end
+    "ð"             // [d] -> [ð] \303\260
+    "ď"             // [d] -> [ď] \304\217
+    "đd\0dj\0"      // [d] -> [đ] \304\221
+    "δ"             // [d] -> [δ] \316\264
+    "д"             // [d] -> [д] \320\264
+    "ђdj\0"         // [d] -> [ђ] \321\222
+    "ѕdz\0"         // [d] -> [ѕ] \321\225
+    "џdj\0dz\0"     // [d] -> [џ] \321\237
+    "դ"             // [d] -> [դ] \325\244
+    "დ"             // [d] -> [დ] \341\203\223
+    "ძdz\0"         // [d] -> [ძ] \341\203\253
+    "ḋ"             // [d] -> [ḋ] \341\270\213
+    "ḍ"             // [d] -> [ḍ] \341\270\215
+    "ḏ"             // [d] -> [ḏ] \341\270\217
+    "ḑ"             // [d] -> [ḑ] \341\270\221
+    "ḓ"             // [d] -> [ḓ] \341\270\223
+    "\0"            // [d] end
+    "è"             // [e] -> [è] \303\250
+    "é"             // [e] -> [é] \303\251
+    "ê"             // [e] -> [ê] \303\252
+    "ë"             // [e] -> [ë] \303\253
+    "ē"             // [e] -> [ē] \304\223
+    "ĕ"             // [e] -> [ĕ] \304\225
+    "ė"             // [e] -> [ė] \304\227
+    "ę"             // [e] -> [ę] \304\231
+    "ě"             // [e] -> [ě] \304\233
+    "έ"             // [e] -> [έ] \316\255
+    "ε"             // [e] -> [ε] \316\265
+    "ϵ"             // [e] -> [ϵ] \317\265
+    "е"             // [e] -> [е] \320\265
+    "э"             // [e] -> [э] \321\215
+    "ѐ"             // [e] -> [ѐ] \321\220
+    "ё"             // [e] -> [ё] \321\221
+    "є"             // [e] -> [є] \321\224
+    "ә"             // [e] -> [ә] \323\231
+    "ե"             // [e] -> [ե] \325\245
+    "է"             // [e] -> [է] \325\247
+    "ըeh\0"         // [e] -> [ը] \325\250
+    "ևev\0"         // [e] -> [և] \326\207
+    "ე"             // [e] -> [ე] \341\203\224
+    "ვ"             // [e] -> [ვ] \341\203\225
+    "ḕ"             // [e] -> [ḕ] \341\270\225
+    "ḗ"             // [e] -> [ḗ] \341\270\227
+    "ḙ"             // [e] -> [ḙ] \341\270\231
+    "ḛ"             // [e] -> [ḛ] \341\270\233
+    "ḝ"             // [e] -> [ḝ] \341\270\235
+    "ẹ"             // [e] -> [ẹ] \341\272\271
+    "ẻ"             // [e] -> [ẻ] \341\272\273
+    "ẽ"             // [e] -> [ẽ] \341\272\275
+    "ế"             // [e] -> [ế] \341\272\277
+    "ề"             // [e] -> [ề] \341\273\201
+    "ể"             // [e] -> [ể] \341\273\203
+    "ễ"             // [e] -> [ễ] \341\273\205
+    "ệ"             // [e] -> [ệ] \341\273\207
+    "ἐ"             // [e] -> [ἐ] \341\274\220
+    "ἑ"             // [e] -> [ἑ] \341\274\221
+    "ἒ"             // [e] -> [ἒ] \341\274\222
+    "ἓ"             // [e] -> [ἓ] \341\274\223
+    "ἔ"             // [e] -> [ἔ] \341\274\224
+    "ἕ"             // [e] -> [ἕ] \341\274\225
+    "\0"            // [e] end
+    "φ"             // [f] -> [φ] \317\206
+    "ϕ"             // [f] -> [ϕ] \317\225
+    "ф"             // [f] -> [ф] \321\204
+    "ֆ"             // [f] -> [ֆ] \326\206
+    "ḟ"             // [f] -> [ḟ] \341\270\237
+    "ẛ"             // [f] -> [ẛ] \341\272\233
+    "\0"            // [f] end
+    "ĝ"             // [g] -> [ĝ] \304\235
+    "ğ"             // [g] -> [ğ] \304\237
+    "ġ"             // [g] -> [ġ] \304\241
+    "ģ"             // [g] -> [ģ] \304\243
+    "γ"             // [g] -> [γ] \316\263
+    "г"             // [g] -> [г] \320\263
+    "ѓ"             // [g] -> [ѓ] \321\223
+    "ґ"             // [g] -> [ґ] \322\221
+    "ғ"             // [g] -> [ғ] \322\223
+    "ҝ"             // [g] -> [ҝ] \322\235
+    "գ"             // [g] -> [գ] \325\243
+    "ղgh\0"         // [g] -> [ղ] \325\262
+    "ջ"             // [g] -> [ջ] \325\273
+    "გ"             // [g] -> [გ] \341\203\222
+    "ღgh\0"         // [g] -> [ღ] \341\203\246
+    "ḡ"             // [g] -> [ḡ] \341\270\241
+    "\0"            // [g] end
+    "ĥ"             // [h] -> [ĥ] \304\245
+    "ħ"             // [h] -> [ħ] \304\247
+    "х"             // [h] -> [х] \321\205
+    "һ"             // [h] -> [һ] \322\273
+    "հ"             // [h] -> [հ] \325\260
+    "ჰ"             // [h] -> [ჰ] \341\203\260
+    "ḣ"             // [h] -> [ḣ] \341\270\243
+    "ḥ"             // [h] -> [ḥ] \341\270\245
+    "ḧ"             // [h] -> [ḧ] \341\270\247
+    "ḩ"             // [h] -> [ḩ] \341\270\251
+    "ḫ"             // [h] -> [ḫ] \341\270\253
+    "ẖ"             // [h] -> [ẖ] \341\272\226
+    "\0"            // [h] end
+    "ì"             // [i] -> [ì] \303\254
+    "í"             // [i] -> [í] \303\255
+    "î"             // [i] -> [î] \303\256
+    "ï"             // [i] -> [ï] \303\257
+    "ý"             // [i] -> [ý] \303\275
+    "ĩ"             // [i] -> [ĩ] \304\251
+    "ī"             // [i] -> [ī] \304\253
+    "ĭ"             // [i] -> [ĭ] \304\255
+    "į"             // [i] -> [į] \304\257
+    "ı"             // [i] -> [ı] \304\261
+    "ΐ"             // [i] -> [ΐ] \316\220
+    "ή"             // [i] -> [ή] \316\256
+    "ί"             // [i] -> [ί] \316\257
+    "ΰ"             // [i] -> [ΰ] \316\260
+    "η"             // [i] -> [η] \316\267
+    "ι"             // [i] -> [ι] \316\271
+    "υ"             // [i] -> [υ] \317\205
+    "ϊ"             // [i] -> [ϊ] \317\212
+    "ϋ"             // [i] -> [ϋ] \317\213
+    "ύ"             // [i] -> [ύ] \317\215
+    "и"             // [i] -> [и] \320\270
+    "й"             // [i] -> [й] \320\271
+    "і"             // [i] -> [і] \321\226
+    "ї"             // [i] -> [ї] \321\227
+    "ի"             // [i] -> [ի] \325\253
+    "ი"             // [i] -> [ი] \341\203\230
+    "ḭ"             // [i] -> [ḭ] \341\270\255
+    "ḯ"             // [i] -> [ḯ] \341\270\257
+    "ỉ"             // [i] -> [ỉ] \341\273\211
+    "ị"             // [i] -> [ị] \341\273\213
+    "ἠ"             // [i] -> [ἠ] \341\274\240
+    "ἡ"             // [i] -> [ἡ] \341\274\241
+    "ἢ"             // [i] -> [ἢ] \341\274\242
+    "ἣ"             // [i] -> [ἣ] \341\274\243
+    "ἤ"             // [i] -> [ἤ] \341\274\244
+    "ἥ"             // [i] -> [ἥ] \341\274\245
+    "ἦ"             // [i] -> [ἦ] \341\274\246
+    "ἧ"             // [i] -> [ἧ] \341\274\247
+    "ἰ"             // [i] -> [ἰ] \341\274\260
+    "ἱ"             // [i] -> [ἱ] \341\274\261
+    "ἲ"             // [i] -> [ἲ] \341\274\262
+    "ἳ"             // [i] -> [ἳ] \341\274\263
+    "ἴ"             // [i] -> [ἴ] \341\274\264
+    "ἵ"             // [i] -> [ἵ] \341\274\265
+    "ἶ"             // [i] -> [ἶ] \341\274\266
+    "ἷ"             // [i] -> [ἷ] \341\274\267
+    "ὐ"             // [i] -> [ὐ] \341\275\220
+    "ὑ"             // [i] -> [ὑ] \341\275\221
+    "ὒ"             // [i] -> [ὒ] \341\275\222
+    "ὓ"             // [i] -> [ὓ] \341\275\223
+    "ὔ"             // [i] -> [ὔ] \341\275\224
+    "ὕ"             // [i] -> [ὕ] \341\275\225
+    "ὖ"             // [i] -> [ὖ] \341\275\226
+    "ὗ"             // [i] -> [ὗ] \341\275\227
+    "\0"            // [i] end
+    "ĵ"             // [j] -> [ĵ] \304\265
+    "ж"             // [j] -> [ж] \320\266
+    "ј"             // [j] -> [ј] \321\230
+    "ժ"             // [j] -> [ժ] \325\252
+    "ճ"             // [j] -> [ճ] \325\263
+    "ჯ"             // [j] -> [ჯ] \341\203\257
+    "\0"            // [j] end
+    "ķ"             // [k] -> [ķ] \304\267
+    "κ"             // [k] -> [κ] \316\272
+    "ϰ"             // [k] -> [ϰ] \317\260
+    "к"             // [k] -> [к] \320\272
+    "хkh\0"         // [k] -> [х] \321\205
+    "ќ"             // [k] -> [ќ] \321\234
+    "կ"             // [k] -> [կ] \325\257
+    "ք"             // [k] -> [ք] \326\204
+    "კ"             // [k] -> [კ] \341\203\231
+    "ქkh\0"         // [k] -> [ქ] \341\203\245
+    "ḱ"             // [k] -> [ḱ] \341\270\261
+    "ḳ"             // [k] -> [ḳ] \341\270\263
+    "ḵ"             // [k] -> [ḵ] \341\270\265
+    "\0"            // [k] end
+    "ĺ"             // [l] -> [ĺ] \304\272
+    "ļ"             // [l] -> [ļ] \304\274
+    "ľ"             // [l] -> [ľ] \304\276
+    "ŀ"             // [l] -> [ŀ] \305\200
+    "ł"             // [l] -> [ł] \305\202
+    "λ"             // [l] -> [λ] \316\273
+    "л"             // [l] -> [л] \320\273
+    "љlj\0"         // [l] -> [љ] \321\231
+    "լ"             // [l] -> [լ] \325\254
+    "ლ"             // [l] -> [ლ] \341\203\232
+    "ḷ"             // [l] -> [ḷ] \341\270\267
+    "ḹ"             // [l] -> [ḹ] \341\270\271
+    "ḻ"             // [l] -> [ḻ] \341\270\273
+    "ḽ"             // [l] -> [ḽ] \341\270\275
+    "\0"            // [l] end
+    "μ"             // [m] -> [μ] \316\274
+    "м"             // [m] -> [м] \320\274
+    "մ"             // [m] -> [մ] \325\264
+    "მ"             // [m] -> [მ] \341\203\233
+    "ḿ"             // [m] -> [ḿ] \341\270\277
+    "ṁ"             // [m] -> [ṁ] \341\271\201
+    "ṃ"             // [m] -> [ṃ] \341\271\203
+    "\0"            // [m] end
+    "ñ"             // [n] -> [ñ] \303\261
+    "ń"             // [n] -> [ń] \305\204
+    "ņ"             // [n] -> [ņ] \305\206
+    "ň"             // [n] -> [ň] \305\210
+    "ŋ"             // [n] -> [ŋ] \305\213
+    "ν"             // [n] -> [ν] \316\275
+    "н"             // [n] -> [н] \320\275
+    "њnj\0"         // [n] -> [њ] \321\232
+    "ն"             // [n] -> [ն] \325\266
+    "ნ"             // [n] -> [ნ] \341\203\234
+    "ṅ"             // [n] -> [ṅ] \341\271\205
+    "ṇ"             // [n] -> [ṇ] \341\271\207
+    "ṉ"             // [n] -> [ṉ] \341\271\211
+    "ṋ"             // [n] -> [ṋ] \341\271\213
+    "\0"            // [n] end
+    "ò"             // [o] -> [ò] \303\262
+    "ó"             // [o] -> [ó] \303\263
+    "ô"             // [o] -> [ô] \303\264
+    "õ"             // [o] -> [õ] \303\265
+    "öo\0oe\0"      // [o] -> [ö] \303\266
+    "øo\0oe\0"      // [o] -> [ø] \303\270
+    "ō"             // [o] -> [ō] \305\215
+    "ŏ"             // [o] -> [ŏ] \305\217
+    "ő"             // [o] -> [ő] \305\221
+    "œoe\0"         // [o] -> [œ] \305\223
+    "ơ"             // [o] -> [ơ] \306\241
+    "ο"             // [o] -> [ο] \316\277
+    "ω"             // [o] -> [ω] \317\211
+    "ό"             // [o] -> [ό] \317\214
+    "ώ"             // [o] -> [ώ] \317\216
+    "о"             // [o] -> [о] \320\276
+    "ө"             // [o] -> [ө] \323\251
+    "ո"             // [o] -> [ո] \325\270
+    "օ"             // [o] -> [օ] \326\205
+    "ო"             // [o] -> [ო] \341\203\235
+    "ṍ"             // [o] -> [ṍ] \341\271\215
+    "ṏ"             // [o] -> [ṏ] \341\271\217
+    "ṑ"             // [o] -> [ṑ] \341\271\221
+    "ṓ"             // [o] -> [ṓ] \341\271\223
+    "ọ"             // [o] -> [ọ] \341\273\215
+    "ỏ"             // [o] -> [ỏ] \341\273\217
+    "ố"             // [o] -> [ố] \341\273\221
+    "ồ"             // [o] -> [ồ] \341\273\223
+    "ổ"             // [o] -> [ổ] \341\273\225
+    "ỗ"             // [o] -> [ỗ] \341\273\227
+    "ộ"             // [o] -> [ộ] \341\273\231
+    "ớ"             // [o] -> [ớ] \341\273\233
+    "ờ"             // [o] -> [ờ] \341\273\235
+    "ở"             // [o] -> [ở] \341\273\237
+    "ỡ"             // [o] -> [ỡ] \341\273\241
+    "ợ"             // [o] -> [ợ] \341\273\243
+    "ὀ"             // [o] -> [ὀ] \341\275\200
+    "ὁ"             // [o] -> [ὁ] \341\275\201
+    "ὂ"             // [o] -> [ὂ] \341\275\202
+    "ὃ"             // [o] -> [ὃ] \341\275\203
+    "ὄ"             // [o] -> [ὄ] \341\275\204
+    "ὅ"             // [o] -> [ὅ] \341\275\205
+    "ὠ"             // [o] -> [ὠ] \341\275\240
+    "ὡ"             // [o] -> [ὡ] \341\275\241
+    "ὢ"             // [o] -> [ὢ] \341\275\242
+    "\0"            // [o] end
+    "π"             // [p] -> [π] \317\200
+    "ψps\0"         // [p] -> [ψ] \317\210
+    "ϖ"             // [p] -> [ϖ] \317\226
+    "п"             // [p] -> [п] \320\277
+    "պ"             // [p] -> [պ] \325\272
+    "փ"             // [p] -> [փ] \326\203
+    "პ"             // [p] -> [პ] \341\203\236
+    "ფph\0"         // [p] -> [ფ] \341\203\244
+    "ṕ"             // [p] -> [ṕ] \341\271\225
+    "ṗ"             // [p] -> [ṗ] \341\271\227
+    "\0"            // [p] end
+    "г"             // [q] -> [г] \320\263
+    "ყ"             // [q] -> [ყ] \341\203\247
+    "\0"            // [q] end
+    "ŕ"             // [r] -> [ŕ] \305\225
+    "ŗ"             // [r] -> [ŗ] \305\227
+    "ř"             // [r] -> [ř] \305\231
+    "ρ"             // [r] -> [ρ] \317\201
+    "ϱ"             // [r] -> [ϱ] \317\261
+    "р"             // [r] -> [р] \321\200
+    "ռ"             // [r] -> [ռ] \325\274
+    "ր"             // [r] -> [ր] \326\200
+    "რ"             // [r] -> [რ] \341\203\240
+    "ṙ"             // [r] -> [ṙ] \341\271\231
+    "ṛ"             // [r] -> [ṛ] \341\271\233
+    "ṝ"             // [r] -> [ṝ] \341\271\235
+    "ṟ"             // [r] -> [ṟ] \341\271\237
+    "\0"            // [r] end
+    "ßss\0"         // [s] -> [ß] \303\237
+    "ś"             // [s] -> [ś] \305\233
+    "ŝ"             // [s] -> [ŝ] \305\235
+    "ş"             // [s] -> [ş] \305\237
+    "š"             // [s] -> [š] \305\241
+    "ș"             // [s] -> [ș] \310\231
+    "ς"             // [s] -> [ς] \317\202
+    "σ"             // [s] -> [σ] \317\203
+    "ϲ"             // [s] -> [ϲ] \317\262
+    "с"             // [s] -> [с] \321\201
+    "шs\0sh\0"      // [s] -> [ш] \321\210
+    "щshch\0sht\0"  // [s] -> [щ] \321\211
+    "շsh\0"         // [s] -> [շ] \325\267
+    "ս"             // [s] -> [ս] \325\275
+    "ს"             // [s] -> [ს] \341\203\241
+    "შsh\0"         // [s] -> [შ] \341\203\250
+    "ṡ"             // [s] -> [ṡ] \341\271\241
+    "ṣ"             // [s] -> [ṣ] \341\271\243
+    "ṥ"             // [s] -> [ṥ] \341\271\245
+    "ṧ"             // [s] -> [ṧ] \341\271\247
+    "ṩ"             // [s] -> [ṩ] \341\271\251
+    "\0"            // [s] end
+    "þth\0"         // [t] -> [þ] \303\276
+    "ţ"             // [t] -> [ţ] \305\243
+    "ť"             // [t] -> [ť] \305\245
+    "ŧ"             // [t] -> [ŧ] \305\247
+    "ț"             // [t] -> [ț] \310\233
+    "θth\0"         // [t] -> [θ] \316\270
+    "τ"             // [t] -> [τ] \317\204
+    "ϑth\0"         // [t] -> [ϑ] \317\221
+    "т"             // [t] -> [т] \321\202
+    "цts\0"         // [t] -> [ц] \321\206
+    "թ"             // [t] -> [թ] \325\251
+    "ծts\0"         // [t] -> [ծ] \325\256
+    "տ"             // [t] -> [տ] \325\277
+    "ցts\0"         // [t] -> [ց] \326\201
+    "თ"             // [t] -> [თ] \341\203\227
+    "ტ"             // [t] -> [ტ] \341\203\242
+    "ცts\0"         // [t] -> [ც] \341\203\252
+    "წts\0"         // [t] -> [წ] \341\203\254
+    "ṫ"             // [t] -> [ṫ] \341\271\253
+    "ṭ"             // [t] -> [ṭ] \341\271\255
+    "ṯ"             // [t] -> [ṯ] \341\271\257
+    "ṱ"             // [t] -> [ṱ] \341\271\261
+    "ẗ"             // [t] -> [ẗ] \341\272\227
+    "\0"            // [t] end
+    "ù"             // [u] -> [ù] \303\271
+    "ú"             // [u] -> [ú] \303\272
+    "û"             // [u] -> [û] \303\273
+    "üu\0ue\0"      // [u] -> [ü] \303\274
+    "ũ"             // [u] -> [ũ] \305\251
+    "ū"             // [u] -> [ū] \305\253
+    "ŭ"             // [u] -> [ŭ] \305\255
+    "ů"             // [u] -> [ů] \305\257
+    "ű"             // [u] -> [ű] \305\261
+    "ų"             // [u] -> [ų] \305\263
+    "ư"             // [u] -> [ư] \306\260
+    "у"             // [u] -> [у] \321\203
+    "ў"             // [u] -> [ў] \321\236
+    "ү"             // [u] -> [ү] \322\257
+    "უ"             // [u] -> [უ] \341\203\243
+    "ṳ"             // [u] -> [ṳ] \341\271\263
+    "ṵ"             // [u] -> [ṵ] \341\271\265
+    "ṷ"             // [u] -> [ṷ] \341\271\267
+    "ṹ"             // [u] -> [ṹ] \341\271\271
+    "ṻ"             // [u] -> [ṻ] \341\271\273
+    "ụ"             // [u] -> [ụ] \341\273\245
+    "ủ"             // [u] -> [ủ] \341\273\247
+    "ứ"             // [u] -> [ứ] \341\273\251
+    "ừ"             // [u] -> [ừ] \341\273\253
+    "ử"             // [u] -> [ử] \341\273\255
+    "ữ"             // [u] -> [ữ] \341\273\257
+    "ự"             // [u] -> [ự] \341\273\261
+    "\0"            // [u] end
+    "β"             // [v] -> [β] \316\262
+    "ϐ"             // [v] -> [ϐ] \317\220
+    "в"             // [v] -> [в] \320\262
+    "վ"             // [v] -> [վ] \325\276
+    "ւ"             // [v] -> [ւ] \326\202
+    "ṽ"             // [v] -> [ṽ] \341\271\275
+    "ṿ"             // [v] -> [ṿ] \341\271\277
+    "\0"            // [v] end
+    "ŵ"             // [w] -> [ŵ] \305\265
+    "ẁ"             // [w] -> [ẁ] \341\272\201
+    "ẃ"             // [w] -> [ẃ] \341\272\203
+    "ẅ"             // [w] -> [ẅ] \341\272\205
+    "ẇ"             // [w] -> [ẇ] \341\272\207
+    "ẉ"             // [w] -> [ẉ] \341\272\211
+    "ẘ"             // [w] -> [ẘ] \341\272\230
+    "\0"            // [w] end
+    "ξ"             // [x] -> [ξ] \316\276
+    "χ"             // [x] -> [χ] \317\207
+    "х"             // [x] -> [х] \321\205
+    "խ"             // [x] -> [խ] \325\255
+    "ხ"             // [x] -> [ხ] \341\203\256
+    "ẋ"             // [x] -> [ẋ] \341\272\213
+    "ẍ"             // [x] -> [ẍ] \341\272\215
+    "\0"            // [x] end
+    "ý"             // [y] -> [ý] \303\275
+    "ÿ"             // [y] -> [ÿ] \303\277
+    "ŷ"             // [y] -> [ŷ] \305\267
+    "й"             // [y] -> [й] \320\271
+    "ы"             // [y] -> [ы] \321\213
+    "ь"             // [y] -> [ь] \321\214
+    "юyu\0"         // [y] -> [ю] \321\216
+    "яya\0"         // [y] -> [я] \321\217
+    "ј"             // [y] -> [ј] \321\230
+    "յ"             // [y] -> [յ] \325\265
+    "ẏ"             // [y] -> [ẏ] \341\272\217
+    "ẙ"             // [y] -> [ẙ] \341\272\231
+    "ỳ"             // [y] -> [ỳ] \341\273\263
+    "ỵ"             // [y] -> [ỵ] \341\273\265
+    "ỷ"             // [y] -> [ỷ] \341\273\267
+    "ỹ"             // [y] -> [ỹ] \341\273\271
+    "\0"            // [y] end
+    "ź"             // [z] -> [ź] \305\272
+    "ż"             // [z] -> [ż] \305\274
+    "ž"             // [z] -> [ž] \305\276
+    "ƶ"             // [z] -> [ƶ] \306\266
+    "ζ"             // [z] -> [ζ] \316\266
+    "жz\0zh\0"      // [z] -> [ж] \320\266
+    "з"             // [z] -> [з] \320\267
+    "զ"             // [z] -> [զ] \325\246
+    "ձ"             // [z] -> [ձ] \325\261
+    "ზ"             // [z] -> [ზ] \341\203\226
+    "ჟzh\0"         // [z] -> [ჟ] \341\203\237
+    "ẑ"             // [z] -> [ẑ] \341\272\221
+    "ẓ"             // [z] -> [ẓ] \341\272\223
+    "ẕ"             // [z] -> [ẕ] \341\272\225
+    "\0"            // [z] end
+    ;
+
+// Currently only lowercase ascii characters are mapped from. These
+// are indices from the first ascii character in the key into the
+// kVariantMapElts string above.
+const int kVariantMapIndex[] = {
+    536,   // a
+    649,   // b
+    666,   // c
+    710,   // d
+    770,   // e
+    884,   // f
+    899,   // g
+    941,   // h
+    973,   // i
+    1111,  // j
+    1125,  // k
+    1163,  // l
+    1200,  // m
+    1219,  // n
+    1256,  // o
+    1386,  // p
+    1417,  // q
+    1423,  // r
+    1455,  // s
+    1528,  // t
+    1608,  // u
+    1681,  // v
+    1698,  // w
+    1719,  // x
+    1737,  // y
+    1782,  // z
+};
+
+#endif  // ICING_LEGACY_INDEX_ICING_VARIANT_MAP_H_

diff --git a/icing/legacy/index/proto/icing-dynamic-trie-header.proto b/icing/legacy/index/proto/icing-dynamic-trie-header.proto
new file mode 100644
index 0000000..0e59832
--- /dev/null
+++ b/icing/legacy/index/proto/icing-dynamic-trie-header.proto

@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// This file contains protos that are only used in native.
+
+syntax = "proto2";
+
+package icing;
+
+message IcingDynamicTrieHeader {
+  optional uint32 version = 1;
+
+  optional uint32 value_size = 2;
+
+  // Sizes of buffers.
+  optional uint32 max_nodes = 3;
+  optional uint32 max_nexts = 4;
+  optional uint32 max_suffixes_size = 5;
+
+  // Tail indices for buffers.
+  optional uint32 num_nodes = 6;
+  optional uint32 num_nexts = 7;
+  optional uint32 suffixes_size = 8;
+
+  // Next free list.
+  repeated uint32 free_lists = 9;
+
+  // Number of unique keys.
+  optional uint32 num_keys = 10;
+
+  // Flag used to indicate a flush is in progress.
+  optional bool deprecated_is_flushing = 11 [deprecated = true];
+}

diff --git a/icing/legacy/portable/icing-zlib.h b/icing/legacy/portable/icing-zlib.h
new file mode 100644
index 0000000..ed5e0e2
--- /dev/null
+++ b/icing/legacy/portable/icing-zlib.h

@@ -0,0 +1,23 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_LEGACY_PORTABLE_ICING_ZLIB_H_
+#define ICING_LEGACY_PORTABLE_ICING_ZLIB_H_
+
+// Though we use the same zlib header on all platforms, the implementation used
+// is from NDK on android and from third_party/zlib on iOS/linux. See BUILD
+// rule.
+#include <zlib.h>  // IWYU pragma: export
+
+#endif  // ICING_LEGACY_PORTABLE_ICING_ZLIB_H_

diff --git a/icing/portable/zlib.h b/icing/portable/zlib.h
new file mode 100644
index 0000000..b575427
--- /dev/null
+++ b/icing/portable/zlib.h

@@ -0,0 +1,23 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_PORTABLE_ZLIB_H_
+#define ICING_PORTABLE_ZLIB_H_
+
+// Though we use the same zlib header on all platforms, the implementation used
+// is from NDK on android and from third_party/zlib on iOS/linux. See BUILD
+// rule.
+#include <zlib.h>  // IWYU pragma: export
+
+#endif  // ICING_PORTABLE_ZLIB_H_

diff --git a/icing/proto/document.proto b/icing/proto/document.proto
index 50c0f7e..07bce0e 100644
--- a/icing/proto/document.proto
+++ b/icing/proto/document.proto

@@ -20,7 +20,7 @@
 option java_multiple_files = true;
 
 // Defines a unit of data understood by the IcingSearchEngine.
-// Next tag: 8
+// Next tag: 9
 message DocumentProto {
   // REQUIRED: Namespace that this Document resides in.
   // Namespaces can affect read/write permissions.
@@ -37,10 +37,9 @@
   optional string schema = 3;
 
   // OPTIONAL: Seconds since epoch at which the Document was created.
-  // Negative values are invalid for this field and may cause unexpected
-  // behaviors. If not specified, it will default to when the Icing receives the
-  // Document.
-  optional fixed64 creation_timestamp_secs = 4;
+  // Negative values will lead to validation errors. If not specified, it will
+  // default to when the Icing receives the Document.
+  optional int64 creation_timestamp_secs = 4;
 
   // REQUIRED: Properties that will be validated against the provided schema.
   // The names of these properties should map to one of the properties
@@ -56,6 +55,16 @@
   // ranking. Negative values will lead to validation errors. The default is the
   // lowest score 0.
   optional int32 score = 7 [default = 0];
+
+  // The time-to-live that should be enforced on this Document. Documents get
+  // garbage-collected once the current time exceeds the ttl_secs after the
+  // creation_timestamp_secs. Negative values will lead to validation errors.
+  //
+  // Default value of 0 keeps the Documents till they're explicitly deleted.
+  //
+  // TODO(cassiewang): Benchmark if fixed64 or some other proto type is better
+  // in terms of space/time efficiency. Both for ttl_secs and timestamp fields
+  optional int64 ttl_secs = 8 [default = 0];
 }
 
 // Holds a property field of the Document.

diff --git a/icing/proto/schema.proto b/icing/proto/schema.proto
index dbfbfc4..9a66617 100644
--- a/icing/proto/schema.proto
+++ b/icing/proto/schema.proto

@@ -41,13 +41,6 @@
   // in http://schema.org. Eg: DigitalDocument, Message, Person, etc.
   optional string schema_type = 1;
 
-  // The time-to-live that should be enforced on every Document of this type.
-  // Documents get garbage-collected based on their creation-timestamp and the
-  // TTL of the schema they belong to.
-  //
-  // Default value of 0 keeps the Documents till they're explicitly deleted.
-  optional int64 ttl_secs = 3;
-
   // List of all properties that are supported by Documents of this type.
   // An Document should never have properties that are not listed here.
   //
@@ -56,7 +49,7 @@
   // easier.
   repeated PropertyConfigProto properties = 4;
 
-  reserved 2;
+  reserved 2, 3;
 }
 
 // Describes how a single property should be indexed.
@@ -65,26 +58,26 @@
   // Indicates how the content of this property should be matched in the index.
   //
   // TermMatchType.Code=UNKNOWN
-  // Content in this section will not be tokenized or indexed. Useful if the
+  // Content in this property will not be tokenized or indexed. Useful if the
   // data type is not made up of terms (e.g. DOCUMENT or BYTES type). All the
   // properties inside the nested property won't be indexed regardless of the
   // value of the term_match_type field for the nested properties.
   //
   // TermMatchType.Code=EXACT_ONLY
-  // Content in this section should only be returned for queries matching the
-  // exact tokens appearing in this section.
-  // Ex. A section with "fool" should NOT match a query for "foo".
+  // Content in this property should only be returned for queries matching the
+  // exact tokens appearing in this property.
+  // Ex. A property with "fool" should NOT match a query for "foo".
   //
   // TermMatchType.Code=PREFIX
-  // Content in this section should be returned for queries that are either
-  // exact matches or query matches of the tokens appearing in this section.
-  // Ex. A section with "fool" *should* match a query for "foo".
+  // Content in this property should be returned for queries that are either
+  // exact matches or query matches of the tokens appearing in this property.
+  // Ex. A property with "fool" *should* match a query for "foo".
   optional TermMatchType.Code term_match_type = 1;
 
   message TokenizerType {
     enum Code {
-      // It is only valid for tokenizer_type to be 'NONE' if either indexed is
-      // also 'NONE' or the data type is DOCUMENT.
+      // It is only valid for tokenizer_type to be 'NONE' if the data type is
+      // DOCUMENT.
       NONE = 0;
 
       // Tokenization for plain text.

diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
new file mode 100644
index 0000000..4983c33
--- /dev/null
+++ b/icing/query/query-processor.cc

@@ -0,0 +1,351 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/query-processor.h"
+
+#include <deque>
+#include <memory>
+#include <stack>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-all-document-id.h"
+#include "icing/index/iterator/doc-hit-info-iterator-and.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
+#include "icing/index/iterator/doc-hit-info-iterator-not.h"
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
+#include "icing/index/iterator/doc-hit-info-iterator-section-restrict.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/search.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/raw-query-tokenizer.h"
+#include "icing/tokenization/token.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// State of the current query parser state. This is specific to how the raw
+// query is parsed/stored.
+struct ParserStateFrame {
+  std::vector<std::unique_ptr<DocHitInfoIterator>> and_iterators;
+  std::vector<std::unique_ptr<DocHitInfoIterator>> or_iterators;
+
+  // If the last independent token was an OR, then we need to treat the next
+  // resulting iterator as part of an or_iterator
+  bool saw_or = false;
+
+  // If the last independent token was an exclusion, then we need to treat the
+  // next resulting iterator as being excluded.
+  bool saw_exclude = false;
+
+  // If the last independent token was a property/section filter, then we need
+  // to save the section name so we can create a section filter iterator.
+  std::string_view section_restrict = "";
+};
+
+// Combines any OR and AND iterators together into one iterator.
+std::unique_ptr<DocHitInfoIterator> ProcessParserStateFrame(
+    ParserStateFrame parser_state_frame,
+    const DocumentId last_added_document_id) {
+  if (parser_state_frame.and_iterators.empty() &&
+      parser_state_frame.or_iterators.empty()) {
+    // No terms specified, treat an empty query as retrieving all documents.
+    //
+    // We don't use the index_.last_added_document_id here because it's possible
+    // that documents exist in the DocumentStore, but were not successfully
+    // indexed. So to return *all* documents and not just *all indexed*
+    // documents, we use the DocumentStore's last_added_document_id
+    return std::make_unique<DocHitInfoIteratorAllDocumentId>(
+        last_added_document_id);
+  }
+
+  if (!parser_state_frame.or_iterators.empty()) {
+    // Combine all the ORs first since they have higher priority, then add it to
+    // the ANDs.
+    parser_state_frame.and_iterators.push_back(
+        CreateOrIterator(std::move(parser_state_frame.or_iterators)));
+  }
+  return CreateAndIterator(std::move(parser_state_frame.and_iterators));
+}
+
+}  // namespace
+
+QueryProcessor::QueryProcessor(Index* index,
+                               const LanguageSegmenter* language_segmenter,
+                               const Normalizer* normalizer,
+                               const DocumentStore* document_store,
+                               const SchemaStore* schema_store,
+                               const Clock* clock)
+    : index_(*index),
+      language_segmenter_(*language_segmenter),
+      normalizer_(*normalizer),
+      document_store_(*document_store),
+      schema_store_(*schema_store),
+      clock_(*clock) {}
+
+libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
+QueryProcessor::ParseSearch(const SearchSpecProto& search_spec) {
+  ICING_ASSIGN_OR_RETURN(QueryResults results, ParseRawQuery(search_spec));
+
+  DocHitInfoIteratorFilter::Options options;
+
+  if (search_spec.namespace_filters_size() > 0) {
+    options.namespaces =
+        std::vector<std::string_view>(search_spec.namespace_filters().begin(),
+                                      search_spec.namespace_filters().end());
+  }
+
+  if (search_spec.schema_type_filters_size() > 0) {
+    options.schema_types =
+        std::vector<std::string_view>(search_spec.schema_type_filters().begin(),
+                                      search_spec.schema_type_filters().end());
+  }
+
+  results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
+      std::move(results.root_iterator), &document_store_, &schema_store_,
+      &clock_, options);
+  return results;
+}
+
+// TODO(cassiewang): Collect query stats to populate the SearchResultsProto
+libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
+QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
+  // Tokenize the incoming raw query
+  //
+  // TODO(cassiewang): Consider caching/creating a tokenizer factory that will
+  // cache the n most recently used tokenizers. So we don't have to recreate
+  // this on every new query, if they'll all be raw queries.
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(&language_segmenter_);
+  ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens,
+                         raw_query_tokenizer->TokenizeAll(search_spec.query()));
+
+  std::stack<ParserStateFrame> frames;
+  frames.emplace();
+
+  QueryResults results;
+  // Process all the tokens
+  for (int i = 0; i < tokens.size(); i++) {
+    const Token& token = tokens.at(i);
+    std::unique_ptr<DocHitInfoIterator> result_iterator;
+
+    // TODO(cassiewang): Handle negation tokens
+    switch (token.type) {
+      case Token::Type::QUERY_LEFT_PARENTHESES: {
+        frames.emplace(ParserStateFrame());
+        break;
+      }
+      case Token::Type::QUERY_RIGHT_PARENTHESES: {
+        if (frames.empty()) {
+          return absl_ports::InternalError(
+              "Encountered empty stack of ParserStateFrames");
+        }
+        result_iterator = ProcessParserStateFrame(
+            std::move(frames.top()), document_store_.last_added_document_id());
+        frames.pop();
+        break;
+      }
+      case Token::Type::QUERY_EXCLUSION: {
+        if (frames.empty()) {
+          return absl_ports::InternalError(
+              "Encountered empty stack of ParserStateFrames");
+        }
+        frames.top().saw_exclude = true;
+        break;
+      }
+      case Token::Type::QUERY_OR: {
+        if (frames.empty()) {
+          return absl_ports::InternalError(
+              "Encountered empty stack of ParserStateFrames");
+        }
+        frames.top().saw_or = true;
+        break;
+      }
+      case Token::Type::QUERY_PROPERTY: {
+        if (frames.empty()) {
+          return absl_ports::InternalError(
+              "Encountered empty stack of ParserStateFrames");
+        }
+
+        frames.top().section_restrict = token.text;
+        break;
+      }
+      case Token::Type::REGULAR: {
+        if (frames.empty()) {
+          return absl_ports::InternalError(
+              "Encountered empty stack of ParserStateFrames");
+        }
+
+        std::string normalized_text = normalizer_.NormalizeTerm(token.text);
+
+        // TODO(cassiewang): Consider removing the use of a section mask in the
+        // term iterator, or constructing a best-effort SectionIdMask based on
+        // the section filter. For some combination of schema type filters and
+        // section filters, we can't encapsulate the perfect
+        // SchemaTypeId-SectionId sets with just a SectionIdMask. So we
+        // over-retrieve hits and have to do a post-filter anyways. With a
+        // SectionIdMask, we might be able to narrow down our SectionIds, but
+        // we'll still over-retrieve hits a bit. So at that point, it's a
+        // tradeoff of
+        //
+        //   1.1 Go to SchemaStore and iterate over the schema to calculate a
+        //   SectionIdMask
+        //   1.2 Use SectionIdMask and save some hit buffer memory
+        //   1.3 Do a post-filter to double check SchemaTypeId-SectionId combo
+        //
+        // vs
+        //
+        //   2.1 Use SectionIdMaskAll and use more hit buffer memory
+        //   2.2 Do a post-filter to double check SchemaTypeId-SectionId combo
+        //
+        // We do the same amount of disk reads, so it may be dependent on how
+        // big the schema is and/or how popular schema type filtering and
+        // section filtering is.
+
+        ICING_ASSIGN_OR_RETURN(
+            result_iterator,
+            index_.GetIterator(normalized_text, kSectionIdMaskAll,
+                               search_spec.term_match_type()));
+
+        // Add terms to match if this is not a negation term.
+        // WARNING: setting query terms at this point is not compatible with
+        // group-level excludes, group-level sections restricts or excluded
+        // section restricts. Those are not currently supported. If they became
+        // supported, this handling for query terms would need to be altered.
+        if (!frames.top().saw_exclude) {
+          results.query_terms[frames.top().section_restrict].insert(
+              std::move(normalized_text));
+        }
+        break;
+      }
+      case Token::Type::INVALID:
+        U_FALLTHROUGH;
+      default:
+        // This wouldn't happen if tokenizer and query processor both work
+        // correctly. An unknown token indicates inconsistency between tokenizer
+        // and query processor, so we return an internal error here.
+        return absl_ports::InternalError(absl_ports::StrCat(
+            "Encountered unknown token while processing query: ", token.text));
+    }
+
+    // Did we get an iterator out of this token?
+    if (result_iterator) {
+      if (frames.empty()) {
+        return absl_ports::InternalError(
+            "Encountered empty stack of ParserStateFrames");
+      }
+
+      // NOTE: Order matters!! We must apply the section restrict first, then
+      // the NOT operator.
+      //
+      // Imagine a query like [-subject:foo] which means we
+      // want to get documents that don't have the term 'foo' in section
+      // 'subject'.
+      //
+      // Assume some Document_0:
+      //   { "subject": "foo" }
+      //
+      // And assume some Document_1:
+      //   { "subject": "bar" }
+      //
+      // If we use the IteratorNot first, then we'll get DocHitInfos that
+      // represent DocumentIds without any section hits like
+      // DocHitInfo(document_id_1, kSectionIdMaskNone). Then, when we try to
+      // apply the IteratorSectionRestrict, no SectionIds in the mask will match
+      // the SectionId of 'subject' and we won't return any results.
+      //
+      // If we use the IteratorSectionRestrict first, then we'll get a
+      // DocHitInfo for Document_0. Then with the IteratorNot, we can get the
+      // rest of the Documents excluding Document_0, and get Document_1 as a
+      // correct result.
+      //
+      // TODO(cassiewang): The point is a bit moot right now since we don't even
+      // support this functionality. But add tests for this once we do support
+      // more advanced section restricts with grouping, negation, etc.
+      if (!frames.top().section_restrict.empty()) {
+        // We saw a section restrict earlier, wrap the result iterator in
+        // the section restrict
+        result_iterator = std::make_unique<DocHitInfoIteratorSectionRestrict>(
+            std::move(result_iterator), &document_store_, &schema_store_,
+            frames.top().section_restrict);
+
+        frames.top().section_restrict = "";
+      }
+
+      // Check if we need to NOT/exclude this iterator
+      if (frames.top().saw_exclude) {
+        result_iterator = std::make_unique<DocHitInfoIteratorNot>(
+            std::move(result_iterator),
+            document_store_.last_added_document_id());
+        frames.top().saw_exclude = false;
+      }
+
+      if (i < tokens.size() - 1 &&
+          tokens.at(i + 1).type == Token::Type::QUERY_OR) {
+        // This isn't the last token, and the next token is an OR. Then we
+        // should OR this iterator with the next iterator, (e.g. if the query
+        // was "A OR B", we would be processing "A" right now)
+        frames.top().or_iterators.push_back(std::move(result_iterator));
+      } else if (frames.top().saw_or) {
+        // This isn't the first token, and the previous token was an OR. Then
+        // we should OR this iterator with the previous iterator (e.g. if the
+        // query was "A OR (B C)", we would be processing the iterator for "(B
+        // C)" right now)
+        frames.top().or_iterators.push_back(std::move(result_iterator));
+        frames.top().saw_or = false;
+      } else {
+        // If we're not trying to OR this iterator, we AND everything else.
+        if (!frames.top().or_iterators.empty()) {
+          // Accumulate the previous OR iterators if there were any.
+          frames.top().and_iterators.push_back(
+              CreateOrIterator(std::move(frames.top().or_iterators)));
+          frames.top().or_iterators =
+              std::vector<std::unique_ptr<DocHitInfoIterator>>();
+        }
+        frames.top().and_iterators.push_back(std::move(result_iterator));
+      }
+    }
+  }
+
+  // Guaranteed that we have some iterators to return. Need to do one last
+  // combining since we could have ORs and ANDs.
+  if (frames.size() != 1) {
+    return absl_ports::InternalError(
+        "Encountered invalid state of ParserStateFrames stack");
+  }
+  results.root_iterator = ProcessParserStateFrame(
+      std::move(frames.top()), document_store_.last_added_document_id());
+  return results;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
new file mode 100644
index 0000000..9d7e3d9
--- /dev/null
+++ b/icing/query/query-processor.h

@@ -0,0 +1,92 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_QUERY_PROCESSOR_H_
+#define ICING_QUERY_QUERY_PROCESSOR_H_
+
+#include <memory>
+
+#include "utils/base/statusor.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/search.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+// Processes SearchSpecProtos and retrieves the specified DocHitInfos that
+// satisfies the query and its restrictions. This does not perform any scoring,
+// and returns matched documents in a descending DocumentId order.
+class QueryProcessor {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  // TODO(b/141180665): Add nullptr checks for the raw pointers
+  explicit QueryProcessor(Index* index,
+                          const LanguageSegmenter* language_segmenter,
+                          const Normalizer* normalizer,
+                          const DocumentStore* document_store,
+                          const SchemaStore* schema_store, const Clock* clock);
+
+  struct QueryResults {
+    std::unique_ptr<DocHitInfoIterator> root_iterator;
+    // A map from section names to sets of terms restricted to those sections.
+    // Query terms that are not restricted are found at the entry with key "".
+    SectionRestrictQueryTermsMap query_terms;
+  };
+  // Parse the search configurations (including the query, any additional
+  // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
+  //
+  // Returns:
+  //   On success,
+  //     - One iterator that represents the entire query
+  //     - A map representing the query terms and any section restrictions
+  //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
+  //   INTERNAL_ERROR on all other errors
+  libtextclassifier3::StatusOr<QueryResults> ParseSearch(
+      const SearchSpecProto& search_spec);
+
+ private:
+  // Parse the query into a one DocHitInfoIterator that represents the root of a
+  // query tree.
+  //
+  // Returns:
+  //   On success,
+  //     - One iterator that represents the entire query
+  //     - A map representing the query terms and any section restrictions
+  //   INVALID_ARGUMENT if query syntax is incorrect and cannot be tokenized
+  //   INTERNAL_ERROR on all other errors
+  libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
+      const SearchSpecProto& search_spec);
+
+  // Not const because we could modify/sort the hit buffer in the lite index at
+  // query time.
+  Index& index_;
+  const LanguageSegmenter& language_segmenter_;
+  const Normalizer& normalizer_;
+  const DocumentStore& document_store_;
+  const SchemaStore& schema_store_;
+  const Clock& clock_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_QUERY_PROCESSOR_H_

diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
new file mode 100644
index 0000000..40df462
--- /dev/null
+++ b/icing/query/query-processor_benchmark.cc

@@ -0,0 +1,469 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "third_party/absl/flags/flag.h"
+#include "icing/document-builder.h"
+#include "icing/index/index.h"
+#include "icing/proto/term.pb.h"
+#include "icing/query/query-processor.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/logging.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/query:query-processor_benchmark
+//
+//    $ blaze-bin/icing/query/query-processor_benchmark
+//    --benchmarks=all
+//
+// Run on an Android device:
+//    Make target //icing/tokenization:language-segmenter depend on
+//    //third_party/icu
+//
+//    Make target //icing/transform:normalizer depend on
+//    //third_party/icu
+//
+//    Download LangId model file from
+//    //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and
+//    put it into your device:
+//    $ adb push [your model path] /data/local/tmp/
+//
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/query:query-processor_benchmark
+//
+//    $ adb push blaze-bin/icing/query/query-processor_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all
+//    --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
+                     TermMatchType::Code term_match_type,
+                     const std::string& token) {
+  Index::Editor editor = index->Edit(document_id, section_id, term_match_type);
+  ICING_ASSERT_OK(editor.AddHit(token.c_str()));
+}
+
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+                                   const std::string& index_dir) {
+  Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
+  return Index::Create(options, &filesystem).ValueOrDie();
+}
+
+std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() {
+  if (absl::GetFlag(FLAGS_adb)) {
+    return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb")
+        .ValueOrDie();
+  } else {
+    return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie();
+  }
+}
+
+std::unique_ptr<Normalizer> CreateNormalizer() {
+  return Normalizer::Create(
+             /*max_term_byte_size=*/std::numeric_limits<int>::max())
+      .ValueOrDie();
+}
+
+void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
+  filesystem.DeleteDirectoryRecursively(base_dir.c_str());
+}
+
+void BM_QueryOneTerm(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  const std::string base_dir = GetTestTempDir() + "/query_test";
+  const std::string index_dir = base_dir + "/index";
+  const std::string schema_dir = base_dir + "/schema";
+  const std::string doc_store_dir = base_dir + "/store";
+
+  CleanUp(filesystem, base_dir);
+  if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  FakeClock fake_clock;
+
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("type1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem, schema_dir));
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  std::unique_ptr<DocumentStore> document_store =
+      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+                            schema_store.get())
+          .ValueOrDie();
+
+  DocumentId document_id = document_store
+                               ->Put(DocumentBuilder()
+                                         .SetKey("icing", "type1")
+                                         .SetSchema("type1")
+                                         .Build())
+                               .ValueOrDie();
+
+  const std::string input_string(state.range(0), 'A');
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
+                  TermMatchType::EXACT_ONLY, input_string);
+
+  QueryProcessor query_processor(index.get(), language_segmenter.get(),
+                                 normalizer.get(), document_store.get(),
+                                 schema_store.get(), &fake_clock);
+  SearchSpecProto search_spec;
+  search_spec.set_query(input_string);
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  for (auto _ : state) {
+    QueryProcessor::QueryResults results =
+        query_processor.ParseSearch(search_spec).ValueOrDie();
+    while (results.root_iterator->Advance().ok()) {
+      results.root_iterator->doc_hit_info();
+    }
+  }
+
+  // Destroy document store before the whole directory is removed because it
+  // persists data in destructor.
+  document_store.reset();
+  CleanUp(filesystem, base_dir);
+}
+BENCHMARK(BM_QueryOneTerm)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_QueryFiveTerms(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  const std::string base_dir = GetTestTempDir() + "/query_test";
+  const std::string index_dir = base_dir + "/index";
+  const std::string schema_dir = base_dir + "/schema";
+  const std::string doc_store_dir = base_dir + "/store";
+
+  CleanUp(filesystem, base_dir);
+  if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  FakeClock fake_clock;
+
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("type1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem, schema_dir));
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  std::unique_ptr<DocumentStore> document_store =
+      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+                            schema_store.get())
+          .ValueOrDie();
+
+  DocumentId document_id = document_store
+                               ->Put(DocumentBuilder()
+                                         .SetKey("icing", "type1")
+                                         .SetSchema("type1")
+                                         .Build())
+                               .ValueOrDie();
+
+  int term_length = state.range(0) / 5;
+
+  const std::string input_string_a(term_length, 'A');
+  const std::string input_string_b(term_length, 'B');
+  const std::string input_string_c(term_length, 'C');
+  const std::string input_string_d(term_length, 'D');
+  const std::string input_string_e(term_length, 'E');
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
+                  TermMatchType::EXACT_ONLY, input_string_a);
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/1,
+                  TermMatchType::EXACT_ONLY, input_string_b);
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/2,
+                  TermMatchType::EXACT_ONLY, input_string_c);
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/3,
+                  TermMatchType::EXACT_ONLY, input_string_d);
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/4,
+                  TermMatchType::EXACT_ONLY, input_string_e);
+
+  QueryProcessor query_processor(index.get(), language_segmenter.get(),
+                                 normalizer.get(), document_store.get(),
+                                 schema_store.get(), &fake_clock);
+
+  const std::string query_string = absl_ports::StrCat(
+      input_string_a, " ", input_string_b, " ", input_string_c, " ",
+      input_string_d, " ", input_string_e);
+
+  SearchSpecProto search_spec;
+  search_spec.set_query(query_string);
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  for (auto _ : state) {
+    QueryProcessor::QueryResults results =
+        query_processor.ParseSearch(search_spec).ValueOrDie();
+    while (results.root_iterator->Advance().ok()) {
+      results.root_iterator->doc_hit_info();
+    }
+  }
+
+  // Destroy document store before the whole directory is removed because it
+  // persists data in destructor.
+  document_store.reset();
+  CleanUp(filesystem, base_dir);
+}
+BENCHMARK(BM_QueryFiveTerms)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_QueryDiacriticTerm(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  const std::string base_dir = GetTestTempDir() + "/query_test";
+  const std::string index_dir = base_dir + "/index";
+  const std::string schema_dir = base_dir + "/schema";
+  const std::string doc_store_dir = base_dir + "/store";
+
+  CleanUp(filesystem, base_dir);
+  if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  FakeClock fake_clock;
+
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("type1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem, schema_dir));
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  std::unique_ptr<DocumentStore> document_store =
+      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+                            schema_store.get())
+          .ValueOrDie();
+
+  DocumentId document_id = document_store
+                               ->Put(DocumentBuilder()
+                                         .SetKey("icing", "type1")
+                                         .SetSchema("type1")
+                                         .Build())
+                               .ValueOrDie();
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("àáâãā");
+  }
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
+                  TermMatchType::EXACT_ONLY, input_string);
+
+  QueryProcessor query_processor(index.get(), language_segmenter.get(),
+                                 normalizer.get(), document_store.get(),
+                                 schema_store.get(), &fake_clock);
+  SearchSpecProto search_spec;
+  search_spec.set_query(input_string);
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  for (auto _ : state) {
+    QueryProcessor::QueryResults results =
+        query_processor.ParseSearch(search_spec).ValueOrDie();
+    while (results.root_iterator->Advance().ok()) {
+      results.root_iterator->doc_hit_info();
+    }
+  }
+
+  // Destroy document store before the whole directory is removed because it
+  // persists data in destructor.
+  document_store.reset();
+  CleanUp(filesystem, base_dir);
+}
+BENCHMARK(BM_QueryDiacriticTerm)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_QueryHiragana(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  IcingFilesystem icing_filesystem;
+  Filesystem filesystem;
+  const std::string base_dir = GetTestTempDir() + "/query_test";
+  const std::string index_dir = base_dir + "/index";
+  const std::string schema_dir = base_dir + "/schema";
+  const std::string doc_store_dir = base_dir + "/store";
+
+  CleanUp(filesystem, base_dir);
+  if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
+      !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
+    ICING_LOG(ERROR) << "Failed to create test directories";
+  }
+
+  std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+  std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
+  FakeClock fake_clock;
+
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("type1");
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem, schema_dir));
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  std::unique_ptr<DocumentStore> document_store =
+      DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+                            schema_store.get())
+          .ValueOrDie();
+
+  DocumentId document_id = document_store
+                               ->Put(DocumentBuilder()
+                                         .SetKey("icing", "type1")
+                                         .SetSchema("type1")
+                                         .Build())
+                               .ValueOrDie();
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("あいうえお");
+  }
+  AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
+                  TermMatchType::EXACT_ONLY, input_string);
+
+  QueryProcessor query_processor(index.get(), language_segmenter.get(),
+                                 normalizer.get(), document_store.get(),
+                                 schema_store.get(), &fake_clock);
+
+  SearchSpecProto search_spec;
+  search_spec.set_query(input_string);
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+
+  for (auto _ : state) {
+    QueryProcessor::QueryResults results =
+        query_processor.ParseSearch(search_spec).ValueOrDie();
+    while (results.root_iterator->Advance().ok()) {
+      results.root_iterator->doc_hit_info();
+    }
+  }
+
+  // Destroy document store before the whole directory is removed because it
+  // persists data in destructor.
+  document_store.reset();
+  CleanUp(filesystem, base_dir);
+}
+BENCHMARK(BM_QueryHiragana)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
new file mode 100644
index 0000000..fbc83e2
--- /dev/null
+++ b/icing/query/query-processor_test.cc

@@ -0,0 +1,1668 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/query/query-processor.h"
+
+#include <memory>
+#include <string>
+
+#include "utils/base/status.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+using ::testing::Test;
+using ::testing::UnorderedElementsAre;
+
+class QueryProcessorTest : public Test {
+ protected:
+  QueryProcessorTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        index_dir_(test_dir_ + "/index"),
+        store_dir_(test_dir_ + "/store") {}
+
+  void SetUp() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
+
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+
+    Index::Options options(index_dir_,
+                           /*index_merge_size=*/1024 * 1024);
+    ICING_ASSERT_OK_AND_ASSIGN(index_,
+                               Index::Create(options, &icing_filesystem_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(language_segmenter_,
+                               LanguageSegmenter::Create(GetLangIdModelPath()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_,
+                               Normalizer::Create(/*max_term_byte_size=*/1000));
+
+    SchemaProto schema;
+
+    // Message schema
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("message");
+
+    // Add an indexed property so we generate section metadata on it
+    auto property = type_config->add_properties();
+    property->set_property_name("foo");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    // Add another indexed property so we generate section metadata on it
+    property = type_config->add_properties();
+    property->set_property_name(indexed_property_);
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    // Since we order indexed properties alphabetically, "foo" gets section id
+    // 0, and "subject" gets section id 1 for messages
+    indexed_message_section_id_ = 1;
+
+    // Email schema
+    type_config = schema.add_types();
+    type_config->set_schema_type("email");
+
+    // Add an indexed property so we generate section metadata on it
+    property = type_config->add_properties();
+    property->set_property_name(indexed_property_);
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    // First and only indexed property, so it gets the first id of 0
+    indexed_email_section_id_ = 0;
+
+    // Add an unindexed property
+    property = type_config->add_properties();
+    property->set_property_name(unindexed_property_);
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+                              schema_store_.get()));
+  }
+
+  libtextclassifier3::Status AddTokenToIndex(
+      DocumentId document_id, SectionId section_id,
+      TermMatchType::Code term_match_type, const std::string& token) {
+    Index::Editor editor =
+        index_->Edit(document_id, section_id, term_match_type);
+    return editor.AddHit(token.c_str());
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  std::unique_ptr<Index> index_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<Normalizer> normalizer_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock_;
+  const std::string indexed_property_ = "subject";
+  const std::string unindexed_property_ = "to";
+  int indexed_email_section_id_;
+  int indexed_message_section_id_;
+
+ private:
+  IcingFilesystem icing_filesystem_;
+  Filesystem filesystem_;
+  const std::string test_dir_;
+  const std::string index_dir_;
+  const std::string store_dir_;
+};
+
+TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
+  // We don't need to insert anything in the index since the empty query will
+  // match all DocumentIds from the DocumentStore
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("()");
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
+              ElementsAre(document_id2, document_id1));
+  EXPECT_THAT(results.query_terms, IsEmpty());
+}
+
+TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
+  // We don't need to insert anything in the index since the empty query will
+  // match all DocumentIds from the DocumentStore
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("");
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
+              ElementsAre(document_id2, document_id1));
+  EXPECT_THAT(results.query_terms, IsEmpty());
+}
+
+TEST_F(QueryProcessorTest, QueryTermNormalized) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hElLo WORLD");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+}
+
+TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("he");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
+}
+
+TEST_F(QueryProcessorTest, OneTermExactMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
+}
+
+TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello world");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+}
+
+TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("he wo");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
+}
+
+TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(AddTokenToIndex(document_id, section_id,
+                              TermMatchType::EXACT_ONLY, "hello"),
+              IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, TermMatchType::PREFIX, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello wo");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
+}
+
+TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello OR world");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                          DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+}
+
+TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("he OR wo");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                          DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
+}
+
+TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id,
+                              TermMatchType::EXACT_ONLY, "hello"),
+              IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, TermMatchType::PREFIX, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello OR wo");
+  search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                          DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
+}
+
+TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "animal puppy dog"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "animal kitten cat"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+
+  {
+    // OR gets precedence over AND, this is parsed as ((puppy OR kitten) AND
+    // dog)
+    SearchSpecProto search_spec;
+    search_spec.set_query("puppy OR kitten dog");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // Only Document 1 matches since it has puppy AND dog
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+                ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+    EXPECT_THAT(results.query_terms, SizeIs(1));
+    EXPECT_THAT(results.query_terms[""],
+                UnorderedElementsAre("puppy", "kitten", "dog"));
+  }
+
+  {
+    // OR gets precedence over AND, this is parsed as (animal AND (puppy OR
+    // kitten))
+    SearchSpecProto search_spec;
+    search_spec.set_query("animal puppy OR kitten");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // Both Document 1 and 2 match since Document 1 has puppy AND dog, and
+    // Document 2 has kitten
+    // Descending order of valid DocumentIds
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+                ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                            DocHitInfo(document_id1, section_id_mask)));
+    EXPECT_THAT(results.query_terms, SizeIs(1));
+    EXPECT_THAT(results.query_terms[""],
+                UnorderedElementsAre("animal", "puppy", "kitten"));
+  }
+
+  {
+    // OR gets precedence over AND, this is parsed as (kitten AND ((foo OR bar)
+    // OR cat))
+    SearchSpecProto search_spec;
+    search_spec.set_query("kitten foo OR bar OR cat");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // Only Document 2 matches since it has both kitten and cat
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+                ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+    EXPECT_THAT(results.query_terms, SizeIs(1));
+    EXPECT_THAT(results.query_terms[""],
+                UnorderedElementsAre("kitten", "foo", "bar", "cat"));
+  }
+}
+
+TEST_F(QueryProcessorTest, OneGroup) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "puppy dog"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "kitten cat"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+
+  // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
+  // no documents would match. But with grouping, Document 1 matches puppy
+  SearchSpecProto search_spec;
+  search_spec.set_query("puppy OR (kitten foo)");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""],
+              UnorderedElementsAre("puppy", "kitten", "foo"));
+}
+
+TEST_F(QueryProcessorTest, TwoGroups) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "puppy dog"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "kitten cat"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+
+  // Without grouping, this would be parsed as (puppy AND (dog OR kitten) AND
+  // cat) and wouldn't match any documents. But with grouping, Document 1
+  // matches (puppy AND dog) and Document 2 matches (kitten and cat).
+  SearchSpecProto search_spec;
+  search_spec.set_query("(puppy dog) OR (kitten cat)");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                          DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""],
+              UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
+}
+
+TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "puppy dog"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "kitten cat"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+
+  // Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
+  // no documents would match. But with grouping, Document 1 matches puppy
+  SearchSpecProto search_spec;
+  search_spec.set_query("puppy OR ((((kitten foo))))");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""],
+              UnorderedElementsAre("puppy", "kitten", "foo"));
+}
+
+TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that the DocHitInfoIterators will see that the
+  // document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "puppy dog"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "puppy"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "kitten cat"
+  EXPECT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "kitten"),
+      IsOk());
+  EXPECT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  // Document 1 will match puppy and Document 2 matches (kitten AND (cat))
+  SearchSpecProto search_spec;
+  search_spec.set_query("puppy OR (kitten(cat))");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                          DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""],
+              UnorderedElementsAre("puppy", "kitten", "cat"));
+}
+
+TEST_F(QueryProcessorTest, ExcludeTerm) {
+  SectionId section_id = 0;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that they'll bump the last_added_document_id,
+  // which will give us the proper exclusion results
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
+      IsOk());
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("-hello");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // We don't know have the section mask to indicate what section "world" came.
+  // It doesn't matter which section it was in since the query doesn't care.  It
+  // just wanted documents that didn't have "hello"
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone)));
+  EXPECT_THAT(results.query_terms, IsEmpty());
+}
+
+TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
+  SectionId section_id = 0;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that they'll bump the last_added_document_id,
+  // which will give us the proper exclusion results
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "hello"),
+      IsOk());
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "world"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("-foo");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone),
+                          DocHitInfo(document_id1, kSectionIdMaskNone)));
+  EXPECT_THAT(results.query_terms, IsEmpty());
+}
+
+TEST_F(QueryProcessorTest, ExcludeAnd) {
+  SectionId section_id = 0;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that they'll bump the last_added_document_id,
+  // which will give us the proper exclusion results
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "animal dog"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "animal cat"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  {
+    SearchSpecProto search_spec;
+    search_spec.set_query("-dog -cat");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // The query is interpreted as "exclude all documents that have animal, and
+    // exclude all documents that have cat". Since both documents contain
+    // animal, there are no results.
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+    EXPECT_THAT(results.query_terms, IsEmpty());
+  }
+
+  {
+    SearchSpecProto search_spec;
+    search_spec.set_query("-animal cat");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // The query is interpreted as "exclude all documents that have animal, and
+    // include all documents that have cat". Since both documents contain
+    // animal, there are no results.
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+    EXPECT_THAT(results.query_terms, SizeIs(1));
+    EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
+  }
+}
+
+TEST_F(QueryProcessorTest, ExcludeOr) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're just
+  // inserting the documents so that they'll bump the last_added_document_id,
+  // which will give us the proper exclusion results
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "animal dog"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "animal cat"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  {
+    SearchSpecProto search_spec;
+    search_spec.set_query("-animal OR -cat");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // We don't have a section mask indicating which sections in this document
+    // matched the query since it's not based on section-term matching. It's
+    // more based on the fact that the query excluded all the other documents.
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+                ElementsAre(DocHitInfo(document_id1, kSectionIdMaskNone)));
+    EXPECT_THAT(results.query_terms, IsEmpty());
+  }
+
+  {
+    SearchSpecProto search_spec;
+    search_spec.set_query("animal OR -cat");
+    search_spec.set_term_match_type(term_match_type);
+
+    ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                               query_processor.ParseSearch(search_spec));
+
+    // Descending order of valid DocumentIds
+    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+                ElementsAre(DocHitInfo(document_id2, section_id_mask),
+                            DocHitInfo(document_id1, section_id_mask)));
+    EXPECT_THAT(results.query_terms, SizeIs(1));
+    EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+  }
+}
+
+TEST_F(QueryProcessorTest, DeletedFilter) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  EXPECT_THAT(document_store_->Delete("namespace", "1"), IsOk());
+
+  // Document 1 has content "animal dog"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "animal cat"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, NamespaceFilter) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // namespaces populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace1", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace2", "2")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document 1 has content "animal dog"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id1, section_id, term_match_type, "dog"),
+              IsOk());
+
+  // Document 2 has content "animal cat"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+  ASSERT_THAT(AddTokenToIndex(document_id2, section_id, term_match_type, "cat"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("animal");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.add_namespace_filters("namespace1");
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SchemaTypeFilter) {
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Document 1 has content "animal dog"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id1, section_id, term_match_type, "animal"),
+      IsOk());
+
+  // Document 2 has content "animal cat"
+  ASSERT_THAT(
+      AddTokenToIndex(document_id2, section_id, term_match_type, "animal"),
+      IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  search_spec.set_query("animal");
+  search_spec.set_term_match_type(term_match_type);
+  search_spec.add_schema_type_filters("email");
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
+  SectionIdMask section_id_mask = 1U << indexed_email_section_id_;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>'
+  search_spec.set_query(indexed_property_ + ":animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Descending order of valid DocumentIds
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[indexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
+  SectionIdMask email_section_id_mask = 1U << indexed_email_section_id_;
+  SectionIdMask message_section_id_mask = 1U << indexed_message_section_id_;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  // Message document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(message_document_id, indexed_message_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>'
+  search_spec.set_query(indexed_property_ + ":animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Ordered by descending DocumentId, so message comes first since it was
+  // inserted last
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
+                  DocHitInfo(email_document_id, email_section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[indexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
+  SectionIdMask email_section_id_mask = 1U << indexed_email_section_id_;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  // Message document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(message_document_id, indexed_message_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>', but only look within
+  // documents of email schema
+  search_spec.set_query(indexed_property_ + ":animal");
+  search_spec.add_schema_type_filters("email");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Shouldn't include the message document since we're only looking at email
+  // types
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[indexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
+  SectionIdMask email_section_id_mask = 1U << indexed_email_section_id_;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  // Message document has content "animal", but put in in the same section id as
+  // the indexed email section id, the same id as indexed property "foo" in the
+  // message type
+  ASSERT_THAT(AddTokenToIndex(message_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>', but only look within
+  // documents of email schema
+  search_spec.set_query(indexed_property_ + ":animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Even though the section id is the same, we should be able to tell that it
+  // doesn't match to the name of the section filter
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[indexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>', but only look within
+  // documents of email schema
+  search_spec.set_query("nonexistent.section:animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Even though the section id is the same, we should be able to tell that it
+  // doesn't match to the name of the section filter
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms["nonexistent.section"],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>', but only look within
+  // documents of email schema
+  search_spec.set_query(unindexed_property_ + ":animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Even though the section id is the same, we should be able to tell that it
+  // doesn't match to the name of the section filter
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[unindexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
+  SectionIdMask email_section_id_mask = 1U << indexed_email_section_id_;
+  SectionIdMask message_section_id_mask = 1U << indexed_message_section_id_;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+
+  // These documents don't actually match to the tokens in the index. We're
+  // inserting the documents to get the appropriate number of documents and
+  // schema types populated.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "2")
+                                                      .SetSchema("message")
+                                                      .Build()));
+
+  // Email document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+  ASSERT_THAT(AddTokenToIndex(email_document_id, indexed_email_section_id_,
+                              term_match_type, "cat"),
+              IsOk());
+
+  // Message document has content "animal"
+  ASSERT_THAT(AddTokenToIndex(message_document_id, indexed_message_section_id_,
+                              term_match_type, "animal"),
+              IsOk());
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock_);
+  SearchSpecProto search_spec;
+  // Create a section filter '<section name>:<query term>'
+  search_spec.set_query("cat OR " + indexed_property_ + ":animal");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  // Ordered by descending DocumentId, so message comes first since it was
+  // inserted last
+  EXPECT_THAT(
+      GetDocHitInfos(results.root_iterator.get()),
+      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
+                  DocHitInfo(email_document_id, email_section_id_mask)));
+  EXPECT_THAT(results.query_terms, SizeIs(2));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
+  EXPECT_THAT(results.query_terms[indexed_property_],
+              UnorderedElementsAre("animal"));
+}
+
+TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampSecs(0)
+                               .SetTtlSecs(100)
+                               .Build()));
+
+  EXPECT_THAT(AddTokenToIndex(document_id, indexed_email_section_id_,
+                              term_match_type, "hello"),
+              IsOk());
+
+  // Arbitrary value, just has to be less than the document's creation
+  // timestamp + ttl
+  FakeClock fake_clock;
+  fake_clock.SetSeconds(50);
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  SectionIdMask section_id_mask = 1U << indexed_email_section_id_;
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+}
+
+TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      document_store_->Put(DocumentBuilder()
+                               .SetKey("namespace", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampSecs(0)
+                               .SetTtlSecs(100)
+                               .Build()));
+
+  EXPECT_THAT(AddTokenToIndex(document_id, indexed_email_section_id_,
+                              term_match_type, "hello"),
+              IsOk());
+
+  // Arbitrary value, just has to be greater than the document's creation
+  // timestamp + ttl
+  FakeClock fake_clock;
+  fake_clock.SetSeconds(200);
+
+  QueryProcessor query_processor(index_.get(), language_segmenter_.get(),
+                                 normalizer_.get(), document_store_.get(),
+                                 schema_store_.get(), &fake_clock);
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor.ParseSearch(search_spec));
+
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/query/query-terms.h b/icing/query/query-terms.h
new file mode 100644
index 0000000..1c5ce02
--- /dev/null
+++ b/icing/query/query-terms.h

@@ -0,0 +1,34 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_QUERY_QUERY_TERMS_H_
+#define ICING_QUERY_QUERY_TERMS_H_
+
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+
+namespace icing {
+namespace lib {
+
+// A map from section names to sets of terms restricted to those sections.
+// Query terms that are not restricted are found at the entry with key "".
+using SectionRestrictQueryTermsMap =
+    std::unordered_map<std::string_view, std::unordered_set<std::string>>;
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_QUERY_QUERY_TERMS_H_

diff --git a/icing/result-retriever.cc b/icing/result-retriever.cc
new file mode 100644
index 0000000..a80cf96
--- /dev/null
+++ b/icing/result-retriever.cc

@@ -0,0 +1,73 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result-retriever.h"
+
+#include "utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
+ResultRetriever::RetrieveResults(
+    const ResultSpecProto& result_spec,
+    const SectionRestrictQueryTermsMap& query_terms,
+    TermMatchType::Code match_type,
+    const std::vector<ScoredDocumentHit>& scored_document_hits) const {
+  const int num_results_returned =
+      std::min(static_cast<int>(scored_document_hits.size()),
+               result_spec.num_to_retrieve());
+  std::vector<SearchResultProto::ResultProto> search_results;
+  search_results.reserve(num_results_returned);
+  for (const auto& scored_document_hit : scored_document_hits) {
+    if (search_results.size() >= result_spec.num_to_retrieve()) {
+      break;
+    }
+    libtextclassifier3::StatusOr<DocumentProto> document_or =
+        doc_store_.Get(scored_document_hit.document_id());
+
+    if (!document_or.ok()) {
+      // Internal errors from document store are IO errors, return directly.
+      if (absl_ports::IsInternal(document_or.status())) {
+        return document_or.status();
+      }
+
+      if (ignore_bad_document_ids_) {
+        continue;
+      } else {
+        return document_or.status();
+      }
+    }
+
+    SearchResultProto::ResultProto result;
+    // Add the snippet if requested.
+    if (result_spec.snippet_spec().num_matches_per_property() > 0 &&
+        result_spec.snippet_spec().num_to_snippet() > search_results.size()) {
+      SnippetProto snippet_proto = snippet_retriever_.RetrieveSnippet(
+          query_terms, match_type, result_spec.snippet_spec(),
+          document_or.ValueOrDie(), scored_document_hit.hit_section_id_mask());
+      *result.mutable_snippet() = std::move(snippet_proto);
+    }
+
+    // Add the document, itself.
+    *result.mutable_document() = std::move(document_or).ValueOrDie();
+    search_results.push_back(std::move(result));
+  }
+  return search_results;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/result-retriever.h b/icing/result-retriever.h
new file mode 100644
index 0000000..48ff5c7
--- /dev/null
+++ b/icing/result-retriever.h

@@ -0,0 +1,82 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_RETRIEVER_H_
+#define ICING_RESULT_RETRIEVER_H_
+
+#include <utility>
+#include <vector>
+
+#include "utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/snippet-retriever.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/tokenization/language-segmenter.h"
+
+namespace icing {
+namespace lib {
+
+class ResultRetriever {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit ResultRetriever(const DocumentStore* doc_store,
+                           const SchemaStore* schema_store,
+                           const LanguageSegmenter* language_segmenter,
+                           bool ignore_bad_document_ids = true)
+      : doc_store_(*doc_store),
+        snippet_retriever_(schema_store, language_segmenter),
+        ignore_bad_document_ids_(ignore_bad_document_ids) {}
+
+  // Gets results (pairs of DocumentProtos and SnippetProtos) with the given
+  // document ids from the given document store. The
+  // order of documents returned is the same as the order of document_ids.
+  //
+  // Parameter "ignore_bad_document_ids" indicates whether to ignore invalid and
+  // non-existing document_ids. If it's true, errors on some document_ids will
+  // be ignored and valid documents will be returned, otherwise any error will
+  // be returned immediately. Note that IO errors will always be returned.
+  //
+  // Returns when ignore_bad_document_ids is true:
+  //   A list of valid documents on success
+  //   INTERNAL_ERROR on IO error
+  //
+  // Returns when ignore_bad_document_ids is false:
+  //   A list of documents on success
+  //   INVALID_ARGUMENT if any document_id < 0
+  //   NOT_FOUND if any doc doesn't exist or has been deleted
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
+  RetrieveResults(
+      const ResultSpecProto& result_spec,
+      const SectionRestrictQueryTermsMap& query_terms,
+      TermMatchType::Code match_type,
+      const std::vector<ScoredDocumentHit>& scored_document_hits) const;
+
+ private:
+  const DocumentStore& doc_store_;
+  const SnippetRetriever snippet_retriever_;
+  const bool ignore_bad_document_ids_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_RESULT_RETRIEVER_H_

diff --git a/icing/result-retriever_test.cc b/icing/result-retriever_test.cc
new file mode 100644
index 0000000..5e22041
--- /dev/null
+++ b/icing/result-retriever_test.cc

@@ -0,0 +1,438 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result-retriever.h"
+
+#include <limits>
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Return;
+using ::testing::SizeIs;
+
+class ResultRetrieverTest : public testing::Test {
+ protected:
+  ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    test_document1_ = DocumentBuilder()
+                          .SetKey("icing", "email/1")
+                          .SetSchema("email")
+                          .AddStringProperty("subject", "subject foo")
+                          .AddStringProperty("body", "body bar")
+                          .SetCreationTimestampSecs(1574365086)
+                          .Build();
+    test_document2_ = DocumentBuilder()
+                          .SetKey("icing", "email/2")
+                          .SetSchema("email")
+                          .AddStringProperty("subject", "subject foo 2")
+                          .AddStringProperty("body", "body bar 2")
+                          .SetCreationTimestampSecs(1574365087)
+                          .Build();
+    test_document3_ = DocumentBuilder()
+                          .SetKey("icing", "email/3")
+                          .SetSchema("email")
+                          .AddStringProperty("subject", "subject foo 3")
+                          .AddStringProperty("body", "body bar 3")
+                          .SetCreationTimestampSecs(1574365088)
+                          .Build();
+  }
+
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+    ICING_ASSERT_OK_AND_ASSIGN(language_segmenter_,
+                               LanguageSegmenter::Create(GetLangIdModelPath()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("email");
+    PropertyConfigProto* prop_config = type_config->add_properties();
+    prop_config->set_property_name("subject");
+    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
+    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    prop_config->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    prop_config->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+    prop_config = type_config->add_properties();
+    prop_config->set_property_name("body");
+    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
+    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    prop_config->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    prop_config->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+    result_spec_no_snippet_ = ResultSpecProto::default_instance();
+
+    result_spec_snippet_.mutable_snippet_spec()->set_num_to_snippet(
+        std::numeric_limits<int>::max());
+    result_spec_snippet_.mutable_snippet_spec()->set_num_matches_per_property(
+        std::numeric_limits<int>::max());
+    result_spec_snippet_.mutable_snippet_spec()->set_max_window_bytes(1024);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  ResultSpecProto result_spec_no_snippet_;
+  ResultSpecProto result_spec_snippet_;
+  const std::string test_dir_;
+  DocumentProto test_document1_;
+  DocumentProto test_document2_;
+  DocumentProto test_document3_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  FakeClock fake_clock_;
+};
+
+TEST_F(ResultRetrieverTest, Simple) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(test_document3_));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get());
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = test_document1_;
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = test_document2_;
+  SearchResultProto::ResultProto result3;
+  *result3.mutable_document() = test_document3_;
+
+  SectionRestrictQueryTermsMap query_terms{};
+  EXPECT_THAT(
+      result_retriever->RetrieveResults(result_spec_no_snippet_, query_terms,
+                                        TermMatchType::EXACT_ONLY,
+                                        scored_document_hits),
+      IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
+                               EqualsProto(result3))));
+}
+
+TEST_F(ResultRetrieverTest, OnlyOneResultRequested) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(test_document3_));
+
+  result_spec_no_snippet_.set_num_to_retrieve(1);
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get());
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = test_document1_;
+
+  SectionRestrictQueryTermsMap query_terms{};
+  EXPECT_THAT(result_retriever->RetrieveResults(
+                  result_spec_no_snippet_, query_terms,
+                  TermMatchType::EXACT_ONLY, scored_document_hits),
+              IsOkAndHolds(ElementsAre(EqualsProto(result1))));
+}
+
+TEST_F(ResultRetrieverTest, IgnoreErrors) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+
+  DocumentId invalid_document_id = -1;
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get(),
+      /*ignore_bad_document_ids=*/true);
+
+  SearchResultProto::ResultProto result1;
+  *result1.mutable_document() = test_document1_;
+  SearchResultProto::ResultProto result2;
+  *result2.mutable_document() = test_document2_;
+
+  SectionRestrictQueryTermsMap query_terms{};
+  EXPECT_THAT(
+      result_retriever->RetrieveResults(result_spec_no_snippet_, query_terms,
+                                        TermMatchType::EXACT_ONLY,
+                                        scored_document_hits),
+      IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
+}
+
+TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+
+  DocumentId invalid_document_id = -1;
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get(),
+      /*ignore_bad_document_ids=*/false);
+
+  SectionRestrictQueryTermsMap query_terms{};
+  EXPECT_THAT(result_retriever->RetrieveResults(
+                  result_spec_no_snippet_, query_terms,
+                  TermMatchType::EXACT_ONLY, scored_document_hits),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  DocumentId non_existing_document_id = 4;
+  scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {non_existing_document_id, /*hit_section_id_mask=*/0b00000011,
+       /*score=*/0}};
+  EXPECT_THAT(result_retriever->RetrieveResults(
+                  result_spec_no_snippet_, query_terms,
+                  TermMatchType::EXACT_ONLY, scored_document_hits),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(ResultRetrieverTest, IOError) {
+  MockFilesystem mock_filesystem;
+  ON_CALL(mock_filesystem, OpenForRead(_)).WillByDefault(Return(false));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+
+  SectionRestrictQueryTermsMap query_terms{};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get(),
+      /*ignore_bad_document_ids=*/true);
+  EXPECT_THAT(result_retriever->RetrieveResults(
+                  result_spec_no_snippet_, query_terms,
+                  TermMatchType::EXACT_ONLY, scored_document_hits),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(ResultRetrieverTest, SnippetingDisabled) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(test_document3_));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = std::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get());
+
+  SectionRestrictQueryTermsMap query_terms{};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<SearchResultProto::ResultProto> results,
+      result_retriever->RetrieveResults(result_spec_no_snippet_, query_terms,
+                                        TermMatchType::EXACT_ONLY,
+                                        scored_document_hits));
+  ASSERT_THAT(results, SizeIs(3));
+  EXPECT_THAT(results.at(0).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+  EXPECT_THAT(results.at(1).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+  EXPECT_THAT(results.at(2).snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+}
+
+TEST_F(ResultRetrieverTest, SimpleSnippeted) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(test_document3_));
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = absl::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get());
+
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<SearchResultProto::ResultProto> result,
+      result_retriever->RetrieveResults(result_spec_snippet_, query_terms,
+                                        TermMatchType::EXACT_ONLY,
+                                        scored_document_hits));
+  EXPECT_THAT(result, SizeIs(3));
+  EXPECT_THAT(result[0].document(), EqualsProto(test_document1_));
+  EXPECT_THAT(
+      GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
+      Eq("subject foo"));
+  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+              Eq("foo"));
+  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
+              Eq("body bar"));
+  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
+              Eq("bar"));
+
+  EXPECT_THAT(result[1].document(), EqualsProto(test_document2_));
+  EXPECT_THAT(
+      GetWindow(result[1].document(), result[1].snippet(), "subject", 0),
+      Eq("subject foo 2"));
+  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "subject", 0),
+              Eq("foo"));
+  EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
+              Eq("body bar 2"));
+  EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "body", 0),
+              Eq("bar"));
+
+  EXPECT_THAT(result[2].document(), EqualsProto(test_document3_));
+  EXPECT_THAT(
+      GetWindow(result[2].document(), result[2].snippet(), "subject", 0),
+      Eq("subject foo 3"));
+  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "subject", 0),
+              Eq("foo"));
+  EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
+              Eq("body bar 3"));
+  EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "body", 0),
+              Eq("bar"));
+}
+
+TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(test_document2_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             doc_store->Put(test_document3_));
+
+  result_spec_snippet_.mutable_snippet_spec()->set_num_to_snippet(1);
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
+      {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+  auto result_retriever = absl::make_unique<ResultRetriever>(
+      doc_store.get(), schema_store_.get(), language_segmenter_.get());
+
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::vector<SearchResultProto::ResultProto> result,
+      result_retriever->RetrieveResults(result_spec_snippet_, query_terms,
+                                        TermMatchType::EXACT_ONLY,
+                                        scored_document_hits));
+  EXPECT_THAT(result, SizeIs(3));
+  EXPECT_THAT(result[0].document(), EqualsProto(test_document1_));
+  EXPECT_THAT(
+      GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
+      Eq("subject foo"));
+  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+              Eq("foo"));
+  EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
+              Eq("body bar"));
+  EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
+              Eq("bar"));
+
+  EXPECT_THAT(result[1].document(), EqualsProto(test_document2_));
+  EXPECT_THAT(result[1].snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+
+  EXPECT_THAT(result[2].document(), EqualsProto(test_document3_));
+  EXPECT_THAT(result[2].snippet(),
+              EqualsProto(SnippetProto::default_instance()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
new file mode 100644
index 0000000..37cfb3f
--- /dev/null
+++ b/icing/schema/schema-store.cc

@@ -0,0 +1,434 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-store.h"
+
+#include <algorithm>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr char kSchemaStoreHeaderFilename[] = "schema_store_header";
+constexpr char kSchemaFilename[] = "schema.pb";
+constexpr char kSchemaTypeMapperFilename[] = "schema_type_mapper";
+
+// A KeyMapper stores its data across 3 arrays internally. Giving each array
+// 128KiB for storage means the entire KeyMapper requires 384KiB.
+constexpr int32_t kSchemaTypeMapperMaxSize = 3 * 128 * 1024;  // 384 KiB
+
+const std::string MakeHeaderFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSchemaStoreHeaderFilename);
+}
+
+const std::string MakeSchemaFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSchemaFilename);
+}
+
+const std::string MakeSchemaTypeMapperFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kSchemaTypeMapperFilename);
+}
+
+// Assuming that SchemaTypeIds are assigned to schema types based on their order
+// in the SchemaProto. Check if the schema type->SchemaTypeId mapping would
+// change with the new schema.
+std::unordered_set<SchemaTypeId> SchemaTypeIdsChanged(
+    const SchemaProto& old_schema, const SchemaProto& new_schema) {
+  std::unordered_set<SchemaTypeId> old_schema_type_ids_changed;
+
+  std::unordered_map<std::string, int> old_types_and_index;
+  for (int i = 0; i < old_schema.types_size(); ++i) {
+    old_types_and_index.emplace(old_schema.types(i).schema_type(), i);
+  }
+
+  std::unordered_map<std::string, int> new_types_and_index;
+  for (int i = 0; i < new_schema.types_size(); ++i) {
+    new_types_and_index.emplace(new_schema.types(i).schema_type(), i);
+  }
+
+  for (const auto& old_type_index : old_types_and_index) {
+    const auto& iter = new_types_and_index.find(old_type_index.first);
+    // We only care if the type exists in both the old and new schema. If the
+    // type has been deleted, then it'll be captured in
+    // SetSchemaResult.schema_types_deleted*. If the type has been added in the
+    // new schema then we also don't care because nothing needs to be updated.
+    if (iter != new_types_and_index.end()) {
+      // Since the SchemaTypeId of the schema type is just the index of it in
+      // the SchemaProto, compare the index and save it if it's not the same
+      if (old_type_index.second != iter->second) {
+        old_schema_type_ids_changed.emplace(old_type_index.second);
+      }
+    }
+  }
+
+  return old_schema_type_ids_changed;
+}
+
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
+    const Filesystem* filesystem, const std::string& base_dir) {
+  std::unique_ptr<SchemaStore> schema_store =
+      std::unique_ptr<SchemaStore>(new SchemaStore(filesystem, base_dir));
+  ICING_RETURN_IF_ERROR(schema_store->Initialize());
+  return schema_store;
+}
+
+SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir)
+    : filesystem_(*filesystem),
+      base_dir_(std::move(base_dir)),
+      schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
+
+SchemaStore::~SchemaStore() {
+  if (initialized_) {
+    if (!PersistToDisk().ok()) {
+      ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
+    }
+  }
+}
+
+libtextclassifier3::Status SchemaStore::Initialize() {
+  auto schema_proto_or = GetSchema();
+  if (absl_ports::IsNotFound(schema_proto_or.status())) {
+    // Don't have an existing schema proto, that's fine
+    return libtextclassifier3::Status::OK;
+  } else if (!schema_proto_or.ok()) {
+    // Real error when trying to read the existing schema
+    return schema_proto_or.status();
+  }
+
+  if (!InitializeDerivedFiles().ok()) {
+    ICING_VLOG(3)
+        << "Couldn't find derived files or failed to initialize them, "
+           "regenerating derived files for SchemaStore.";
+    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+  }
+
+  initialized_ = true;
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaStore::InitializeDerivedFiles() {
+  if (!HeaderExists()) {
+    // Without a header, we don't know if things are consistent between each
+    // other so the caller should just regenerate everything from ground truth.
+    return absl_ports::InternalError("SchemaStore header doesn't exist");
+  }
+
+  SchemaStore::Header header;
+  if (!filesystem_.Read(MakeHeaderFilename(base_dir_).c_str(), &header,
+                        sizeof(header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
+  }
+
+  if (header.magic != SchemaStore::Header::kMagic) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      schema_type_mapper_,
+      KeyMapper<SchemaTypeId>::Create(filesystem_,
+                                      MakeSchemaTypeMapperFilename(base_dir_),
+                                      kSchemaTypeMapperMaxSize));
+
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  if (checksum.Get() != header.checksum) {
+    return absl_ports::InternalError(
+        "Combined checksum of SchemaStore was inconsistent");
+  }
+
+  // Update our in-memory data structures
+  type_config_map_.clear();
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
+  for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
+    // Update our type_config_map_
+    type_config_map_.emplace(type_config.schema_type(), type_config);
+  }
+  ICING_ASSIGN_OR_RETURN(
+      section_manager_,
+      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaStore::RegenerateDerivedFiles() {
+  ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
+
+  ICING_RETURN_IF_ERROR(ResetSchemaTypeMapper());
+  type_config_map_.clear();
+
+  for (const SchemaTypeConfigProto& type_config : schema_proto->types()) {
+    // Update our type_config_map_
+    type_config_map_.emplace(type_config.schema_type(), type_config);
+
+    // Assign a SchemaTypeId to the type
+    ICING_RETURN_IF_ERROR(schema_type_mapper_->Put(
+        type_config.schema_type(), schema_type_mapper_->num_keys()));
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      section_manager_,
+      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Write the header
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+bool SchemaStore::HeaderExists() {
+  if (!filesystem_.FileExists(MakeHeaderFilename(base_dir_).c_str())) {
+    return false;
+  }
+
+  int64_t file_size =
+      filesystem_.GetFileSize(MakeHeaderFilename(base_dir_).c_str());
+
+  // If it's been truncated to size 0 before, we consider it to be a new file
+  return file_size != 0 && file_size != Filesystem::kBadFileSize;
+}
+
+libtextclassifier3::Status SchemaStore::UpdateHeader(const Crc32& checksum) {
+  // Write the header
+  SchemaStore::Header header;
+  header.magic = SchemaStore::Header::kMagic;
+  header.checksum = checksum.Get();
+
+  // This should overwrite the header.
+  if (!filesystem_.Write(MakeHeaderFilename(base_dir_).c_str(), &header,
+                         sizeof(header))) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaStore::ResetSchemaTypeMapper() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  schema_type_mapper_.reset();
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = KeyMapper<SchemaTypeId>::Delete(
+      filesystem_, MakeSchemaTypeMapperFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old schema_type mapper";
+    return status;
+  }
+  ICING_ASSIGN_OR_RETURN(
+      schema_type_mapper_,
+      KeyMapper<SchemaTypeId>::Create(filesystem_,
+                                      MakeSchemaTypeMapperFilename(base_dir_),
+                                      kSchemaTypeMapperMaxSize));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
+  Crc32 total_checksum;
+
+  auto schema_proto_or = GetSchema();
+  if (absl_ports::IsNotFound(schema_proto_or.status())) {
+    // Nothing to checksum
+    return total_checksum;
+  } else if (!schema_proto_or.ok()) {
+    // Some real error. Pass it up
+    return schema_proto_or.status();
+  }
+
+  // Guaranteed to have a schema proto now
+  const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+  Crc32 schema_checksum;
+  schema_checksum.Append(schema_proto->SerializeAsString());
+
+  Crc32 schema_type_mapper_checksum = schema_type_mapper_->ComputeChecksum();
+
+  total_checksum.Append(std::to_string(schema_checksum.Get()));
+  total_checksum.Append(std::to_string(schema_type_mapper_checksum.Get()));
+
+  return total_checksum;
+}
+
+libtextclassifier3::StatusOr<const SchemaProto*> SchemaStore::GetSchema()
+    const {
+  return schema_file_.Read();
+}
+
+// TODO(cassiewang): Consider removing this definition of SetSchema if it's not
+// needed by production code. It's currently being used by our tests, but maybe
+// it's trivial to change our test code to also use the
+// SetSchema(SchemaProto&& new_schema)
+libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
+SchemaStore::SetSchema(const SchemaProto& new_schema,
+                       bool ignore_errors_and_delete_documents) {
+  return SetSchema(SchemaProto(new_schema), ignore_errors_and_delete_documents);
+}
+
+libtextclassifier3::StatusOr<const SchemaStore::SetSchemaResult>
+SchemaStore::SetSchema(SchemaProto&& new_schema,
+                       bool ignore_errors_and_delete_documents) {
+  SetSchemaResult result;
+
+  auto schema_proto_or = GetSchema();
+  if (absl_ports::IsNotFound(schema_proto_or.status())) {
+    // We don't have a pre-existing schema, so anything is valid.
+    result.success = true;
+  } else if (!schema_proto_or.ok()) {
+    // Real error
+    return schema_proto_or.status();
+  } else {
+    // At this point, we're guaranteed that we have a schema.
+    const SchemaProto old_schema = *schema_proto_or.ValueOrDie();
+
+    // Assume we can set the schema unless proven otherwise.
+    result.success = true;
+
+    if (new_schema.SerializeAsString() == old_schema.SerializeAsString()) {
+      // Same schema as before. No need to update anything
+      return result;
+    }
+
+    // Different schema, track the differences and see if we can still write it
+    SchemaUtil::SchemaDelta schema_delta =
+        SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema);
+
+    // An incompatible index is fine, we can just reindex
+    result.index_incompatible = schema_delta.index_incompatible;
+
+    for (const auto& schema_type : schema_delta.schema_types_deleted) {
+      // We currently don't support deletions, so mark this as not possible.
+      // This will change once we allow force-set schemas.
+      result.success = false;
+
+      result.schema_types_deleted_by_name.emplace(schema_type);
+
+      ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                             GetSchemaTypeId(schema_type));
+      result.schema_types_deleted_by_id.emplace(schema_type_id);
+    }
+
+    for (const auto& schema_type : schema_delta.schema_types_incompatible) {
+      // We currently don't support incompatible schemas, so mark this as
+      // not possible. This will change once we allow force-set schemas.
+      result.success = false;
+
+      result.schema_types_incompatible_by_name.emplace(schema_type);
+
+      ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                             GetSchemaTypeId(schema_type));
+      result.schema_types_incompatible_by_id.emplace(schema_type_id);
+    }
+
+    // SchemaTypeIds changing is fine, we can update the DocumentStore
+    result.old_schema_type_ids_changed =
+        SchemaTypeIdsChanged(old_schema, new_schema);
+  }
+
+  // We can force set the schema if the caller has told us to ignore any errors
+  result.success = result.success || ignore_errors_and_delete_documents;
+
+  if (result.success) {
+    // Write the schema (and potentially overwrite a previous schema)
+    ICING_RETURN_IF_ERROR(
+        schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
+
+    ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
+  }
+
+  return result;
+}
+
+libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
+SchemaStore::GetSchemaTypeConfig(const std::string& schema_type) const {
+  const auto& type_config_iter = type_config_map_.find(schema_type);
+  if (type_config_iter == type_config_map_.end()) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Schema type config '", schema_type, "' not found"));
+  }
+  return &type_config_iter->second;
+}
+
+libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
+    std::string_view schema_type) const {
+  return schema_type_mapper_->Get(schema_type);
+}
+
+libtextclassifier3::StatusOr<std::vector<std::string>>
+SchemaStore::GetSectionContent(const DocumentProto& document,
+                               std::string_view section_path) const {
+  return section_manager_->GetSectionContent(document, section_path);
+}
+
+libtextclassifier3::StatusOr<std::vector<std::string>>
+SchemaStore::GetSectionContent(const DocumentProto& document,
+                               SectionId section_id) const {
+  return section_manager_->GetSectionContent(document, section_id);
+}
+
+libtextclassifier3::StatusOr<const SectionMetadata*>
+SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
+                                SectionId section_id) const {
+  return section_manager_->GetSectionMetadata(schema_type_id, section_id);
+}
+
+libtextclassifier3::StatusOr<std::vector<Section>> SchemaStore::ExtractSections(
+    const DocumentProto& document) const {
+  return section_manager_->ExtractSections(document);
+}
+
+libtextclassifier3::Status SchemaStore::PersistToDisk() {
+  if (schema_type_mapper_ != nullptr) {
+    // It's possible we haven't had a schema set yet, so SchemaTypeMapper hasn't
+    // been initialized and is still a nullptr
+    ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
+  }
+
+  // Write the header
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
new file mode 100644
index 0000000..cc65a32
--- /dev/null
+++ b/icing/schema/schema-store.h

@@ -0,0 +1,285 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_STORE_H_
+#define ICING_SCHEMA_SCHEMA_STORE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/file/file-backed-proto.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// Holds the ground truth schema proto. Tracks compatible changes to the schema
+// and will update any derived data based on the schema proto, such as Sections,
+// SchemaTypeConfigs, PropertyConfigs, and SchemaTypeIds. To ensure they have
+// the most up-to-date data, callers should not save instances themselves and
+// should always call Get* from the SchemaStore.
+class SchemaStore {
+ public:
+  struct Header {
+    static constexpr int32_t kMagic = 0x72650d0a;
+
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic;
+
+    // Checksum of the SchemaStore's sub-component's checksums.
+    uint32_t checksum;
+  };
+
+  // Holds information on what may have been affected by the new schema. This is
+  // generally data that other classes may depend on from the SchemaStore,
+  // so that we can know if we should go update those classes as well.
+  struct SetSchemaResult {
+    // Whether we are able to write the schema as determined by SetSchema's
+    // arguments. This boolean reflects SetSchema's logic, and does not reflect
+    // any system level IO errors that may prevent the schema from being written
+    // to file.
+    bool success = false;
+
+    // Whether the new schema changes invalidate the index.
+    bool index_incompatible = false;
+
+    // SchemaTypeIds of schema types can be reassigned new SchemaTypeIds if:
+    //   1. Schema types are added in the middle of the SchemaProto
+    //   2. Schema types are removed from the middle of the SchemaProto
+    //   3. Schema types are reordered in the SchemaProto
+    //
+    // SchemaTypeIds are not changed if schema types are added/removed to the
+    // end of the SchemaProto.
+    std::unordered_set<SchemaTypeId> old_schema_type_ids_changed;
+
+    // Schema types that have been removed from the new schema. Represented by
+    // the `schema_type` field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_deleted_by_name;
+
+    // Schema types that have been removed from the new schema. Represented by
+    // the SchemaTypeId assigned to this SchemaTypeConfigProto in the *old*
+    // schema.
+    std::unordered_set<SchemaTypeId> schema_types_deleted_by_id;
+
+    // Schema types whose SchemaTypeConfigProto has changed in an incompatible
+    // manner in the new schema. Compatibility determined in
+    // SchemaUtil::ComputeCompatibilityDelta. Represented by the `schema_type`
+    // field in the SchemaTypeConfigProto.
+    std::unordered_set<std::string> schema_types_incompatible_by_name;
+
+    // Schema types whose SchemaTypeConfigProto has changed in an incompatible
+    // manner in the new schema. Compatibility determined in
+    // SchemaUtil::ComputeCompatibilityDelta. Represented by the SchemaTypeId
+    // assigned to this SchemaTypeConfigProto in the *old* schema.
+    std::unordered_set<SchemaTypeId> schema_types_incompatible_by_id;
+  };
+
+  // Create a SchemaStore instance. The base_dir must already exist. There does
+  // not need to be an existing schema already.
+  //
+  // Returns:
+  //   unique_ptr to SchemaStore on success
+  //   INTERNAL_ERROR on any IO errors
+  static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
+      const Filesystem* filesystem, const std::string& base_dir);
+
+  // Not copyable
+  SchemaStore(const SchemaStore&) = delete;
+  SchemaStore& operator=(const SchemaStore&) = delete;
+
+  // Persists and updates checksum of subcomponents.
+  ~SchemaStore();
+
+  // Retrieve the current schema if it exists. Caller does not get ownership of
+  // the schema proto and modifying the returned pointer does not affect the
+  // underlying schema proto.
+  //
+  // Returns:
+  //   SchemaProto* if exists
+  //   INTERNAL_ERROR on any IO errors
+  //   NOT_FOUND_ERROR if a schema hasn't been set before
+  libtextclassifier3::StatusOr<const SchemaProto*> GetSchema() const;
+
+  // Update our current schema if it's compatible. Does not accept incompatible
+  // schema. Compatibility rules defined by
+  // SchemaUtil::ComputeCompatibilityDelta.
+  //
+  // If ignore_errors_and_delete_documents is set to true, then incompatible
+  // schema are allowed and we'll force set the schema, meaning
+  // SetSchemaResult.success will always be true.
+  //
+  // Returns:
+  //   SetSchemaResult that encapsulates the differences between the old and new
+  //   schema, as well as if the new schema can be set.
+  //   INTERNAL_ERROR on any IO errors
+  libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
+      const SchemaProto& new_schema,
+      bool ignore_errors_and_delete_documents = false);
+  libtextclassifier3::StatusOr<const SetSchemaResult> SetSchema(
+      SchemaProto&& new_schema,
+      bool ignore_errors_and_delete_documents = false);
+
+  // Get the SchemaTypeConfigProto of schema_type name.
+  //
+  // Returns:
+  //   SchemaTypeConfigProto on success
+  //   NOT_FOUND if schema type name doesn't exist
+  libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
+  GetSchemaTypeConfig(const std::string& schema_type) const;
+
+  // Returns the SchemaTypeId of the passed in schema type
+  //
+  // Returns:
+  //   SchemaTypeId on success
+  //   NOT_FOUND_ERROR if we don't know about the schema type
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
+      std::string_view schema_type) const;
+
+  // Finds content of a section by section path (e.g. property1.property2)
+  //
+  // Returns:
+  //   A string of content on success
+  //   NOT_FOUND if:
+  //     1. Property is optional and not found in the document
+  //     2. section_path is invalid
+  //     3. Content is empty
+  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
+      const DocumentProto& document, std::string_view section_path) const;
+
+  // Finds content of a section by id
+  //
+  // Returns:
+  //   A string of content on success
+  //   INVALID_ARGUMENT if section id is invalid
+  //   NOT_FOUND if type config name of document not found
+  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
+      const DocumentProto& document, SectionId section_id) const;
+
+  // Returns the SectionMetadata associated with the SectionId that's in the
+  // SchemaTypeId.
+  //
+  // Returns:
+  //   pointer to SectionMetadata on success
+  //   INVALID_ARGUMENT if schema type id or section is invalid
+  libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
+      SchemaTypeId schema_type_id, SectionId section_id) const;
+
+  // Extracts all sections from the given document, sections are sorted by
+  // section id in increasing order. Section ids start from 0. Sections with
+  // empty content won't be returned.
+  //
+  // Returns:
+  //   A list of sections on success
+  //   NOT_FOUND if type config name of document not found
+  libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
+      const DocumentProto& document) const;
+
+  // Syncs all the data changes to disk.
+  //
+  // Returns any encountered IO errors.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Computes the combined checksum of the schema store - includes the ground
+  // truth and all derived files.
+  //
+  // Returns:
+  //   Combined checksum on success
+  //   INTERNAL_ERROR on compute error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
+
+ private:
+  // Use SchemaStore::Create instead.
+  explicit SchemaStore(const Filesystem* filesystem, std::string base_dir);
+
+  // Handles initializing the SchemaStore and regenerating any data if needed.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Initialize();
+
+  // Creates sub-components and verifies the integrity of each sub-component.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status InitializeDerivedFiles();
+
+  // Populates any derived data structures off of the schema.
+  //
+  // Returns:
+  //   OK on success
+  //   NOT_FOUND_ERROR if a schema proto has not been set
+  //   INTERNAL_ERROR on any IO errors
+  libtextclassifier3::Status RegenerateDerivedFiles();
+
+  // Checks if the header exists already. This does not create the header file
+  // if it doesn't exist.
+  bool HeaderExists();
+
+  // Update and replace the header file. Creates the header file if it doesn't
+  // exist.
+  libtextclassifier3::Status UpdateHeader(const Crc32& checksum);
+
+  // Resets the unique_ptr to the schema_type_mapper_, deletes the underlying
+  // file, and re-creates a new instance of the schema_type_mapper_. Does not
+  // populate the schema_type_mapper_.
+  //
+  // Returns any IO errors.
+  libtextclassifier3::Status ResetSchemaTypeMapper();
+
+  const Filesystem& filesystem_;
+  const std::string base_dir_;
+
+  // Used internally to indicate whether the class has been initialized. This is
+  // to guard against cases where the object has been created, but Initialize
+  // fails in the constructor. If we have successfully exited the constructor,
+  // then this field can be ignored. Clients of SchemaStore should not need to
+  // worry about this field.
+  bool initialized_ = false;
+
+  // Cached schema
+  FileBackedProto<SchemaProto> schema_file_;
+
+  // A hash map of (type config name -> type config), allows faster lookup of
+  // type config in schema. The O(1) type config access makes schema-related and
+  // section-related operations faster.
+  SchemaUtil::TypeConfigMap type_config_map_;
+
+  // Maps schema types to a densely-assigned unique id.
+  std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+  // Manager of indexed section related metadata.
+  std::unique_ptr<const SectionManager> section_manager_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SCHEMA_STORE_H_

diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
new file mode 100644
index 0000000..410a681
--- /dev/null
+++ b/icing/schema/schema-store_test.cc

@@ -0,0 +1,647 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-store.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section-manager.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Not;
+using ::testing::Pointee;
+
+class SchemaStoreTest : public ::testing::Test {
+ protected:
+  SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+
+    auto type = schema_.add_types();
+    type->set_schema_type("email");
+
+    // Add an indexed property so we generate section metadata on it
+    auto property = type->add_properties();
+    property->set_property_name("subject");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    property->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  SchemaProto schema_;
+};
+
+TEST_F(SchemaStoreTest, CorruptSchemaError) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+
+    // Set it for the first time
+    SchemaStore::SetSchemaResult result;
+    result.success = true;
+    EXPECT_THAT(schema_store->SetSchema(schema_),
+                IsOkAndHolds(EqualsSetSchemaResult(result)));
+    ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                               schema_store->GetSchema());
+    EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+  }
+
+  // "Corrupt" the  ground truth schema by adding new data to it. This will mess
+  // up the checksum of the schema store
+
+  SchemaProto corrupt_schema;
+  auto type = corrupt_schema.add_types();
+  type->set_schema_type("corrupted");
+
+  const std::string schema_file = absl_ports::StrCat(test_dir_, "/schema.pb");
+  const std::string serialized_schema = corrupt_schema.SerializeAsString();
+
+  filesystem_.Write(schema_file.c_str(), serialized_schema.data(),
+                    serialized_schema.size());
+
+  // If ground truth was corrupted, we won't know what to do
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+
+    // Set it for the first time
+    SchemaStore::SetSchemaResult result;
+    result.success = true;
+    EXPECT_THAT(schema_store->SetSchema(schema_),
+                IsOkAndHolds(EqualsSetSchemaResult(result)));
+    ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                               schema_store->GetSchema());
+    EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+    EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+  }
+
+  // "Corrupt" the derived SchemaTypeIds by deleting the entire directory. This
+  // will mess up the initialization of schema store, causing everything to be
+  // regenerated from ground truth
+
+  const std::string schema_type_mapper_dir =
+      absl_ports::StrCat(test_dir_, "/schema_type_mapper");
+  filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Everything looks fine, ground truth and derived data
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+  EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+
+    // Set it for the first time
+    SchemaStore::SetSchemaResult result;
+    result.success = true;
+    EXPECT_THAT(schema_store->SetSchema(schema_),
+                IsOkAndHolds(EqualsSetSchemaResult(result)));
+    ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                               schema_store->GetSchema());
+    EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+    EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+  }
+
+  // Change the SchemaStore's header combined checksum so that it won't match
+  // the recalculated checksum on initialization. This will force a regeneration
+  // of derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(test_dir_, "/schema_store_header");
+  SchemaStore::Header header;
+  header.magic = SchemaStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Everything looks fine, ground truth and derived data
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+  EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+}
+
+TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+  schema_store.reset();
+  EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+}
+
+TEST_F(SchemaStoreTest, MultipleCreateOk) {
+  DocumentProto document;
+  document.set_schema("email");
+  auto properties = document.add_properties();
+  properties->set_name("subject");
+  properties->add_string_values("subject_content");
+
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+  // Verify that our in-memory structures are ok
+  EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
+              IsOkAndHolds(Pointee(EqualsProto(schema_.types(0)))));
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<Section> sections,
+                             schema_store->ExtractSections(document));
+  EXPECT_THAT(sections[0].content, ElementsAre("subject_content"));
+
+  // Verify that our persisted data is ok
+  EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+  schema_store.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Verify that our in-memory structures are ok
+  EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
+              IsOkAndHolds(Pointee(EqualsProto(schema_.types(0)))));
+
+  ICING_ASSERT_OK_AND_ASSIGN(sections, schema_store->ExtractSections(document));
+  EXPECT_THAT(sections[0].content, ElementsAre("subject_content"));
+
+  // Verify that our persisted data is ok
+  EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+}
+
+TEST_F(SchemaStoreTest, SetNewSchemaOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+}
+
+TEST_F(SchemaStoreTest, SetSameSchemaOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+  // And one more for fun
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+}
+
+TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema_));
+
+  // Make the schema incompatible by removing a type.
+  schema_.clear_types();
+
+  // Set the incompatible schema
+  result.success = false;
+  result.schema_types_deleted_by_name.emplace("email");
+  result.schema_types_deleted_by_id.emplace(0);
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  // Add a type, shouldn't affect the index or cached SchemaTypeIds
+  type = schema.add_types();
+  type->set_schema_type("new_type");
+
+  // Set the compatible schema
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+  type = schema.add_types();
+  type->set_schema_type("message");
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_message_schema_type_id,
+                             schema_store->GetSchemaTypeId("message"));
+
+  // Remove "email" type, this also changes previous SchemaTypeIds
+  schema.Clear();
+  type = schema.add_types();
+  type->set_schema_type("message");
+
+  SchemaStore::SetSchemaResult incompatible_result;
+  incompatible_result.success = false;
+  incompatible_result.old_schema_type_ids_changed.emplace(
+      old_message_schema_type_id);
+  incompatible_result.schema_types_deleted_by_name.emplace("email");
+  incompatible_result.schema_types_deleted_by_id.emplace(
+      old_email_schema_type_id);
+
+  // Can't set the incompatible schema
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
+
+  SchemaStore::SetSchemaResult force_result;
+  force_result.success = true;
+  force_result.old_schema_type_ids_changed.emplace(old_message_schema_type_id);
+  force_result.schema_types_deleted_by_name.emplace("email");
+  force_result.schema_types_deleted_by_id.emplace(old_email_schema_type_id);
+
+  // Force set the incompatible schema
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true),
+              IsOkAndHolds(EqualsSetSchemaResult(force_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+  type = schema.add_types();
+  type->set_schema_type("message");
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  // Reorder the types
+  schema.clear_types();
+  type = schema.add_types();
+  type->set_schema_type("message");
+  type = schema.add_types();
+  type->set_schema_type("email");
+
+  // Since we assign SchemaTypeIds based on order in the SchemaProto, this will
+  // cause SchemaTypeIds to change
+  result.old_schema_type_ids_changed.emplace(0);  // Old SchemaTypeId of "email"
+  result.old_schema_type_ids_changed.emplace(
+      1);  // Old SchemaTypeId of "message"
+
+  // Set the compatible schema
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+
+  // Add an unindexed property
+  auto property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  // Make a previously unindexed property indexed
+  property = schema.mutable_types(0)->mutable_properties(0);
+  property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  property->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  // With a new indexed property, we'll need to reindex
+  result.index_incompatible = true;
+
+  // Set the compatible schema
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type = schema.add_types();
+  type->set_schema_type("email");
+
+  // Add a STRING property
+  auto property = type->add_properties();
+  property->set_property_name("subject");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+
+  // Make a previously STRING property into DOUBLE
+  property = schema.mutable_types(0)->mutable_properties(0);
+  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+
+  SchemaStore::SetSchemaResult incompatible_result;
+  incompatible_result.success = false;
+  incompatible_result.schema_types_incompatible_by_name.emplace("email");
+  incompatible_result.schema_types_incompatible_by_id.emplace(
+      old_email_schema_type_id);
+
+  // Can't set the incompatible schema
+  EXPECT_THAT(schema_store->SetSchema(schema),
+              IsOkAndHolds(EqualsSetSchemaResult(incompatible_result)));
+
+  SchemaStore::SetSchemaResult force_result;
+  force_result.success = true;
+  force_result.schema_types_incompatible_by_name.emplace("email");
+  force_result.schema_types_incompatible_by_id.emplace(
+      old_email_schema_type_id);
+
+  // Force set the incompatible schema
+  EXPECT_THAT(schema_store->SetSchema(
+                  schema, /*ignore_errors_and_delete_documents=*/true),
+              IsOkAndHolds(EqualsSetSchemaResult(force_result)));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+TEST_F(SchemaStoreTest, GetSchemaTypeId) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  schema_.clear_types();
+
+  // Add a few schema types
+  const std::string first_type = "first";
+  auto type = schema_.add_types();
+  type->set_schema_type(first_type);
+
+  const std::string second_type = "second";
+  type = schema_.add_types();
+  type->set_schema_type(second_type);
+
+  // Set it for the first time
+  SchemaStore::SetSchemaResult result;
+  result.success = true;
+  EXPECT_THAT(schema_store->SetSchema(schema_),
+              IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+  EXPECT_THAT(schema_store->GetSchemaTypeId(first_type), IsOkAndHolds(0));
+  EXPECT_THAT(schema_store->GetSchemaTypeId(second_type), IsOkAndHolds(1));
+}
+
+TEST_F(SchemaStoreTest, ComputeChecksumDefaultOnEmptySchemaStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  Crc32 default_checksum;
+  EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(default_checksum));
+}
+
+TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto foo_schema;
+  auto type_config = foo_schema.add_types();
+  type_config->set_schema_type("foo");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
+
+  // Calling it again doesn't change the checksum
+  EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
+}
+
+TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto foo_schema;
+  auto type_config = foo_schema.add_types();
+  type_config->set_schema_type("foo");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
+
+  // Destroy the previous instance and recreate SchemaStore
+  schema_store.reset();
+
+  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+  EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
+}
+
+TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto foo_schema;
+  auto type_config = foo_schema.add_types();
+  type_config->set_schema_type("foo");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
+
+  // Modifying the SchemaStore changes the checksum
+  SchemaProto foo_bar_schema;
+  type_config = foo_bar_schema.add_types();
+  type_config->set_schema_type("foo");
+  type_config = foo_bar_schema.add_types();
+  type_config->set_schema_type("bar");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
+
+  EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+}
+
+TEST_F(SchemaStoreTest, PersistToDiskFineForEmptySchemaStore) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  // Persisting is fine and shouldn't affect anything
+  ICING_EXPECT_OK(schema_store->PersistToDisk());
+}
+
+TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("foo");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Persisting shouldn't change anything
+  ICING_EXPECT_OK(schema_store->PersistToDisk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
+                             schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+
+  // Modify the schema so that something different is persisted next time
+  type_config = schema.add_types();
+  type_config->set_schema_type("bar");
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Should also persist on destruction
+  schema_store.reset();
+
+  // And we get the same schema back on reinitialization
+  ICING_ASSERT_OK_AND_ASSIGN(schema_store,
+                             SchemaStore::Create(&filesystem_, test_dir_));
+  ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
+  EXPECT_THAT(*actual_schema, EqualsProto(schema));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
new file mode 100644
index 0000000..96d2575
--- /dev/null
+++ b/icing/schema/schema-util.cc

@@ -0,0 +1,392 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+
+#include "base/logging.h"
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+bool isAlphaNumeric(std::string_view str) {
+  for (char c : str) {
+    if (!std::isalnum(c)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
+                             const PropertyConfigProto& new_property) {
+  if (old_property.cardinality() < new_property.cardinality()) {
+    // We allow a new, less restrictive cardinality (i.e. a REQUIRED field
+    // can become REPEATED or OPTIONAL, but not the other way around).
+    ICING_VLOG(1) << absl_ports::StrCat(
+        "Cardinality is more restrictive than before ",
+        PropertyConfigProto::Cardinality::Code_Name(old_property.cardinality()),
+        "->",
+        PropertyConfigProto::Cardinality::Code_Name(
+            new_property.cardinality()));
+    return false;
+  }
+  return true;
+}
+
+bool IsDataTypeCompatible(const PropertyConfigProto& old_property,
+                          const PropertyConfigProto& new_property) {
+  if (old_property.data_type() != new_property.data_type()) {
+    // TODO(cassiewang): Maybe we can be a bit looser with this, e.g. we just
+    // string cast an int64_t to a string. But for now, we'll stick with
+    // simplistics.
+    ICING_VLOG(1) << absl_ports::StrCat(
+        "Data type ",
+        PropertyConfigProto::DataType::Code_Name(old_property.data_type()),
+        "->",
+        PropertyConfigProto::DataType::Code_Name(new_property.data_type()));
+    return false;
+  }
+  return true;
+}
+
+bool IsSchemaTypeCompatible(const PropertyConfigProto& old_property,
+                            const PropertyConfigProto& new_property) {
+  if (old_property.schema_type() != new_property.schema_type()) {
+    ICING_VLOG(1) << absl_ports::StrCat("Schema type ",
+                                        old_property.schema_type(), "->",
+                                        new_property.schema_type());
+    return false;
+  }
+  return true;
+}
+
+bool IsPropertyCompatible(const PropertyConfigProto& old_property,
+                          const PropertyConfigProto& new_property) {
+  return IsDataTypeCompatible(old_property, new_property) &&
+         IsSchemaTypeCompatible(old_property, new_property) &&
+         IsCardinalityCompatible(old_property, new_property);
+}
+
+bool IsTermMatchTypeCompatible(const IndexingConfig& old_indexed,
+                               const IndexingConfig& new_indexed) {
+  return old_indexed.term_match_type() == new_indexed.term_match_type() &&
+         old_indexed.tokenizer_type() == new_indexed.tokenizer_type();
+}
+
+}  // namespace
+
+libtextclassifier3::Status SchemaUtil::Validate(const SchemaProto& schema) {
+  // Tracks SchemaTypeConfigs that we've validated already.
+  std::unordered_set<std::string_view> known_schema_types;
+
+  // Tracks SchemaTypeConfigs that have been mentioned (by other
+  // SchemaTypeConfigs), but we haven't validated yet.
+  std::unordered_set<std::string_view> unknown_schema_types;
+
+  // Tracks PropertyConfigs within a SchemaTypeConfig that we've validated
+  // already.
+  std::unordered_set<std::string_view> known_property_names;
+
+  for (const auto& type_config : schema.types()) {
+    std::string_view schema_type(type_config.schema_type());
+    ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
+
+    // We can't have duplicate schema_types
+    if (!known_schema_types.insert(schema_type).second) {
+      return absl_ports::AlreadyExistsError(absl_ports::StrCat(
+          "Field 'schema_type' '", schema_type, "' is already defined"));
+    }
+    unknown_schema_types.erase(schema_type);
+
+    // We only care about properties being unique within one type_config
+    known_property_names.clear();
+    for (const auto& property_config : type_config.properties()) {
+      std::string_view property_name(property_config.property_name());
+      ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
+
+      // Property names must be unique
+      if (!known_property_names.insert(property_name).second) {
+        return absl_ports::AlreadyExistsError(absl_ports::StrCat(
+            "Field 'property_name' '", property_name,
+            "' is already defined for schema '", schema_type, "'"));
+      }
+
+      auto data_type = property_config.data_type();
+      ICING_RETURN_IF_ERROR(
+          ValidateDataType(data_type, schema_type, property_name));
+
+      if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
+        // Need to know what schema_type these Document properties should be
+        // validated against
+        std::string_view property_schema_type(property_config.schema_type());
+        ICING_RETURN_IF_ERROR(ValidatePropertySchemaType(
+            property_schema_type, schema_type, property_name));
+
+        // Need to make sure we eventually see/validate this schema_type
+        if (known_schema_types.count(property_schema_type) == 0) {
+          unknown_schema_types.insert(property_schema_type);
+        }
+      }
+
+      ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
+                                                schema_type, property_name));
+
+      ICING_RETURN_IF_ERROR(
+          ValidateIndexingConfig(property_config.indexing_config(), data_type));
+    }
+  }
+
+  // An Document property claimed to be of a schema_type that we never
+  // saw/validated
+  if (!unknown_schema_types.empty()) {
+    return absl_ports::UnknownError(
+        absl_ports::StrCat("Undefined 'schema_type's: ",
+                           absl_ports::StrJoin(unknown_schema_types, ",")));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateSchemaType(
+    std::string_view schema_type) {
+  // Require a schema_type
+  if (schema_type.empty()) {
+    return absl_ports::InvalidArgumentError(
+        "Field 'schema_type' cannot be empty.");
+  }
+
+  // Only support alphanumeric values.
+  if (!isAlphaNumeric(schema_type)) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Field 'schema_type' '", schema_type,
+                           "' can only contain "
+                           "alphanumeric characters."));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidatePropertyName(
+    std::string_view property_name, std::string_view schema_type) {
+  // Require a property_name
+  if (property_name.empty()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Field 'property_name' for schema '", schema_type,
+                           "' cannot be empty."));
+  }
+
+  // Only support alphanumeric values.
+  for (char c : property_name) {
+    if (!std::isalnum(c)) {
+      return absl_ports::InvalidArgumentError(
+          absl_ports::StrCat("Field 'property_name' '", schema_type,
+                             "' can only contain alphanumeric characters."));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateDataType(
+    PropertyConfigProto::DataType::Code data_type, std::string_view schema_type,
+    std::string_view property_name) {
+  // UNKNOWN is the default enum value and should only be used for backwards
+  // compatibility
+  if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Field 'data_type' cannot be UNKNOWN for schema property '",
+        schema_type, " ", property_name, "'"));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidatePropertySchemaType(
+    std::string_view property_schema_type, std::string_view schema_type,
+    std::string_view property_name) {
+  if (property_schema_type.empty()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
+                           "data_types in schema property '",
+                           schema_type, " ", property_name, "'"));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateCardinality(
+    PropertyConfigProto::Cardinality::Code cardinality,
+    std::string_view schema_type, std::string_view property_name) {
+  // UNKNOWN is the default enum value and should only be used for backwards
+  // compatibility
+  if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Field 'cardinality' cannot be UNKNOWN for schema property '",
+        schema_type, " ", property_name, "'"));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status SchemaUtil::ValidateIndexingConfig(
+    const IndexingConfig& config,
+    PropertyConfigProto::DataType::Code data_type) {
+  if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
+    return libtextclassifier3::Status::OK;
+  }
+  if (config.term_match_type() != TermMatchType::UNKNOWN &&
+      config.tokenizer_type() == IndexingConfig::TokenizerType::NONE) {
+    return absl_ports::InvalidArgumentError(
+        "TermMatchType properties cannot have a tokenizer type of NONE");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+void SchemaUtil::BuildTypeConfigMap(
+    const SchemaProto& schema, SchemaUtil::TypeConfigMap* type_config_map) {
+  type_config_map->clear();
+  for (const SchemaTypeConfigProto& type_config : schema.types()) {
+    type_config_map->emplace(type_config.schema_type(), type_config);
+  }
+}
+
+void SchemaUtil::BuildPropertyConfigMap(
+    const SchemaTypeConfigProto& type_config,
+    std::unordered_map<std::string_view, const PropertyConfigProto*>*
+        property_config_map,
+    int32_t* num_required_properties) {
+  // TODO(samzheng): consider caching property_config_map for some properties,
+  // e.g. using LRU cache. Or changing schema.proto to use go/protomap.
+  *num_required_properties = 0;
+  property_config_map->clear();
+  for (const PropertyConfigProto& property_config : type_config.properties()) {
+    property_config_map->emplace(property_config.property_name(),
+                                 &property_config);
+    if (property_config.cardinality() ==
+        PropertyConfigProto::Cardinality::REQUIRED) {
+      (*num_required_properties)++;
+    }
+  }
+}
+
+const SchemaUtil::SchemaDelta SchemaUtil::ComputeCompatibilityDelta(
+    const SchemaProto& old_schema, const SchemaProto& new_schema) {
+  SchemaDelta schema_delta;
+  schema_delta.index_incompatible = false;
+
+  TypeConfigMap new_type_config_map;
+  BuildTypeConfigMap(new_schema, &new_type_config_map);
+
+  // Iterate through and check each field of the old schema
+  for (const auto& old_type_config : old_schema.types()) {
+    auto new_schema_type_and_config =
+        new_type_config_map.find(old_type_config.schema_type());
+
+    if (new_schema_type_and_config == new_type_config_map.end()) {
+      // Didn't find the old schema type in the new schema, all the old
+      // documents of this schema type are invalid without the schema
+      ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type ",
+                                          old_type_config.schema_type(),
+                                          " was not defined in new schema");
+      schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
+      continue;
+    }
+
+    std::unordered_map<std::string_view, const PropertyConfigProto*>
+        new_property_map;
+    int32_t new_required_properties = 0;
+    BuildPropertyConfigMap(new_schema_type_and_config->second,
+                           &new_property_map, &new_required_properties);
+
+    // We only need to check the old, existing properties to see if they're
+    // compatible since we'll have old data that may be invalidated or need to
+    // be reindexed. New properties don't have any data that would be
+    // invalidated or incompatible, so we blanket accept all new properties.
+    int32_t old_required_properties = 0;
+    for (const auto& old_property_config : old_type_config.properties()) {
+      auto new_property_name_and_config =
+          new_property_map.find(old_property_config.property_name());
+
+      if (new_property_name_and_config == new_property_map.end()) {
+        // Didn't find the old property
+        ICING_VLOG(1) << absl_ports::StrCat("Previously defined property type ",
+                                            old_type_config.schema_type(), ".",
+                                            old_property_config.property_name(),
+                                            " was not defined in new schema");
+        schema_delta.schema_types_incompatible.insert(
+            old_type_config.schema_type());
+        continue;
+      }
+
+      const PropertyConfigProto* new_property_config =
+          new_property_name_and_config->second;
+
+      if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
+        ICING_VLOG(1) << absl_ports::StrCat(
+            "Property ", old_type_config.schema_type(), ".",
+            old_property_config.property_name(), " is incompatible.");
+        schema_delta.schema_types_incompatible.insert(
+            old_type_config.schema_type());
+      }
+
+      if (old_property_config.cardinality() ==
+          PropertyConfigProto::Cardinality::REQUIRED) {
+        ++old_required_properties;
+      }
+
+      // Any change in the indexed property requires a reindexing
+      if (!IsTermMatchTypeCompatible(old_property_config.indexing_config(),
+                                     new_property_config->indexing_config())) {
+        schema_delta.index_incompatible = true;
+      }
+    }
+
+    // We can't have new properties that are REQUIRED since we won't know how
+    // to backfill the data, and the existing data will be invalid. We're
+    // guaranteed from our previous checks that all the old properties are also
+    // present in the new property config, so we can do a simple int comparison
+    // here to detect new required properties.
+    if (new_required_properties > old_required_properties) {
+      ICING_VLOG(1) << absl_ports::StrCat(
+          "New schema ", old_type_config.schema_type(),
+          " has REQUIRED properties that are not "
+          "present in the previously defined schema");
+      schema_delta.schema_types_incompatible.insert(
+          old_type_config.schema_type());
+    }
+  }
+
+  return schema_delta;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
new file mode 100644
index 0000000..70a9ad2
--- /dev/null
+++ b/icing/schema/schema-util.h

@@ -0,0 +1,153 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SCHEMA_UTIL_H_
+#define ICING_SCHEMA_SCHEMA_UTIL_H_
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+
+#include "utils/base/status.h"
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+class SchemaUtil {
+ public:
+  using TypeConfigMap =
+      std::unordered_map<std::string, const SchemaTypeConfigProto>;
+
+  struct SchemaDelta {
+    // Whether an indexing config has changed, requiring the index to be
+    // regenerated. We don't list out all the types that make the index
+    // incompatible because our index isn't optimized for that. It's much easier
+    // to reset the entire index and reindex every document.
+    bool index_incompatible = false;
+
+    // Which schema types were present in the old schema, but were deleted from
+    // the new schema.
+    std::unordered_set<std::string> schema_types_deleted;
+
+    // Which schema types had their SchemaTypeConfigProto changed in a way that
+    // could invalidate existing Documents of that schema type.
+    std::unordered_set<std::string> schema_types_incompatible;
+
+    bool operator==(const SchemaDelta& other) const {
+      return index_incompatible == other.index_incompatible &&
+             schema_types_deleted == other.schema_types_deleted &&
+             schema_types_incompatible == other.schema_types_incompatible;
+    }
+  };
+
+  // This function validates:
+  //   1. SchemaTypeConfigProto.schema_type's must be unique
+  //   2. Properties within one SchemaTypeConfigProto must be unique
+  //   3. SchemaTypeConfigProtos.schema_type must be non-empty
+  //   4. PropertyConfigProtos.property_name must be non-empty
+  //   5. PropertyConfigProtos.property_name's must be unique within one
+  //      SchemaTypeConfigProto
+  //   6. PropertyConfigProtos.data_type cannot be UNKNOWN
+  //   7. PropertyConfigProtos.data_type of DOCUMENT must also have a
+  //      schema_type
+  //   8. PropertyConfigProtos.cardinality cannot be UNKNOWN
+  //   9. PropertyConfigProtos.schema_type's must correspond to a
+  //      SchemaTypeConfigProto.schema_type
+  //  10. All string fields must be alphanumeric.
+  //
+  // Returns:
+  //   ALREADY_EXISTS for case 1 and 2
+  //   INVALID_ARGUMENT for 3-10
+  //   OK otherwise
+  static libtextclassifier3::Status Validate(const SchemaProto& schema);
+
+  // Creates a mapping of schema type -> schema type config proto. The
+  // type_config_map is cleared, and then each schema-type_config_proto pair is
+  // placed in the given type_config_map parameter.
+  static void BuildTypeConfigMap(const SchemaProto& schema,
+                                 TypeConfigMap* type_config_map);
+
+  // Calculate and return a hash map of (property name -> property config)
+  // from the given type config. The number of required properties will be
+  // assigned to output param num_required_properties.
+  static void BuildPropertyConfigMap(
+      const SchemaTypeConfigProto& type_config,
+      std::unordered_map<std::string_view, const PropertyConfigProto*>*
+          property_config_map,
+      int32_t* num_required_properties);
+
+  // Computes the delta between the old and new schema. There are a few
+  // differences that'll be reported:
+  //   1. The derived index would be incompatible. This is held in
+  //      `SchemaDelta.index_incompatible`.
+  //   2. Some schema types existed in the old schema, but have been deleted
+  //      from the new schema. This is held in
+  //      `SchemaDelta.schema_types_deleted`
+  //   3. A schema type's new definition would mean any existing data of the old
+  //      definition is now incompatible.
+  //
+  // For case 1, the two schemas would result in an incompatible index if:
+  //   1.1. The new SchemaProto has a different set of indexed properties than
+  //        the old SchemaProto.
+  //
+  // For case 3, the two schemas would result in incompatible data if:
+  //   3.1. A SchemaTypeConfig exists in the old SchemaProto, but is not in the
+  //        new SchemaProto
+  //   3.2. A property exists in the old SchemaTypeConfig, but is not in the new
+  //        SchemaTypeConfig
+  //   3.3. A property in the new SchemaTypeConfig and has a REQUIRED
+  //        PropertyConfigProto.cardinality, but is not in the old
+  //        SchemaTypeConfig
+  //   3.4. A property is in both the old and new SchemaTypeConfig, but its
+  //        PropertyConfigProto.data_type is different
+  //   3.5. A property is in both the old and new SchemaTypeConfig, but its
+  //        PropertyConfigProto.schema_type is different
+  //   3.6. A property is in both the old and new SchemaTypeConfig, but its new
+  //        PropertyConfigProto.cardinality is more restrictive. Restrictive
+  //        scale defined as:
+  //          LEAST <REPEATED - OPTIONAL - REQUIRED> MOST
+  //
+  // A property is defined by the combination of the
+  // SchemaTypeConfig.schema_type and the PropertyConfigProto.property_name.
+  //
+  // Returns a SchemaDelta that captures the aforementioned differences.
+  static const SchemaDelta ComputeCompatibilityDelta(
+      const SchemaProto& old_schema, const SchemaProto& new_schema);
+
+ private:
+  static libtextclassifier3::Status ValidateSchemaType(
+      std::string_view schema_type);
+  static libtextclassifier3::Status ValidatePropertyName(
+      std::string_view property_name, std::string_view schema_type);
+  static libtextclassifier3::Status ValidateDataType(
+      PropertyConfigProto::DataType::Code data_type,
+      std::string_view schema_type, std::string_view property_name);
+  static libtextclassifier3::Status ValidatePropertySchemaType(
+      std::string_view property_schema_type, std::string_view schema_type,
+      std::string_view property_name);
+  static libtextclassifier3::Status ValidateCardinality(
+      PropertyConfigProto::Cardinality::Code cardinality,
+      std::string_view schema_type, std::string_view property_name);
+  static libtextclassifier3::Status ValidateIndexingConfig(
+      const IndexingConfig& config,
+      PropertyConfigProto::DataType::Code data_type);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SCHEMA_UTIL_H_

diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
new file mode 100644
index 0000000..67cfb50
--- /dev/null
+++ b/icing/schema/schema-util_test.cc

@@ -0,0 +1,575 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/schema-util.h"
+
+#include <cstdint>
+#include <string>
+#include <string_view>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+
+// Properties/fields in a schema type
+constexpr char kEmailType[] = "EmailMessage";
+constexpr char kPersonType[] = "Person";
+
+class SchemaUtilTest : public ::testing::Test {
+ protected:
+  SchemaProto schema_proto_;
+
+  static SchemaTypeConfigProto CreateSchemaTypeConfig(
+      const std::string_view schema_type,
+      const std::string_view nested_schema_type = "") {
+    SchemaTypeConfigProto type;
+    type.set_schema_type(std::string(schema_type));
+
+    auto string_property = type.add_properties();
+    string_property->set_property_name("string");
+    string_property->set_data_type(PropertyConfigProto::DataType::STRING);
+    string_property->set_cardinality(
+        PropertyConfigProto::Cardinality::REQUIRED);
+
+    auto int_property = type.add_properties();
+    int_property->set_property_name("int");
+    int_property->set_data_type(PropertyConfigProto::DataType::INT64);
+    int_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    auto double_property = type.add_properties();
+    double_property->set_property_name("double");
+    double_property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+    double_property->set_cardinality(
+        PropertyConfigProto::Cardinality::REPEATED);
+
+    auto bool_property = type.add_properties();
+    bool_property->set_property_name("boolean");
+    bool_property->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
+    bool_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+    auto bytes_property = type.add_properties();
+    bytes_property->set_property_name("bytes");
+    bytes_property->set_data_type(PropertyConfigProto::DataType::BYTES);
+    bytes_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+    if (!nested_schema_type.empty()) {
+      auto document_property = type.add_properties();
+      document_property->set_property_name("document");
+      document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+      document_property->set_cardinality(
+          PropertyConfigProto::Cardinality::REPEATED);
+      document_property->set_schema_type(std::string(nested_schema_type));
+    }
+
+    return type;
+  }
+};
+
+TEST_F(SchemaUtilTest, Valid_Empty) {
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+}
+
+TEST_F(SchemaUtilTest, Valid_Nested) {
+  auto email_type = schema_proto_.add_types();
+  *email_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+
+  auto person_type = schema_proto_.add_types();
+  *person_type = CreateSchemaTypeConfig(kPersonType);
+
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+}
+
+TEST_F(SchemaUtilTest, Valid_ClearedPropertyConfigs) {
+  // No property fields is technically ok, but probably not realistic.
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type->clear_properties();
+
+  ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+}
+
+TEST_F(SchemaUtilTest, Invalid_ClearedSchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type->clear_schema_type();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_EmptySchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type->set_schema_type("");
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_NotAlphanumericSchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type->set_schema_type("_");
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_ClearedPropertyName) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->clear_property_name();
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_EmptyPropertyName) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_NotAlphanumericPropertyName) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("_");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_DuplicatePropertyName) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto first_property = type->add_properties();
+  first_property->set_property_name("DuplicatedProperty");
+  first_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  first_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  auto second_property = type->add_properties();
+  second_property->set_property_name("DuplicatedProperty");
+  second_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  second_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
+}
+
+TEST_F(SchemaUtilTest, Invalid_ClearedDataType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->clear_data_type();
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_UnknownDataType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::UNKNOWN);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_ClearedCardinality) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->clear_cardinality();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_UnknownCardinality) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::UNKNOWN);
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_ClearedPropertySchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  property->clear_schema_type();
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  property->set_schema_type("");
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, Invalid_NoMatchingSchemaType) {
+  auto type = schema_proto_.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewProperty");
+  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  property->set_schema_type("NewSchemaType");
+
+  ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+              StatusIs(libtextclassifier3::StatusCode::UNKNOWN));
+}
+
+TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+  // Configure old schema
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  // Configure new schema with an optional field, not considered incompatible
+  // since it's fine if old data doesn't have this optional field
+  SchemaProto new_schema_with_optional;
+  type = new_schema_with_optional.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewOptional");
+  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
+                                                    new_schema_with_optional),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+  // Configure old schema
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  // Configure new schema with a required field, considered incompatible since
+  // old data won't have this required field
+  SchemaProto new_schema_with_required;
+  type = new_schema_with_required.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("NewRequired");
+  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
+                                                    new_schema_with_required),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+  // Configure old schema
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("OldOptional");
+  property->set_data_type(PropertyConfigProto::DataType::INT64);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Configure new schema, new schema needs to at least have all the previously
+  // defined properties
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+  // Configure less restrictive schema based on cardinality
+  SchemaProto less_restrictive_schema;
+  auto type = less_restrictive_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::INT64);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+  // Configure more restrictive schema based on cardinality
+  SchemaProto more_restrictive_schema;
+  type = more_restrictive_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::INT64);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // We can't have a new schema be less restrictive, REQUIRED->OPTIONAL
+  SchemaUtil::SchemaDelta incompatible_schema_delta;
+  incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  /*old_schema=*/less_restrictive_schema,
+                  /*new_schema=*/more_restrictive_schema),
+              Eq(incompatible_schema_delta));
+
+  // We can have the new schema be more restrictive, OPTIONAL->REPEATED;
+  SchemaUtil::SchemaDelta compatible_schema_delta;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+                  /*old_schema=*/more_restrictive_schema,
+                  /*new_schema=*/less_restrictive_schema),
+              Eq(compatible_schema_delta));
+}
+
+TEST_F(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+  // Configure old schema, with an int64_t property
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  auto property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::INT64);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+  // Configure new schema, with a double property
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+  // Configure old schema, where Property is supposed to be a Person type
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kPersonType);
+
+  *type = CreateSchemaTypeConfig(kEmailType);
+  auto property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  property->set_schema_type(kPersonType);
+
+  // Configure new schema, where Property is supposed to be an Email type
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kPersonType);
+
+  *type = CreateSchemaTypeConfig(kEmailType);
+  property = type->add_properties();
+  property->set_property_name("Property");
+  property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  property->set_schema_type(kEmailType);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_incompatible.emplace(kEmailType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+  // Configure old schema
+  SchemaProto old_schema;
+  auto old_type = old_schema.add_types();
+  *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+
+  auto old_property = old_type->add_properties();
+  old_property->set_property_name("Property");
+  old_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  // Configure new schema
+  SchemaProto new_schema;
+  auto new_type = new_schema.add_types();
+  *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+
+  auto new_property = new_type->add_properties();
+  new_property->set_property_name("Property");
+  new_property->set_data_type(PropertyConfigProto::DataType::STRING);
+  new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.index_incompatible = true;
+
+  // New schema gained a new indexed property.
+  old_property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::UNKNOWN);
+  new_property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+
+  // New schema lost an indexed property.
+  old_property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  new_property->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::UNKNOWN);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, AddingTypeIsCompatible) {
+  // Can add a new type, existing data isn't incompatible, since none of them
+  // are of this new schema type
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kPersonType);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, DeletingTypeIsNoted) {
+  // Can't remove an old type, new schema needs to at least have all the
+  // previously defined schema otherwise the Documents of the missing schema
+  // are invalid
+  SchemaProto old_schema;
+  auto type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+  type = old_schema.add_types();
+  *type = CreateSchemaTypeConfig(kPersonType);
+
+  SchemaProto new_schema;
+  type = new_schema.add_types();
+  *type = CreateSchemaTypeConfig(kEmailType);
+
+  SchemaUtil::SchemaDelta schema_delta;
+  schema_delta.schema_types_deleted.emplace(kPersonType);
+  EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+              Eq(schema_delta));
+}
+
+TEST_F(SchemaUtilTest, ValidateNoTokenizer) {
+  SchemaProto schema;
+  auto* type = schema.add_types();
+  type->set_schema_type("MyType");
+
+  auto* prop = type->add_properties();
+  prop->set_property_name("Foo");
+  prop->set_data_type(PropertyConfigProto::DataType::STRING);
+  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  prop->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  EXPECT_THAT(SchemaUtil::Validate(schema),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  prop->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST_F(SchemaUtilTest, ValidateDocumentNoTokenizer) {
+  SchemaProto schema;
+  auto* type = schema.add_types();
+  type->set_schema_type("OtherType");
+
+  type = schema.add_types();
+  type->set_schema_type("MyType");
+
+  auto* prop = type->add_properties();
+  prop->set_property_name("SubType");
+  prop->set_schema_type("OtherType");
+  prop->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+  prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+  prop->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  prop->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::NONE);
+
+  EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
new file mode 100644
index 0000000..85f0768
--- /dev/null
+++ b/icing/schema/section-manager.cc

@@ -0,0 +1,371 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/section-manager.h"
+
+#include <algorithm>
+#include <cinttypes>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using TypeSectionMap =
+    std::unordered_map<std::string, const std::vector<SectionMetadata>>;
+
+// This state helps detect infinite loops (e.g. two type configs referencing
+// each other) when assigning sections. The combination of 'number of section
+// assigned' and 'current schema name' represents a unique state in the
+// section-assign process. If the same state is seen the second time, that means
+// an infinite loop.
+struct SectionAssigningState {
+  size_t num_sections_assigned;
+  std::string current_schema_name;
+
+  SectionAssigningState(size_t num_sections_assigned_in,
+                        std::string&& current_schema_name_in)
+      : num_sections_assigned(num_sections_assigned_in),
+        current_schema_name(std::move(current_schema_name_in)) {}
+};
+
+// Provides a hash value of this struct so that it can be stored in a hash
+// set.
+struct SectionAssigningStateHasher {
+  size_t operator()(const SectionAssigningState& state) {
+    size_t str_hash = std::hash<std::string>()(state.current_schema_name);
+    size_t int_hash = std::hash<size_t>()(state.num_sections_assigned);
+    // Combine the two hashes by taking the upper 16-bits of the string hash and
+    // the lower 16-bits of the int hash.
+    return (str_hash & 0xFFFF0000) | (int_hash & 0x0000FFFF);
+  }
+};
+
+bool operator==(const SectionAssigningState& lhs,
+                const SectionAssigningState& rhs) {
+  return lhs.num_sections_assigned == rhs.num_sections_assigned &&
+         lhs.current_schema_name == rhs.current_schema_name;
+}
+
+// Helper function to concatenate a path and a property name
+std::string ConcatenatePath(const std::string& path,
+                            const std::string& next_property_name) {
+  if (path.empty()) {
+    return next_property_name;
+  }
+  return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
+}
+
+// Helper function to recursively identify sections from a type config and add
+// them to a section metadata list
+libtextclassifier3::Status AssignSections(
+    const SchemaTypeConfigProto& type_config,
+    const std::string& current_section_path,
+    const SchemaUtil::TypeConfigMap& type_config_map,
+    std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>*
+        visited_states,
+    std::vector<SectionMetadata>* metadata_list) {
+  if (!visited_states
+           ->emplace(metadata_list->size(),
+                     std::string(type_config.schema_type()))
+           .second) {
+    // Failed to insert, the same state has been seen before, there's an
+    // infinite loop in type configs
+    return absl_ports::InvalidArgumentError(
+        "Infinite loop detected in type configs");
+  }
+
+  // Sorts properties by name's alphabetical order so that order doesn't affect
+  // section assigning.
+  auto sorted_properties = type_config.properties();
+  std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
+            [](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
+              return p1->property_name() < p2->property_name();
+            });
+  for (const auto& property_config : sorted_properties) {
+    if (property_config.indexing_config().term_match_type() ==
+        TermMatchType::UNKNOWN) {
+      // No need to create section for current property
+      continue;
+    }
+
+    // Creates section metadata according to data type
+    if (property_config.data_type() == PropertyConfigProto::DataType::STRING ||
+        property_config.data_type() == PropertyConfigProto::DataType::INT64 ||
+        property_config.data_type() == PropertyConfigProto::DataType::DOUBLE) {
+      // Validates next section id, makes sure that section id is the same as
+      // the list index so that we could find any section metadata by id in O(1)
+      // later.
+      auto new_section_id = static_cast<SectionId>(metadata_list->size());
+      if (!IsSectionIdValid(new_section_id)) {
+        // Max number of sections reached
+        return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+            "Too many properties to be indexed, max number of properties "
+            "allowed: %d",
+            kMaxSectionId - kMinSectionId + 1));
+      }
+      // Creates section metadata from property config
+      metadata_list->emplace_back(
+          new_section_id, property_config.indexing_config().term_match_type(),
+          property_config.indexing_config().tokenizer_type(),
+          ConcatenatePath(current_section_path,
+                          property_config.property_name()));
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::DOCUMENT) {
+      // Tries to find sections recursively
+      auto nested_type_config_iter =
+          type_config_map.find(property_config.schema_type());
+      if (nested_type_config_iter == type_config_map.end()) {
+        return absl_ports::NotFoundError(absl_ports::StrCat(
+            "type config not found: ", property_config.schema_type()));
+      }
+      const SchemaTypeConfigProto& nested_type_config =
+          nested_type_config_iter->second;
+      ICING_RETURN_IF_ERROR(
+          AssignSections(nested_type_config,
+                         ConcatenatePath(current_section_path,
+                                         property_config.property_name()),
+                         type_config_map, visited_states, metadata_list));
+    }
+    // NOTE: we don't create sections for BOOLEAN and BYTES data types.
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+// Builds a vector of vectors that holds SectionMetadatas for all the schema
+// types. The outer vector's index corresponds with a type's SchemaTypeId. The
+// inner vector's index corresponds to the section's SectionId.
+libtextclassifier3::StatusOr<std::vector<std::vector<SectionMetadata>>>
+BuildSectionMetadataCache(const SchemaUtil::TypeConfigMap& type_config_map,
+                          const KeyMapper<SchemaTypeId>& schema_type_mapper) {
+  // Create our vector and reserve the number of schema types we have
+  std::vector<std::vector<SectionMetadata>> section_metadata_cache(
+      schema_type_mapper.num_keys());
+
+  std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>
+      visited_states;
+  for (const auto& name_and_type : type_config_map) {
+    // Assigns sections for each type config
+    visited_states.clear();
+    const std::string& type_config_name = name_and_type.first;
+    const SchemaTypeConfigProto& type_config = name_and_type.second;
+    std::vector<SectionMetadata> metadata_list;
+    ICING_RETURN_IF_ERROR(
+        AssignSections(type_config, /*current_section_path*/ "",
+                       type_config_map, &visited_states, &metadata_list));
+
+    // Insert the section metadata list at the index of the type's SchemaTypeId
+    ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                           schema_type_mapper.Get(type_config_name));
+    section_metadata_cache[schema_type_id] = std::move(metadata_list);
+  }
+  return section_metadata_cache;
+}
+
+// Helper function to get string content from a property. Repeated values are
+// joined into one string. We only care about STRING, INT64, and DOUBLE data
+// types.
+std::vector<std::string> GetPropertyContent(const PropertyProto& property) {
+  std::vector<std::string> values;
+  if (!property.string_values().empty()) {
+    std::copy(property.string_values().begin(), property.string_values().end(),
+              std::back_inserter(values));
+  } else if (!property.int64_values().empty()) {
+    std::transform(
+        property.int64_values().begin(), property.int64_values().end(),
+        std::back_inserter(values),
+        [](int64_t i) { return IcingStringUtil::StringPrintf("%" PRId64, i); });
+  } else {
+    std::transform(
+        property.double_values().begin(), property.double_values().end(),
+        std::back_inserter(values),
+        [](double d) { return IcingStringUtil::StringPrintf("%f", d); });
+  }
+  return values;
+}
+
+// Helper function to get metadata list of a type config
+libtextclassifier3::StatusOr<std::vector<SectionMetadata>> GetMetadataList(
+    const KeyMapper<SchemaTypeId>& schema_type_mapper,
+    const std::vector<std::vector<SectionMetadata>>& section_metadata_cache,
+    const std::string& type_config_name) {
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         schema_type_mapper.Get(type_config_name));
+  return section_metadata_cache.at(schema_type_id);
+}
+
+}  // namespace
+
+SectionManager::SectionManager(
+    const KeyMapper<SchemaTypeId>* schema_type_mapper,
+    std::vector<std::vector<SectionMetadata>>&& section_metadata_cache)
+    : schema_type_mapper_(*schema_type_mapper),
+      section_metadata_cache_(std::move(section_metadata_cache)) {}
+
+libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>>
+SectionManager::Create(const SchemaUtil::TypeConfigMap& type_config_map,
+                       const KeyMapper<SchemaTypeId>* schema_type_mapper) {
+  ICING_ASSIGN_OR_RETURN(
+      std::vector<std::vector<SectionMetadata>> section_metadata_cache,
+      BuildSectionMetadataCache(type_config_map, *schema_type_mapper));
+  return std::unique_ptr<SectionManager>(new SectionManager(
+      schema_type_mapper, std::move(section_metadata_cache)));
+}
+
+libtextclassifier3::StatusOr<std::vector<std::string>>
+SectionManager::GetSectionContent(const DocumentProto& document,
+                                  std::string_view section_path) const {
+  // Finds the first property name in section_path
+  size_t separator_position = section_path.find(kPropertySeparator);
+  std::string_view current_property_name =
+      (separator_position == std::string::npos)
+          ? section_path
+          : section_path.substr(0, separator_position);
+
+  // Tries to match the property name with the ones in document
+  auto property_iterator =
+      std::find_if(document.properties().begin(), document.properties().end(),
+                   [current_property_name](const PropertyProto& property) {
+                     return property.name() == current_property_name;
+                   });
+
+  if (property_iterator == document.properties().end()) {
+    // Property name not found, it could be one of the following 2 cases:
+    // 1. The property is optional and it's not in the document
+    // 2. The property name is invalid
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Section path ", section_path,
+                           " not found in type config ", document.schema()));
+  }
+
+  if (separator_position == std::string::npos) {
+    // Current property name is the last one in section path
+    std::vector<std::string> content = GetPropertyContent(*property_iterator);
+    if (content.empty()) {
+      // The content of property is explicitly set to empty, we'll treat it as
+      // NOT_FOUND because the index doesn't care about empty strings.
+      return absl_ports::NotFoundError(
+          absl_ports::StrCat("Section path ", section_path,
+                             " not found in type config ", document.schema()));
+    }
+    return content;
+  }
+
+  // Gets section content recursively
+  std::string_view sub_section_path =
+      section_path.substr(separator_position + 1);
+  std::vector<std::string> nested_document_content;
+  for (const auto& nested_document : property_iterator->document_values()) {
+    auto content_or = GetSectionContent(nested_document, sub_section_path);
+    if (content_or.ok()) {
+      std::vector<std::string> content = std::move(content_or).ValueOrDie();
+      std::move(content.begin(), content.end(),
+                std::back_inserter(nested_document_content));
+    }
+  }
+  if (nested_document_content.empty()) {
+    return absl_ports::NotFoundError(
+        absl_ports::StrCat("Section path ", section_path,
+                           " not found in type config ", document.schema()));
+  }
+  return nested_document_content;
+}
+
+libtextclassifier3::StatusOr<std::vector<std::string>>
+SectionManager::GetSectionContent(const DocumentProto& document,
+                                  SectionId section_id) const {
+  if (!IsSectionIdValid(section_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Section id %d is greater than the max value %d", section_id,
+        kMaxSectionId));
+  }
+  ICING_ASSIGN_OR_RETURN(
+      const std::vector<SectionMetadata>& metadata_list,
+      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
+                      document.schema()));
+  if (section_id >= metadata_list.size()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Section with id %d doesn't exist in type config %s", section_id,
+        document.schema().c_str()));
+  }
+  // The index of metadata list is the same as the section id, so we can use
+  // section id as the index.
+  return GetSectionContent(document, metadata_list[section_id].path);
+}
+
+libtextclassifier3::StatusOr<const SectionMetadata*>
+SectionManager::GetSectionMetadata(SchemaTypeId schema_type_id,
+                                   SectionId section_id) const {
+  if (!IsSectionIdValid(section_id)) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Section id %d is greater than the max value %d", section_id,
+        kMaxSectionId));
+  }
+  const std::vector<SectionMetadata>& section_metadatas =
+      section_metadata_cache_[schema_type_id];
+  if (section_id >= section_metadatas.size()) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "Section with id %d doesn't exist in type config with id %d",
+        section_id, schema_type_id));
+  }
+
+  // The index of metadata list is the same as the section id, so we can use
+  // section id as the index.
+  return &section_metadatas[section_id];
+}
+
+libtextclassifier3::StatusOr<std::vector<Section>>
+SectionManager::ExtractSections(const DocumentProto& document) const {
+  ICING_ASSIGN_OR_RETURN(
+      const std::vector<SectionMetadata>& metadata_list,
+      GetMetadataList(schema_type_mapper_, section_metadata_cache_,
+                      document.schema()));
+  std::vector<Section> sections;
+  for (const auto& section_metadata : metadata_list) {
+    auto section_content_or =
+        GetSectionContent(document, section_metadata.path);
+    // Adds to result vector if section is found in document
+    if (section_content_or.ok()) {
+      sections.emplace_back(SectionMetadata(section_metadata),
+                            std::move(section_content_or).ValueOrDie());
+    }
+  }
+  return sections;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
new file mode 100644
index 0000000..56045e8
--- /dev/null
+++ b/icing/schema/section-manager.h

@@ -0,0 +1,117 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SECTION_MANAGER_H_
+#define ICING_SCHEMA_SECTION_MANAGER_H_
+
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "utils/base/statusor.h"
+#include "icing/proto/document.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/key-mapper.h"
+
+namespace icing {
+namespace lib {
+
+inline constexpr char kPropertySeparator[] = ".";
+
+// This class provides section-related operations. It assigns sections according
+// to type configs and extracts section / sections from documents.
+class SectionManager {
+ public:
+  SectionManager(const SectionManager&) = delete;
+  SectionManager& operator=(const SectionManager&) = delete;
+
+  // Creates a SectionManager from a type config map (type config name -> type
+  // config)
+  //
+  // Returns:
+  //   A SectionManager on success
+  //   INVALID_ARGUMENT if infinite loop detected in the type configs
+  //   OUT_OF_RANGE if number of properties need indexing exceeds the max number
+  //   NOT_FOUND if any type config name not found in the map
+  static libtextclassifier3::StatusOr<std::unique_ptr<SectionManager>> Create(
+      const SchemaUtil::TypeConfigMap& type_config_map,
+      const KeyMapper<SchemaTypeId>* schema_type_mapper);
+
+  // Finds content of a section by section path (e.g. property1.property2)
+  //
+  // Returns:
+  //   A string of content on success
+  //   NOT_FOUND if:
+  //     1. Property is optional and not found in the document
+  //     2. section_path is invalid
+  //     3. Content is empty
+  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
+      const DocumentProto& document, std::string_view section_path) const;
+
+  // Finds content of a section by id
+  //
+  // Returns:
+  //   A string of content on success
+  //   INVALID_ARGUMENT if section id is invalid
+  //   NOT_FOUND if type config name of document not found
+  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
+      const DocumentProto& document, SectionId section_id) const;
+
+  // Returns the SectionMetadata associated with the SectionId that's in the
+  // SchemaTypeId.
+  //
+  // Returns:
+  //   pointer to SectionMetadata on success
+  //   INVALID_ARGUMENT if schema type id or section is invalid
+  libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
+      SchemaTypeId schema_type_id, SectionId section_id) const;
+
+  // Extracts all sections from the given document, sections are sorted by
+  // section id in increasing order. Section ids start from 0. Sections with
+  // empty content won't be returned.
+  //
+  // Returns:
+  //   A list of sections on success
+  //   NOT_FOUND if type config name of document not found
+  libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
+      const DocumentProto& document) const;
+
+ private:
+  // Use SectionManager::Create() to instantiate
+  explicit SectionManager(
+      const KeyMapper<SchemaTypeId>* schema_type_mapper,
+      std::vector<std::vector<SectionMetadata>>&& section_metadata_cache);
+
+  // Maps schema types to a densely-assigned unique id.
+  const KeyMapper<SchemaTypeId>& schema_type_mapper_;
+
+  // The index of section_metadata_cache_ corresponds to a schema type's
+  // SchemaTypeId. At that SchemaTypeId index, we store an inner vector. The
+  // inner vector's index corresponds to a section's SectionId. At the SectionId
+  // index, we store the SectionMetadata of that section.
+  //
+  // For example, pretend "email" had a SchemaTypeId of 0 and it had a section
+  // called "subject" with a SectionId of 1. Then there would exist a vector
+  // that holds the "subject" property's SectionMetadata at index 1. This vector
+  // would be stored at index 0 of the section_metadata_cache_ vector.
+  const std::vector<std::vector<SectionMetadata>> section_metadata_cache_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SECTION_MANAGER_H_

diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
new file mode 100644
index 0000000..38fb8b4
--- /dev/null
+++ b/icing/schema/section-manager_test.cc

@@ -0,0 +1,446 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/schema/section-manager.h"
+
+#include <limits>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/schema-util.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::HasSubstr;
+
+// type and property names of EmailMessage
+constexpr char kTypeEmail[] = "EmailMessage";
+constexpr char kPropertySubject[] = "subject";
+constexpr char kPropertyText[] = "text";
+constexpr char kPropertyTimestamp[] = "timestamp";
+constexpr char kPropertyAttachment[] = "attachment";
+constexpr char kPropertyRecipients[] = "recipients";
+// type and property names of Conversation
+constexpr char kTypeConversation[] = "Conversation";
+constexpr char kPropertyName[] = "name";
+constexpr char kPropertyEmails[] = "emails";
+
+class SectionManagerTest : public ::testing::Test {
+ protected:
+  SectionManagerTest() : test_dir_(GetTestTempDir() + "/icing") {
+    auto email_type = CreateEmailTypeConfig();
+    auto conversation_type = CreateConversationTypeConfig();
+    type_config_map_.emplace(email_type.schema_type(), email_type);
+    type_config_map_.emplace(conversation_type.schema_type(),
+                             conversation_type);
+
+    email_document_ =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema(kTypeEmail)
+            .AddStringProperty(kPropertySubject, "the subject")
+            .AddStringProperty(kPropertyText, "the text")
+            .AddInt64Property(kPropertyTimestamp, 1234567890)
+            .AddBytesProperty(kPropertyAttachment, "attachment bytes")
+            .AddStringProperty(kPropertyRecipients, "recipient1", "recipient2",
+                               "recipient3")
+            .Build();
+
+    conversation_document_ =
+        DocumentBuilder()
+            .SetKey("icing", "conversation/1")
+            .SetSchema(kTypeConversation)
+            .AddDocumentProperty(kPropertyEmails,
+                                 DocumentProto(email_document_),
+                                 DocumentProto(email_document_))
+            .Build();
+  }
+
+  void SetUp() override {
+    // KeyMapper uses 3 internal arrays for bookkeeping. Give each one 128KiB so
+    // the total KeyMapper should get 384KiB
+    int key_mapper_size = 3 * 128 * 1024;
+    ICING_ASSERT_OK_AND_ASSIGN(schema_type_mapper_,
+                               KeyMapper<SchemaTypeId>::Create(
+                                   filesystem_, test_dir_, key_mapper_size));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeEmail, 0));
+    ICING_ASSERT_OK(schema_type_mapper_->Put(kTypeConversation, 1));
+  }
+
+  static SchemaTypeConfigProto CreateEmailTypeConfig() {
+    SchemaTypeConfigProto type;
+    type.set_schema_type(kTypeEmail);
+
+    auto subject = type.add_properties();
+    subject->set_property_name(kPropertySubject);
+    subject->set_data_type(PropertyConfigProto::DataType::STRING);
+    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    subject->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    subject->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    auto text = type.add_properties();
+    text->set_property_name(kPropertyText);
+    text->set_data_type(PropertyConfigProto::DataType::STRING);
+    text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    text->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::UNKNOWN);
+
+    auto timestamp = type.add_properties();
+    timestamp->set_property_name(kPropertyTimestamp);
+    timestamp->set_data_type(PropertyConfigProto::DataType::INT64);
+    timestamp->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    timestamp->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    timestamp->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    auto attachment = type.add_properties();
+    attachment->set_property_name(kPropertyAttachment);
+    attachment->set_data_type(PropertyConfigProto::DataType::BYTES);
+    attachment->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    attachment->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    attachment->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    auto recipients = type.add_properties();
+    recipients->set_property_name(kPropertyRecipients);
+    recipients->set_data_type(PropertyConfigProto::DataType::STRING);
+    recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+    recipients->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    recipients->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    return type;
+  }
+
+  static SchemaTypeConfigProto CreateConversationTypeConfig() {
+    SchemaTypeConfigProto type;
+    type.set_schema_type(kTypeConversation);
+
+    auto name = type.add_properties();
+    name->set_property_name(kPropertyName);
+    name->set_data_type(PropertyConfigProto::DataType::STRING);
+    name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    name->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    auto emails = type.add_properties();
+    emails->set_property_name(kPropertyEmails);
+    emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+    emails->set_schema_type(kTypeEmail);
+    emails->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    return type;
+  }
+
+  Filesystem filesystem_;
+  const std::string test_dir_;
+  SchemaUtil::TypeConfigMap type_config_map_;
+  std::unique_ptr<KeyMapper<SchemaTypeId>> schema_type_mapper_;
+
+  DocumentProto email_document_;
+  DocumentProto conversation_document_;
+};
+
+TEST_F(SectionManagerTest, Create) {
+  {
+    ICING_ASSERT_OK(
+        SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+  }
+  {
+    // Test infinite loop in schema
+    // Creates 2 type configs that reference each other
+    SchemaTypeConfigProto type_config1;
+    type_config1.set_schema_type("type1");
+    auto property1 = type_config1.add_properties();
+    property1->set_property_name("property1");
+    property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    property1->set_schema_type("type2");  // Here we reference type2
+    property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property1->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    SchemaTypeConfigProto type_config2;
+    type_config2.set_schema_type("type2");
+    auto property2 = type_config2.add_properties();
+    property2->set_property_name("property2");
+    property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    // Here we reference type1, which references type2 causing the infinite loop
+    property2->set_schema_type("type1");
+    property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property2->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    SchemaUtil::TypeConfigMap type_config_map;
+    type_config_map.emplace("type1", type_config1);
+    type_config_map.emplace("type2", type_config2);
+
+    EXPECT_THAT(
+        SectionManager::Create(type_config_map, schema_type_mapper_.get()),
+        StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                 HasSubstr("Infinite loop detected")));
+  }
+  {
+    // Also test infinite loop.
+    // Creates a type config that has a section and references to self.
+    SchemaTypeConfigProto type_config;
+    type_config.set_schema_type("type");
+    auto property1 = type_config.add_properties();
+    property1->set_property_name("property1");
+    property1->set_data_type(PropertyConfigProto::DataType::STRING);
+    property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property1->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    auto property2 = type_config.add_properties();
+    property2->set_property_name("property2");
+    property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    // Here we're referencing our own type, causing an infinite loop
+    property2->set_schema_type("type");
+    property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property2->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    SchemaUtil::TypeConfigMap type_config_map;
+    type_config_map.emplace("type", type_config);
+
+    EXPECT_THAT(
+        SectionManager::Create(type_config_map, schema_type_mapper_.get()),
+        StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+                 HasSubstr("Too many properties")));
+  }
+  {
+    // Test number of sections that is more than allowed
+    SchemaTypeConfigProto type_config;
+    type_config.set_schema_type("type");
+    // Adds more properties than allowed
+    int max_num_sections_allowed = kMaxSectionId - kMinSectionId + 1;
+    for (int i = 0; i < max_num_sections_allowed + 1; i++) {
+      auto property = type_config.add_properties();
+      property->set_property_name("property" + std::to_string(i));
+      property->set_data_type(PropertyConfigProto::DataType::STRING);
+      property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+      property->mutable_indexing_config()->set_term_match_type(
+          TermMatchType::EXACT_ONLY);
+    }
+
+    SchemaUtil::TypeConfigMap type_config_map;
+    type_config_map.emplace("type", type_config);
+
+    EXPECT_THAT(
+        SectionManager::Create(type_config_map, schema_type_mapper_.get()),
+        StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
+                 HasSubstr("Too many properties")));
+  }
+  {
+    // Test unknown schema name
+    SchemaTypeConfigProto type_config;
+    type_config.set_schema_type("type");
+    auto property = type_config.add_properties();
+    property->set_property_name("property");
+    property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    property->set_schema_type("unknown_name");
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+    property->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+
+    SchemaUtil::TypeConfigMap type_config_map;
+    type_config_map.emplace("type", type_config);
+
+    EXPECT_THAT(
+        SectionManager::Create(type_config_map, schema_type_mapper_.get()),
+        StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                 HasSubstr("type config not found")));
+  }
+}
+
+TEST_F(SectionManagerTest, GetSectionContent) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto section_manager,
+      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Test simple section paths
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "subject"),
+              IsOkAndHolds(ElementsAre("the subject")));
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "text"),
+              IsOkAndHolds(ElementsAre("the text")));
+
+  // Test repeated values, they are joined into one string
+  ICING_ASSERT_OK_AND_ASSIGN(auto content, section_manager->GetSectionContent(
+                                               email_document_,
+                                               /*section_path*/ "recipients"));
+  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
+
+  // Test concatenated section paths: "property1.property2"
+  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
+                                          conversation_document_,
+                                          /*section_path*/ "emails.subject"));
+  EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
+                                          conversation_document_,
+                                          /*section_path*/ "emails.text"));
+  EXPECT_THAT(content, ElementsAre("the text", "the text"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      content,
+      section_manager->GetSectionContent(conversation_document_,
+                                         /*section_path*/ "emails.recipients"));
+  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
+                                   "recipient1", "recipient2", "recipient3"));
+
+  // Test non-existing paths
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "name"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "invalid"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      section_manager->GetSectionContent(conversation_document_,
+                                         /*section_path*/ "emails.invalid"),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Test other data types
+  // INT64
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "timestamp"),
+              IsOkAndHolds(ElementsAre("1234567890")));
+  // BYTES type can't be indexed, so content won't be returned
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 /*section_path*/ "attachment"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // The following tests are similar to the ones above but use section ids
+  // instead of section paths
+
+  // EmailMessage (section id -> section path):
+  SectionId recipients_section_id = 0;
+  SectionId subject_section_id = 1;
+  SectionId timestamp_section_id = 2;
+  SectionId invalid_email_section_id = 3;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      content, section_manager->GetSectionContent(email_document_,
+                                                  recipients_section_id));
+  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
+
+  EXPECT_THAT(
+      section_manager->GetSectionContent(email_document_, subject_section_id),
+      IsOkAndHolds(ElementsAre("the subject")));
+  EXPECT_THAT(
+      section_manager->GetSectionContent(email_document_, timestamp_section_id),
+      IsOkAndHolds(ElementsAre("1234567890")));
+
+  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
+                                                 invalid_email_section_id),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  // Conversation (section id -> section path):
+  //   0 -> emails.recipients
+  //   1 -> emails.subject
+  //   2 -> emails.timestamp
+  //   3 -> name
+  SectionId emails_recipients_section_id = 0;
+  SectionId emails_subject_section_id = 1;
+  SectionId emails_timestamp_section_id = 2;
+  SectionId name_section_id = 3;
+  SectionId invalid_conversation_section_id = 4;
+  ICING_ASSERT_OK_AND_ASSIGN(
+      content, section_manager->GetSectionContent(
+                   conversation_document_, emails_recipients_section_id));
+  EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
+                                   "recipient1", "recipient2", "recipient3"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      content, section_manager->GetSectionContent(conversation_document_,
+                                                  emails_subject_section_id));
+  EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      content, section_manager->GetSectionContent(conversation_document_,
+                                                  emails_timestamp_section_id));
+  EXPECT_THAT(content, ElementsAre("1234567890", "1234567890"));
+
+  EXPECT_THAT(section_manager->GetSectionContent(conversation_document_,
+                                                 name_section_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(section_manager->GetSectionContent(
+                  conversation_document_, invalid_conversation_section_id),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SectionManagerTest, ExtractSections) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto section_manager,
+      SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
+
+  // Extracts all sections from 'EmailMessage' document
+  ICING_ASSERT_OK_AND_ASSIGN(auto sections,
+                             section_manager->ExtractSections(email_document_));
+  EXPECT_THAT(sections.size(), Eq(3));
+
+  EXPECT_THAT(sections[0].metadata.id, Eq(0));
+  EXPECT_THAT(sections[0].metadata.path, Eq("recipients"));
+  EXPECT_THAT(sections[0].content,
+              ElementsAre("recipient1", "recipient2", "recipient3"));
+
+  EXPECT_THAT(sections[1].metadata.id, Eq(1));
+  EXPECT_THAT(sections[1].metadata.path, Eq("subject"));
+  EXPECT_THAT(sections[1].content, ElementsAre("the subject"));
+
+  EXPECT_THAT(sections[2].metadata.id, Eq(2));
+  EXPECT_THAT(sections[2].metadata.path, Eq("timestamp"));
+  EXPECT_THAT(sections[2].content, ElementsAre("1234567890"));
+
+  // Extracts all sections from 'Conversation' document
+  ICING_ASSERT_OK_AND_ASSIGN(
+      sections, section_manager->ExtractSections(conversation_document_));
+  EXPECT_THAT(sections.size(), Eq(3));
+
+  // Section id 3 (name) not found in document, so the first section id found
+  // is 1 below.
+  EXPECT_THAT(sections[0].metadata.id, Eq(0));
+  EXPECT_THAT(sections[0].metadata.path, Eq("emails.recipients"));
+  EXPECT_THAT(sections[0].content,
+              ElementsAre("recipient1", "recipient2", "recipient3",
+                          "recipient1", "recipient2", "recipient3"));
+
+  EXPECT_THAT(sections[1].metadata.id, Eq(1));
+  EXPECT_THAT(sections[1].metadata.path, Eq("emails.subject"));
+  EXPECT_THAT(sections[1].content, ElementsAre("the subject", "the subject"));
+
+  EXPECT_THAT(sections[2].metadata.id, Eq(2));
+  EXPECT_THAT(sections[2].metadata.path, Eq("emails.timestamp"));
+  EXPECT_THAT(sections[2].content, ElementsAre("1234567890", "1234567890"));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/schema/section.h b/icing/schema/section.h
new file mode 100644
index 0000000..daf4fd0
--- /dev/null
+++ b/icing/schema/section.h

@@ -0,0 +1,96 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_SECTION_H_
+#define ICING_SCHEMA_SECTION_H_
+
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/term.pb.h"
+
+namespace icing {
+namespace lib {
+
+using SectionId = int8_t;
+// 4 bits for 16 values. NOTE: Increasing this value means that SectionIdMask
+// must increase from an int16_t to an int32_t
+inline constexpr int kSectionIdBits = 4;
+inline constexpr SectionId kInvalidSectionId = (1 << kSectionIdBits);
+inline constexpr SectionId kMaxSectionId = kInvalidSectionId - 1;
+inline constexpr SectionId kMinSectionId = 0;
+constexpr bool IsSectionIdValid(SectionId section_id) {
+  return section_id >= kMinSectionId && section_id <= kMaxSectionId;
+}
+
+using SectionIdMask = int16_t;
+inline constexpr SectionIdMask kSectionIdMaskAll = ~SectionIdMask{0};
+inline constexpr SectionIdMask kSectionIdMaskNone = SectionIdMask{0};
+
+static_assert(
+    kMaxSectionId < 8 * sizeof(SectionIdMask),
+    "SectionIdMask is not large enough to represent all section values!");
+
+// TODO(samzheng): add more metadata when needed, e.g. tokenizer type,
+struct SectionMetadata {
+  // Dot-joined property names, representing the location of section inside an
+  // document. E.g. "property1.property2"
+  std::string path;
+
+  // A unique id of property within a type config
+  SectionId id;
+
+  // How content in this section should be tokenized. It is invalid for a
+  // section to have tokenizer == 'NONE'.
+  IndexingConfig::TokenizerType::Code tokenizer;
+
+  // How tokens in this section should be matched.
+  //
+  // TermMatchType::UNKNOWN:
+  //   Terms will not match anything
+  //
+  // TermMatchType::PREFIX:
+  //   Terms will be stored as a prefix match, "fool" matches "foo" and "fool"
+  //
+  // TermMatchType::EXACT_ONLY:
+  //   Terms will be only stored as an exact match, "fool" only matches "fool"
+  TermMatchType::Code term_match_type = TermMatchType::UNKNOWN;
+
+  SectionMetadata(SectionId id_in, TermMatchType::Code term_match_type_in,
+                  IndexingConfig::TokenizerType::Code tokenizer,
+                  std::string&& path_in)
+      : path(std::move(path_in)),
+        id(id_in),
+        tokenizer(tokenizer),
+        term_match_type(term_match_type_in) {}
+};
+
+// Section is an icing internal concept similar to document property but with
+// extra metadata. The content can be a value or the combination of repeated
+// values of a property.
+struct Section {
+  SectionMetadata metadata;
+  std::vector<std::string> content;
+
+  Section(SectionMetadata&& metadata_in, std::vector<std::string>&& content_in)
+      : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCHEMA_SECTION_H_

diff --git a/icing/scoring/ranker.cc b/icing/scoring/ranker.cc
new file mode 100644
index 0000000..e68fbd2
--- /dev/null
+++ b/icing/scoring/ranker.cc

@@ -0,0 +1,154 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/ranker.h"
+
+#include <algorithm>
+#include <vector>
+
+#include "base/logging.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-id.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+// For all the heap manipulations in this file, we use a vector to represent the
+// heap. The element at index 0 is the root node. For any node at index i, its
+// left child node is at 2 * i + 1, its right child node is at 2 * i + 2.
+
+// Helper function to wrap the heapify algorithm, it heapifies the target
+// subtree node in place.
+void Heapify(std::vector<ScoredDocumentHit>* scored_document_hits,
+             int target_subtree_root_index,
+             const ScoredDocumentHitComparator scored_document_hit_comparator) {
+  const int heap_size = scored_document_hits->size();
+  if (target_subtree_root_index >= heap_size) {
+    return;
+  }
+
+  // Initializes subtree root as the current best node.
+  int best = target_subtree_root_index;
+  // If we represent a heap in an array/vector, indices of left and right
+  // children can be calculated.
+  const int left = target_subtree_root_index * 2 + 1;
+  const int right = target_subtree_root_index * 2 + 2;
+
+  // If left child is better than current best
+  if (left < heap_size &&
+      scored_document_hit_comparator(scored_document_hits->at(left),
+                                     scored_document_hits->at(best))) {
+    best = left;
+  }
+
+  // If right child is better than current best
+  if (right < heap_size &&
+      scored_document_hit_comparator(scored_document_hits->at(right),
+                                     scored_document_hits->at(best))) {
+    best = right;
+  }
+
+  // If the best is not the subtree root, swap and continue heapifying the lower
+  // level subtree
+  if (best != target_subtree_root_index) {
+    std::swap(scored_document_hits->at(best),
+              scored_document_hits->at(target_subtree_root_index));
+    Heapify(scored_document_hits, best, scored_document_hit_comparator);
+  }
+}
+
+// Helper function to build a heap in place whose root is the best node defined
+// by scored_document_hit_comparator. Time complexity is O(n).
+void BuildHeap(
+    std::vector<ScoredDocumentHit>* scored_document_hits,
+    const ScoredDocumentHitComparator scored_document_hit_comparator) {
+  const int heap_size = scored_document_hits->size();
+  // Since we use a vector to represent the heap, [size / 2 - 1] is the index
+  // of the parent node of the last node.
+  for (int subtree_root_index = heap_size / 2 - 1; subtree_root_index >= 0;
+       subtree_root_index--) {
+    Heapify(scored_document_hits, subtree_root_index,
+            scored_document_hit_comparator);
+  }
+}
+
+// Helper function to extract the root from the heap. The heap structure will be
+// maintained.
+//
+// Returns:
+//   The current root element on success
+//   RESOURCE_EXHAUSTED_ERROR if heap is empty
+libtextclassifier3::StatusOr<ScoredDocumentHit> ExtractRoot(
+    std::vector<ScoredDocumentHit>* scored_document_hits,
+    ScoredDocumentHitComparator scored_document_hit_comparator) {
+  if (scored_document_hits->empty()) {
+    // An invalid ScoredDocumentHit
+    return absl_ports::ResourceExhaustedError("Heap is empty");
+  }
+
+  // Steps to extract root from heap:
+  // 1. copy out root
+  ScoredDocumentHit root = scored_document_hits->at(0);
+  const size_t last_node_index = scored_document_hits->size() - 1;
+  // 2. swap root and the last node
+  std::swap(scored_document_hits->at(0),
+            scored_document_hits->at(last_node_index));
+  // 3. remove last node
+  scored_document_hits->pop_back();
+  // 4. heapify root
+  Heapify(scored_document_hits, /*target_subtree_root_index=*/0,
+          scored_document_hit_comparator);
+  return root;
+}
+
+std::vector<ScoredDocumentHit> HeapifyAndProduceTopN(
+    std::vector<ScoredDocumentHit> scored_document_hits, int num_result,
+    bool is_descending) {
+  // Build a heap in place
+  const ScoredDocumentHitComparator scored_document_hit_comparator(
+      is_descending);
+  BuildHeap(&scored_document_hits, scored_document_hit_comparator);
+
+  // Get best nodes from heap one by one
+  std::vector<ScoredDocumentHit> scored_document_hit_result;
+  int result_size =
+      std::min(num_result, static_cast<int>(scored_document_hits.size()));
+  while (result_size-- > 0) {
+    libtextclassifier3::StatusOr<ScoredDocumentHit> next_best_document_hit_or =
+        ExtractRoot(&scored_document_hits, scored_document_hit_comparator);
+    if (next_best_document_hit_or.ok()) {
+      scored_document_hit_result.push_back(
+          std::move(next_best_document_hit_or).ValueOrDie());
+    } else {
+      ICING_VLOG(1) << next_best_document_hit_or.status().error_message();
+    }
+  }
+  return scored_document_hit_result;
+}
+
+}  // namespace
+
+std::vector<ScoredDocumentHit> GetTopNFromScoredDocumentHits(
+    std::vector<ScoredDocumentHit> scored_document_hits, int num_result,
+    bool is_descending) {
+  return HeapifyAndProduceTopN(std::move(scored_document_hits), num_result,
+                               is_descending);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/ranker.h b/icing/scoring/ranker.h
new file mode 100644
index 0000000..1acd06c
--- /dev/null
+++ b/icing/scoring/ranker.h

@@ -0,0 +1,37 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_RANKER_H_
+#define ICING_SCORING_RANKER_H_
+
+#include <vector>
+
+#include "icing/scoring/scored-document-hit.h"
+
+// Provides functionality to get the top N results from an unsorted vector.
+namespace icing {
+namespace lib {
+
+// Returns the top num_result results from scored_document_hits. The returned
+// vector will be sorted and contain no more than num_result elements.
+// is_descending indicates whether the result is in a descending score order
+// or an ascending score order.
+std::vector<ScoredDocumentHit> GetTopNFromScoredDocumentHits(
+    std::vector<ScoredDocumentHit> scored_document_hits, int num_result,
+    bool is_descending);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_RANKER_H_

diff --git a/icing/scoring/ranker_benchmark.cc b/icing/scoring/ranker_benchmark.cc
new file mode 100644
index 0000000..f47ea9f
--- /dev/null
+++ b/icing/scoring/ranker_benchmark.cc

@@ -0,0 +1,95 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdlib>
+
+#include "testing/base/public/benchmark.h"
+#include "icing/scoring/ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/scoring:ranker_benchmark
+//
+//    $ blaze-bin/icing/scoring/ranker_benchmark --benchmarks=all
+//    --benchmark_memory_usage
+//
+// Run on an Android device:
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/scoring:ranker_benchmark
+//
+//    $ adb push blaze-bin/icing/scoring/ranker_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/ranker_benchmark --benchmarks=all
+
+void BM_GetTopN(benchmark::State& state) {
+  int num_to_score = state.range(0);
+  int num_to_return = state.range(1);
+
+  std::vector<ScoredDocumentHit> scored_document_hits;
+  uint seed = Clock().GetCurrentSeconds();
+  for (int i = 0; i < num_to_score; i++) {
+    int score = rand_r(&seed);
+    scored_document_hits.emplace_back(/*document_id=*/0,
+                                      /*hit_section_id_mask=*/0, score);
+  }
+
+  for (auto _ : state) {
+    auto result =
+        GetTopNFromScoredDocumentHits(scored_document_hits, num_to_return,
+                                      /*is_descending=*/true);
+  }
+}
+BENCHMARK(BM_GetTopN)
+    ->ArgPair(1000, 10)  // (num_to_score, num_to_return)
+    ->ArgPair(3000, 10)
+    ->ArgPair(5000, 10)
+    ->ArgPair(7000, 10)
+    ->ArgPair(9000, 10)
+    ->ArgPair(11000, 10)
+    ->ArgPair(13000, 10)
+    ->ArgPair(15000, 10)
+    ->ArgPair(17000, 10)
+    ->ArgPair(19000, 10)
+    ->ArgPair(1000, 20)
+    ->ArgPair(3000, 20)
+    ->ArgPair(5000, 20)
+    ->ArgPair(7000, 20)
+    ->ArgPair(9000, 20)
+    ->ArgPair(11000, 20)
+    ->ArgPair(13000, 20)
+    ->ArgPair(15000, 20)
+    ->ArgPair(17000, 20)
+    ->ArgPair(19000, 20)
+    ->ArgPair(1000, 30)
+    ->ArgPair(3000, 30)
+    ->ArgPair(5000, 30)
+    ->ArgPair(7000, 30)
+    ->ArgPair(9000, 30)
+    ->ArgPair(11000, 30)
+    ->ArgPair(13000, 30)
+    ->ArgPair(15000, 30)
+    ->ArgPair(17000, 30)
+    ->ArgPair(19000, 30);
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/ranker_test.cc b/icing/scoring/ranker_test.cc
new file mode 100644
index 0000000..3aa94e6
--- /dev/null
+++ b/icing/scoring/ranker_test.cc

@@ -0,0 +1,179 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/ranker.h"
+
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/testing/common-matchers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+using ::testing::Test;
+
+class RankerTest : public Test {
+ protected:
+  RankerTest()
+      : test_scored_document_hit1_(/*document_id=*/3, /*hit_section_id_mask=*/3,
+                                   /*score=*/1),
+        test_scored_document_hit2_(/*document_id=*/1, /*hit_section_id_mask=*/1,
+                                   /*score=*/2),
+        test_scored_document_hit3_(/*document_id=*/2, /*hit_section_id_mask=*/2,
+                                   /*score=*/3),
+        test_scored_document_hit4_(/*document_id=*/5, /*hit_section_id_mask=*/5,
+                                   /*score=*/4),
+        test_scored_document_hit5_(/*document_id=*/4, /*hit_section_id_mask=*/4,
+                                   /*score=*/5) {}
+
+  const ScoredDocumentHit& test_scored_document_hit1() {
+    return test_scored_document_hit1_;
+  }
+
+  const ScoredDocumentHit& test_scored_document_hit2() {
+    return test_scored_document_hit2_;
+  }
+
+  const ScoredDocumentHit& test_scored_document_hit3() {
+    return test_scored_document_hit3_;
+  }
+
+  const ScoredDocumentHit& test_scored_document_hit4() {
+    return test_scored_document_hit4_;
+  }
+
+  const ScoredDocumentHit& test_scored_document_hit5() {
+    return test_scored_document_hit5_;
+  }
+
+ private:
+  ScoredDocumentHit test_scored_document_hit1_;
+  ScoredDocumentHit test_scored_document_hit2_;
+  ScoredDocumentHit test_scored_document_hit3_;
+  ScoredDocumentHit test_scored_document_hit4_;
+  ScoredDocumentHit test_scored_document_hit5_;
+};
+
+TEST_F(RankerTest, ShouldHandleEmpty) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {};
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/0,
+                                    /*is_descending=*/true),
+      IsEmpty());
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/3,
+                                    /*is_descending=*/true),
+      IsEmpty());
+}
+
+TEST_F(RankerTest, ShouldCorrectlySortResults) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      test_scored_document_hit2(), test_scored_document_hit1(),
+      test_scored_document_hit5(), test_scored_document_hit4(),
+      test_scored_document_hit3()};
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/5,
+                                    /*is_descending=*/true),
+      ElementsAre(EqualsScoredDocumentHit(test_scored_document_hit5()),
+                  EqualsScoredDocumentHit(test_scored_document_hit4()),
+                  EqualsScoredDocumentHit(test_scored_document_hit3()),
+                  EqualsScoredDocumentHit(test_scored_document_hit2()),
+                  EqualsScoredDocumentHit(test_scored_document_hit1())));
+}
+
+TEST_F(RankerTest, ShouldHandleSmallerNumResult) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      test_scored_document_hit2(), test_scored_document_hit1(),
+      test_scored_document_hit5(), test_scored_document_hit4(),
+      test_scored_document_hit3()};
+
+  // num_result = 3, smaller than the size 5
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/3,
+                                    /*is_descending=*/true),
+      ElementsAre(EqualsScoredDocumentHit(test_scored_document_hit5()),
+                  EqualsScoredDocumentHit(test_scored_document_hit4()),
+                  EqualsScoredDocumentHit(test_scored_document_hit3())));
+}
+
+TEST_F(RankerTest, ShouldHandleGreaterNumResult) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      test_scored_document_hit2(), test_scored_document_hit1(),
+      test_scored_document_hit5(), test_scored_document_hit4(),
+      test_scored_document_hit3()};
+
+  // num_result = 10, greater than the size 5
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/10,
+                                    /*is_descending=*/true),
+      ElementsAre(EqualsScoredDocumentHit(test_scored_document_hit5()),
+                  EqualsScoredDocumentHit(test_scored_document_hit4()),
+                  EqualsScoredDocumentHit(test_scored_document_hit3()),
+                  EqualsScoredDocumentHit(test_scored_document_hit2()),
+                  EqualsScoredDocumentHit(test_scored_document_hit1())));
+}
+
+TEST_F(RankerTest, ShouldHandleAcsendingOrder) {
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      test_scored_document_hit2(), test_scored_document_hit1(),
+      test_scored_document_hit5(), test_scored_document_hit4(),
+      test_scored_document_hit3()};
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/5,
+                                    /*is_descending=*/false),
+      ElementsAre(EqualsScoredDocumentHit(test_scored_document_hit1()),
+                  EqualsScoredDocumentHit(test_scored_document_hit2()),
+                  EqualsScoredDocumentHit(test_scored_document_hit3()),
+                  EqualsScoredDocumentHit(test_scored_document_hit4()),
+                  EqualsScoredDocumentHit(test_scored_document_hit5())));
+}
+
+TEST_F(RankerTest, ShouldRespectDocumentIdWhenScoresAreEqual) {
+  ScoredDocumentHit scored_document_hit1(
+      /*document_id=*/1, /*hit_section_id_mask=*/0, /*score=*/100);
+  ScoredDocumentHit scored_document_hit2(
+      /*document_id=*/2, /*hit_section_id_mask=*/0, /*score=*/100);
+  ScoredDocumentHit scored_document_hit3(
+      /*document_id=*/3, /*hit_section_id_mask=*/0, /*score=*/100);
+
+  std::vector<ScoredDocumentHit> scored_document_hits = {
+      scored_document_hit3, scored_document_hit1, scored_document_hit2};
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/3,
+                                    /*is_descending=*/true),
+      ElementsAre(EqualsScoredDocumentHit(scored_document_hit3),
+                  EqualsScoredDocumentHit(scored_document_hit2),
+                  EqualsScoredDocumentHit(scored_document_hit1)));
+
+  EXPECT_THAT(
+      GetTopNFromScoredDocumentHits(scored_document_hits, /*num_result=*/3,
+                                    /*is_descending=*/false),
+      ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+                  EqualsScoredDocumentHit(scored_document_hit2),
+                  EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/scored-document-hit.h b/icing/scoring/scored-document-hit.h
new file mode 100644
index 0000000..efe0e2a
--- /dev/null
+++ b/icing/scoring/scored-document-hit.h

@@ -0,0 +1,84 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORED_DOCUMENT_HIT_H_
+#define ICING_SCORING_SCORED_DOCUMENT_HIT_H_
+
+#include <type_traits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// A data class containing information about the document, hit sections, and a
+// score. The score is calculated against both the document and the hit
+// sections.
+class ScoredDocumentHit {
+ public:
+  ScoredDocumentHit(DocumentId document_id, SectionIdMask hit_section_id_mask,
+                    float score)
+      : document_id_(document_id),
+        hit_section_id_mask_(hit_section_id_mask),
+        score_(score) {}
+
+  bool operator<(const ScoredDocumentHit& other) const {
+    if (score() < other.score()) return true;
+    if (score() > other.score()) return false;
+    return document_id() < other.document_id();
+  }
+
+  DocumentId document_id() const { return document_id_; }
+
+  SectionIdMask hit_section_id_mask() const { return hit_section_id_mask_; }
+
+  float score() const { return score_; }
+
+ private:
+  DocumentId document_id_;
+  SectionIdMask hit_section_id_mask_;
+  float score_;
+} __attribute__((packed));
+
+static_assert(sizeof(ScoredDocumentHit) == 10,
+              "Size of ScoredDocHit should be 10");
+static_assert(icing_is_packed_pod<ScoredDocumentHit>::value, "go/icing-ubsan");
+
+// A custom comparator for ScoredDocumentHit that determines which
+// ScoredDocumentHit is better (should come first) based off of
+// ScoredDocumentHit itself and the order of its score.
+//
+// Returns true if left is better than right according to score and order.
+// Comparison is based off of score with ties broken by
+// ScoredDocumentHit.document_id().
+class ScoredDocumentHitComparator {
+ public:
+  explicit ScoredDocumentHitComparator(bool is_descending = true)
+      : is_descending_(is_descending) {}
+
+  bool operator()(const ScoredDocumentHit& lhs,
+                  const ScoredDocumentHit& rhs) const {
+    return is_descending_ == !(lhs < rhs);
+  }
+
+ private:
+  bool is_descending_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORED_DOCUMENT_HIT_H_

diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
new file mode 100644
index 0000000..20e4690
--- /dev/null
+++ b/icing/scoring/scorer.cc

@@ -0,0 +1,87 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scorer.h"
+
+#include <memory>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+class DocumentScoreScorer : public Scorer {
+ public:
+  explicit DocumentScoreScorer(const DocumentStore* document_store,
+                               float default_score)
+      : document_store_(*document_store), default_score_(default_score) {}
+
+  float GetScore(DocumentId document_id) override {
+    ICING_ASSIGN_OR_RETURN_VAL(
+        DocumentAssociatedScoreData score_data,
+        document_store_.GetDocumentAssociatedScoreData(document_id),
+        default_score_);
+
+    return static_cast<float>(score_data.document_score());
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  float default_score_;
+};
+
+class DocumentCreationTimestampScorer : public Scorer {
+ public:
+  explicit DocumentCreationTimestampScorer(const DocumentStore* document_store,
+                                           float default_score)
+      : document_store_(*document_store), default_score_(default_score) {}
+
+  float GetScore(DocumentId document_id) override {
+    ICING_ASSIGN_OR_RETURN_VAL(
+        DocumentAssociatedScoreData score_data,
+        document_store_.GetDocumentAssociatedScoreData(document_id),
+        default_score_);
+
+    return score_data.creation_timestamp_secs();
+  }
+
+ private:
+  const DocumentStore& document_store_;
+  float default_score_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Scorer::Create(
+    ScoringSpecProto::RankingStrategy::Code rank_by, float default_score,
+    const DocumentStore* document_store) {
+  switch (rank_by) {
+    case ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE:
+      return std::make_unique<DocumentScoreScorer>(document_store,
+                                                   default_score);
+    case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
+      return std::make_unique<DocumentCreationTimestampScorer>(document_store,
+                                                               default_score);
+    case ScoringSpecProto::RankingStrategy::NONE:
+      return absl_ports::InvalidArgumentError(
+          "RankingStrategy NONE not supported");
+  }
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/scorer.h b/icing/scoring/scorer.h
new file mode 100644
index 0000000..e6a9f17
--- /dev/null
+++ b/icing/scoring/scorer.h

@@ -0,0 +1,63 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORER_H_
+#define ICING_SCORING_SCORER_H_
+
+#include <memory>
+
+#include "utils/base/statusor.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// Scorer calculates scores for documents.
+class Scorer {
+ public:
+  virtual ~Scorer() = default;
+
+  // Factory function to create a Scorer according to the ranking strategy and
+  // default score. The default score will be returned only if the scorer fails
+  // to find or calculate a score for the document.
+  //
+  // Returns:
+  //   A Scorer on success
+  //   INVALID_ARGUMENT if fails to create an instance
+  static libtextclassifier3::StatusOr<std::unique_ptr<Scorer>> Create(
+      ScoringSpecProto::RankingStrategy::Code rank_by, float default_score,
+      const DocumentStore* document_store);
+
+  // Returns a non-negative score of a document. The score can be a
+  // document-associated score which comes from the DocumentProto directly, an
+  // accumulated score, or even an inferred score. If it fails to find or
+  // calculate a score, the user-provided default score will be returned.
+  //
+  // Some examples of possible scores:
+  // 1. Document-associated scores: document score, creation timestamp score.
+  // 2. Accumulated scores: usage count score.
+  // 3. Inferred scores: a score calculated by a machine learning model.
+  //
+  // NOTE: This method is performance-sensitive as it's called for every
+  // potential result document. We're trying to avoid returning StatusOr<float>
+  // to save a little more time and memory.
+  virtual float GetScore(DocumentId document_id) = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORER_H_

diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
new file mode 100644
index 0000000..af1d2bc
--- /dev/null
+++ b/icing/scoring/scorer_test.cc

@@ -0,0 +1,193 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scorer.h"
+
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-id.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::Eq;
+using ::testing::Test;
+
+class ScorerTest : public Test {
+ protected:
+  ScorerTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        doc_store_dir_(test_dir_ + "/doc_store"),
+        schema_store_dir_(test_dir_ + "/schema_store") {}
+
+  void SetUp() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    fake_clock1_.SetSeconds(1571100000);
+    fake_clock2_.SetSeconds(1572200000);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
+                              schema_store_.get()));
+
+    // Creates a simple email schema
+    SchemaProto test_email_schema;
+    auto type_config = test_email_schema.add_types();
+    type_config->set_schema_type("email");
+    auto subject = type_config->add_properties();
+    subject->set_property_name("subject");
+    subject->set_data_type(PropertyConfigProto::DataType::STRING);
+    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  DocumentStore* document_store() { return document_store_.get(); }
+
+  const FakeClock& fake_clock1() { return fake_clock1_; }
+
+  const FakeClock& fake_clock2() { return fake_clock2_; }
+
+ private:
+  const std::string test_dir_;
+  const std::string doc_store_dir_;
+  const std::string schema_store_dir_;
+  Filesystem filesystem_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<DocumentStore> document_store_;
+  FakeClock fake_clock1_;
+  FakeClock fake_clock2_;
+};
+
+TEST_F(ScorerTest, ShouldFailToCreate) {
+  EXPECT_THAT(Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
+                             /*default_score=*/0, document_store()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(ScorerTest, ShouldGetDefaultScore) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
+
+  DocumentId non_existing_document_id = 1;
+  // The caller-provided default score is returned
+  EXPECT_THAT(scorer->GetScore(non_existing_document_id), Eq(10));
+}
+
+TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
+  // Creates a test document with the default document score 0
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampSecs(fake_clock1().GetCurrentSeconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/10, document_store()));
+
+  EXPECT_THAT(scorer->GetScore(document_id), Eq(0));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectDocumentScore) {
+  // Creates a test document with document score 5
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetScore(5)
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampSecs(fake_clock2().GetCurrentSeconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+                     /*default_score=*/0, document_store()));
+
+  EXPECT_THAT(scorer->GetScore(document_id), Eq(5));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
+  // Creates test_document1 with fake timestamp1
+  DocumentProto test_document1 =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampSecs(fake_clock1().GetCurrentSeconds())
+          .Build();
+  // Creates test_document2 with fake timestamp2
+  DocumentProto test_document2 =
+      DocumentBuilder()
+          .SetKey("icing", "email/2")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo 2")
+          .SetCreationTimestampSecs(fake_clock2().GetCurrentSeconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store()->Put(test_document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store()->Put(test_document2));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
+                     /*default_score=*/0, document_store()));
+
+  EXPECT_THAT(scorer->GetScore(document_id1),
+              Eq(fake_clock1().GetCurrentSeconds()));
+  EXPECT_THAT(scorer->GetScore(document_id2),
+              Eq(fake_clock2().GetCurrentSeconds()));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
new file mode 100644
index 0000000..8bf2ce9
--- /dev/null
+++ b/icing/scoring/scoring-processor.cc

@@ -0,0 +1,86 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scoring-processor.h"
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/scoring/ranker.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scorer.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+constexpr float kDefaultScoreInDescendingOrder = 0;
+constexpr float kDefaultScoreInAscendingOrder =
+    std::numeric_limits<float>::max();
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
+ScoringProcessor::Create(const ScoringSpecProto& scoring_spec,
+                         const DocumentStore* document_store) {
+  bool is_descending_order =
+      scoring_spec.order_by() == ScoringSpecProto::Order::DESC;
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(scoring_spec.rank_by(),
+                     is_descending_order ? kDefaultScoreInDescendingOrder
+                                         : kDefaultScoreInAscendingOrder,
+                     document_store));
+
+  // Using `new` to access a non-public constructor.
+  return std::unique_ptr<ScoringProcessor>(
+      new ScoringProcessor(std::move(scorer), is_descending_order));
+}
+
+std::vector<ScoredDocumentHit> ScoringProcessor::ScoreAndRank(
+    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
+    int num_to_return) {
+  std::vector<ScoredDocumentHit> scored_document_hits;
+
+  if (num_to_return <= 0) {
+    return scored_document_hits;
+  }
+
+  // TODO(b/145025400) Determine if we want to score all DocHitInfo or enforce
+  // an upper limit.
+  while (doc_hit_info_iterator->Advance().ok()) {
+    const DocHitInfo& doc_hit_info = doc_hit_info_iterator->doc_hit_info();
+    // TODO(b/144955274) Calculate hit demotion factor from HitScore
+    float hit_demotion_factor = 1.0;
+    // The final score of the doc_hit_info = score of doc * demotion factor of
+    // hit.
+    float score =
+        scorer_->GetScore(doc_hit_info.document_id()) * hit_demotion_factor;
+    scored_document_hits.emplace_back(
+        doc_hit_info.document_id(), doc_hit_info.hit_section_ids_mask(), score);
+  }
+
+  return GetTopNFromScoredDocumentHits(std::move(scored_document_hits),
+                                       num_to_return, is_descending_);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
new file mode 100644
index 0000000..b472c14
--- /dev/null
+++ b/icing/scoring/scoring-processor.h

@@ -0,0 +1,65 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_SCORING_PROCESSOR_H_
+#define ICING_SCORING_SCORING_PROCESSOR_H_
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "utils/base/statusor.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/scoring/scored-document-hit.h"
+#include "icing/scoring/scorer.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// ScoringProcessor is the top-level class that handles scoring.
+class ScoringProcessor {
+ public:
+  // Factory function to create a Scorer with its subcomponents according to the
+  // scoring spec.
+  //
+  // Returns:
+  //   A Scorer on success
+  //   INVALID_ARGUMENT if unable to create what the spec specifies
+  static libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>> Create(
+      const ScoringSpecProto& scoring_spec,
+      const DocumentStore* document_store);
+
+  // Returns a vector of ScoredDocHit sorted by their scores. The size of it is
+  // no more than num_to_return.
+  std::vector<ScoredDocumentHit> ScoreAndRank(
+      std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
+      int num_to_return);
+
+ private:
+  explicit ScoringProcessor(std::unique_ptr<Scorer> scorer, bool is_descending)
+      : scorer_(std::move(scorer)), is_descending_(is_descending) {}
+
+  std::unique_ptr<Scorer> scorer_;
+
+  // If true, the final result will be sorted in a descending order, otherwise
+  // ascending.
+  bool is_descending_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_SCORING_PROCESSOR_H_

diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
new file mode 100644
index 0000000..5f61cb6
--- /dev/null
+++ b/icing/scoring/scoring-processor_test.cc

@@ -0,0 +1,541 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/scoring-processor.h"
+
+#include <cstdint>
+
+#include "utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/scoring.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+using ::testing::Test;
+
+class ScoringProcessorTest : public Test {
+ protected:
+  ScoringProcessorTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        doc_store_dir_(test_dir_ + "/doc_store"),
+        schema_store_dir_(test_dir_ + "/schema_store") {}
+
+  void SetUp() override {
+    // Creates file directories
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_store_,
+        DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
+                              schema_store_.get()));
+
+    // Creates a simple email schema
+    SchemaProto test_email_schema;
+    auto type_config = test_email_schema.add_types();
+    type_config->set_schema_type("email");
+    auto subject = type_config->add_properties();
+    subject->set_property_name("subject");
+    subject->set_data_type(PropertyConfigProto::DataType::STRING);
+    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
+  }
+
+  void TearDown() override {
+    document_store_.reset();
+    schema_store_.reset();
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  DocumentStore* document_store() { return document_store_.get(); }
+
+ private:
+  const std::string test_dir_;
+  const std::string doc_store_dir_;
+  const std::string schema_store_dir_;
+  Filesystem filesystem_;
+  FakeClock fake_clock_;
+  std::unique_ptr<DocumentStore> document_store_;
+  std::unique_ptr<SchemaStore> schema_store_;
+};
+
+constexpr int kDefaultScore = 0;
+constexpr int64_t kDefaultCreationTimestampSecs = 1571100001;
+
+DocumentProto CreateDocument(const std::string& name_space,
+                             const std::string& uri, int score,
+                             int64_t creation_timestamp_secs) {
+  return DocumentBuilder()
+      .SetKey(name_space, uri)
+      .SetSchema("email")
+      .SetScore(score)
+      .SetCreationTimestampSecs(creation_timestamp_secs)
+      .Build();
+}
+
+libtextclassifier3::StatusOr<
+    std::pair<std::vector<DocHitInfo>, std::vector<ScoredDocumentHit>>>
+CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
+                                    const std::vector<int>& scores) {
+  std::vector<DocHitInfo> doc_hit_infos;
+  std::vector<ScoredDocumentHit> scored_document_hits;
+  for (int i = 0; i < scores.size(); i++) {
+    ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+                           document_store->Put(CreateDocument(
+                               "icing", "email/" + std::to_string(i),
+                               scores.at(i), kDefaultCreationTimestampSecs)));
+    doc_hit_infos.emplace_back(document_id);
+    scored_document_hits.emplace_back(document_id, kSectionIdMaskNone,
+                                      scores.at(i));
+  }
+  return std::pair(doc_hit_infos, scored_document_hits);
+}
+
+TEST_F(ScoringProcessorTest, FailToCreateOnInvalidRankingStrategy) {
+  ScoringSpecProto spec_proto;
+  EXPECT_THAT(ScoringProcessor::Create(spec_proto, document_store()),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(ScoringProcessorTest, ShouldCreateInstance) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  ICING_EXPECT_OK(ScoringProcessor::Create(spec_proto, document_store()));
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Creates an empty DocHitInfoIterator
+  std::vector<DocHitInfo> doc_hit_infos = {};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/5),
+              IsEmpty());
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToReturn) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Sets up documents
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(CreateDocument("icing", "email/1", /*score=*/1,
+                                           kDefaultCreationTimestampSecs)));
+  DocHitInfo doc_hit_info1(document_id1);
+
+  // Creates a dummy DocHitInfoIterator
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/-1),
+              IsEmpty());
+
+  doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/0),
+              IsEmpty());
+}
+
+TEST_F(ScoringProcessorTest, ShouldRespectNumToReturn) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Sets up documents
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/2),
+              SizeIs(2));
+
+  doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/4),
+              SizeIs(3));
+}
+
+TEST_F(ScoringProcessorTest, ShouldRankByDocumentScoreDesc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(2)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(0))));
+}
+
+TEST_F(ScoringProcessorTest, ShouldRankByDocumentScoreAsc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  spec_proto.set_order_by(ScoringSpecProto::Order::ASC);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(2))));
+}
+
+TEST_F(ScoringProcessorTest, ShouldRankByCreationTimestampDesc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100001);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100002);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100003);
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store()->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store()->Put(document2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store()->Put(document3));
+  DocHitInfo doc_hit_info1(document_id1);
+  DocHitInfo doc_hit_info2(document_id2);
+  DocHitInfo doc_hit_info3(document_id3);
+  ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+                                         document1.creation_timestamp_secs());
+  ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+                                         document2.creation_timestamp_secs());
+  ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+                                         document3.creation_timestamp_secs());
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
+                                           doc_hit_info1};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hit3),
+                          EqualsScoredDocumentHit(scored_document_hit2),
+                          EqualsScoredDocumentHit(scored_document_hit1)));
+}
+
+TEST_F(ScoringProcessorTest, ShouldRankByCreationTimestampAsc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+  spec_proto.set_order_by(ScoringSpecProto::Order::ASC);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100001);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100002);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_secs=*/1571100003);
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             document_store()->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             document_store()->Put(document2));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+                             document_store()->Put(document3));
+  DocHitInfo doc_hit_info1(document_id1);
+  DocHitInfo doc_hit_info2(document_id2);
+  DocHitInfo doc_hit_info3(document_id3);
+  ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+                                         document1.creation_timestamp_secs());
+  ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+                                         document2.creation_timestamp_secs());
+  ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+                                         document3.creation_timestamp_secs());
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
+                                           doc_hit_info1};
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+                          EqualsScoredDocumentHit(scored_document_hit2),
+                          EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleSameScoresDesc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Creates 3 documents with the same score.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {100, 100, 100}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  // Results should be ranked in descending document id order.
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(2)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(0))));
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleSameScoresAsc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  spec_proto.set_order_by(ScoringSpecProto::Order::ASC);
+
+  // Creates 3 documents with the same score.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {100, 100, 100}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 3 results
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // Creates a ScoringProcessor which ranks in ascending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  // Results should be ranked in ascending document id order.
+  EXPECT_THAT(scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                              /*num_to_return=*/3),
+              ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                          EqualsScoredDocumentHit(scored_document_hits.at(2))));
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleNoScoresDesc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 4 results one of which doesn't have
+  // a score.
+  doc_hit_infos.emplace(doc_hit_infos.begin(), /*document_id_in=*/4,
+                        kSectionIdMaskNone);
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // The document hit without a score will be be assigned the default score 0 in
+  // a descending order.
+  ScoredDocumentHit scored_document_hit_default_desc =
+      ScoredDocumentHit(4, kSectionIdMaskNone, /*score=*/0.0);
+
+  // Creates a ScoringProcessor which ranks in descending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+  EXPECT_THAT(
+      scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                      /*num_to_return=*/4),
+      ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(2)),
+                  EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                  EqualsScoredDocumentHit(scored_document_hits.at(0)),
+                  EqualsScoredDocumentHit(scored_document_hit_default_desc)));
+}
+
+TEST_F(ScoringProcessorTest, ShouldHandleNoScoresAsc) {
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+  spec_proto.set_order_by(ScoringSpecProto::Order::ASC);
+
+  // Sets up documents, guaranteed relationship:
+  // document1 < document2 < document3
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto doc_hit_result_pair,
+      CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
+  std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
+  std::vector<ScoredDocumentHit> scored_document_hits =
+      std::move(doc_hit_result_pair.second);
+
+  // Disarrays doc_hit_infos
+  std::swap(doc_hit_infos.at(0), doc_hit_infos.at(1));
+  std::swap(doc_hit_infos.at(1), doc_hit_infos.at(2));
+
+  // Creates a dummy DocHitInfoIterator with 4 results one of which doesn't have
+  // a score.
+  doc_hit_infos.emplace(doc_hit_infos.begin(), /*document_id_in=*/4,
+                        kSectionIdMaskNone);
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+  // The document hit without a score will be be assigned the default score
+  // max of float in an ascending order.
+  ScoredDocumentHit scored_document_hit_default_asc = ScoredDocumentHit(
+      4, kSectionIdMaskNone, /*score=*/std::numeric_limits<float>::max());
+
+  // Creates a ScoringProcessor which ranks in ascending order
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+  EXPECT_THAT(
+      scoring_processor->ScoreAndRank(std::move(doc_hit_info_iterator),
+                                      /*num_to_return=*/4),
+      ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
+                  EqualsScoredDocumentHit(scored_document_hits.at(1)),
+                  EqualsScoredDocumentHit(scored_document_hits.at(2)),
+                  EqualsScoredDocumentHit(scored_document_hit_default_asc)));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/snippet-retriever.cc b/icing/snippet-retriever.cc
new file mode 100644
index 0000000..d9242a3
--- /dev/null
+++ b/icing/snippet-retriever.cc

@@ -0,0 +1,341 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/snippet-retriever.h"
+
+#include <algorithm>
+#include <cctype>
+#include <memory>
+#include <string_view>
+#include <unordered_set>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/proto/term.pb.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/i18n-utils.h"
+#include "unicode/utf8.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+class TokenMatcher {
+ public:
+  virtual ~TokenMatcher() = default;
+  virtual bool Matches(Token token) const = 0;
+};
+
+class TokenMatcherExact : public TokenMatcher {
+ public:
+  explicit TokenMatcherExact(
+      const std::unordered_set<std::string>& unrestricted_query_terms,
+      const std::unordered_set<std::string>& restricted_query_terms)
+      : unrestricted_query_terms_(unrestricted_query_terms),
+        restricted_query_terms_(restricted_query_terms) {}
+
+  bool Matches(Token token) const override {
+    // TODO(tjbarron) : Add normalization of token.
+    std::string s(token.text);
+    return (unrestricted_query_terms_.count(s) > 0) ||
+           (restricted_query_terms_.count(s) > 0);
+  }
+
+ private:
+  const std::unordered_set<std::string>& unrestricted_query_terms_;
+  const std::unordered_set<std::string>& restricted_query_terms_;
+};
+
+class TokenMatcherPrefix : public TokenMatcher {
+ public:
+  explicit TokenMatcherPrefix(
+      const std::unordered_set<std::string>& unrestricted_query_terms,
+      const std::unordered_set<std::string>& restricted_query_terms)
+      : unrestricted_query_terms_(unrestricted_query_terms),
+        restricted_query_terms_(restricted_query_terms) {}
+
+  bool Matches(Token token) const override {
+    if (std::any_of(unrestricted_query_terms_.begin(),
+                    unrestricted_query_terms_.end(),
+                    [&token](const std::string& term) {
+                      return term.length() <= token.text.length() &&
+                             token.text.compare(0, term.length(), term) == 0;
+                    })) {
+      return true;
+    }
+    return std::any_of(restricted_query_terms_.begin(),
+                       restricted_query_terms_.end(),
+                       [token](const std::string& term) {
+                         return term.length() <= token.text.length() &&
+                                token.text.compare(0, term.length(), term) == 0;
+                       });
+  }
+
+ private:
+  const std::unordered_set<std::string>& unrestricted_query_terms_;
+  const std::unordered_set<std::string>& restricted_query_terms_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<TokenMatcher>> CreateTokenMatcher(
+    TermMatchType::Code match_type,
+    const std::unordered_set<std::string>& unrestricted_query_terms,
+    const std::unordered_set<std::string>& restricted_query_terms) {
+  switch (match_type) {
+    case TermMatchType::EXACT_ONLY:
+      return std::make_unique<TokenMatcherExact>(unrestricted_query_terms,
+                                                 restricted_query_terms);
+    case TermMatchType::PREFIX:
+      return std::make_unique<TokenMatcherPrefix>(unrestricted_query_terms,
+                                                  restricted_query_terms);
+    case TermMatchType::UNKNOWN:
+      U_FALLTHROUGH;
+    default:
+      return absl_ports::InvalidArgumentError("Invalid match type provided.");
+  }
+}
+
+// Returns true if token matches any of the terms in query terms according to
+// the provided match type.
+
+// Returns:
+//   the position of the window start if successful
+//   INTERNAL_ERROR - if a tokenizer error is encountered
+libtextclassifier3::StatusOr<int> DetermineWindowStart(
+    const ResultSpecProto::SnippetSpecProto& snippet_spec,
+    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
+  int window_start_min =
+      std::max((match_mid - snippet_spec.max_window_bytes() / 2), 0);
+  if (window_start_min == 0) {
+    return 0;
+  }
+  if (!iterator->ResetToTokenAfter(window_start_min - 1)) {
+    return absl_ports::InternalError(
+        "Couldn't reset tokenizer to determine snippet window!");
+  }
+  return iterator->GetToken().text.data() - value.data();
+}
+
+// Increments window_end_exclusive so long as the character at the position
+// of window_end_exclusive is punctuation and does not exceed
+// window_end_max_exclusive.
+int IncludeTrailingPunctuation(std::string_view value, int window_end_exclusive,
+                               int window_end_max_exclusive) {
+  while (window_end_exclusive < window_end_max_exclusive) {
+    int char_len = 0;
+    if (!i18n_utils::IsPunctuationAt(value, window_end_exclusive, &char_len)) {
+      break;
+    }
+    if (window_end_exclusive + char_len > window_end_max_exclusive) {
+      // This is punctuation, but it goes beyond the window end max. Don't
+      // include it.
+      break;
+    }
+    // Expand window by char_len and check the next character.
+    window_end_exclusive += char_len;
+  }
+  return window_end_exclusive;
+}
+
+// Returns:
+//   the position of the window end if successful
+//   INTERNAL_ERROR - if a tokenizer error is encountered
+libtextclassifier3::StatusOr<int> DetermineWindowEnd(
+    const ResultSpecProto::SnippetSpecProto& snippet_spec,
+    std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
+  int window_end_max_exclusive =
+      std::min((match_mid + snippet_spec.max_window_bytes() / 2),
+               static_cast<int>(value.length()));
+  if (window_end_max_exclusive == value.length()) {
+    return window_end_max_exclusive;
+  }
+  if (!iterator->ResetToTokenBefore(window_end_max_exclusive)) {
+    return absl_ports::InternalError(
+        "Couldn't reset tokenizer to determine snippet window!");
+  }
+  int window_end_exclusive = iterator->GetToken().text.data() - value.data() +
+                             iterator->GetToken().text.length();
+  return IncludeTrailingPunctuation(value, window_end_exclusive,
+                                    window_end_max_exclusive);
+}
+
+struct SectionData {
+  std::string_view section_name;
+  std::string_view section_subcontent;
+  // Identifies which subsection of the section content, section_subcontent has
+  // come from.
+  // Ex. "recipient.address" :
+  //       ["foo@google.com", "bar@google.com", "baz@google.com"]
+  // The subcontent_index of "bar@google.com" is 1.
+  int subcontent_index;
+};
+
+libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
+    const ResultSpecProto::SnippetSpecProto& snippet_spec,
+    const SectionData& value, Tokenizer::Iterator* iterator) {
+  SnippetMatchProto snippet_match;
+  snippet_match.set_values_index(value.subcontent_index);
+
+  Token match = iterator->GetToken();
+  int match_pos = match.text.data() - value.section_subcontent.data();
+  int match_mid = match_pos + match.text.length() / 2;
+
+  snippet_match.set_exact_match_position(match_pos);
+  snippet_match.set_exact_match_bytes(match.text.length());
+
+  if (snippet_spec.max_window_bytes() > match.text.length()) {
+    // Find the beginning of the window.
+    ICING_ASSIGN_OR_RETURN(
+        int window_start,
+        DetermineWindowStart(snippet_spec, value.section_subcontent, match_mid,
+                             iterator));
+    snippet_match.set_window_position(window_start);
+
+    // Find the end of the window.
+    ICING_ASSIGN_OR_RETURN(
+        int window_end_exclusive,
+        DetermineWindowEnd(snippet_spec, value.section_subcontent, match_mid,
+                           iterator));
+    snippet_match.set_window_bytes(window_end_exclusive - window_start);
+
+    // Reset the iterator back to the original position.
+    if (!iterator->ResetToTokenAfter(match_pos - 1)) {
+      return absl_ports::InternalError(
+          "Couldn't reset tokenizer to determine snippet window!");
+    }
+  }
+
+  return snippet_match;
+}
+
+struct MatchOptions {
+  const ResultSpecProto::SnippetSpecProto& snippet_spec;
+  int max_matches_remaining;
+};
+
+libtextclassifier3::StatusOr<SnippetProto::EntryProto> RetrieveMatches(
+    const TokenMatcher* matcher, const MatchOptions& match_options,
+    const SectionData& value, const Tokenizer* tokenizer) {
+  SnippetProto::EntryProto snippet_entry;
+  snippet_entry.set_property_name(std::string(value.section_name));
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+                         tokenizer->Tokenize(value.section_subcontent));
+  while (iterator->Advance()) {
+    if (snippet_entry.snippet_matches_size() >=
+        match_options.max_matches_remaining) {
+      break;
+    }
+    Token token = iterator->GetToken();
+    if (matcher->Matches(token)) {
+      // If there was an error while retrieving the match, the tokenizer
+      // iterator is probably in an invalid state. There's nothing we can do
+      // here, so just return.
+      ICING_ASSIGN_OR_RETURN(
+          SnippetMatchProto match,
+          RetrieveMatch(match_options.snippet_spec, value, iterator.get()));
+      snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+    }
+  }
+  if (snippet_entry.snippet_matches().empty()) {
+    return absl_ports::NotFoundError("No matches found in value!");
+  }
+  return snippet_entry;
+}
+
+}  // namespace
+
+SnippetProto SnippetRetriever::RetrieveSnippet(
+    const SectionRestrictQueryTermsMap& query_terms,
+    TermMatchType::Code match_type,
+    const ResultSpecProto::SnippetSpecProto& snippet_spec,
+    const DocumentProto& document, SectionIdMask section_id_mask) const {
+  SnippetProto snippet_proto;
+  ICING_ASSIGN_OR_RETURN_VAL(SchemaTypeId type_id,
+                             schema_store_.GetSchemaTypeId(document.schema()),
+                             snippet_proto);
+  const std::unordered_set<std::string> empty_set;
+  auto itr = query_terms.find("");
+  const std::unordered_set<std::string>& unrestricted_set =
+      (itr != query_terms.end()) ? itr->second : empty_set;
+  while (section_id_mask != kSectionIdMaskNone) {
+    SectionId section_id = __builtin_ctz(section_id_mask);
+    // Remove this section from the mask.
+    section_id_mask &= ~(1u << section_id);
+
+    // Determine the section name and match type.
+    auto section_metadata_or =
+        schema_store_.GetSectionMetadata(type_id, section_id);
+    if (!section_metadata_or.ok()) {
+      continue;
+    }
+    const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
+    MatchOptions match_options = {snippet_spec};
+    // Match type must be as restrictive as possible. Prefix matches for a
+    // snippet should only be included if both the query is Prefix and the
+    // section has prefixes enabled.
+    TermMatchType::Code section_match_type = TermMatchType::EXACT_ONLY;
+    if (match_type == TermMatchType::PREFIX &&
+        metadata->term_match_type == TermMatchType::PREFIX) {
+      section_match_type = TermMatchType::PREFIX;
+    }
+
+    itr = query_terms.find(metadata->path);
+    const std::unordered_set<std::string>& restricted_set =
+        (itr != query_terms.end()) ? itr->second : empty_set;
+    libtextclassifier3::StatusOr<std::unique_ptr<TokenMatcher>> matcher_or =
+        CreateTokenMatcher(section_match_type, unrestricted_set,
+                           restricted_set);
+    if (!matcher_or.ok()) {
+      continue;
+    }
+    match_options.max_matches_remaining =
+        snippet_spec.num_matches_per_property();
+
+    // Retrieve values and snippet them.
+    auto values_or = schema_store_.GetSectionContent(document, metadata->path);
+    if (!values_or.ok()) {
+      continue;
+    }
+    auto tokenizer_or = tokenizer_factory::CreateIndexingTokenizer(
+        metadata->tokenizer, &language_segmenter_);
+    if (!tokenizer_or.ok()) {
+      // If we couldn't create the tokenizer properly, just skip this section.
+      continue;
+    }
+    std::vector<std::string> values = values_or.ValueOrDie();
+    for (int value_index = 0; value_index < values.size(); ++value_index) {
+      if (match_options.max_matches_remaining <= 0) {
+        break;
+      }
+      SectionData value = {metadata->path, values.at(value_index), value_index};
+      auto entry_or =
+          RetrieveMatches(matcher_or.ValueOrDie().get(), match_options, value,
+                          tokenizer_or.ValueOrDie().get());
+
+      // Drop any entries that encountered errors or didn't find any matches.
+      if (entry_or.ok()) {
+        match_options.max_matches_remaining -=
+            entry_or.ValueOrDie().snippet_matches_size();
+        snippet_proto.mutable_entries()->Add(std::move(entry_or).ValueOrDie());
+      }
+    }
+  }
+  return snippet_proto;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/snippet-retriever.h b/icing/snippet-retriever.h
new file mode 100644
index 0000000..879b322
--- /dev/null
+++ b/icing/snippet-retriever.h

@@ -0,0 +1,65 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SNIPPET_RETRIEVER_H_
+#define ICING_SNIPPET_RETRIEVER_H_
+
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/language-segmenter.h"
+
+namespace icing {
+namespace lib {
+
+// This class provides functions to retrieve snippets from documents. Snippets
+// are retrieved anywhere that content in the document matches query_terms
+// according to match_type. The behavior of snippet population is determined by
+// the SnippetSpecProto.
+//
+// This class does not take ownership of any of the provided pointers. The only
+// constraint for the lifecycle of this class is that it must be shorter than
+// that of the provided pointers.
+class SnippetRetriever {
+ public:
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  explicit SnippetRetriever(const SchemaStore* schema_store,
+                            const LanguageSegmenter* language_segmenter)
+      : schema_store_(*schema_store),
+        language_segmenter_(*language_segmenter) {}
+
+  // Retrieve the snippet information for content in document. terms in
+  // query_terms are matched to content in document according to match_type.
+  // Only sections identified in section_id_mask are considered.
+  //
+  // Returns an empty SnippetProto if no snippets were found.
+  SnippetProto RetrieveSnippet(
+      const SectionRestrictQueryTermsMap& query_terms,
+      TermMatchType::Code match_type,
+      const ResultSpecProto::SnippetSpecProto& snippet_spec,
+      const DocumentProto& document, SectionIdMask section_id_mask) const;
+
+ private:
+  const SchemaStore& schema_store_;
+  const LanguageSegmenter& language_segmenter_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SNIPPET_RETRIEVER_H_

diff --git a/icing/snippet-retriever_test.cc b/icing/snippet-retriever_test.cc
new file mode 100644
index 0000000..4c53fa3
--- /dev/null
+++ b/icing/snippet-retriever_test.cc

@@ -0,0 +1,560 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/snippet-retriever.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/search.pb.h"
+#include "icing/query/query-terms.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section-manager.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/snippet-helpers.h"
+#include "icing/testing/test-data.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/i18n-utils.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+class SnippetRetrieverTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    test_dir_ = GetTestTempDir() + "/icing";
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+    ICING_ASSERT_OK_AND_ASSIGN(language_segmenter_,
+                               LanguageSegmenter::Create(GetLangIdModelPath()));
+
+    // Setup the schema
+    ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
+                               SchemaStore::Create(&filesystem_, test_dir_));
+    SchemaProto schema;
+    SchemaTypeConfigProto* type_config = schema.add_types();
+    type_config->set_schema_type("email");
+    PropertyConfigProto* prop_config = type_config->add_properties();
+    prop_config->set_property_name("subject");
+    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
+    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    prop_config->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::PREFIX);
+    prop_config->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+    prop_config = type_config->add_properties();
+    prop_config->set_property_name("body");
+    prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
+    prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    prop_config->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    prop_config->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+    ICING_ASSERT_OK(schema_store_->SetSchema(schema));
+
+    snippet_retriever_ = std::make_unique<SnippetRetriever>(
+        schema_store_.get(), language_segmenter_.get());
+
+    // Set limits to max - effectively no limit. Enable matching and request a
+    // window of 64 bytes.
+    snippet_spec_.set_num_to_snippet(std::numeric_limits<int32_t>::max());
+    snippet_spec_.set_num_matches_per_property(
+        std::numeric_limits<int32_t>::max());
+    snippet_spec_.set_max_window_bytes(64);
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  Filesystem filesystem_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<LanguageSegmenter> language_segmenter_;
+  std::unique_ptr<SnippetRetriever> snippet_retriever_;
+  ResultSpecProto::SnippetSpecProto snippet_spec_;
+  std::string test_dir_;
+};
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeSmallerThanMatch) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts at the beginning of "three" and ends in the middle of
+  // "three". len=4, orig_window= "thre"
+  snippet_spec_.set_max_window_bytes(4);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq(""));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts at the space between "one" and "two". Window ends in the
+  // middle of "four".
+  // len=14, orig_window=" two three fou"
+  snippet_spec_.set_max_window_bytes(14);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("two three"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts in the middle of "one" and ends at the end of "four".
+  // len=16, orig_window="e two three four"
+  snippet_spec_.set_max_window_bytes(16);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("two three four"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window ends in the middle of all the punctuation and window starts at 0.
+  // len=20, orig_window="one two three four.."
+  snippet_spec_.set_max_window_bytes(20);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four.."));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowEndsInMiddleOfMultiBytePunctuation) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body",
+                             "Is everything upside down in Australia¿ Crikey!")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
+
+  // Window ends in the middle of all the punctuation and window starts at 0.
+  // len=26, orig_window="pside down in Australia\xC2"
+  snippet_spec_.set_max_window_bytes(24);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("down in Australia"));
+}
+
+TEST_F(SnippetRetrieverTest,
+       SnippetingWindowMaxWindowEndsInMultiBytePunctuation) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body",
+                             "Is everything upside down in Australia¿ Crikey!")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"in"}}};
+
+  // Window ends in the middle of all the punctuation and window starts at 0.
+  // len=26, orig_window="upside down in Australia\xC2\xBF"
+  snippet_spec_.set_max_window_bytes(26);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("upside down in Australia¿"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window starts before 0.
+  // len=22, orig_window="one two three four..."
+  snippet_spec_.set_max_window_bytes(22);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four..."));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window ends before "five" but after all the punctuation
+  // len=26, orig_window="one two three four.... "
+  snippet_spec_.set_max_window_bytes(26);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four...."));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Window ends in the middle of "five"
+  // len=32, orig_window="one two three four.... fiv"
+  snippet_spec_.set_max_window_bytes(32);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four...."));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Max window size equals the size of the value.
+  // len=34, orig_window="one two three four.... five"
+  snippet_spec_.set_max_window_bytes(34);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "counting")
+          .AddStringProperty("body", "one two three four.... five")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+  // Max window size exceeds the size of the value.
+  // len=36, orig_window="one two three four.... five"
+  snippet_spec_.set_max_window_bytes(36);
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
+              Eq("one two three four.... five"));
+}
+
+TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "Only a fool would match this content.")
+          .Build();
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets. 'f' should match prefix-enabled property 'subject', but
+  // not exact-only property 'body'
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+}
+
+TEST_F(SnippetRetrieverTest, ExactSnippeting) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "Only a fool would match this content.")
+          .Build();
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), IsEmpty());
+}
+
+TEST_F(SnippetRetrieverTest, SimpleSnippetingNoWindowing) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "Only a fool would match this content.")
+          .Build();
+
+  snippet_spec_.set_max_window_bytes(0);
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), IsEmpty());
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), SizeIs(2));
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(
+      GetWindow(document, snippet, "body", 0),
+      Eq("Concerning the subject of foo, we need to begin considering"));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
+  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
+              Eq("our options regarding body bar."));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+  // Section 1 "subject" is not in the section_mask, so no snippet information
+  // from that section should be returned by the SnippetRetriever.
+  SectionIdMask section_mask = 0b00000001;
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), SizeIs(1));
+  EXPECT_THAT(
+      GetWindow(document, snippet, "body", 0),
+      Eq("Concerning the subject of foo, we need to begin considering"));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
+  EXPECT_THAT(GetWindow(document, snippet, "body", 1),
+              Eq("our options regarding body bar."));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+  SectionIdMask section_mask = 0b00000011;
+  // "subject" should match in both sections, but "foo" is restricted to "body"
+  // so it should only match in the 'body' section and not the 'subject'
+  // section.
+  SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
+                                           {"body", {"foo"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), SizeIs(2));
+  // 'subject' section should only have the one match for "subject".
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("subject"));
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 1), IsEmpty());
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 1), IsEmpty());
+
+  // 'body' section should have matches for "subject" and "foo".
+  EXPECT_THAT(GetWindow(document, snippet, "body", 0),
+              Eq("Concerning the subject of foo, we need to begin"));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("subject"));
+  EXPECT_THAT(
+      GetWindow(document, snippet, "body", 1),
+      Eq("Concerning the subject of foo, we need to begin considering"));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("foo"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body",
+                             "Concerning the subject of foo, we need to begin "
+                             "considering our options regarding body bar.")
+          .Build();
+
+  snippet_spec_.set_num_matches_per_property(1);
+
+  SectionIdMask section_mask = 0b00000011;
+  SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
+  SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+      query_terms, TermMatchType::PREFIX, snippet_spec_, document,
+      section_mask);
+
+  // Check the snippets
+  EXPECT_THAT(snippet.entries(), SizeIs(2));
+  EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
+  EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+  EXPECT_THAT(
+      GetWindow(document, snippet, "body", 0),
+      Eq("Concerning the subject of foo, we need to begin considering"));
+  EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
+  EXPECT_THAT(GetWindow(document, snippet, "body", 1), IsEmpty());
+  EXPECT_THAT(GetMatch(document, snippet, "body", 1), IsEmpty());
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/store/document-associated-score-data.h b/icing/store/document-associated-score-data.h
new file mode 100644
index 0000000..65b35e1
--- /dev/null
+++ b/icing/store/document-associated-score-data.h

@@ -0,0 +1,62 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_ASSOCIATED_SCORE_DATA_H_
+#define ICING_STORE_DOCUMENT_ASSOCIATED_SCORE_DATA_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+
+namespace icing {
+namespace lib {
+
+// This is the cache entity of document-associated scores. It contains scores
+// that are related to the document itself. The ground-truth data is stored
+// somewhere else. The cache includes:
+// 1. Document score. It's defined in and passed from DocumentProto.score.
+//    Positive values are required.
+// 2. Document creation timestamp. Unix timestamp of when the document is
+//    created and inserted into Icing.
+class DocumentAssociatedScoreData {
+ public:
+  explicit DocumentAssociatedScoreData(int document_score,
+                                       int64_t creation_timestamp_secs)
+      : document_score_(document_score),
+        creation_timestamp_secs_(creation_timestamp_secs) {}
+
+  bool operator==(const DocumentAssociatedScoreData& other) const {
+    return document_score_ == other.document_score() &&
+           creation_timestamp_secs_ == other.creation_timestamp_secs();
+  }
+
+  int document_score() const { return document_score_; }
+
+  int64_t creation_timestamp_secs() const { return creation_timestamp_secs_; }
+
+ private:
+  int document_score_;
+  int64_t creation_timestamp_secs_;
+} __attribute__((packed));
+
+static_assert(sizeof(DocumentAssociatedScoreData) == 12,
+              "Size of DocumentAssociatedScoreData should be 12");
+static_assert(icing_is_packed_pod<DocumentAssociatedScoreData>::value,
+              "go/icing-ubsan");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_ASSOCIATED_SCORE_DATA_H_

diff --git a/icing/store/document-filter-data.h b/icing/store/document-filter-data.h
new file mode 100644
index 0000000..86d0efd
--- /dev/null
+++ b/icing/store/document-filter-data.h

@@ -0,0 +1,67 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_FILTER_DATA_H_
+#define ICING_STORE_DOCUMENT_FILTER_DATA_H_
+
+#include <cstdint>
+#include <type_traits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+
+namespace icing {
+namespace lib {
+
+using NamespaceId = int16_t;
+using SchemaTypeId = int16_t;
+
+class DocumentFilterData {
+ public:
+  explicit DocumentFilterData(NamespaceId namespace_id,
+                              SchemaTypeId schema_type_id,
+                              int64_t expiration_timestamp_secs)
+      : expiration_timestamp_secs_(expiration_timestamp_secs),
+        namespace_id_(namespace_id),
+        schema_type_id_(schema_type_id) {}
+
+  bool operator==(const DocumentFilterData& other) const {
+    return namespace_id_ == other.namespace_id() &&
+           schema_type_id_ == other.schema_type_id() &&
+           expiration_timestamp_secs_ == other.expiration_timestamp_secs();
+  }
+
+  NamespaceId namespace_id() const { return namespace_id_; }
+
+  SchemaTypeId schema_type_id() const { return schema_type_id_; }
+  void set_schema_type_id(SchemaTypeId schema_type_id) {
+    schema_type_id_ = schema_type_id;
+  }
+
+  int64_t expiration_timestamp_secs() const {
+    return expiration_timestamp_secs_;
+  }
+
+ private:
+  int64_t expiration_timestamp_secs_;
+  NamespaceId namespace_id_;
+  SchemaTypeId schema_type_id_;
+} __attribute__((packed));
+
+static_assert(sizeof(DocumentFilterData) == 12, "");
+static_assert(icing_is_packed_pod<DocumentFilterData>::value, "go/icing-ubsan");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_FILTER_DATA_H_

diff --git a/icing/store/document-id.h b/icing/store/document-id.h
new file mode 100644
index 0000000..cbe9959
--- /dev/null
+++ b/icing/store/document-id.h

@@ -0,0 +1,40 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_ID_H_
+#define ICING_STORE_DOCUMENT_ID_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+// Id of a document
+using DocumentId = int32_t;
+
+// We use 20 bits to encode document_ids and use the largest value (1M - 1) to
+// represent an invalid document_id.
+inline constexpr int kDocumentIdBits = 20;
+inline constexpr DocumentId kInvalidDocumentId = (1u << kDocumentIdBits) - 1;
+inline constexpr DocumentId kMinDocumentId = 0;
+inline constexpr DocumentId kMaxDocumentId = kInvalidDocumentId - 1;
+
+constexpr bool IsDocumentIdValid(DocumentId document_id) {
+  return document_id >= kMinDocumentId && document_id <= kMaxDocumentId;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_ID_H_

diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
new file mode 100644
index 0000000..b9b6738
--- /dev/null
+++ b/icing/store/document-store.cc

@@ -0,0 +1,1214 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-store.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "utils/hash/farmhash.h"
+#include "icing/absl_ports/annotate.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/clock.h"
+#include "icing/util/crc32.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Used in DocumentId mapper to mark a document as deleted
+constexpr int64_t kDocDeletedFlag = -1;
+constexpr char kDocumentLogFilename[] = "document_log";
+constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
+constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
+constexpr char kScoreCacheFilename[] = "score_cache";
+constexpr char kFilterCacheFilename[] = "filter_cache";
+constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
+
+constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024;  // 12 MiB
+
+// 384 KiB for a KeyMapper would allow each internal array to have a max of
+// 128 KiB for storage.
+constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024;  // 384 KiB
+
+DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
+  DocumentWrapper document_wrapper;
+  *document_wrapper.mutable_document() = std::move(document);
+  return document_wrapper;
+}
+
+DocumentWrapper CreateDocumentTombstone(std::string_view document_namespace,
+                                        std::string_view document_uri) {
+  DocumentWrapper document_wrapper;
+  document_wrapper.set_deleted(true);
+  DocumentProto* document = document_wrapper.mutable_document();
+  document->set_namespace_(std::string(document_namespace));
+  document->set_uri(std::string(document_uri));
+  return document_wrapper;
+}
+
+DocumentWrapper CreateNamespaceTombstone(std::string_view document_namespace) {
+  DocumentWrapper document_wrapper;
+  document_wrapper.set_deleted(true);
+  DocumentProto* document = document_wrapper.mutable_document();
+  document->set_namespace_(std::string(document_namespace));
+  return document_wrapper;
+}
+
+DocumentWrapper CreateSchemaTypeTombstone(
+    std::string_view document_schema_type) {
+  DocumentWrapper document_wrapper;
+  document_wrapper.set_deleted(true);
+  DocumentProto* document = document_wrapper.mutable_document();
+  document->set_schema(std::string(document_schema_type));
+  return document_wrapper;
+}
+
+std::string MakeHeaderFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
+}
+
+std::string MakeDocumentIdMapperFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentIdMapperFilename);
+}
+
+std::string MakeDocumentLogFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kDocumentLogFilename);
+}
+
+std::string MakeScoreCacheFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
+}
+
+std::string MakeFilterCacheFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kFilterCacheFilename);
+}
+
+std::string MakeNamespaceMapperFilename(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename);
+}
+
+// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
+// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
+// overhead per key. As we know that these fingerprints are always 8-bytes in
+// length and that they're random, we might be able to store them more
+// compactly.
+std::string MakeFingerprint(std::string_view name_space, std::string_view uri) {
+  // Using a 64-bit fingerprint to represent the key could lead to collisions.
+  // But, even with 200K unique keys, the probability of collision is about
+  // one-in-a-billion (https://en.wikipedia.org/wiki/Birthday_attack).
+  uint64_t fprint =
+      tc3farmhash::Fingerprint64(absl_ports::StrCat(name_space, uri));
+
+  std::string encoded_fprint;
+  // DynamicTrie cannot handle keys with '0' as bytes. So, we encode it in
+  // base128 and add 1 to make sure that no byte is '0'. This increases the
+  // size of the encoded_fprint from 8-bytes to 10-bytes.
+  while (fprint) {
+    encoded_fprint.push_back((fprint & 0x7F) + 1);
+    fprint >>= 7;
+  }
+  return encoded_fprint;
+}
+
+int64_t CalculateExpirationTimestampSecs(int64_t creation_timestamp_secs,
+                                         int64_t ttl_secs) {
+  if (ttl_secs == 0) {
+    // Special case where a TTL of 0 indicates the document should never
+    // expire. int64_t max, interpreted as seconds since epoch, represents
+    // some point in the year 292,277,026,596. So we're probably ok to use
+    // this as "never reaching this point".
+    return std::numeric_limits<int64_t>::max();
+  }
+
+  int64_t expiration_timestamp_secs;
+  if (__builtin_add_overflow(creation_timestamp_secs, ttl_secs,
+                             &expiration_timestamp_secs)) {
+    // Overflow detected. Treat overflow as the same behavior of just int64_t
+    // max
+    return std::numeric_limits<int64_t>::max();
+  }
+
+  return expiration_timestamp_secs;
+}
+
+}  // namespace
+
+DocumentStore::DocumentStore(const Filesystem* filesystem,
+                             const std::string_view base_dir,
+                             const Clock* clock,
+                             const SchemaStore* schema_store)
+    : filesystem_(filesystem),
+      base_dir_(base_dir),
+      clock_(*clock),
+      schema_store_(schema_store),
+      document_validator_(schema_store) {}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
+    const DocumentProto& document) {
+  return Put(DocumentProto(document));
+}
+
+DocumentStore::~DocumentStore() {
+  if (initialized_) {
+    if (!PersistToDisk().ok()) {
+      ICING_LOG(ERROR)
+          << "Error persisting to disk in DocumentStore destructor";
+    }
+  }
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>>
+DocumentStore::Create(const Filesystem* filesystem, const std::string& base_dir,
+                      const Clock* clock, const SchemaStore* schema_store) {
+  auto document_store = std::unique_ptr<DocumentStore>(
+      new DocumentStore(filesystem, base_dir, clock, schema_store));
+  ICING_RETURN_IF_ERROR(document_store->Initialize());
+  return document_store;
+}
+
+libtextclassifier3::Status DocumentStore::Initialize() {
+  auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
+      filesystem_, MakeDocumentLogFilename(base_dir_),
+      FileBackedProtoLog<DocumentWrapper>::Options(
+          /*compress_in=*/true));
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  if (!create_result_or.ok()) {
+    ICING_LOG(ERROR) << create_result_or.status().error_message()
+                     << "Failed to initialize DocumentLog";
+    return create_result_or.status();
+  }
+  FileBackedProtoLog<DocumentWrapper>::CreateResult create_result =
+      std::move(create_result_or).ValueOrDie();
+  document_log_ = std::move(create_result.proto_log);
+
+  if (create_result.data_loss) {
+    ICING_LOG(WARNING)
+        << "Data loss in document log, regenerating derived files.";
+    libtextclassifier3::Status status = RegenerateDerivedFiles();
+    if (!status.ok()) {
+      ICING_LOG(ERROR)
+          << "Failed to regenerate derived files for DocumentStore";
+      return status;
+    }
+  } else {
+    if (!InitializeDerivedFiles().ok()) {
+      ICING_VLOG(1)
+          << "Couldn't find derived files or failed to initialize them, "
+             "regenerating derived files for DocumentStore.";
+      libtextclassifier3::Status status = RegenerateDerivedFiles();
+      if (!status.ok()) {
+        ICING_LOG(ERROR)
+            << "Failed to regenerate derived files for DocumentStore";
+        return status;
+      }
+    }
+  }
+
+  initialized_ = true;
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
+  if (!HeaderExists()) {
+    // Without a header, we don't know if things are consistent between each
+    // other so the caller should just regenerate everything from ground
+    // truth.
+    return absl_ports::InternalError("DocumentStore header doesn't exist");
+  }
+
+  DocumentStore::Header header;
+  if (!filesystem_->Read(MakeHeaderFilename(base_dir_).c_str(), &header,
+                         sizeof(header))) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Couldn't read: ", MakeHeaderFilename(base_dir_)));
+  }
+
+  if (header.magic != DocumentStore::Header::kMagic) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Invalid header kMagic for file: ", MakeHeaderFilename(base_dir_)));
+  }
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto document_key_mapper_or =
+      KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+  if (!document_key_mapper_or.ok()) {
+    ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
+                     << "Failed to initialize KeyMapper";
+    return document_key_mapper_or.status();
+  }
+  document_key_mapper_ = std::move(document_key_mapper_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
+      *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
+      MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+  if (!document_id_mapper_or.ok()) {
+    ICING_LOG(ERROR) << document_id_mapper_or.status().error_message()
+                     << "Failed to initialize DocumentIdMapper";
+    return document_id_mapper_or.status();
+  }
+  document_id_mapper_ = std::move(document_id_mapper_or).ValueOrDie();
+
+  ICING_ASSIGN_OR_RETURN(score_cache_,
+                         FileBackedVector<DocumentAssociatedScoreData>::Create(
+                             *filesystem_, MakeScoreCacheFilename(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+
+  ICING_ASSIGN_OR_RETURN(filter_cache_,
+                         FileBackedVector<DocumentFilterData>::Create(
+                             *filesystem_, MakeFilterCacheFilename(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+
+  ICING_ASSIGN_OR_RETURN(
+      namespace_mapper_,
+      KeyMapper<NamespaceId>::Create(*filesystem_,
+                                     MakeNamespaceMapperFilename(base_dir_),
+                                     kNamespaceMapperMaxSize));
+
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  if (checksum.Get() != header.checksum) {
+    return absl_ports::InternalError(
+        "Combined checksum of DocStore was inconsistent");
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
+  ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
+  ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
+  ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
+  ICING_RETURN_IF_ERROR(ResetFilterCache());
+  ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+
+  // Iterates through document log
+  auto iterator = document_log_->GetIterator();
+  auto iterator_status = iterator.Advance();
+  while (iterator_status.ok()) {
+    ICING_ASSIGN_OR_RETURN(DocumentWrapper document_wrapper,
+                           document_log_->ReadProto(iterator.GetOffset()));
+    if (document_wrapper.deleted()) {
+      if (!document_wrapper.document().uri().empty()) {
+        // Individual document deletion.
+        auto document_id_or =
+            GetDocumentId(document_wrapper.document().namespace_(),
+                          document_wrapper.document().uri());
+        // Updates document_id mapper with deletion
+        if (document_id_or.ok()) {
+          ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
+              document_id_or.ValueOrDie(), kDocDeletedFlag));
+        } else if (!absl_ports::IsNotFound(document_id_or.status())) {
+          // Real error
+          return absl_ports::Annotate(
+              document_id_or.status(),
+              absl_ports::StrCat("Failed to find document id. namespace: ",
+                                 document_wrapper.document().namespace_(),
+                                 ", uri: ", document_wrapper.document().uri()));
+        }
+      } else if (!document_wrapper.document().namespace_().empty()) {
+        // Namespace deletion.
+        ICING_RETURN_IF_ERROR(UpdateDerivedFilesNamespaceDeleted(
+            document_wrapper.document().namespace_()));
+
+      } else if (!document_wrapper.document().schema().empty()) {
+        // SchemaType deletion.
+        auto schema_type_id_or = schema_store_->GetSchemaTypeId(
+            document_wrapper.document().schema());
+
+        if (schema_type_id_or.ok()) {
+          ICING_RETURN_IF_ERROR(UpdateDerivedFilesSchemaTypeDeleted(
+              schema_type_id_or.ValueOrDie()));
+        } else {
+          // The deleted schema type doesn't have a SchemaTypeId we can refer
+          // to in the FilterCache.
+          //
+          // TODO(cassiewang): We could avoid reading out all the documents.
+          // When we see a schema type doesn't have a SchemaTypeId, assign the
+          // unknown schema type a unique, temporary SchemaTypeId and store
+          // that in the FilterCache. Then, when we see the schema type
+          // tombstone here, we can look up its temporary SchemaTypeId and
+          // just iterate through the FilterCache to mark those documents as
+          // deleted.
+          int size = document_id_mapper_->num_elements();
+          for (DocumentId document_id = 0; document_id < size; document_id++) {
+            auto document_or = Get(document_id);
+            if (absl_ports::IsNotFound(document_or.status())) {
+              // Skip nonexistent documents
+              continue;
+            } else if (!document_or.ok()) {
+              // Real error, pass up
+              return absl_ports::Annotate(
+                  document_or.status(),
+                  IcingStringUtil::StringPrintf(
+                      "Failed to retrieve Document for DocumentId %d",
+                      document_id));
+            }
+
+            // Guaranteed to have a document now.
+            DocumentProto document = document_or.ValueOrDie();
+
+            if (document.schema() == document_wrapper.document().schema()) {
+              ICING_RETURN_IF_ERROR(
+                  document_id_mapper_->Set(document_id, kDocDeletedFlag));
+            }
+          }
+        }
+      } else {
+        return absl_ports::InternalError(
+            "Encountered an invalid tombstone during recovery!");
+      }
+    } else {
+      // Updates key mapper and document_id mapper with the new document
+      DocumentId new_document_id = document_id_mapper_->num_elements();
+      ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+          MakeFingerprint(document_wrapper.document().namespace_(),
+                          document_wrapper.document().uri()),
+          new_document_id));
+      ICING_RETURN_IF_ERROR(
+          document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
+
+      ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+          new_document_id,
+          DocumentAssociatedScoreData(
+              document_wrapper.document().score(),
+              document_wrapper.document().creation_timestamp_secs())));
+
+      SchemaTypeId schema_type_id;
+      auto schema_type_id_or =
+          schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
+      if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+        // Didn't find a SchemaTypeId. This means that the DocumentStore and
+        // the SchemaStore are out of sync. But DocumentStore can't do
+        // anything about it so just ignore this for now. This should be
+        // detected/handled by the owner of DocumentStore. Set it to some
+        // arbitrary invalid value for now, it'll get updated to the correct
+        // ID later.
+        schema_type_id = -1;
+      } else if (!schema_type_id_or.ok()) {
+        // Real error. Pass it up
+        return schema_type_id_or.status();
+      } else {
+        // We're guaranteed that SchemaTypeId is valid now
+        schema_type_id = schema_type_id_or.ValueOrDie();
+      }
+
+      ICING_ASSIGN_OR_RETURN(
+          NamespaceId namespace_id,
+          namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
+                                      namespace_mapper_->num_keys()));
+
+      int64_t expiration_timestamp_secs = CalculateExpirationTimestampSecs(
+          document_wrapper.document().creation_timestamp_secs(),
+          document_wrapper.document().ttl_secs());
+
+      ICING_RETURN_IF_ERROR(UpdateFilterCache(
+          new_document_id, DocumentFilterData(namespace_id, schema_type_id,
+                                              expiration_timestamp_secs)));
+    }
+    iterator_status = iterator.Advance();
+  }
+
+  if (!absl_ports::IsOutOfRange(iterator_status)) {
+    ICING_LOG(WARNING)
+        << "Failed to iterate through proto log while regenerating "
+           "derived files";
+    return absl_ports::Annotate(iterator_status,
+                                "Failed to iterate through proto log.");
+  }
+
+  // Write the header
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetDocumentKeyMapper() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  document_key_mapper_.reset();
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status =
+      KeyMapper<DocumentId>::Delete(*filesystem_, base_dir_);
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old key mapper";
+    return status;
+  }
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto document_key_mapper_or =
+      KeyMapper<DocumentId>::Create(*filesystem_, base_dir_, kUriMapperMaxSize);
+  if (!document_key_mapper_or.ok()) {
+    ICING_LOG(ERROR) << document_key_mapper_or.status().error_message()
+                     << "Failed to re-init key mapper";
+    return document_key_mapper_or.status();
+  }
+  document_key_mapper_ = std::move(document_key_mapper_or).ValueOrDie();
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetDocumentIdMapper() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  document_id_mapper_.reset();
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = FileBackedVector<int64_t>::Delete(
+      *filesystem_, MakeDocumentIdMapperFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old document_id mapper";
+    return status;
+  }
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto document_id_mapper_or = FileBackedVector<int64_t>::Create(
+      *filesystem_, MakeDocumentIdMapperFilename(base_dir_),
+      MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+  if (!document_id_mapper_or.ok()) {
+    ICING_LOG(ERROR) << document_id_mapper_or.status().error_message()
+                     << "Failed to re-init document_id mapper";
+    return document_id_mapper_or.status();
+  }
+  document_id_mapper_ = std::move(document_id_mapper_or).ValueOrDie();
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetDocumentAssociatedScoreCache() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  score_cache_.reset();
+  ICING_RETURN_IF_ERROR(FileBackedVector<DocumentAssociatedScoreData>::Delete(
+      *filesystem_, MakeScoreCacheFilename(base_dir_)));
+  ICING_ASSIGN_OR_RETURN(score_cache_,
+                         FileBackedVector<DocumentAssociatedScoreData>::Create(
+                             *filesystem_, MakeScoreCacheFilename(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetFilterCache() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  filter_cache_.reset();
+  ICING_RETURN_IF_ERROR(FileBackedVector<DocumentFilterData>::Delete(
+      *filesystem_, MakeFilterCacheFilename(base_dir_)));
+  ICING_ASSIGN_OR_RETURN(filter_cache_,
+                         FileBackedVector<DocumentFilterData>::Create(
+                             *filesystem_, MakeFilterCacheFilename(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::ResetNamespaceMapper() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  namespace_mapper_.reset();
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = KeyMapper<NamespaceId>::Delete(
+      *filesystem_, MakeNamespaceMapperFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old namespace_id mapper";
+    return status;
+  }
+  ICING_ASSIGN_OR_RETURN(
+      namespace_mapper_,
+      KeyMapper<NamespaceId>::Create(*filesystem_,
+                                     MakeNamespaceMapperFilename(base_dir_),
+                                     kNamespaceMapperMaxSize));
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
+  Crc32 total_checksum;
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto checksum_or = document_log_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of DocumentLog";
+    return checksum_or.status();
+  }
+  Crc32 document_log_checksum = std::move(checksum_or).ValueOrDie();
+
+  Crc32 document_key_mapper_checksum = document_key_mapper_->ComputeChecksum();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = document_id_mapper_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of DocumentIdMapper";
+    return checksum_or.status();
+  }
+  Crc32 document_id_mapper_checksum = std::move(checksum_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = score_cache_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of score cache";
+    return checksum_or.status();
+  }
+  Crc32 score_cache_checksum = std::move(checksum_or).ValueOrDie();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = filter_cache_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(ERROR) << checksum_or.status().error_message()
+                     << "Failed to compute checksum of filter cache";
+    return checksum_or.status();
+  }
+  Crc32 filter_cache_checksum = std::move(checksum_or).ValueOrDie();
+
+  Crc32 namespace_mapper_checksum = namespace_mapper_->ComputeChecksum();
+
+  total_checksum.Append(std::to_string(document_log_checksum.Get()));
+  total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
+  total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
+  total_checksum.Append(std::to_string(score_cache_checksum.Get()));
+  total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
+  total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
+
+  return total_checksum;
+}
+
+bool DocumentStore::HeaderExists() {
+  if (!filesystem_->FileExists(MakeHeaderFilename(base_dir_).c_str())) {
+    return false;
+  }
+
+  int64_t file_size =
+      filesystem_->GetFileSize(MakeHeaderFilename(base_dir_).c_str());
+
+  // If it's been truncated to size 0 before, we consider it to be a new file
+  return file_size != 0 && file_size != Filesystem::kBadFileSize;
+}
+
+libtextclassifier3::Status DocumentStore::UpdateHeader(const Crc32& checksum) {
+  // Write the header
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = checksum.Get();
+
+  // This should overwrite the header.
+  if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
+                          sizeof(header))) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
+    DocumentProto&& document) {
+  ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
+
+  // Copy fields needed before they are moved
+  std::string name_space = document.namespace_();
+  std::string uri = document.uri();
+  std::string schema = document.schema();
+  int document_score = document.score();
+  int64_t creation_timestamp_secs = document.creation_timestamp_secs();
+
+  // Sets the creation timestamp if caller hasn't specified.
+  if (document.creation_timestamp_secs() == 0) {
+    creation_timestamp_secs = clock_.GetCurrentSeconds();
+    document.set_creation_timestamp_secs(creation_timestamp_secs);
+  }
+
+  int64_t expiration_timestamp_secs = CalculateExpirationTimestampSecs(
+      creation_timestamp_secs, document.ttl_secs());
+
+  // Update ground truth first
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto offset_or =
+      document_log_->WriteProto(CreateDocumentWrapper(std::move(document)));
+  if (!offset_or.ok()) {
+    ICING_LOG(ERROR) << offset_or.status().error_message()
+                     << "Failed to write document";
+    return offset_or.status();
+  }
+  int64_t file_offset = std::move(offset_or).ValueOrDie();
+
+  // Get existing document id
+  auto old_document_id_or = GetDocumentId(name_space, uri);
+  if (!old_document_id_or.ok() &&
+      !absl_ports::IsNotFound(old_document_id_or.status())) {
+    return absl_ports::InternalError("Failed to read from key mapper");
+  }
+
+  // Creates a new document id, updates key mapper and document_id mapper
+  DocumentId new_document_id = document_id_mapper_->num_elements();
+  ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+      MakeFingerprint(name_space, uri), new_document_id));
+  ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
+
+  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+      new_document_id,
+      DocumentAssociatedScoreData(document_score, creation_timestamp_secs)));
+
+  // Update namespace maps
+  ICING_ASSIGN_OR_RETURN(
+      NamespaceId namespace_id,
+      namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
+
+  ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                         schema_store_->GetSchemaTypeId(schema));
+
+  ICING_RETURN_IF_ERROR(UpdateFilterCache(
+      new_document_id, DocumentFilterData(namespace_id, schema_type_id,
+                                          expiration_timestamp_secs)));
+
+  if (old_document_id_or.ok()) {
+    // Mark the old document id as deleted.
+    ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
+        old_document_id_or.ValueOrDie(), kDocDeletedFlag));
+  }
+
+  return new_document_id;
+}
+
+libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
+    const std::string_view name_space, const std::string_view uri) const {
+  ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+                         GetDocumentId(name_space, uri));
+  return Get(document_id);
+}
+
+libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
+    DocumentId document_id) const {
+  ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
+                         DoesDocumentExistAndGetFileOffset(document_id));
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  auto document_wrapper_or = document_log_->ReadProto(document_log_offset);
+  if (!document_wrapper_or.ok()) {
+    ICING_LOG(ERROR) << document_wrapper_or.status().error_message()
+                     << "Failed to read from document log";
+    return document_wrapper_or.status();
+  }
+  DocumentWrapper document_wrapper =
+      std::move(document_wrapper_or).ValueOrDie();
+
+  return std::move(*document_wrapper.mutable_document());
+}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::GetDocumentId(
+    const std::string_view name_space, const std::string_view uri) const {
+  auto document_id_or =
+      document_key_mapper_->Get(MakeFingerprint(name_space, uri));
+  if (!document_id_or.ok()) {
+    return absl_ports::Annotate(
+        document_id_or.status(),
+        absl_ports::StrCat("Failed to find DocumentId by key: ", name_space,
+                           ", ", uri));
+  }
+
+  // Guaranteed to have a DocumentId now
+  return document_id_or.ValueOrDie();
+}
+
+libtextclassifier3::StatusOr<int64_t>
+DocumentStore::DoesDocumentExistAndGetFileOffset(DocumentId document_id) const {
+  if (!IsDocumentIdValid(document_id)) {
+    return absl_ports::InvalidArgumentError(
+        IcingStringUtil::StringPrintf("DocumentId %d is invalid", document_id));
+  }
+
+  auto file_offset_or = document_id_mapper_->Get(document_id);
+
+  bool deleted =
+      file_offset_or.ok() && *file_offset_or.ValueOrDie() == kDocDeletedFlag;
+  if (deleted || absl_ports::IsOutOfRange(file_offset_or.status())) {
+    // Document has been deleted or doesn't exist
+    return absl_ports::NotFoundError(
+        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+  }
+
+  ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
+                         filter_cache_->Get(document_id));
+  if (clock_.GetCurrentSeconds() >= filter_data->expiration_timestamp_secs()) {
+    // Past the expiration time, so also return NOT FOUND since it *shouldn't*
+    // exist anymore.
+    return absl_ports::NotFoundError(
+        IcingStringUtil::StringPrintf("Document %d not found", document_id));
+  }
+
+  ICING_RETURN_IF_ERROR(file_offset_or.status());
+  return *file_offset_or.ValueOrDie();
+}
+
+bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
+  // If we can successfully get the document log offset, the document exists.
+  return DoesDocumentExistAndGetFileOffset(document_id).ok();
+}
+
+libtextclassifier3::Status DocumentStore::Delete(
+    const std::string_view name_space, const std::string_view uri) {
+  // Try to get the DocumentId first
+  auto document_id_or = GetDocumentId(name_space, uri);
+  if (absl_ports::IsNotFound(document_id_or.status())) {
+    // No need to delete nonexistent (name_space, uri)
+    return libtextclassifier3::Status::OK;
+  } else if (!document_id_or.ok()) {
+    // Real error
+    return absl_ports::Annotate(
+        document_id_or.status(),
+        absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
+                           ", uri: ", uri));
+  }
+
+  // Check if the DocumentId's Document still exists.
+  DocumentId document_id = document_id_or.ValueOrDie();
+  auto file_offset_or = DoesDocumentExistAndGetFileOffset(document_id);
+  if (absl_ports::IsNotFound(file_offset_or.status())) {
+    // No need to delete nonexistent documents
+    return libtextclassifier3::Status::OK;
+  } else if (!file_offset_or.ok()) {
+    // Real error, pass it up
+    return absl_ports::Annotate(
+        file_offset_or.status(),
+        IcingStringUtil::StringPrintf(
+            "Failed to retrieve file offset for DocumentId %d", document_id));
+  }
+
+  // Update ground truth first.
+  // To delete a proto we don't directly remove it. Instead, we mark it as
+  // deleted first by appending a tombstone of it and actually remove it from
+  // file later in Optimize()
+  // TODO(b/144458732): Implement a more robust version of ICING_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status =
+      document_log_->WriteProto(CreateDocumentTombstone(name_space, uri))
+          .status();
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete Document. namespace: " << name_space
+                     << ", uri: " << uri;
+    return status;
+  }
+
+  ICING_RETURN_IF_ERROR(
+      document_id_mapper_->Set(document_id_or.ValueOrDie(), kDocDeletedFlag));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<NamespaceId> DocumentStore::GetNamespaceId(
+    std::string_view name_space) const {
+  return namespace_mapper_->Get(name_space);
+}
+
+libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
+DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
+  auto score_data_or = score_cache_->Get(document_id);
+  if (!score_data_or.ok()) {
+    ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
+                     << " from score_cache_";
+    return score_data_or.status();
+  }
+  return *std::move(score_data_or).ValueOrDie();
+}
+
+libtextclassifier3::StatusOr<DocumentFilterData>
+DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
+  auto filter_data_or = filter_cache_->Get(document_id);
+  if (!filter_data_or.ok()) {
+    ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
+                     << " from filter_cache_";
+    return filter_data_or.status();
+  }
+  return *std::move(filter_data_or).ValueOrDie();
+}
+
+libtextclassifier3::Status DocumentStore::DeleteByNamespace(
+    std::string_view name_space) {
+  auto namespace_id_or = namespace_mapper_->Get(name_space);
+  if (absl_ports::IsNotFound(namespace_id_or.status())) {
+    // Namespace doesn't exist. Don't need to delete anything.
+    return libtextclassifier3::Status::OK;
+  } else if (!namespace_id_or.ok()) {
+    // Real error, pass it up.
+    return namespace_id_or.status();
+  }
+
+  // Update ground truth first.
+  // To delete an entire namespace, we append a tombstone that only contains
+  // the deleted bit and the name of the deleted namespace.
+  // TODO(b/144458732): Implement a more robust version of
+  // ICING_RETURN_IF_ERROR that can support error logging.
+  libtextclassifier3::Status status =
+      document_log_->WriteProto(CreateNamespaceTombstone(name_space)).status();
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete namespace. namespace = "
+                     << name_space;
+    return status;
+  }
+
+  return UpdateDerivedFilesNamespaceDeleted(name_space);
+}
+
+libtextclassifier3::Status DocumentStore::UpdateDerivedFilesNamespaceDeleted(
+    std::string_view name_space) {
+  auto namespace_id_or = namespace_mapper_->Get(name_space);
+  if (absl_ports::IsNotFound(namespace_id_or.status())) {
+    // Namespace doesn't exist. Don't need to delete anything.
+    return libtextclassifier3::Status::OK;
+  } else if (!namespace_id_or.ok()) {
+    // Real error, pass it up.
+    return namespace_id_or.status();
+  }
+
+  // Guaranteed to have a NamespaceId now.
+  NamespaceId namespace_id = namespace_id_or.ValueOrDie();
+
+  // Traverse FilterCache and delete all docs that match namespace_id
+  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
+       ++document_id) {
+    // filter_cache_->Get can only fail if document_id is < 0
+    // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
+    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* data,
+                           filter_cache_->Get(document_id));
+    if (data->namespace_id() == namespace_id) {
+      // docid_mapper_->Set can only fail if document_id is < 0
+      // or >= docid_mapper_->num_elements. So the only possible way to get an
+      // error here would be if filter_cache_->num_elements >
+      // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
+      ICING_RETURN_IF_ERROR(
+          document_id_mapper_->Set(document_id, kDocDeletedFlag));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::DeleteBySchemaType(
+    std::string_view schema_type) {
+  auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
+  if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+    // SchemaType doesn't exist. Don't need to delete anything.
+    return libtextclassifier3::Status::OK;
+  } else if (!schema_type_id_or.ok()) {
+    // Real error, pass it up.
+    return schema_type_id_or.status();
+  }
+
+  // Update ground truth first.
+  // To delete an entire schema type, we append a tombstone that only contains
+  // the deleted bit and the name of the deleted schema type.
+  // TODO(b/144458732): Implement a more robust version of
+  // ICING_RETURN_IF_ERROR that can support error logging.
+  libtextclassifier3::Status status =
+      document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
+          .status();
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete schema_type. schema_type = "
+                     << schema_type;
+    return status;
+  }
+
+  // Guaranteed to have a SchemaTypeId now
+  SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
+
+  ICING_RETURN_IF_ERROR(UpdateDerivedFilesSchemaTypeDeleted(schema_type_id));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::UpdateDerivedFilesSchemaTypeDeleted(
+    SchemaTypeId schema_type_id) {
+  // Traverse FilterCache and delete all docs that match schema_type_id.
+  for (DocumentId document_id = 0; document_id < filter_cache_->num_elements();
+       ++document_id) {
+    // filter_cache_->Get can only fail if document_id is < 0
+    // or >= filter_cache_->num_elements. So, this error SHOULD NEVER HAPPEN.
+    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* data,
+                           filter_cache_->Get(document_id));
+    if (data->schema_type_id() == schema_type_id) {
+      // docid_mapper_->Set can only fail if document_id is < 0
+      // or >= docid_mapper_->num_elements. So the only possible way to get an
+      // error here would be if filter_cache_->num_elements >
+      // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
+      ICING_RETURN_IF_ERROR(
+          document_id_mapper_->Set(document_id, kDocDeletedFlag));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::PersistToDisk() {
+  ICING_RETURN_IF_ERROR(document_log_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(document_key_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(document_id_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
+
+  // Update the combined checksum and write to header file.
+  ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
+  ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
+  ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
+                         document_log_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
+                         document_key_mapper_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
+                         document_id_mapper_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
+                         score_cache_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
+                         filter_cache_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
+                         namespace_mapper_->GetDiskUsage());
+
+  return document_log_disk_usage + document_key_mapper_disk_usage +
+         document_id_mapper_disk_usage + score_cache_disk_usage +
+         filter_cache_disk_usage + namespace_mapper_disk_usage;
+}
+
+libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
+    const SchemaStore* schema_store) {
+  // Update all references to the SchemaStore
+  schema_store_ = schema_store;
+  document_validator_.UpdateSchemaStore(schema_store);
+
+  int size = document_id_mapper_->num_elements();
+  for (DocumentId document_id = 0; document_id < size; document_id++) {
+    auto document_or = Get(document_id);
+    if (absl_ports::IsNotFound(document_or.status())) {
+      // Skip nonexistent documents
+      continue;
+    } else if (!document_or.ok()) {
+      // Real error, pass up
+      return absl_ports::Annotate(
+          document_or.status(),
+          IcingStringUtil::StringPrintf(
+              "Failed to retrieve Document for DocumentId %d", document_id));
+    }
+
+    // Guaranteed to have a document now.
+    DocumentProto document = document_or.ValueOrDie();
+
+    // Revalidate that this document is still compatible
+    if (document_validator_.Validate(document).ok()) {
+      // Update the SchemaTypeId for this entry
+      ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+                             schema_store_->GetSchemaTypeId(document.schema()));
+      filter_cache_->mutable_array()[document_id].set_schema_type_id(
+          schema_type_id);
+    } else {
+      // Document is no longer valid with the new SchemaStore. Mark as
+      // deleted
+      ICING_RETURN_IF_ERROR(Delete(document.namespace_(), document.uri()));
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::OptimizedUpdateSchemaStore(
+    const SchemaStore* schema_store,
+    const SchemaStore::SetSchemaResult& set_schema_result) {
+  if (!set_schema_result.success) {
+    // No new schema was set, no work to be done
+    return libtextclassifier3::Status::OK;
+  }
+
+  // Update all references to the SchemaStore
+  schema_store_ = schema_store;
+  document_validator_.UpdateSchemaStore(schema_store);
+
+  // Append a tombstone for each deleted schema type. This way, we don't have
+  // to read out each document, check if the schema type has been deleted, and
+  // append a tombstone per-document.
+  for (const auto& schema_type :
+       set_schema_result.schema_types_deleted_by_name) {
+    // TODO(b/144458732): Implement a more robust version of
+    // ICING_RETURN_IF_ERROR that can support error logging.
+    libtextclassifier3::Status status =
+        document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
+            .status();
+    if (!status.ok()) {
+      ICING_LOG(ERROR) << status.error_message()
+                       << "Failed to delete schema_type. schema_type = "
+                       << schema_type;
+      return status;
+    }
+  }
+
+  int size = document_id_mapper_->num_elements();
+  for (DocumentId document_id = 0; document_id < size; document_id++) {
+    auto exists_or = DoesDocumentExistAndGetFileOffset(document_id);
+    if (absl_ports::IsNotFound(exists_or.status())) {
+      // Skip nonexistent documents
+      continue;
+    } else if (!exists_or.ok()) {
+      // Real error, pass up
+      return absl_ports::Annotate(
+          exists_or.status(),
+          IcingStringUtil::StringPrintf("Failed to retrieve DocumentId %d",
+                                        document_id));
+    }
+
+    // Guaranteed that the document exists now.
+    ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
+                           filter_cache_->Get(document_id));
+
+    if (set_schema_result.schema_types_deleted_by_id.count(
+            filter_data->schema_type_id()) != 0) {
+      // We already created a tombstone for this deleted type. Just update the
+      // derived files now.
+      ICING_RETURN_IF_ERROR(
+          document_id_mapper_->Set(document_id, kDocDeletedFlag));
+      continue;
+    }
+
+    // Check if we need to update the FilterCache entry for this document. It
+    // may have been assigned a different SchemaTypeId in the new SchemaStore.
+    bool update_filter_cache =
+        set_schema_result.old_schema_type_ids_changed.count(
+            filter_data->schema_type_id()) != 0;
+
+    // Check if we need to revalidate this document if the type is now
+    // incompatible
+    bool revalidate_document =
+        set_schema_result.schema_types_incompatible_by_id.count(
+            filter_data->schema_type_id()) != 0;
+
+    if (update_filter_cache || revalidate_document) {
+      ICING_ASSIGN_OR_RETURN(DocumentProto document, Get(document_id));
+
+      if (update_filter_cache) {
+        ICING_ASSIGN_OR_RETURN(
+            SchemaTypeId schema_type_id,
+            schema_store_->GetSchemaTypeId(document.schema()));
+        filter_cache_->mutable_array()[document_id].set_schema_type_id(
+            schema_type_id);
+      }
+
+      if (revalidate_document) {
+        if (!document_validator_.Validate(document).ok()) {
+          // Document is no longer valid with the new SchemaStore. Mark as
+          // deleted
+          ICING_RETURN_IF_ERROR(Delete(document.namespace_(), document.uri()));
+        }
+      }
+    }
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+// TODO(b/121227117): Implement Optimize()
+libtextclassifier3::Status DocumentStore::Optimize() {
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::OptimizeInto(
+    const std::string& new_directory) {
+  // Validates directory
+  if (new_directory == base_dir_) {
+    return absl_ports::InvalidArgumentError(
+        "New directory is the same as the current one.");
+  }
+
+  ICING_ASSIGN_OR_RETURN(auto new_doc_store,
+                         DocumentStore::Create(filesystem_, new_directory,
+                                               &clock_, schema_store_));
+
+  // Writes all valid docs into new document store (new directory)
+  int size = document_id_mapper_->num_elements();
+  for (DocumentId document_id = 0; document_id < size; document_id++) {
+    auto document_or = Get(document_id);
+    if (absl_ports::IsNotFound(document_or.status())) {
+      // Skip nonexistent documents
+      continue;
+    } else if (!document_or.ok()) {
+      // Real error, pass up
+      return absl_ports::Annotate(
+          document_or.status(),
+          IcingStringUtil::StringPrintf(
+              "Failed to retrieve Document for DocumentId %d", document_id));
+    }
+
+    // Guaranteed to have a document now.
+    DocumentProto document_to_keep = document_or.ValueOrDie();
+    // TODO(b/144458732): Implement a more robust version of
+    // ICING_RETURN_IF_ERROR that can support error logging.
+    libtextclassifier3::Status status =
+        new_doc_store->Put(std::move(document_to_keep)).status();
+    if (!status.ok()) {
+      ICING_LOG(ERROR) << status.error_message()
+                       << "Failed to write into new document store";
+      return status;
+    }
+  }
+
+  ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::Status DocumentStore::UpdateDocumentAssociatedScoreCache(
+    DocumentId document_id, const DocumentAssociatedScoreData& score_data) {
+  return score_cache_->Set(document_id, score_data);
+}
+
+libtextclassifier3::Status DocumentStore::UpdateFilterCache(
+    DocumentId document_id, const DocumentFilterData& filter_data) {
+  return filter_cache_->Set(document_id, filter_data);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/store/document-store.h b/icing/store/document-store.h
new file mode 100644
index 0000000..018e19e
--- /dev/null
+++ b/icing/store/document-store.h

@@ -0,0 +1,450 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_DOCUMENT_STORE_H_
+#define ICING_STORE_DOCUMENT_STORE_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/file/file-backed-proto-log.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/document_wrapper.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/store/key-mapper.h"
+#include "icing/util/clock.h"
+#include "icing/util/crc32.h"
+#include "icing/util/document-validator.h"
+
+namespace icing {
+namespace lib {
+
+// Provides storage interfaces for documents.
+class DocumentStore {
+ public:
+  struct Header {
+    static constexpr int32_t kMagic = 0x746f7265;
+
+    // Holds the magic as a quick sanity check against file corruption.
+    int32_t magic;
+
+    // Checksum of the DocumentStore's sub-component's checksums.
+    uint32_t checksum;
+  };
+
+  // Not copyable
+  DocumentStore(const DocumentStore&) = delete;
+  DocumentStore& operator=(const DocumentStore&) = delete;
+
+  // Persists and updates checksum of subcomponents.
+  ~DocumentStore();
+
+  // Factory method to create, initialize, and return a DocumentStore. The base
+  // directory is used to persist document store files. If document store was
+  // previously initialized with this directory, it will reload the files saved
+  // by the last instance.
+  //
+  // Does not take any ownership, and all pointers must refer to valid objects
+  // that outlive the one constructed.
+  //
+  // TODO(cassiewang): Consider returning a status indicating that derived files
+  // were regenerated. This may be helpful in logs.
+  //
+  // Returns:
+  //   A valid document store on success
+  //   INTERNAL_ERROR on IO error
+  static libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>> Create(
+      const Filesystem* filesystem, const std::string& base_dir,
+      const Clock* clock, const SchemaStore* schema_store);
+
+  // Returns the maximum DocumentId that the DocumentStore has assigned. If
+  // there has not been any DocumentIds assigned, i.e. the DocumentStore is
+  // empty, then kInvalidDocumentId is returned. This does not filter out
+  // DocumentIds of deleted documents.
+  const DocumentId last_added_document_id() const {
+    if (document_id_mapper_->num_elements() == 0) {
+      return kInvalidDocumentId;
+    }
+    return document_id_mapper_->num_elements() - 1;
+  }
+
+  // Puts the document into document store.
+  //
+  // Returns:
+  //   A newly generated document id on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentId> Put(const DocumentProto& document);
+  libtextclassifier3::StatusOr<DocumentId> Put(DocumentProto&& document);
+
+  // Finds and returns the document identified by the given key (namespace +
+  // uri)
+  //
+  // Returns:
+  //   The document found on success
+  //   NOT_FOUND if the key doesn't exist or document has been deleted
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentProto> Get(std::string_view name_space,
+                                                  std::string_view uri) const;
+
+  // Finds and returns the document identified by the given document id
+  //
+  // Returns:
+  //   The document found on success
+  //   INVALID_ARGUMENT if document_id is less than 0 or greater than the
+  //                    maximum value
+  //   NOT_FOUND if the document doesn't exist or has been deleted
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentProto> Get(DocumentId document_id) const;
+
+  // Returns true if there's an existing document associated with the given
+  // document id.
+  bool DoesDocumentExist(DocumentId document_id) const;
+
+  // Deletes the document identified by the given namespace and uri
+  //
+  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Delete(std::string_view name_space,
+                                    std::string_view uri);
+
+  // Returns the NamespaceId of the string namespace
+  //
+  // Returns:
+  //   NamespaceId on success
+  //   NOT_FOUND if the namespace doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<NamespaceId> GetNamespaceId(
+      std::string_view name_space) const;
+
+  // Returns the DocumentAssociatedScoreData of the document specified by the
+  // DocumentId.
+  //
+  // NOTE: This does not check if the document exists and will return the
+  // DocumentFilterData of the document even if it has been deleted. Users
+  // should check DoesDocumentExist(document_id) if they only want existing
+  // documents' DocumentFilterData.
+  //
+  // Returns:
+  //   DocumentAssociatedScoreData on success
+  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
+  //                DocumentIds
+  libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
+  GetDocumentAssociatedScoreData(DocumentId document_id) const;
+
+  // Returns the DocumentFilterData of the document specified by the DocumentId.
+  //
+  // NOTE: This does not check if the document exists and will return the
+  // DocumentFilterData of the document even if it has been deleted. Users
+  // should check DoesDocumentExist(document_id) if they only want existing
+  // documents' DocumentFilterData.
+  //
+  // Returns:
+  //   DocumentFilterData on success
+  //   OUT_OF_RANGE if document_id is negative or exceeds previously seen
+  //                DocumentIds
+  libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
+      DocumentId document_id) const;
+
+  // Deletes all documents belonging to the given namespace.
+  //
+  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status DeleteByNamespace(std::string_view name_space);
+
+  // Deletes all documents belonging to the given schema type
+  //
+  // NOTE: Space is not reclaimed for deleted documents until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status DeleteBySchemaType(std::string_view schema_type);
+
+  // Syncs all the data and metadata changes to disk.
+  // Returns any encountered IO errors.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates and returns the disk usage in bytes.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  //
+  // TODO(samzheng): consider returning a struct which has the breakdown of each
+  // component.
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Update any derived data off of the SchemaStore with the new SchemaStore.
+  // This may include pointers, SchemaTypeIds, etc.
+  //
+  // NOTE: This function may delete documents. A document may be invalidated by
+  // the new SchemaStore, such as failing validation or having its schema type
+  // deleted from the schema.
+  //
+  // This is best used if the caller is unsure about what's changed in the
+  // SchemaStore, and wants to update all information no matter what. If the
+  // caller does know what has changed, then it's recommended to call
+  // OptimizedUpdateSchemaStore.
+  //
+  // Returns;
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status UpdateSchemaStore(const SchemaStore* schema_store);
+
+  // Performs the same funtionality as UpdateSchemaStore, but this can be more
+  // optimized in terms of less disk reads and less work if we know exactly
+  // what's changed between the old and new SchemaStore.
+  //
+  // Returns;
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status OptimizedUpdateSchemaStore(
+      const SchemaStore* schema_store,
+      const SchemaStore::SetSchemaResult& set_schema_result);
+
+  // Reduces internal file sizes by reclaiming space of deleted documents and
+  // regenerating derived files.
+  //
+  // NOTE: The tasks in this method are too expensive to be executed in
+  // real-time. The caller should decide how frequently and when to call this
+  // method based on device usage.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status Optimize();
+
+  // Copy data from current base directory into a new directory. Any outdated or
+  // deleted data won't be copied. During the process, document ids will be
+  // reassigned so any files / classes that are based on old document ids may be
+  // outdated.
+  //
+  // NOTE: The tasks in this method are too expensive to be executed in
+  // real-time. The caller should decide how frequently and when to call this
+  // method based on device usage.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if new_directory is same as current base directory
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status OptimizeInto(const std::string& new_directory);
+
+  // Computes the combined checksum of the document store - includes the ground
+  // truth and all derived files.
+  //
+  // Returns:
+  //   Combined checksum on success
+  //   INTERNAL_ERROR on compute error
+  libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
+
+ private:
+  // Use DocumentStore::Create() to instantiate.
+  DocumentStore(const Filesystem* filesystem, std::string_view base_dir,
+                const Clock* clock, const SchemaStore* schema_store);
+
+  const Filesystem* const filesystem_;
+  const std::string base_dir_;
+  const Clock& clock_;
+
+  // Handles the ground truth schema and all of the derived data off of the
+  // schema
+  const SchemaStore* schema_store_;
+
+  // Used to validate incoming documents
+  DocumentValidator document_validator_;
+
+  // A log used to store all documents, it serves as a ground truth of doc
+  // store. key_mapper_ and document_id_mapper_ can be regenerated from it.
+  std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log_;
+
+  // Key (namespace + uri) to DocumentId mapping
+  std::unique_ptr<KeyMapper<DocumentId>> document_key_mapper_;
+
+  // DocumentId to file offset mapping
+  std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper_;
+
+  // A cache of document associated scores. The ground truth of the scores is
+  // DocumentProto stored in document_log_. This cache contains:
+  //   - Document score
+  //   - Document creation timestamp in seconds
+  std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>> score_cache_;
+
+  // A cache of data, indexed by DocumentId, used to filter documents. Currently
+  // contains:
+  //   - NamespaceId
+  //   - SchemaTypeId
+  //   - Expiration timestamp in seconds
+  std::unique_ptr<FileBackedVector<DocumentFilterData>> filter_cache_;
+
+  // Maps namespaces to a densely-assigned unique id. Namespaces are assigned an
+  // id when the first document belonging to that namespace is added to the
+  // DocumentStore. Namespaces may be removed from the mapper during compaction.
+  std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
+
+  // Used internally to indicate whether the class has been initialized. This is
+  // to guard against cases where the object has been created, but Initialize
+  // fails in the constructor. If we have successfully exited the constructor,
+  // then this field can be ignored. Clients of DocumentStore should not need to
+  // worry about this field.
+  bool initialized_ = false;
+
+  libtextclassifier3::Status Initialize();
+
+  // Creates sub-components and verifies the integrity of each sub-component.
+  //
+  // Returns an error if subcomponents failed to initialize successfully.
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status InitializeDerivedFiles();
+
+  // Re-generates all files derived from the ground truth: the document log.
+  //
+  // NOTE: if this function fails, the only thing we can do is to retry it until
+  // it succeeds or prevent the initialization of a DocumentStore. The
+  // DocumentStore object wouldn't work reliably if this fails.
+  //
+  // Steps:
+  //   1. Delete all derived files.
+  //   2. Iterate through document log, put data into new key mapper and
+  //   document_id
+  //      mapper.
+  //   3. Create header and store the updated combined checksum
+  libtextclassifier3::Status RegenerateDerivedFiles();
+
+  // Resets the unique_ptr to the document_key_mapper, deletes the underlying
+  // file, and re-creates a new instance of the document_key_mapper .
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetDocumentKeyMapper();
+
+  // Resets the unique_ptr to the document_id_mapper, deletes the underlying
+  // file, and re-creates a new instance of the document_id_mapper.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetDocumentIdMapper();
+
+  // Resets the unique_ptr to the score_cache, deletes the underlying file, and
+  // re-creates a new instance of the score_cache.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetDocumentAssociatedScoreCache();
+
+  // Resets the unique_ptr to the filter_cache, deletes the underlying file, and
+  // re-creates a new instance of the filter_cache.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetFilterCache();
+
+  // Resets the unique_ptr to the namespace_mapper, deletes the underlying file,
+  // and re-creates a new instance of the namespace_mapper.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetNamespaceMapper();
+
+  // Checks if the header exists already. This does not create the header file
+  // if it doesn't exist.
+  bool HeaderExists();
+
+  // Update and replace the header file. Creates the header file if it doesn't
+  // exist.
+  libtextclassifier3::Status UpdateHeader(const Crc32& checksum);
+
+  // Update derived files that `name_space` has been deleted. This is primarily
+  // useful if we're trying to update derived files when we've already seen a
+  // namespace tombstone, and don't need to write another tombstone.
+  //
+  // NOTE: Space is not reclaimed in the derived files until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status UpdateDerivedFilesNamespaceDeleted(
+      std::string_view name_space);
+
+  // Update derived files that the schema type schema_type_id has been deleted.
+  // This is primarily useful if we're trying to update derived files when we've
+  // already seen a schema type tombstone, and don't need to write another
+  // tombstone.
+  //
+  // NOTE: Space is not reclaimed in the derived files until Optimize() is
+  // called.
+  //
+  // Returns:
+  //   OK on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::Status UpdateDerivedFilesSchemaTypeDeleted(
+      SchemaTypeId schema_type_id);
+
+  // Helper method to find a DocumentId that is associated with the given
+  // namespace and uri.
+  //
+  // NOTE: The DocumentId may refer to a invalid document (deleted
+  // or expired). Callers can call DoesDocumentExist(document_id) to ensure it
+  // refers to a valid Document.
+  //
+  // Returns:
+  //   A DocumentId on success
+  //   NOT_FOUND if the key doesn't exist
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
+      std::string_view name_space, std::string_view uri) const;
+
+  // Helper method to validate the document id and return the file offset of the
+  // associated document in document_log_.
+  //
+  // This can be a more informative call than just DoesDocumentExist because it
+  // can return more status errors on whether the Document actually doesn't
+  // exist or if there was an internal error while accessing files.
+  //
+  // Returns:
+  //   The file offset on success
+  //   INVALID_ARGUMENT if document_id is less than 0 or greater than the
+  //                    maximum value
+  //   NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> DoesDocumentExistAndGetFileOffset(
+      DocumentId document_id) const;
+
+  // Updates the entry in the score cache for document_id.
+  libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
+      DocumentId document_id, const DocumentAssociatedScoreData& score_data);
+
+  // Updates the entry in the filter cache for document_id.
+  libtextclassifier3::Status UpdateFilterCache(
+      DocumentId document_id, const DocumentFilterData& filter_data);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_DOCUMENT_STORE_H_

diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
new file mode 100644
index 0000000..45e2b9c
--- /dev/null
+++ b/icing/store/document-store_test.cc

@@ -0,0 +1,1886 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/document-store.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/document-builder.h"
+#include "icing/file/file-backed-vector.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-filter-data.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::_;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::IsFalse;
+using ::testing::IsTrue;
+using ::testing::Not;
+using ::testing::Return;
+
+class DocumentStoreTest : public ::testing::Test {
+ protected:
+  DocumentStoreTest()
+      : test_dir_(GetTestTempDir() + "/icing"),
+        document_store_dir_(test_dir_ + "/document_store"),
+        schema_store_dir_(test_dir_ + "/schema_store") {
+    filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+    test_document1_ =
+        DocumentBuilder()
+            .SetKey("icing", "email/1")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo")
+            .AddStringProperty("body", "body bar")
+            .SetScore(document1_score_)
+            .SetCreationTimestampSecs(
+                document1_creation_timestamp_)  // A random timestamp
+            .SetTtlSecs(document1_ttl_)
+            .Build();
+    test_document2_ =
+        DocumentBuilder()
+            .SetKey("icing", "email/2")
+            .SetSchema("email")
+            .AddStringProperty("subject", "subject foo 2")
+            .AddStringProperty("body", "body bar 2")
+            .SetScore(document2_score_)
+            .SetCreationTimestampSecs(
+                document2_creation_timestamp_)  // A random timestamp
+            .SetTtlSecs(document2_ttl_)
+            .Build();
+  }
+
+  void SetUp() override {
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("email");
+
+    auto subject = type_config->add_properties();
+    subject->set_property_name("subject");
+    subject->set_data_type(PropertyConfigProto::DataType::STRING);
+    subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    subject->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    subject->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    auto body = type_config->add_properties();
+    body->set_property_name("body");
+    body->set_data_type(PropertyConfigProto::DataType::STRING);
+    body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    body->mutable_indexing_config()->set_term_match_type(
+        TermMatchType::EXACT_ONLY);
+    body->mutable_indexing_config()->set_tokenizer_type(
+        IndexingConfig::TokenizerType::PLAIN);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
+    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+  }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+  }
+
+  const Filesystem filesystem_;
+  const std::string test_dir_;
+  FakeClock fake_clock_;
+  const std::string document_store_dir_;
+  const std::string schema_store_dir_;
+  DocumentProto test_document1_;
+  DocumentProto test_document2_;
+  std::unique_ptr<SchemaStore> schema_store_;
+
+  // Document1 values
+  const int document1_score_ = 1;
+  const int64_t document1_creation_timestamp_ = 1;
+  const int64_t document1_ttl_ = 0;
+  const int64_t document1_expiration_timestamp_ =
+      std::numeric_limits<int64_t>::max();  // special_case where ttl=0
+
+  // Document2 values
+  const int document2_score_ = 2;
+  const int64_t document2_creation_timestamp_ = 2;
+  const int64_t document2_ttl_ = 1;
+  const int64_t document2_expiration_timestamp_ = 3;  // creation + ttl
+};
+
+TEST_F(DocumentStoreTest, InitializationFailure) {
+  MockFilesystem mock_filesystem;
+  ON_CALL(mock_filesystem, OpenForWrite(_)).WillByDefault(Return(false));
+
+  EXPECT_THAT(DocumentStore::Create(&mock_filesystem, document_store_dir_,
+                                    &fake_clock_, schema_store_.get()),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Both documents have namespace of "icing"
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(DocumentProto(test_document2_)));
+
+  EXPECT_THAT(doc_store->Get(document_id1),
+              IsOkAndHolds(EqualsProto(test_document1_)));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(test_document2_)));
+}
+
+TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Can handle different namespaces with same url
+  DocumentProto foo_document = DocumentBuilder()
+                                   .SetKey("foo", "1")
+                                   .SetSchema("email")
+                                   .SetCreationTimestampSecs(0)
+                                   .Build();
+  DocumentProto bar_document = DocumentBuilder()
+                                   .SetKey("bar", "1")
+                                   .SetSchema("email")
+                                   .SetCreationTimestampSecs(0)
+                                   .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(foo_document));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(DocumentProto(bar_document)));
+
+  EXPECT_THAT(doc_store->Get(document_id1),
+              IsOkAndHolds(EqualsProto(foo_document)));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(bar_document)));
+}
+
+// Validates that putting an document with the same key will overwrite previous
+// document and old doc ids are not getting reused.
+TEST_F(DocumentStoreTest, PutSameKey) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Creates two documents with the same key (namespace + uri)
+  DocumentProto document1 = DocumentProto(test_document1_);
+  DocumentProto document2 = DocumentProto(test_document1_);
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(document2));
+  EXPECT_THAT(document_id1, Not(document_id2));
+  // document2 overrides document1, so document_id1 becomes invalid
+  EXPECT_THAT(doc_store->Get(document_id1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(document2)));
+
+  // Makes sure that old doc ids are not getting reused.
+  DocumentProto document3 = DocumentProto(test_document1_);
+  document3.set_uri("another/uri/1");
+  EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
+}
+
+TEST_F(DocumentStoreTest, IsDocumentExisting) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(DocumentProto(test_document1_)));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(DocumentProto(test_document2_)));
+
+  EXPECT_THAT(doc_store->DoesDocumentExist(document_id1), IsTrue());
+  EXPECT_THAT(doc_store->DoesDocumentExist(document_id2), IsTrue());
+
+  DocumentId invalid_document_id_negative = -1;
+  EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_negative),
+              IsFalse());
+
+  DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2;
+  EXPECT_THAT(
+      doc_store->DoesDocumentExist(invalid_document_id_greater_than_max),
+      IsFalse());
+
+  EXPECT_THAT(doc_store->DoesDocumentExist(kInvalidDocumentId), IsFalse());
+
+  DocumentId invalid_document_id_out_of_range = document_id2 + 1;
+  EXPECT_THAT(doc_store->DoesDocumentExist(invalid_document_id_out_of_range),
+              IsFalse());
+}
+
+TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
+  EXPECT_THAT(
+      document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
+      IsOkAndHolds(EqualsProto(test_document1_)));
+
+  ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
+                                         test_document1_.uri()));
+  EXPECT_THAT(
+      document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, GetExpiredDocumentNotFound) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace", "uri")
+                               .SetSchema("email")
+                               .SetCreationTimestampSecs(10)
+                               .SetTtlSecs(100)
+                               .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_EXPECT_OK(document_store->Put(document));
+  EXPECT_THAT(document_store->Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(document)));
+
+  // Some arbitrary time before the document's creation time (10) + ttl (100)
+  fake_clock_.SetSeconds(109);
+  EXPECT_THAT(document_store->Get("namespace", "uri"),
+              IsOkAndHolds(EqualsProto(document)));
+
+  // Some arbitrary time equal to the document's creation time (10) + ttl (100)
+  fake_clock_.SetSeconds(110);
+  EXPECT_THAT(document_store->Get("namespace", "uri"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Some arbitrary time past the document's creation time (10) + ttl (100)
+  fake_clock_.SetSeconds(200);
+  EXPECT_THAT(document_store->Get("namespace", "uri"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(DocumentProto(test_document1_)));
+
+  DocumentId invalid_document_id_negative = -1;
+  EXPECT_THAT(doc_store->Get(invalid_document_id_negative),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  DocumentId invalid_document_id_greater_than_max = kMaxDocumentId + 2;
+  EXPECT_THAT(doc_store->Get(invalid_document_id_greater_than_max),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  EXPECT_THAT(doc_store->Get(kInvalidDocumentId),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+  DocumentId invalid_document_id_out_of_range = document_id + 1;
+  EXPECT_THAT(doc_store->Get(invalid_document_id_out_of_range),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, Delete) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Get() after Delete() returns NOT_FOUND
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(DocumentProto(test_document1_)));
+  EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+  EXPECT_THAT(doc_store->Get(document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // Validates that deleting something non-existing won't append anything to
+  // ground truth
+  int64_t ground_truth_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  // icing + email/1 has already been deleted.
+  EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+}
+
+TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  DocumentProto document1 = test_document1_;
+  document1.set_namespace_("namespace.1");
+  document1.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document1));
+
+  DocumentProto document2 = test_document1_;
+  document2.set_namespace_("namespace.2");
+  document2.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document2));
+
+  DocumentProto document3 = test_document1_;
+  document3.set_namespace_("namespace.3");
+  document3.set_uri("uri1");
+  ICING_ASSERT_OK(doc_store->Put(document3));
+
+  DocumentProto document4 = test_document1_;
+  document4.set_namespace_("namespace.1");
+  document4.set_uri("uri2");
+  ICING_ASSERT_OK(doc_store->Put(document4));
+
+  // DELETE namespace.1. document1 and document 4 should be deleted. document2
+  // and document3 should still be retrievable.
+  ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
+  EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
+              IsOkAndHolds(EqualsProto(document2)));
+  EXPECT_THAT(doc_store->Get(document3.namespace_(), document3.uri()),
+              IsOkAndHolds(EqualsProto(document3)));
+  EXPECT_THAT(doc_store->Get(document4.namespace_(), document4.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Validates that deleting something non-existing won't append anything to
+  // ground truth
+  int64_t ground_truth_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+
+  ICING_EXPECT_OK(doc_store->DeleteByNamespace("nonexistent_namespace"));
+
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+}
+
+TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
+  DocumentProto document1 = test_document1_;
+  document1.set_namespace_("namespace.1");
+  document1.set_uri("uri1");
+
+  DocumentProto document2 = test_document1_;
+  document2.set_namespace_("namespace.2");
+  document2.set_uri("uri1");
+
+  DocumentProto document3 = test_document1_;
+  document3.set_namespace_("namespace.3");
+  document3.set_uri("uri1");
+
+  DocumentProto document4 = test_document1_;
+  document4.set_namespace_("namespace.1");
+  document4.set_uri("uri2");
+
+  int64_t ground_truth_size_before;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> doc_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get()));
+    ICING_ASSERT_OK(doc_store->Put(document1));
+    ICING_ASSERT_OK(doc_store->Put(document2));
+    ICING_ASSERT_OK(doc_store->Put(document3));
+    ICING_ASSERT_OK(doc_store->Put(document4));
+
+    // DELETE namespace.1. document1 and document 4 should be deleted. document2
+    // and document3 should still be retrievable.
+    ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
+
+    ground_truth_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  }  // Destructors should update checksum and persist all data to file.
+
+  // Change the DocStore's header combined checksum so that it won't match the
+  // recalculated checksum on initialization. This will force a regeneration of
+  // derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(document_store_dir_, "/document_store_header");
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Make sure we didn't add anything to the ground truth after we recovered.
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+
+  EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
+              IsOkAndHolds(EqualsProto(document2)));
+  EXPECT_THAT(doc_store->Get(document3.namespace_(), document3.uri()),
+              IsOkAndHolds(EqualsProto(document3)));
+  EXPECT_THAT(doc_store->Get(document4.namespace_(), document4.uri()),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+  type_config = schema.add_types();
+  type_config->set_schema_type("person");
+
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+
+  DocumentProto email_document_1 = DocumentBuilder()
+                                       .SetKey("namespace1", "1")
+                                       .SetSchema("email")
+                                       .SetCreationTimestampSecs(1)
+                                       .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_1_document_id,
+                             document_store->Put(email_document_1));
+
+  DocumentProto email_document_2 = DocumentBuilder()
+                                       .SetKey("namespace2", "2")
+                                       .SetSchema("email")
+                                       .SetCreationTimestampSecs(1)
+                                       .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_2_document_id,
+                             document_store->Put(email_document_2));
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetKey("namespace", "3")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(1)
+                                       .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store->Put(message_document));
+
+  DocumentProto person_document = DocumentBuilder()
+                                      .SetKey("namespace", "4")
+                                      .SetSchema("person")
+                                      .SetCreationTimestampSecs(1)
+                                      .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
+                             document_store->Put(person_document));
+
+  // Delete the "email" type and ensure that it works across both
+  // email_document's namespaces. And that other documents aren't affected.
+  ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+  EXPECT_THAT(document_store->Get(email_1_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(email_2_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+  EXPECT_THAT(document_store->Get(person_document_id),
+              IsOkAndHolds(EqualsProto(person_document)));
+
+  // Delete the "message" type and check that other documents aren't affected
+  ICING_EXPECT_OK(document_store->DeleteBySchemaType("message"));
+  EXPECT_THAT(document_store->Get(email_1_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(email_2_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(person_document_id),
+              IsOkAndHolds(EqualsProto(person_document)));
+}
+
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Validates that deleting something non-existing won't append anything to
+  // ground truth
+  int64_t ground_truth_size_before = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+
+  ICING_EXPECT_OK(document_store->DeleteBySchemaType("nonexistent_type"));
+
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+
+  EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+}
+
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  DocumentId email_document_id;
+  DocumentId message_document_id;
+
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetKey("namespace", "1")
+                                     .SetSchema("email")
+                                     .SetCreationTimestampSecs(1)
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetKey("namespace", "2")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(1)
+                                       .Build();
+  int64_t ground_truth_size_before;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> document_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
+                               document_store->Put(email_document));
+    ICING_ASSERT_OK_AND_ASSIGN(message_document_id,
+                               document_store->Put(message_document));
+
+    // Delete "email". "message" documents should still be retrievable.
+    ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+
+    ground_truth_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  }  // Destructors should update checksum and persist all data to file.
+
+  // Change the DocumentStore's header combined checksum so that it won't match
+  // the recalculated checksum on initialization. This will force a regeneration
+  // of derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(document_store_dir_, "/document_store_header");
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+
+  // Make sure we didn't add anything to the ground truth after we recovered.
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+
+  EXPECT_THAT(document_store->Get(email_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+}
+
+TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  std::string schema_store_dir = schema_store_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+
+  ICING_ASSERT_OK(schema_store->SetSchema(schema));
+
+  DocumentId email_document_id;
+  DocumentId message_document_id;
+
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetKey("namespace", "email")
+                                     .SetSchema("email")
+                                     .SetCreationTimestampSecs(1)
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetKey("namespace", "message")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(1)
+                                       .Build();
+  int64_t ground_truth_size_before;
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> document_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+
+    ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
+                               document_store->Put(email_document));
+    ICING_ASSERT_OK_AND_ASSIGN(message_document_id,
+                               document_store->Put(message_document));
+
+    // Delete "email". "message" documents should still be retrievable.
+    ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+
+    EXPECT_THAT(document_store->Get(email_document_id),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(document_store->Get(message_document_id),
+                IsOkAndHolds(EqualsProto(message_document)));
+
+    ground_truth_size_before = filesystem_.GetFileSize(
+        absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  }  // Destructors should update checksum and persist all data to file.
+
+  // Change the DocumentStore's header combined checksum so that it won't match
+  // the recalculated checksum on initialization. This will force a regeneration
+  // of derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(document_store_dir_, "/document_store_header");
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  SchemaProto new_schema;
+  type_config = new_schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      new_schema, /*ignore_errors_and_delete_documents=*/true));
+
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+
+  // Make sure we didn't add anything to the ground truth after we recovered.
+  int64_t ground_truth_size_after = filesystem_.GetFileSize(
+      absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
+  EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+
+  EXPECT_THAT(document_store->Get(email_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+}
+
+TEST_F(DocumentStoreTest, OptimizeInto) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("namespace", "uri1")
+                                .SetSchema("email")
+                                .SetCreationTimestampSecs(100)
+                                .SetTtlSecs(1000)
+                                .Build();
+
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("namespace", "uri2")
+                                .SetSchema("email")
+                                .SetCreationTimestampSecs(100)
+                                .SetTtlSecs(1000)
+                                .Build();
+
+  DocumentProto document3 = DocumentBuilder()
+                                .SetKey("namespace", "uri3")
+                                .SetSchema("email")
+                                .SetCreationTimestampSecs(100)
+                                .SetTtlSecs(100)
+                                .Build();
+
+  // Nothing should have expired yet.
+  fake_clock_.SetSeconds(100);
+
+  ICING_ASSERT_OK(doc_store->Put(document1));
+  ICING_ASSERT_OK(doc_store->Put(document2));
+  ICING_ASSERT_OK(doc_store->Put(document3));
+
+  std::string original_document_log = document_store_dir_ + "/document_log";
+  int64_t original_size =
+      filesystem_.GetFileSize(original_document_log.c_str());
+
+  // Optimizing into the same directory is not allowed
+  EXPECT_THAT(doc_store->OptimizeInto(document_store_dir_),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("directory is the same")));
+
+  std::string optimized_dir = document_store_dir_ + "_optimize";
+  std::string optimized_document_log = optimized_dir + "/document_log";
+
+  // Validates that the optimized document log has the same size if nothing is
+  // deleted
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  int64_t optimized_size1 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_EQ(original_size, optimized_size1);
+
+  // Validates that the optimized document log has a smaller size if something
+  // is deleted
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1"));
+  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  int64_t optimized_size2 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(original_size, Gt(optimized_size2));
+
+  // Document3 has expired since this is past its creation (100) + ttl (100).
+  // But document1 and document2 should be fine since their ttl's were 1000.
+  fake_clock_.SetSeconds(300);
+
+  // Validates that the optimized document log has a smaller size if something
+  // expired
+  ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
+  ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
+  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  int64_t optimized_size3 =
+      filesystem_.GetFileSize(optimized_document_log.c_str());
+  EXPECT_THAT(optimized_size2, Gt(optimized_size3));
+}
+
+TEST_F(DocumentStoreTest, ShouldRecoverFromDataLoss) {
+  DocumentId document_id1, document_id2;
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> doc_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
+                               doc_store->Put(DocumentProto(test_document1_)));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
+                               doc_store->Put(DocumentProto(test_document2_)));
+    EXPECT_THAT(doc_store->Get(document_id1),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    EXPECT_THAT(doc_store->Get(document_id1),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+  }
+
+  // "Corrupt" the content written in the log by adding non-checksummed data to
+  // it. This will mess up the checksum of the proto log, forcing it to rewind
+  // to the last saved point.
+  DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+  const std::string serialized_document = document.SerializeAsString();
+
+  const std::string document_log_file =
+      absl_ports::StrCat(document_store_dir_, "/document_log");
+  int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+  filesystem_.PWrite(document_log_file.c_str(), file_size,
+                     serialized_document.data(), serialized_document.size());
+
+  // Successfully recover from a data loss issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  EXPECT_THAT(doc_store->Get(document_id1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(test_document2_)));
+
+  // Checks derived filter cache
+  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
+              IsOkAndHolds(DocumentFilterData(
+                  /*namespace_id=*/0,
+                  /*schema_type_id=*/0, document2_expiration_timestamp_)));
+  // Checks derived score cache
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  document2_score_, document2_creation_timestamp_)));
+}
+
+TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
+  DocumentId document_id1, document_id2;
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> doc_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
+                               doc_store->Put(DocumentProto(test_document1_)));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
+                               doc_store->Put(DocumentProto(test_document2_)));
+    EXPECT_THAT(doc_store->Get(document_id1),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    EXPECT_THAT(doc_store->Get(document_id1),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+  }
+
+  // "Corrupt" one of the derived files by adding non-checksummed data to
+  // it. This will mess up the checksum and throw an error on the derived file's
+  // initialization.
+  const std::string document_id_mapper_file =
+      absl_ports::StrCat(document_store_dir_, "/document_id_mapper");
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<FileBackedVector<int64_t>> document_id_mapper,
+      FileBackedVector<int64_t>::Create(
+          filesystem_, document_id_mapper_file,
+          MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+  int64_t corrupt_document_id = 3;
+  int64_t corrupt_offset = 3;
+  EXPECT_THAT(document_id_mapper->Set(corrupt_document_id, corrupt_offset),
+              IsOk());
+
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  EXPECT_THAT(doc_store->Get(document_id1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(test_document2_)));
+
+  // Checks derived filter cache
+  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
+              IsOkAndHolds(DocumentFilterData(
+                  /*namespace_id=*/0,
+                  /*schema_type_id=*/0, document2_expiration_timestamp_)));
+  // Checks derived score cache
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  document2_score_, document2_creation_timestamp_)));
+}
+
+TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
+  DocumentId document_id1, document_id2;
+  {
+    // Can put and delete fine.
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> doc_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store_.get()));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
+                               doc_store->Put(DocumentProto(test_document1_)));
+    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
+                               doc_store->Put(DocumentProto(test_document2_)));
+    EXPECT_THAT(doc_store->Get(document_id1),
+                IsOkAndHolds(EqualsProto(test_document1_)));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+    EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
+    EXPECT_THAT(doc_store->Get(document_id1),
+                StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+    EXPECT_THAT(doc_store->Get(document_id2),
+                IsOkAndHolds(EqualsProto(test_document2_)));
+  }
+
+  // Change the DocStore's header combined checksum so that it won't match the
+  // recalculated checksum on initialization. This will force a regeneration of
+  // derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(document_store_dir_, "/document_store_header");
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  // Successfully recover from a corrupt derived file issue.
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  EXPECT_THAT(doc_store->Get(document_id1),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->Get(document_id2),
+              IsOkAndHolds(EqualsProto(test_document2_)));
+
+  // Checks derived filter cache
+  EXPECT_THAT(doc_store->GetDocumentFilterData(document_id2),
+              IsOkAndHolds(DocumentFilterData(
+                  /*namespace_id=*/0,
+                  /*schema_type_id=*/0, document2_expiration_timestamp_)));
+  // Checks derived score cache
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  document2_score_, document2_creation_timestamp_)));
+}
+
+TEST_F(DocumentStoreTest, GetDiskUsage) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t size1, doc_store->GetDiskUsage());
+  EXPECT_THAT(size1, Gt(0));
+
+  ICING_ASSERT_OK(doc_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t size2, doc_store->GetDiskUsage());
+  EXPECT_THAT(size2, Gt(size1));
+
+  ICING_ASSERT_OK(doc_store->Put(test_document2_));
+  EXPECT_THAT(doc_store->GetDiskUsage(), IsOkAndHolds(Gt(size2)));
+  doc_store.reset();
+
+  // Bad file system
+  MockFilesystem mock_filesystem;
+  ON_CALL(mock_filesystem, GetDiskUsage(A<const char *>()))
+      .WillByDefault(Return(Filesystem::kBadFileSize));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem,
+      DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(DocumentStoreTest, MaxDocumentId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  // Since the DocumentStore is empty, we get an invalid DocumentId
+  EXPECT_THAT(doc_store->last_added_document_id(), Eq(kInvalidDocumentId));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(DocumentProto(test_document1_)));
+  EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
+
+  // Still returns the last DocumentId even if it was deleted
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+  EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id1));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(DocumentProto(test_document2_)));
+  EXPECT_THAT(doc_store->last_added_document_id(), Eq(document_id2));
+}
+
+TEST_F(DocumentStoreTest, GetNamespaceId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  DocumentProto document_namespace1 =
+      DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+  DocumentProto document_namespace2 =
+      DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_namespace1)));
+  ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_namespace2)));
+
+  // NamespaceId of 0 since it was the first namespace seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
+
+  // NamespaceId of 1 since it was the second namespace seen by the
+  // DocumentStore
+  EXPECT_THAT(doc_store->GetNamespaceId("namespace2"), IsOkAndHolds(Eq(1)));
+
+  // NamespaceMapper doesn't care if the document has been deleted
+  EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(document1));
+  ICING_ASSERT_OK(doc_store->Put(document2));
+
+  // NamespaceId of 0 since it was the first namespace seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetNamespaceId("namespace"), IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  EXPECT_THAT(doc_store->GetNamespaceId("nonexistent_namespace"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, FilterCacheHoldsDeletedDocumentData) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             doc_store->Put(test_document1_));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentFilterData(document_id),
+      IsOkAndHolds(DocumentFilterData(
+          /*namespace_id=*/0,
+          /*schema_type_id=*/0,
+          /*expiration_timestamp_secs=*/document1_expiration_timestamp_)));
+
+  // FilterCache doesn't care if the document has been deleted
+  ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
+  EXPECT_THAT(
+      doc_store->GetDocumentFilterData(document_id),
+      IsOkAndHolds(DocumentFilterData(
+          /*namespace_id=*/0,
+          /*schema_type_id=*/0,
+          /*expiration_timestamp_secs=*/document1_expiration_timestamp_)));
+}
+
+TEST_F(DocumentStoreTest,
+       ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace1", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampSecs(100)
+                               .SetTtlSecs(1000)
+                               .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentFilterData(document_id),
+      IsOkAndHolds(DocumentFilterData(/*namespace_id=*/0,
+                                      /*schema_type_id=*/0,
+                                      /*expiration_timestamp_secs=*/1100)));
+}
+
+TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxIfTtlIsZero) {
+  DocumentProto document = DocumentBuilder()
+                               .SetKey("namespace1", "1")
+                               .SetSchema("email")
+                               .SetCreationTimestampSecs(100)
+                               .SetTtlSecs(0)
+                               .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentFilterData(document_id),
+      IsOkAndHolds(DocumentFilterData(
+          /*namespace_id=*/0,
+          /*schema_type_id=*/0,
+          /*expiration_timestamp_secs=*/std::numeric_limits<int64_t>::max())));
+}
+
+TEST_F(DocumentStoreTest, ExpirationTimestampIsInt64MaxOnOverflow) {
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace1", "1")
+          .SetSchema("email")
+          .SetCreationTimestampSecs(std::numeric_limits<int64_t>::max() - 1)
+          .SetTtlSecs(std::numeric_limits<int64_t>::max() - 1)
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentFilterData(document_id),
+      IsOkAndHolds(DocumentFilterData(
+          /*namespace_id=*/0,
+          /*schema_type_id=*/0,
+          /*expiration_timestamp_secs=*/std::numeric_limits<int64_t>::max())));
+}
+
+TEST_F(DocumentStoreTest, CreationTimestampShouldBePopulated) {
+  // Creates a document without a given creation timestamp
+  DocumentProto document_without_creation_timestamp =
+      DocumentBuilder()
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .AddStringProperty("body", "body bar")
+          .Build();
+
+  std::time_t fake_real_time = 100;
+  fake_clock_.SetSeconds(fake_real_time);
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id,
+      doc_store->Put(document_without_creation_timestamp));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentProto document_with_creation_timestamp,
+                             doc_store->Get(document_id));
+
+  // Now the creation timestamp should be set by document store.
+  EXPECT_THAT(document_with_creation_timestamp.creation_timestamp_secs(),
+              Eq(fake_real_time));
+}
+
+TEST_F(DocumentStoreTest, ShouldWriteAndReadScoresCorrectly) {
+  DocumentProto document1 = DocumentBuilder()
+                                .SetKey("icing", "email/1")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "subject foo")
+                                // With default doc score 0
+                                .Build();
+  DocumentProto document2 = DocumentBuilder()
+                                .SetKey("icing", "email/1")
+                                .SetSchema("email")
+                                .AddStringProperty("subject", "subject foo")
+                                .SetScore(5)
+                                .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> doc_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+                             doc_store->Put(document1));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+                             doc_store->Put(document2));
+
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id1),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  /*document_score=*/0, /*creation_timestamp_secs=*/0)));
+
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
+              IsOkAndHolds(DocumentAssociatedScoreData(
+                  /*document_score=*/5, /*creation_timestamp_secs=*/0)));
+}
+
+TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_EXPECT_OK(document_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+  // Calling ComputeChecksum again shouldn't change anything
+  EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
+}
+
+TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_EXPECT_OK(document_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+  // Destroy the previous instance and recreate DocumentStore
+  document_store.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      document_store, DocumentStore::Create(&filesystem_, document_store_dir_,
+                                            &fake_clock_, schema_store_.get()));
+
+  EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
+}
+
+TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+
+  ICING_EXPECT_OK(document_store->Put(test_document1_));
+  ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+  ICING_EXPECT_OK(document_store->Put(test_document2_));
+  EXPECT_THAT(document_store->ComputeChecksum(),
+              IsOkAndHolds(Not(Eq(checksum))));
+}
+
+TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
+  const std::string schema_store_dir = schema_store_dir_ + "_custom";
+
+  DocumentId email_document_id;
+  NamespaceId email_namespace_id;
+  int64_t email_expiration_timestamp;
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetKey("namespace", "email_uri")
+                                     .SetSchema("email")
+                                     .SetCreationTimestampSecs(0)
+                                     .Build();
+
+  DocumentId message_document_id;
+  NamespaceId message_namespace_id;
+  int64_t message_expiration_timestamp;
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetKey("namespace", "message_uri")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(0)
+                                       .Build();
+
+  {
+    // Set a schema with "email" and "message"
+    filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+    filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<SchemaStore> schema_store,
+        SchemaStore::Create(&filesystem_, schema_store_dir));
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    type_config->set_schema_type("email");
+    type_config = schema.add_types();
+    type_config->set_schema_type("message");
+    ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+    ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                               schema_store->GetSchemaTypeId("email"));
+    ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId message_schema_type_id,
+                               schema_store->GetSchemaTypeId("message"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        std::unique_ptr<DocumentStore> document_store,
+        DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                              schema_store.get()));
+
+    // Insert and verify a "email "document
+    ICING_ASSERT_OK_AND_ASSIGN(
+        email_document_id, document_store->Put(DocumentProto(email_document)));
+    EXPECT_THAT(document_store->Get(email_document_id),
+                IsOkAndHolds(EqualsProto(email_document)));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentFilterData email_data,
+        document_store->GetDocumentFilterData(email_document_id));
+    EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
+    email_namespace_id = email_data.namespace_id();
+    email_expiration_timestamp = email_data.expiration_timestamp_secs();
+
+    // Insert and verify a "message" document
+    ICING_ASSERT_OK_AND_ASSIGN(
+        message_document_id,
+        document_store->Put(DocumentProto(message_document)));
+    EXPECT_THAT(document_store->Get(message_document_id),
+                IsOkAndHolds(EqualsProto(message_document)));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        DocumentFilterData message_data,
+        document_store->GetDocumentFilterData(message_document_id));
+    EXPECT_THAT(message_data.schema_type_id(), Eq(message_schema_type_id));
+    message_namespace_id = message_data.namespace_id();
+    message_expiration_timestamp = message_data.expiration_timestamp_secs();
+  }  // Everything destructs and commits changes to file
+
+  // Change the DocumentStore's header combined checksum so that it won't match
+  // the recalculated checksum on initialization. This will force a regeneration
+  // of derived files from ground truth.
+  const std::string header_file =
+      absl_ports::StrCat(document_store_dir_, "/document_store_header");
+  DocumentStore::Header header;
+  header.magic = DocumentStore::Header::kMagic;
+  header.checksum = 10;  // Arbitrary garbage checksum
+  filesystem_.DeleteFile(header_file.c_str());
+  filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+  // Change the schema so that we don't know of the Document's type anymore.
+  // Since we can't set backwards incompatible changes, we do some file-level
+  // hacks to "reset" the schema. Without a previously existing schema, the new
+  // schema isn't considered backwards incompatible
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+
+  // Successfully recover from a corrupt derived file issue. We don't fail just
+  // because the "message" schema type is missing
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+
+  // "email" document is fine
+  EXPECT_THAT(document_store->Get(email_document_id),
+              IsOkAndHolds(EqualsProto(email_document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData email_data,
+      document_store->GetDocumentFilterData(email_document_id));
+  EXPECT_THAT(email_data.schema_type_id(), Eq(email_schema_type_id));
+  // Make sure that all the other fields are stll valid/the same
+  EXPECT_THAT(email_data.namespace_id(), Eq(email_namespace_id));
+  EXPECT_THAT(email_data.expiration_timestamp_secs(),
+              Eq(email_expiration_timestamp));
+
+  // "message" document has an invalid SchemaTypeId
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData message_data,
+      document_store->GetDocumentFilterData(message_document_id));
+  EXPECT_THAT(message_data.schema_type_id(), Eq(-1));
+  // Make sure that all the other fields are stll valid/the same
+  EXPECT_THAT(message_data.namespace_id(), Eq(message_namespace_id));
+  EXPECT_THAT(message_data.expiration_timestamp_secs(),
+              Eq(message_expiration_timestamp));
+}
+
+TEST_F(DocumentStoreTest, UpdateSchemaStoreUpdatesSchemaTypeIds) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_message_schema_type_id,
+                             schema_store->GetSchemaTypeId("message"));
+
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetNamespace("namespace")
+                                     .SetUri("email_uri")
+                                     .SetSchema("email")
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetNamespace("namespace")
+                                       .SetUri("message_uri")
+                                       .SetSchema("message")
+                                       .Build();
+
+  // Add the documents and check SchemaTypeIds match
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store->Put(email_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData email_data,
+      document_store->GetDocumentFilterData(email_document_id));
+  EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store->Put(message_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData message_data,
+      document_store->GetDocumentFilterData(message_document_id));
+  EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
+
+  // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
+  // this should change the SchemaTypeIds.
+  schema.clear_types();
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+  type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_message_schema_type_id,
+                             schema_store->GetSchemaTypeId("message"));
+
+  // SchemaTypeIds should have changed.
+  EXPECT_NE(old_email_schema_type_id, new_email_schema_type_id);
+  EXPECT_NE(old_message_schema_type_id, new_message_schema_type_id);
+
+  ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
+
+  // Check that the FilterCache holds the new SchemaTypeIds
+  ICING_ASSERT_OK_AND_ASSIGN(
+      email_data, document_store->GetDocumentFilterData(email_document_id));
+  EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      message_data, document_store->GetDocumentFilterData(message_document_id));
+  EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
+}
+
+TEST_F(DocumentStoreTest, UpdateSchemaStoreDeletesInvalidDocuments) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  auto property_config = type_config->add_properties();
+  property_config->set_property_name("subject");
+  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
+  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property_config->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  property_config->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Add two documents, with and without a subject
+  DocumentProto email_without_subject = DocumentBuilder()
+                                            .SetNamespace("namespace")
+                                            .SetUri("email_uri_without_subject")
+                                            .SetSchema("email")
+                                            .SetCreationTimestampSecs(0)
+                                            .Build();
+
+  DocumentProto email_with_subject = DocumentBuilder()
+                                         .SetNamespace("namespace")
+                                         .SetUri("email_uri_with_subject")
+                                         .SetSchema("email")
+                                         .AddStringProperty("subject", "foo")
+                                         .SetCreationTimestampSecs(0)
+                                         .Build();
+
+  // Insert documents and check they're ok
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
+                             document_store->Put(email_without_subject));
+  EXPECT_THAT(document_store->Get(email_without_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_without_subject)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_with_subject_document_id,
+                             document_store->Put(email_with_subject));
+  EXPECT_THAT(document_store->Get(email_with_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_with_subject)));
+
+  // Changing an OPTIONAL field to REQUIRED is backwards incompatible, and will
+  // invalidate all documents that don't have this property set
+  schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  ICING_EXPECT_OK(schema_store->SetSchema(
+      schema, /*ignore_errors_and_delete_documents=*/true));
+
+  ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
+
+  // The email without a subject should be marked as deleted
+  EXPECT_THAT(document_store->Get(email_without_subject_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // The email with a subject should be unaffected
+  EXPECT_THAT(document_store->Get(email_with_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_with_subject)));
+}
+
+TEST_F(DocumentStoreTest,
+       UpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Add a "email" and "message" document
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetNamespace("namespace")
+                                     .SetUri("email_uri")
+                                     .SetSchema("email")
+                                     .SetCreationTimestampSecs(0)
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetNamespace("namespace")
+                                       .SetUri("message_uri")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(0)
+                                       .Build();
+
+  // Insert documents and check they're ok
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store->Put(email_document));
+  EXPECT_THAT(document_store->Get(email_document_id),
+              IsOkAndHolds(EqualsProto(email_document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store->Put(message_document));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+
+  SchemaProto new_schema;
+  type_config = new_schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_EXPECT_OK(
+      schema_store->SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/true));
+
+  ICING_EXPECT_OK(document_store->UpdateSchemaStore(schema_store.get()));
+
+  // The "email" type is unknown now, so the "email" document should be deleted
+  EXPECT_THAT(document_store->Get(email_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // The "message" document should be unaffected
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+}
+
+TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreUpdatesSchemaTypeIds) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_message_schema_type_id,
+                             schema_store->GetSchemaTypeId("message"));
+
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetNamespace("namespace")
+                                     .SetUri("email_uri")
+                                     .SetSchema("email")
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetNamespace("namespace")
+                                       .SetUri("message_uri")
+                                       .SetSchema("message")
+                                       .Build();
+
+  // Add the documents and check SchemaTypeIds match
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store->Put(email_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData email_data,
+      document_store->GetDocumentFilterData(email_document_id));
+  EXPECT_THAT(email_data.schema_type_id(), Eq(old_email_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store->Put(message_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentFilterData message_data,
+      document_store->GetDocumentFilterData(message_document_id));
+  EXPECT_THAT(message_data.schema_type_id(), Eq(old_message_schema_type_id));
+
+  // Rearrange the schema types. Since SchemaTypeId is assigned based on order,
+  // this should change the SchemaTypeIds.
+  schema.clear_types();
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+  type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
+                             schema_store->SetSchema(schema));
+
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_email_schema_type_id,
+                             schema_store->GetSchemaTypeId("email"));
+  ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId new_message_schema_type_id,
+                             schema_store->GetSchemaTypeId("message"));
+
+  // SchemaTypeIds should have changed.
+  EXPECT_NE(old_email_schema_type_id, new_email_schema_type_id);
+  EXPECT_NE(old_message_schema_type_id, new_message_schema_type_id);
+
+  ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
+      schema_store.get(), set_schema_result));
+
+  // Check that the FilterCache holds the new SchemaTypeIds
+  ICING_ASSERT_OK_AND_ASSIGN(
+      email_data, document_store->GetDocumentFilterData(email_document_id));
+  EXPECT_THAT(email_data.schema_type_id(), Eq(new_email_schema_type_id));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      message_data, document_store->GetDocumentFilterData(message_document_id));
+  EXPECT_THAT(message_data.schema_type_id(), Eq(new_message_schema_type_id));
+}
+
+TEST_F(DocumentStoreTest, OptimizedUpdateSchemaStoreDeletesInvalidDocuments) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+
+  auto property_config = type_config->add_properties();
+  property_config->set_property_name("subject");
+  property_config->set_data_type(PropertyConfigProto::DataType::STRING);
+  property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+  property_config->mutable_indexing_config()->set_term_match_type(
+      TermMatchType::EXACT_ONLY);
+  property_config->mutable_indexing_config()->set_tokenizer_type(
+      IndexingConfig::TokenizerType::PLAIN);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Add two documents, with and without a subject
+  DocumentProto email_without_subject = DocumentBuilder()
+                                            .SetNamespace("namespace")
+                                            .SetUri("email_uri_without_subject")
+                                            .SetSchema("email")
+                                            .SetCreationTimestampSecs(0)
+                                            .Build();
+
+  DocumentProto email_with_subject = DocumentBuilder()
+                                         .SetNamespace("namespace")
+                                         .SetUri("email_uri_with_subject")
+                                         .SetSchema("email")
+                                         .AddStringProperty("subject", "foo")
+                                         .SetCreationTimestampSecs(0)
+                                         .Build();
+
+  // Insert documents and check they're ok
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
+                             document_store->Put(email_without_subject));
+  EXPECT_THAT(document_store->Get(email_without_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_without_subject)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_with_subject_document_id,
+                             document_store->Put(email_with_subject));
+  EXPECT_THAT(document_store->Get(email_with_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_with_subject)));
+
+  // Changing an OPTIONAL field to REQUIRED is backwards incompatible, and will
+  // invalidate all documents that don't have this property set
+  schema.mutable_types(0)->mutable_properties(0)->set_cardinality(
+      PropertyConfigProto::Cardinality::REQUIRED);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaStore::SetSchemaResult set_schema_result,
+      schema_store->SetSchema(schema,
+                              /*ignore_errors_and_delete_documents=*/true));
+
+  ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
+      schema_store.get(), set_schema_result));
+
+  // The email without a subject should be marked as deleted
+  EXPECT_THAT(document_store->Get(email_without_subject_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // The email with a subject should be unaffected
+  EXPECT_THAT(document_store->Get(email_with_subject_document_id),
+              IsOkAndHolds(EqualsProto(email_with_subject)));
+}
+
+TEST_F(DocumentStoreTest,
+       OptimizedUpdateSchemaStoreDeletesDocumentsByDeletedSchemaType) {
+  const std::string schema_store_dir = test_dir_ + "_custom";
+  filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
+
+  // Set a schema
+  SchemaProto schema;
+  auto type_config = schema.add_types();
+  type_config->set_schema_type("email");
+  type_config = schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, schema_store_dir));
+  ICING_EXPECT_OK(schema_store->SetSchema(schema));
+
+  // Add a "email" and "message" document
+  DocumentProto email_document = DocumentBuilder()
+                                     .SetNamespace("namespace")
+                                     .SetUri("email_uri")
+                                     .SetSchema("email")
+                                     .SetCreationTimestampSecs(0)
+                                     .Build();
+
+  DocumentProto message_document = DocumentBuilder()
+                                       .SetNamespace("namespace")
+                                       .SetUri("message_uri")
+                                       .SetSchema("message")
+                                       .SetCreationTimestampSecs(0)
+                                       .Build();
+
+  // Insert documents and check they're ok
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<DocumentStore> document_store,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store.get()));
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
+                             document_store->Put(email_document));
+  EXPECT_THAT(document_store->Get(email_document_id),
+              IsOkAndHolds(EqualsProto(email_document)));
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
+                             document_store->Put(message_document));
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+
+  SchemaProto new_schema;
+  type_config = new_schema.add_types();
+  type_config->set_schema_type("message");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      SchemaStore::SetSchemaResult set_schema_result,
+      schema_store->SetSchema(new_schema,
+                              /*ignore_errors_and_delete_documents=*/true));
+
+  ICING_EXPECT_OK(document_store->OptimizedUpdateSchemaStore(
+      schema_store.get(), set_schema_result));
+
+  // The "email" type is unknown now, so the "email" document should be deleted
+  EXPECT_THAT(document_store->Get(email_document_id),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  // The "message" document should be unaffected
+  EXPECT_THAT(document_store->Get(message_document_id),
+              IsOkAndHolds(EqualsProto(message_document)));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/store/key-mapper.h b/icing/store/key-mapper.h
new file mode 100644
index 0000000..1d2d455
--- /dev/null
+++ b/icing/store/key-mapper.h

@@ -0,0 +1,267 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_KEY_MAPPER_H_
+#define ICING_STORE_KEY_MAPPER_H_
+
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <type_traits>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/legacy/index/icing-dynamic-trie.h"
+#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/util/crc32.h"
+
+namespace icing {
+namespace lib {
+
+// File-backed mapping between the string key and a trivially copyable value
+// type.
+//
+// KeyMapper is thread-compatible
+template <typename T>
+class KeyMapper {
+ public:
+  // Returns an initialized instance of KeyMapper that can immediately handle
+  // read/write operations.
+  // Returns any encountered IO errors.
+  //
+  // base_dir : Base directory used to save all the files required to persist
+  //            KeyMapper. If this base_dir was previously used to create a
+  //            KeyMapper, then this existing data would be loaded. Otherwise,
+  //            an empty KeyMapper would be created.
+  // maximum_size_bytes : The maximum allowable size of the key mapper storage.
+  static libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>> Create(
+      const Filesystem& filesystem, std::string_view base_dir,
+      int maximum_size_bytes);
+
+  // Deletes all the files associated with the KeyMapper. Returns success or any
+  // encountered IO errors
+  //
+  // base_dir : Base directory used to save all the files required to persist
+  //            KeyMapper. Should be the same as passed into Create().
+  static libtextclassifier3::Status Delete(const Filesystem& filesystem,
+                                           std::string_view base_dir);
+
+  ~KeyMapper() = default;
+
+  // Inserts/Updates value for key.
+  // Returns any encountered IO errors.
+  //
+  // NOTE: Put() doesn't automatically flush changes to disk and relies on
+  // either explicit calls to PersistToDisk() or a clean shutdown of the class.
+  libtextclassifier3::Status Put(std::string_view key, T value);
+
+  // Finds the current value for key and returns it. If key is not present, it
+  // is inserted with next_value and next_value is returned.
+  //
+  // Returns any IO errors that may occur during Put.
+  libtextclassifier3::StatusOr<T> GetOrPut(std::string_view key, T next_value);
+
+  // Returns the value corresponding to the key.
+  //
+  // Returns NOT_FOUND error if the key was missing.
+  // Returns any encountered IO errors.
+  libtextclassifier3::StatusOr<T> Get(std::string_view key) const;
+
+  // Count of unique keys stored in the KeyMapper.
+  int32_t num_keys() const { return trie_.size(); }
+
+  // Syncs all the changes made to the KeyMapper to disk.
+  // Returns any encountered IO errors.
+  //
+  // NOTE: To control disk-churn, Put() doesn't automatically persist every
+  // change to disk. The caller should explicitly call PersistToDisk() to make
+  // sure that the data is durable.
+  libtextclassifier3::Status PersistToDisk();
+
+  // Calculates and returns the disk usage in bytes.
+  //
+  // Returns:
+  //   Disk usage on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+  // Computes and returns the checksum of the header and contents.
+  Crc32 ComputeChecksum();
+
+ private:
+  static constexpr char kKeyMapperDir[] = "key_mapper_dir";
+  static constexpr char kKeyMapperPrefix[] = "key_mapper";
+
+  // Use KeyMapper::Create() to instantiate.
+  explicit KeyMapper(std::string_view key_mapper_dir);
+
+  // Load any existing KeyMapper data from disk, or creates a new instance
+  // of KeyMapper on disk and gets ready to process read/write operations.
+  //
+  // Returns any encountered IO errors.
+  libtextclassifier3::Status Initialize(int maximum_size_bytes);
+
+  const std::string file_prefix_;
+
+  // TODO(adorokhine) Filesystem is a forked class that's available both in
+  // icing and icing namespaces. We will need icing::Filesystem in order
+  // to use IcingDynamicTrie. Filesystem class should be fully refactored
+  // to have a single definition across both namespaces. Such a class should
+  // use icing (and general google3) coding conventions and behave like
+  // a proper C++ class.
+  const IcingFilesystem icing_filesystem_;
+  IcingDynamicTrie trie_;
+
+  static_assert(std::is_trivially_copyable<T>::value,
+                "T must be trivially copyable");
+};
+
+template <typename T>
+libtextclassifier3::StatusOr<std::unique_ptr<KeyMapper<T>>>
+KeyMapper<T>::Create(const Filesystem& filesystem, std::string_view base_dir,
+                     int maximum_size_bytes) {
+  // We create a subdirectory since the trie creates and stores multiple files.
+  // This makes it easier to isolate the trie files away from other files that
+  // could potentially be in the same base_dir, and makes it easier to delete.
+  const std::string key_mapper_dir =
+      absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
+  if (!filesystem.CreateDirectoryRecursively(key_mapper_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to create KeyMapper directory: ", key_mapper_dir));
+  }
+  auto mapper = std::unique_ptr<KeyMapper<T>>(new KeyMapper<T>(key_mapper_dir));
+  ICING_RETURN_IF_ERROR(mapper->Initialize(maximum_size_bytes));
+  return mapper;
+}
+
+template <typename T>
+libtextclassifier3::Status KeyMapper<T>::Delete(const Filesystem& filesystem,
+                                                std::string_view base_dir) {
+  std::string key_mapper_dir = absl_ports::StrCat(base_dir, "/", kKeyMapperDir);
+  if (!filesystem.DeleteDirectoryRecursively(key_mapper_dir.c_str())) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to delete KeyMapper directory: ", key_mapper_dir));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+KeyMapper<T>::KeyMapper(std::string_view key_mapper_dir)
+    : file_prefix_(absl_ports::StrCat(key_mapper_dir, "/", kKeyMapperPrefix)),
+      trie_(file_prefix_,
+            IcingDynamicTrie::RuntimeOptions().set_storage_policy(
+                IcingDynamicTrie::RuntimeOptions::kMapSharedWithCrc),
+            &icing_filesystem_) {}
+
+template <typename T>
+libtextclassifier3::Status KeyMapper<T>::Initialize(int maximum_size_bytes) {
+  IcingDynamicTrie::Options options;
+  // Divide the max space between the three internal arrays: nodes, nexts and
+  // suffixes. MaxNodes and MaxNexts are in units of their own data structures.
+  // MaxSuffixesSize is in units of bytes.
+  options.max_nodes = maximum_size_bytes / (3 * sizeof(IcingDynamicTrie::Node));
+  options.max_nexts = options.max_nodes;
+  options.max_suffixes_size =
+      sizeof(IcingDynamicTrie::Node) * options.max_nodes;
+  options.value_size = sizeof(T);
+
+  if (!trie_.CreateIfNotExist(options)) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to create KeyMapper file: ", file_prefix_));
+  }
+  if (!trie_.Init()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to init KeyMapper file: ", file_prefix_));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<T> KeyMapper<T>::GetOrPut(std::string_view key,
+                                                       T next_value) {
+  std::string string_key(key);
+  uint32_t value_index;
+  if (!trie_.Insert(string_key.c_str(), &next_value, &value_index,
+                    /*replace=*/false)) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
+  }
+  // This memory address could be unaligned since we're just grabbing the value
+  // from somewhere in the trie's suffix array. The suffix array is filled with
+  // chars, so the address might not be aligned to T values.
+  const T* unaligned_value =
+      static_cast<const T*>(trie_.GetValueAtIndex(value_index));
+
+  // memcpy the value to ensure that the returned value here is in a T-aligned
+  // address
+  T aligned_value;
+  memcpy(&aligned_value, unaligned_value, sizeof(T));
+  return aligned_value;
+}
+
+template <typename T>
+libtextclassifier3::Status KeyMapper<T>::Put(std::string_view key, T value) {
+  std::string string_key(key);
+  if (!trie_.Insert(string_key.c_str(), &value)) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Unable to insert key ", key, " into KeyMapper ", file_prefix_, "."));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<T> KeyMapper<T>::Get(std::string_view key) const {
+  std::string string_key(key);
+  T value;
+  if (!trie_.Find(string_key.c_str(), &value)) {
+    return absl_ports::NotFoundError(absl_ports::StrCat(
+        "Key not found ", key, " in KeyMapper ", file_prefix_, "."));
+  }
+  return value;
+}
+
+template <typename T>
+libtextclassifier3::Status KeyMapper<T>::PersistToDisk() {
+  if (!trie_.Sync()) {
+    return absl_ports::InternalError(
+        absl_ports::StrCat("Failed to sync KeyMapper file: ", file_prefix_));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+template <typename T>
+libtextclassifier3::StatusOr<int64_t> KeyMapper<T>::GetDiskUsage() const {
+  int64_t size = trie_.GetDiskUsage();
+  if (size == IcingFilesystem::kBadFileSize || size < 0) {
+    return absl_ports::InternalError("Failed to get disk usage of key mapper");
+  }
+  return size;
+}
+
+template <typename T>
+Crc32 KeyMapper<T>::ComputeChecksum() {
+  return Crc32(trie_.UpdateCrc());
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_KEY_MAPPER_H_

diff --git a/icing/store/key-mapper_test.cc b/icing/store/key-mapper_test.cc
new file mode 100644
index 0000000..c75c203
--- /dev/null
+++ b/icing/store/key-mapper_test.cc

@@ -0,0 +1,168 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/store/key-mapper.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/store/document-id.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+using ::testing::_;
+using ::testing::HasSubstr;
+
+namespace icing {
+namespace lib {
+namespace {
+constexpr int kMaxKeyMapperSize = 3 * 1024 * 1024;  // 3 MiB
+
+class KeyMapperTest : public testing::Test {
+ protected:
+  void SetUp() override { base_dir_ = GetTestTempDir() + "/key_mapper"; }
+
+  void TearDown() override {
+    filesystem_.DeleteDirectoryRecursively(base_dir_.c_str());
+  }
+
+  std::string base_dir_;
+  Filesystem filesystem_;
+};
+
+TEST_F(KeyMapperTest, InvalidBaseDir) {
+  ASSERT_THAT(
+      KeyMapper<DocumentId>::Create(filesystem_, "/dev/null", kMaxKeyMapperSize)
+          .status()
+          .error_message(),
+      HasSubstr("Failed to create KeyMapper"));
+}
+
+TEST_F(KeyMapperTest, NegativeMaxKeyMapperSizeReturnsInternalError) {
+  ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_, -1),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(KeyMapperTest, TooLargeMaxKeyMapperSizeReturnsInternalError) {
+  ASSERT_THAT(KeyMapper<DocumentId>::Create(filesystem_, base_dir_,
+                                            std::numeric_limits<int>::max()),
+              StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(KeyMapperTest, CreateNewKeyMapper) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  EXPECT_THAT(key_mapper->num_keys(), 0);
+}
+
+TEST_F(KeyMapperTest, CanUpdateSameKeyMultipleTimes) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
+  ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 50));
+
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
+
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 200));
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(200));
+  EXPECT_THAT(key_mapper->num_keys(), 2);
+
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 300));
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
+  EXPECT_THAT(key_mapper->num_keys(), 2);
+}
+
+TEST_F(KeyMapperTest, GetOrPutOk) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+
+  EXPECT_THAT(key_mapper->Get("foo"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(key_mapper->GetOrPut("foo", 1), IsOkAndHolds(1));
+  EXPECT_THAT(key_mapper->Get("foo"), IsOkAndHolds(1));
+}
+
+TEST_F(KeyMapperTest, CanPersistToDiskRegularly) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  // Can persist an empty KeyMapper.
+  ICING_EXPECT_OK(key_mapper->PersistToDisk());
+  EXPECT_THAT(key_mapper->num_keys(), 0);
+
+  // Can persist the smallest KeyMapper.
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
+  ICING_EXPECT_OK(key_mapper->PersistToDisk());
+  EXPECT_THAT(key_mapper->num_keys(), 1);
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
+
+  // Can continue to add keys after PersistToDisk().
+  ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 200));
+  EXPECT_THAT(key_mapper->num_keys(), 2);
+  EXPECT_THAT(key_mapper->Get("default-youtube.com"), IsOkAndHolds(200));
+
+  // Can continue to update the same key after PersistToDisk().
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 300));
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
+  EXPECT_THAT(key_mapper->num_keys(), 2);
+}
+
+TEST_F(KeyMapperTest, CanUseAcrossMultipleInstances) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
+  ICING_EXPECT_OK(key_mapper->PersistToDisk());
+
+  key_mapper.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  EXPECT_THAT(key_mapper->num_keys(), 1);
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(100));
+
+  // Can continue to read/write to the KeyMapper.
+  ICING_EXPECT_OK(key_mapper->Put("default-youtube.com", 200));
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 300));
+  EXPECT_THAT(key_mapper->num_keys(), 2);
+  EXPECT_THAT(key_mapper->Get("default-youtube.com"), IsOkAndHolds(200));
+  EXPECT_THAT(key_mapper->Get("default-google.com"), IsOkAndHolds(300));
+}
+
+TEST_F(KeyMapperTest, CanDeleteAndRestartKeyMapping) {
+  // Can delete even if there's nothing there
+  ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<KeyMapper<DocumentId>> key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
+  ICING_EXPECT_OK(key_mapper->PersistToDisk());
+  ICING_EXPECT_OK(KeyMapper<DocumentId>::Delete(filesystem_, base_dir_));
+
+  key_mapper.reset();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      key_mapper,
+      KeyMapper<DocumentId>::Create(filesystem_, base_dir_, kMaxKeyMapperSize));
+  EXPECT_THAT(key_mapper->num_keys(), 0);
+  ICING_EXPECT_OK(key_mapper->Put("default-google.com", 100));
+  EXPECT_THAT(key_mapper->num_keys(), 1);
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
new file mode 100644
index 0000000..3e007d1
--- /dev/null
+++ b/icing/testing/common-matchers.h

@@ -0,0 +1,295 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_COMMON_MATCHERS_H_
+#define ICING_TESTING_COMMON_MATCHERS_H_
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_join.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+
+namespace icing {
+namespace lib {
+
+// Used to match Token(Token::Type type, std::string_view text)
+MATCHER_P2(EqualsToken, type, text, "") {
+  if (arg.type != type || arg.text != text) {
+    *result_listener << IcingStringUtil::StringPrintf(
+        "(Expected: type=%d, text=\"%s\". Actual: type=%d, text=\"%s\")", type,
+        &text[0], arg.type, arg.text.data());
+    return false;
+  }
+  return true;
+}
+
+// Used to match a DocHitInfo
+MATCHER_P2(EqualsDocHitInfo, document_id, section_ids, "") {
+  const DocHitInfo& actual = arg;
+  SectionIdMask section_mask = kSectionIdMaskNone;
+  for (SectionId section_id : section_ids) {
+    section_mask |= 1U << section_id;
+  }
+  *result_listener << IcingStringUtil::StringPrintf(
+      "(actual is {document_id=%d, section_mask=%d}, but expected was "
+      "{document_id=%d, section_mask=%d}.)",
+      actual.document_id(), actual.hit_section_ids_mask(), document_id,
+      section_mask);
+  return actual.document_id() == document_id &&
+         actual.hit_section_ids_mask() == section_mask;
+}
+
+// Used to match a ScoredDocumentHit
+MATCHER_P(EqualsScoredDocumentHit, expected_scored_document_hit, "") {
+  if (arg.document_id() != expected_scored_document_hit.document_id() ||
+      arg.hit_section_id_mask() !=
+          expected_scored_document_hit.hit_section_id_mask() ||
+      arg.score() != expected_scored_document_hit.score()) {
+    *result_listener << IcingStringUtil::StringPrintf(
+        "Expected: document_id=%d, hit_section_id_mask=%d, score=%.2f. Actual: "
+        "document_id=%d, hit_section_id_mask=%d, score=%.2f",
+        expected_scored_document_hit.document_id(),
+        expected_scored_document_hit.hit_section_id_mask(),
+        expected_scored_document_hit.score(), arg.document_id(),
+        arg.hit_section_id_mask(), arg.score());
+    return false;
+  }
+  return true;
+}
+
+MATCHER_P(EqualsSetSchemaResult, expected, "") {
+  const SchemaStore::SetSchemaResult& actual = arg;
+
+  if (actual.success == expected.success &&
+      actual.index_incompatible == expected.index_incompatible &&
+      actual.old_schema_type_ids_changed ==
+          expected.old_schema_type_ids_changed &&
+      actual.schema_types_deleted_by_name ==
+          expected.schema_types_deleted_by_name &&
+      actual.schema_types_deleted_by_id ==
+          expected.schema_types_deleted_by_id &&
+      actual.schema_types_incompatible_by_name ==
+          expected.schema_types_incompatible_by_name &&
+      actual.schema_types_incompatible_by_id ==
+          expected.schema_types_incompatible_by_id) {
+    return true;
+  }
+
+  // Format schema_type_ids_changed
+  std::string actual_old_schema_type_ids_changed = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(actual.old_schema_type_ids_changed, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  std::string expected_old_schema_type_ids_changed = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(expected.old_schema_type_ids_changed, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  // Format schema_types_deleted_by_name
+  std::string actual_schema_types_deleted_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(actual.schema_types_deleted_by_name, ","), "]");
+
+  std::string expected_schema_types_deleted_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(expected.schema_types_deleted_by_name, ","),
+      "]");
+
+  // Format schema_types_deleted_by_id
+  std::string actual_schema_types_deleted_by_id = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(actual.schema_types_deleted_by_id, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  std::string expected_schema_types_deleted_by_id = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(expected.schema_types_deleted_by_id, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  // Format schema_types_incompatible_by_name
+  std::string actual_schema_types_incompatible_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(actual.schema_types_incompatible_by_name, ","),
+      "]");
+
+  std::string expected_schema_types_incompatible_by_name = absl_ports::StrCat(
+      "[", absl_ports::StrJoin(expected.schema_types_incompatible_by_name, ","),
+      "]");
+
+  // Format schema_types_incompatible_by_id
+  std::string actual_schema_types_incompatible_by_id = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(actual.schema_types_incompatible_by_id, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  std::string expected_schema_types_incompatible_by_id = absl_ports::StrCat(
+      "[",
+      absl_ports::StrJoin(expected.schema_types_incompatible_by_id, ",",
+                          absl_ports::NumberFormatter()),
+      "]");
+
+  *result_listener << IcingStringUtil::StringPrintf(
+      "\nExpected {\n"
+      "\tsuccess=%d,\n"
+      "\tindex_incompatible=%d,\n"
+      "\told_schema_type_ids_changed=%s,\n"
+      "\tschema_types_deleted_by_name=%s,\n"
+      "\tschema_types_deleted_by_id=%s,\n"
+      "\tschema_types_incompatible_by_name=%s,\n"
+      "\tschema_types_incompatible_by_id=%s\n"
+      "}\n"
+      "Actual {\n"
+      "\tsuccess=%d,\n"
+      "\tindex_incompatible=%d,\n"
+      "\told_schema_type_ids_changed=%s,\n"
+      "\tschema_types_deleted_by_name=%s,\n"
+      "\tschema_types_deleted_by_id=%s,\n"
+      "\tschema_types_incompatible_by_name=%s,\n"
+      "\tschema_types_incompatible_by_id=%s\n"
+      "}\n",
+      expected.success, expected.index_incompatible,
+      expected_old_schema_type_ids_changed.c_str(),
+      expected_schema_types_deleted_by_name.c_str(),
+      expected_schema_types_deleted_by_id.c_str(),
+      expected_schema_types_incompatible_by_name.c_str(),
+      expected_schema_types_incompatible_by_id.c_str(), actual.success,
+      actual.index_incompatible, actual_old_schema_type_ids_changed.c_str(),
+      actual_schema_types_deleted_by_name.c_str(),
+      actual_schema_types_deleted_by_id.c_str(),
+      actual_schema_types_incompatible_by_name.c_str(),
+      actual_schema_types_incompatible_by_id.c_str());
+
+  return false;
+}
+
+std::string StatusCodeToString(libtextclassifier3::StatusCode code) {
+  switch (code) {
+    case libtextclassifier3::StatusCode::OK:
+      return "OK";
+    case libtextclassifier3::StatusCode::CANCELLED:
+      return "CANCELLED";
+    case libtextclassifier3::StatusCode::UNKNOWN:
+      return "UNKNOWN";
+    case libtextclassifier3::StatusCode::INVALID_ARGUMENT:
+      return "INVALID_ARGUMENT";
+    case libtextclassifier3::StatusCode::DEADLINE_EXCEEDED:
+      return "DEADLINE_EXCEEDED";
+    case libtextclassifier3::StatusCode::NOT_FOUND:
+      return "NOT_FOUND";
+    case libtextclassifier3::StatusCode::ALREADY_EXISTS:
+      return "ALREADY_EXISTS";
+    case libtextclassifier3::StatusCode::PERMISSION_DENIED:
+      return "PERMISSION_DENIED";
+    case libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED:
+      return "RESOURCE_EXHAUSTED";
+    case libtextclassifier3::StatusCode::FAILED_PRECONDITION:
+      return "FAILED_PRECONDITION";
+    case libtextclassifier3::StatusCode::ABORTED:
+      return "ABORTED";
+    case libtextclassifier3::StatusCode::OUT_OF_RANGE:
+      return "OUT_OF_RANGE";
+    case libtextclassifier3::StatusCode::UNIMPLEMENTED:
+      return "UNIMPLEMENTED";
+    case libtextclassifier3::StatusCode::INTERNAL:
+      return "INTERNAL";
+    case libtextclassifier3::StatusCode::UNAVAILABLE:
+      return "UNAVAILABLE";
+    case libtextclassifier3::StatusCode::DATA_LOSS:
+      return "DATA_LOSS";
+    case libtextclassifier3::StatusCode::UNAUTHENTICATED:
+      return "UNAUTHENTICATED";
+    default:
+      return "";
+  }
+}
+
+MATCHER(IsOk, "") {
+  absl_ports::StatusAdapter adapter(arg);
+  if (adapter.status().ok()) {
+    return true;
+  }
+  *result_listener << IcingStringUtil::StringPrintf(
+      "Expected OK, actual was (%s:%s)",
+      StatusCodeToString(adapter.status().CanonicalCode()).c_str(),
+      adapter.status().error_message().c_str());
+  return false;
+}
+
+MATCHER_P(IsOkAndHolds, matcher, "") {
+  if (!arg.ok()) {
+    *result_listener << IcingStringUtil::StringPrintf(
+        "Expected OK, actual was (%s:%s)",
+        StatusCodeToString(arg.status().CanonicalCode()).c_str(),
+        arg.status().error_message().c_str());
+    return false;
+  }
+  return ExplainMatchResult(matcher, arg.ValueOrDie(), result_listener);
+}
+
+MATCHER_P(StatusIs, status_code, "") {
+  absl_ports::StatusAdapter adapter(arg);
+  if (adapter.status().CanonicalCode() == status_code) {
+    return true;
+  }
+  *result_listener << IcingStringUtil::StringPrintf(
+      "Expected (%s:), actual was (%s:%s)",
+      StatusCodeToString(status_code).c_str(),
+      StatusCodeToString(adapter.status().CanonicalCode()).c_str(),
+      adapter.status().error_message().c_str());
+  return false;
+}
+
+MATCHER_P2(StatusIs, status_code, error_matcher, "") {
+  absl_ports::StatusAdapter adapter(arg);
+  if (adapter.status().CanonicalCode() != status_code) {
+    *result_listener << IcingStringUtil::StringPrintf(
+        "Expected (%s:), actual was (%s:%s)",
+        StatusCodeToString(status_code).c_str(),
+        StatusCodeToString(adapter.status().CanonicalCode()).c_str(),
+        adapter.status().error_message().c_str());
+    return false;
+  }
+  return ExplainMatchResult(error_matcher, adapter.status().error_message(),
+                            result_listener);
+}
+
+// TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status
+#define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
+  ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
+#define ICING_STATUS_MACROS_CONCAT_IMPL(x, y) x##y
+
+#define ICING_EXPECT_OK(func) EXPECT_THAT(func, IsOk())
+#define ICING_ASSERT_OK(func) ASSERT_THAT(func, IsOk())
+#define ICING_ASSERT_OK_AND_ASSIGN(lhs, rexpr)                             \
+  ICING_ASSERT_OK_AND_ASSIGN_IMPL(                                         \
+      ICING_STATUS_MACROS_CONCAT_NAME(_status_or_value, __COUNTER__), lhs, \
+      rexpr)
+#define ICING_ASSERT_OK_AND_ASSIGN_IMPL(statusor, lhs, rexpr) \
+  auto statusor = (rexpr);                                    \
+  ICING_ASSERT_OK(statusor.status());                         \
+  lhs = std::move(statusor).ValueOrDie()
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_COMMON_MATCHERS_H_

diff --git a/icing/testing/fake-clock.h b/icing/testing/fake-clock.h
new file mode 100644
index 0000000..c3b3af5
--- /dev/null
+++ b/icing/testing/fake-clock.h

@@ -0,0 +1,40 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_FAKE_CLOCK_H_
+#define ICING_TESTING_FAKE_CLOCK_H_
+
+#include <ctime>
+
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+// Wrapper around real-time clock functions. This is separated primarily so
+// tests can override this clock and inject it into the class under test.
+class FakeClock : public Clock {
+ public:
+  std::time_t GetCurrentSeconds() const override { return seconds_; }
+
+  void SetSeconds(std::time_t seconds) { seconds_ = seconds; }
+
+ private:
+  std::time_t seconds_ = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_FAKE_CLOCK_H_

diff --git a/icing/testing/fake-clock_test.cc b/icing/testing/fake-clock_test.cc
new file mode 100644
index 0000000..3e85b35
--- /dev/null
+++ b/icing/testing/fake-clock_test.cc

@@ -0,0 +1,41 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/fake-clock.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+
+TEST(FakeClockTest, GetSetOk) {
+  FakeClock fake_clock;
+  EXPECT_THAT(fake_clock.GetCurrentSeconds(), Eq(0));
+
+  fake_clock.SetSeconds(10);
+  EXPECT_THAT(fake_clock.GetCurrentSeconds(), Eq(10));
+
+  fake_clock.SetSeconds(-1);
+  EXPECT_THAT(fake_clock.GetCurrentSeconds(), Eq(-1));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/i18n-test-utils.cc b/icing/testing/i18n-test-utils.cc
new file mode 100644
index 0000000..3839dc8
--- /dev/null
+++ b/icing/testing/i18n-test-utils.cc

@@ -0,0 +1,46 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/i18n-test-utils.h"
+
+#include <cstdint>
+#include <string>
+
+#include "icing/util/logging.h"
+#include "unicode/umachine.h"
+#include "unicode/utf8.h"
+
+namespace icing {
+namespace lib {
+
+std::string UcharToString(UChar32 uchar) {
+  std::string result;
+  uint8_t utf8_buffer[4];  // U8_APPEND writes 0 to 4 bytes
+
+  int utf8_index = 0;
+  UBool has_error = FALSE;
+
+  // utf8_index is advanced to the end of the contents if successful
+  U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
+
+  if (has_error) {
+    ICING_VLOG(1) << "Error converting UChar32 to UTF8";
+    return "";
+  }
+  result.append(reinterpret_cast<char*>(utf8_buffer), utf8_index);
+  return result;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/i18n-test-utils.h b/icing/testing/i18n-test-utils.h
new file mode 100644
index 0000000..4e8a3b8
--- /dev/null
+++ b/icing/testing/i18n-test-utils.h

@@ -0,0 +1,30 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_I18N_TEST_UTILS_H_
+#define ICING_TESTING_I18N_TEST_UTILS_H_
+
+#include <string>
+
+#include "unicode/umachine.h"
+
+namespace icing {
+namespace lib {
+
+std::string UcharToString(UChar32 uchar);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_I18N_TEST_UTILS_H_

diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
new file mode 100644
index 0000000..1510e15
--- /dev/null
+++ b/icing/testing/random-string.h

@@ -0,0 +1,42 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_RANDOM_STRING_H_
+#define ICING_TESTING_RANDOM_STRING_H_
+
+#include <random>
+#include <string>
+
+namespace icing {
+namespace lib {
+
+inline constexpr std::string_view kAlNumAlphabet =
+    "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+template <typename Gen>
+std::string RandomString(const std::string_view alphabet, size_t len,
+                         Gen* gen) {
+  std::uniform_int_distribution<size_t> uniform(0u, alphabet.size());
+  std::string result(len, '\0');
+  std::generate(
+      std::begin(result), std::end(result),
+      [&gen, &alphabet, &uniform]() { return alphabet[uniform(*gen)]; });
+
+  return result;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_RANDOM_STRING_H_

diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
new file mode 100644
index 0000000..fde0004
--- /dev/null
+++ b/icing/testing/snippet-helpers.cc

@@ -0,0 +1,80 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/snippet-helpers.h"
+
+#include <algorithm>
+#include <string_view>
+
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
+                                         const std::string& property_name,
+                                         int snippet_index) {
+  auto iterator = std::find_if(
+      snippet_proto.entries().begin(), snippet_proto.entries().end(),
+      [&property_name](const SnippetProto::EntryProto& entry) {
+        return entry.property_name() == property_name;
+      });
+  if (iterator == snippet_proto.entries().end() ||
+      iterator->snippet_matches_size() <= snippet_index) {
+    return nullptr;
+  }
+  return &iterator->snippet_matches(snippet_index);
+}
+
+const PropertyProto* GetProperty(const DocumentProto& document,
+                                 const std::string& property_name) {
+  const PropertyProto* property = nullptr;
+  for (const PropertyProto& prop : document.properties()) {
+    if (prop.name() == property_name) {
+      property = &prop;
+    }
+  }
+  return property;
+}
+
+std::string GetWindow(const DocumentProto& document,
+                      const SnippetProto& snippet_proto,
+                      const std::string& property_name, int snippet_index) {
+  const SnippetMatchProto* match =
+      GetSnippetMatch(snippet_proto, property_name, snippet_index);
+  const PropertyProto* property = GetProperty(document, property_name);
+  if (match == nullptr || property == nullptr) {
+    return "";
+  }
+  std::string_view value = property->string_values(match->values_index());
+  return std::string(
+      value.substr(match->window_position(), match->window_bytes()));
+}
+
+std::string GetMatch(const DocumentProto& document,
+                     const SnippetProto& snippet_proto,
+                     const std::string& property_name, int snippet_index) {
+  const SnippetMatchProto* match =
+      GetSnippetMatch(snippet_proto, property_name, snippet_index);
+  const PropertyProto* property = GetProperty(document, property_name);
+  if (match == nullptr || property == nullptr) {
+    return "";
+  }
+  std::string_view value = property->string_values(match->values_index());
+  return std::string(
+      value.substr(match->exact_match_position(), match->exact_match_bytes()));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
new file mode 100644
index 0000000..124e421
--- /dev/null
+++ b/icing/testing/snippet-helpers.h

@@ -0,0 +1,60 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_SNIPPET_HELPERS_H_
+#define ICING_TESTING_SNIPPET_HELPERS_H_
+
+#include <string>
+
+#include "icing/proto/document.pb.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+// Retrieve pointer to the snippet_index'th SnippetMatchProto within the
+// EntryProto identified by property_name within snippet_proto.
+// Returns nullptr
+//   - if there is no EntryProto within snippet_proto corresponding to
+//     property_name.
+//   - if there is no SnippetMatchProto at snippet_index within the EntryProto
+const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
+                                         const std::string& property_name,
+                                         int snippet_index);
+
+// Retrieve pointer to the PropertyProto identified by property_name.
+// Returns nullptr if no such property exists.
+const PropertyProto* GetProperty(const DocumentProto& document,
+                                 const std::string& property_name);
+
+// Retrieves the window defined by the SnippetMatchProto returned by
+// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
+// returned by GetProperty(document, property_name).
+// Returns "" if no such property, snippet or window exists.
+std::string GetWindow(const DocumentProto& document,
+                      const SnippetProto& snippet_proto,
+                      const std::string& property_name, int snippet_index);
+
+// Retrieves the match defined by the SnippetMatchProto returned by
+// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
+// returned by GetProperty(document, property_name).
+// Returns "" if no such property or snippet exists.
+std::string GetMatch(const DocumentProto& document,
+                     const SnippetProto& snippet_proto,
+                     const std::string& property_name, int snippet_index);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_SNIPPET_HELPERS_H_

diff --git a/icing/testing/test-data.cc b/icing/testing/test-data.cc
new file mode 100644
index 0000000..9e74531
--- /dev/null
+++ b/icing/testing/test-data.cc

@@ -0,0 +1,70 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/test-data.h"
+
+#include <sys/mman.h>
+
+#include <cstdint>
+
+#include "devtools/build/runtime/get_runfiles_dir.h"
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "unicode/udata.h"
+#include "unicode/utypes.h"
+
+namespace icing {
+namespace lib {
+namespace {
+constexpr char kGoogle3LangIdModelPath[] =
+    "nlp/saft/components/lang_id/mobile/fb_model/models/latest_model.smfb";
+}  // namespace
+
+std::string GetTestFilePath(const std::string& google3_relative_file_path) {
+  return absl_ports::StrCat(devtools_build::testonly::GetTestSrcdir(),
+                            "/google3/", google3_relative_file_path);
+}
+
+std::string GetLangIdModelPath() {
+  return GetTestFilePath(kGoogle3LangIdModelPath);
+}
+
+libtextclassifier3::Status SetUpICUDataFile(
+    const std::string& icu_data_file_relative_path) {
+  const std::string& file_path = GetTestFilePath(icu_data_file_relative_path);
+
+  Filesystem filesystem;
+  int64_t file_size = filesystem.GetFileSize(file_path.c_str());
+  ScopedFd fd(filesystem.OpenForRead(file_path.c_str()));
+
+  // TODO(samzheng): figure out why icing::MemoryMappedFile causes
+  // segmentation fault here.
+  const void* data =
+      mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd.get(), 0);
+
+  UErrorCode status = U_ZERO_ERROR;
+  udata_setCommonData(data, &status);
+
+  if (U_FAILURE(status)) {
+    return absl_ports::InternalError(
+        "Failed to set up ICU data, please check if you have the data file at "
+        "the given path.");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/test-data.h b/icing/testing/test-data.h
new file mode 100644
index 0000000..c780f0e
--- /dev/null
+++ b/icing/testing/test-data.h

@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_TEST_DATA_H_
+#define ICING_TESTING_TEST_DATA_H_
+
+#include <string>
+
+#include "utils/base/status.h"
+
+// This file provides functions for getting / setting up absolute test file
+// paths. They are specific to Blaze and Google3 and should be changed when used
+// in AOSP / Gerrit.
+namespace icing {
+namespace lib {
+
+// The input path should be a relative path under google3. The function returns
+// an absolute path to the file during unit testing. Before calling this
+// function, please make sure the test file is added in "data" attribute in
+// portable_cc_test or any other test build rules.
+std::string GetTestFilePath(const std::string& google3_relative_file_path);
+
+// Returns the latest LangId model in Google3.
+std::string GetLangIdModelPath();
+
+// This is for unit testing in Google3. The library binary doesn't contain any
+// ICU data files, so we generate a .dat file at compile time and here make ICU
+// use that file.
+//
+// Returns:
+//   Ok on success
+//   INTERNAL_ERROR if failed on any errors
+libtextclassifier3::Status SetUpICUDataFile(
+    const std::string& icu_data_file_relative_path);
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_TEST_DATA_H_

diff --git a/icing/testing/tmp-directory.cc b/icing/testing/tmp-directory.cc
new file mode 100644
index 0000000..ea25fe2
--- /dev/null
+++ b/icing/testing/tmp-directory.cc

@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/testing/tmp-directory.h"
+
+#include <string>
+
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+
+// Some other options for getting a tmp directory:
+// 1. FLAGS_test_tmpdir. We don't use FLAGS_test_tmpdir because it only exists
+//    in the internal version of googletest.h (as of June 2019)
+// 2. ::testing::TempDir(). It returns "/sdcard" for Android emulators in which
+//    the sdcard file format is FAT32
+//    (https://developer.android.com/studio/command-line/mksdcard). FAT32
+//    doesn't support sparse files so that it fails some tests in
+//    //icing/file/filesystem_test.cc.
+//    The sparse file related methods are mostly for reporting/logging purposes
+//    and not affecting any system behaviors.
+std::string GetTestTempDir() {
+#ifdef __ANDROID__
+  return "/data/local/tmp";
+#elif defined(__APPLE__)
+  return absl_ports::StrCat(getenv("HOME"), "/tmp");
+#else
+  return "/tmp";
+#endif  // __ANDROID__
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/testing/tmp-directory.h b/icing/testing/tmp-directory.h
new file mode 100644
index 0000000..0999007
--- /dev/null
+++ b/icing/testing/tmp-directory.h

@@ -0,0 +1,30 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_TMP_DIRECTORY_H_
+#define ICING_TESTING_TMP_DIRECTORY_H_
+
+#include <string>
+
+namespace icing {
+namespace lib {
+
+// Returns an absolute path to a tmpdir on the test running the test.
+// The caller should clean up all files that it created in this dir.
+std::string GetTestTempDir();
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TESTING_TMP_DIRECTORY_H_

diff --git a/icing/tokenization/language-detector.cc b/icing/tokenization/language-detector.cc
new file mode 100644
index 0000000..aa29fc3
--- /dev/null
+++ b/icing/tokenization/language-detector.cc

@@ -0,0 +1,71 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/language-detector.h"
+
+#include "utils/base/statusor.h"
+#include "nlp/saft/components/lang_id/mobile/fb_model/lang-id-from-fb.h"
+#include "nlp/saft/components/lang_id/mobile/lang-id.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+
+namespace icing {
+namespace lib {
+using ::nlp_saft::mobile::lang_id::GetLangIdFromFlatbufferFile;
+using ::nlp_saft::mobile::lang_id::LangId;
+
+class LanguageDetectorWithLangId : public LanguageDetector {
+ public:
+  static libtextclassifier3::StatusOr<
+      std::unique_ptr<LanguageDetectorWithLangId>>
+  Create(const std::string& lang_id_model_path) {
+    auto language_detector = std::unique_ptr<LanguageDetectorWithLangId>(
+        new LanguageDetectorWithLangId(lang_id_model_path));
+    if (language_detector->is_valid()) {
+      return language_detector;
+    }
+    return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+        "Failed to create a language detector with LangId model path: ",
+        lang_id_model_path));
+  }
+
+  libtextclassifier3::StatusOr<std::string> DetectLanguage(
+      std::string_view text) const override {
+    const std::string& lang_found =
+        lang_id_->FindLanguage(text.data(), text.length());
+    if (lang_found == LangId::kUnknownLanguageCode) {
+      return absl_ports::NotFoundError(
+          absl_ports::StrCat("Language not found in text: ", text));
+    }
+    return lang_found;
+  }
+
+ private:
+  // TODO(samzheng): Use GetLangIdWithParamsFromCc() as a fallback when it's
+  // available in AOSP
+  explicit LanguageDetectorWithLangId(const std::string& lang_id_model_path)
+      : lang_id_(GetLangIdFromFlatbufferFile(lang_id_model_path)) {}
+
+  std::unique_ptr<LangId> lang_id_;
+
+  bool is_valid() { return lang_id_->is_valid(); }
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<LanguageDetector>>
+LanguageDetector::CreateWithLangId(const std::string& lang_id_model_path) {
+  return LanguageDetectorWithLangId::Create(lang_id_model_path);
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/language-detector.h b/icing/tokenization/language-detector.h
new file mode 100644
index 0000000..07b31ff
--- /dev/null
+++ b/icing/tokenization/language-detector.h

@@ -0,0 +1,53 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_LANGUAGE_DETECTOR_H_
+#define ICING_TOKENIZATION_LANGUAGE_DETECTOR_H_
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+class LanguageDetector {
+ public:
+  virtual ~LanguageDetector() = default;
+
+  // Creates a language detector that uses the given LangId model.
+  //
+  // Returns:
+  //   A LanguageDetector on success
+  //   INVALID_ARGUMENT if fails to load model
+  static libtextclassifier3::StatusOr<std::unique_ptr<LanguageDetector>>
+  CreateWithLangId(const std::string& lang_id_model_path);
+
+  // Detects the language of the given text, if there're multiple languages, the
+  // one with the biggest possibility will be returned. The two-letter language
+  // code uses the ISO-639 standard (https://en.wikipedia.org/wiki/ISO_639).
+  //
+  // Returns:
+  //   language code on success
+  //   NOT_FOUND if no language detected
+  virtual libtextclassifier3::StatusOr<std::string> DetectLanguage(
+      std::string_view text) const = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_LANGUAGE_DETECTOR_H_

diff --git a/icing/tokenization/language-detector_test.cc b/icing/tokenization/language-detector_test.cc
new file mode 100644
index 0000000..5958e5a
--- /dev/null
+++ b/icing/tokenization/language-detector_test.cc

@@ -0,0 +1,81 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/language-detector.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::Eq;
+
+TEST(LanguageDetectorTest, BadFilePath) {
+  EXPECT_THAT(LanguageDetector::CreateWithLangId("Bad file path"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+// TODO(samzheng): more tests for other languages and mixed languages
+TEST(LanguageDetectorTest, DetectLanguage) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto language_detector,
+      LanguageDetector::CreateWithLangId(GetLangIdModelPath()));
+
+  EXPECT_THAT(language_detector->DetectLanguage(" , "),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+  EXPECT_THAT(language_detector->DetectLanguage("hello world"),
+              IsOkAndHolds(Eq("en")));  // English
+
+  EXPECT_THAT(language_detector->DetectLanguage("Selam Dünya"),
+              IsOkAndHolds(Eq("tr")));  // Turkish
+
+  EXPECT_THAT(language_detector->DetectLanguage("Bonjour le monde"),
+              IsOkAndHolds(Eq("fr")));  // French
+
+  EXPECT_THAT(language_detector->DetectLanguage("你好世界"),
+              IsOkAndHolds(Eq("zh")));  // Chinese
+
+  EXPECT_THAT(language_detector->DetectLanguage("こんにちは世界"),
+              IsOkAndHolds(Eq("ja")));  // Japanese
+
+  EXPECT_THAT(language_detector->DetectLanguage("สวัสดีชาวโลก"),
+              IsOkAndHolds(Eq("th")));  // Thai
+
+  EXPECT_THAT(language_detector->DetectLanguage("안녕 세상"),
+              IsOkAndHolds(Eq("ko")));  // Korean
+
+  EXPECT_THAT(language_detector->DetectLanguage("Hallo Wereld"),
+              IsOkAndHolds(Eq("nl")));  // Dutch
+
+  EXPECT_THAT(language_detector->DetectLanguage("Hola Mundo"),
+              IsOkAndHolds(Eq("es")));  // Spanish
+
+  EXPECT_THAT(language_detector->DetectLanguage("नमस्ते दुनिया"),
+              IsOkAndHolds(Eq("hi")));  // Hindi
+
+  EXPECT_THAT(language_detector->DetectLanguage("مرحبا بالعالم"),
+              IsOkAndHolds(Eq("ar")));  // Arabic
+
+  EXPECT_THAT(language_detector->DetectLanguage("Привет, мир"),
+              IsOkAndHolds(Eq("ru")));  // Russian
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/language-segmenter.cc b/icing/tokenization/language-segmenter.cc
new file mode 100644
index 0000000..8c64f96
--- /dev/null
+++ b/icing/tokenization/language-segmenter.cc

@@ -0,0 +1,196 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/language-segmenter.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/tokenization/language-detector.h"
+#include "icing/util/i18n-utils.h"
+#include "unicode/ubrk.h"
+#include "unicode/uchar.h"
+#include "unicode/umachine.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+constexpr char kASCIISpace = ' ';
+}  // namespace
+
+LanguageSegmenter::LanguageSegmenter(
+    std::unique_ptr<LanguageDetector> language_detector,
+    const std::string default_locale)
+    : language_detector_(std::move(language_detector)),
+      default_locale_(std::move(default_locale)) {}
+
+libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>>
+LanguageSegmenter::Create(const std::string& lang_id_model_path,
+                          const std::string& default_locale) {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<LanguageDetector> language_detector,
+      LanguageDetector::CreateWithLangId(lang_id_model_path));
+  return std::unique_ptr<LanguageSegmenter>(
+      new LanguageSegmenter(std::move(language_detector), default_locale));
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
+LanguageSegmenter::Segment(const std::string_view text) const {
+  // TODO(b/143769125): Remove LangId for now.
+  libtextclassifier3::StatusOr<std::string> language_or =
+      language_detector_->DetectLanguage(text);
+
+  if (language_or.ok()) {
+    return LanguageSegmenter::Iterator::Create(text, language_or.ValueOrDie());
+  } else {
+    return LanguageSegmenter::Iterator::Create(text, default_locale_);
+  }
+}
+
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+LanguageSegmenter::GetAllTerms(const std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Iterator> iterator, Segment(text));
+  std::vector<std::string_view> terms;
+  while (iterator->Advance()) {
+    terms.push_back(iterator->GetTerm());
+  }
+  return terms;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
+LanguageSegmenter::Iterator::Create(std::string_view text,
+                                    const std::string locale) {
+  std::unique_ptr<Iterator> iterator(new Iterator(text, std::move(locale)));
+  if (iterator->Initialize()) {
+    return iterator;
+  }
+  return absl_ports::InternalError("Unable to create a term iterator");
+}
+
+LanguageSegmenter::Iterator::Iterator(const std::string_view text,
+                                      const std::string&& locale)
+    : break_iterator_(nullptr),
+      text_(text),
+      locale_(std::move(locale)),
+      u_text_(UTEXT_INITIALIZER),
+      term_start_index_(0),
+      term_end_index_exclusive_(0) {}
+
+LanguageSegmenter::Iterator::~Iterator() {
+  ubrk_close(break_iterator_);
+  utext_close(&u_text_);
+}
+
+bool LanguageSegmenter::Iterator::Initialize() {
+  UErrorCode status = U_ZERO_ERROR;
+  utext_openUTF8(&u_text_, text_.data(), /*length=*/-1, &status);
+  break_iterator_ = ubrk_open(UBRK_WORD, locale_.c_str(), /*text=*/nullptr,
+                              /*textLength=*/0, &status);
+  ubrk_setUText(break_iterator_, &u_text_, &status);
+  return !U_FAILURE(status);
+}
+
+bool LanguageSegmenter::Iterator::Advance() {
+  // Prerequisite check
+  if (term_end_index_exclusive_ == UBRK_DONE) {
+    return false;
+  }
+
+  if (term_end_index_exclusive_ == 0) {
+    // First Advance() call
+    term_start_index_ = ubrk_first(break_iterator_);
+  } else {
+    term_start_index_ = term_end_index_exclusive_;
+  }
+  term_end_index_exclusive_ = ubrk_next(break_iterator_);
+
+  // Reached the end
+  if (term_end_index_exclusive_ == UBRK_DONE) {
+    return false;
+  }
+
+  // Rule 1: all ASCII terms will be returned.
+  // We know it's a ASCII term by checking the first char.
+  if (i18n_utils::IsAscii(text_[term_start_index_])) {
+    return true;
+  }
+
+  UChar32 uchar32 =
+      i18n_utils::GetUChar32At(text_.data(), text_.length(), term_start_index_);
+  // Rule 2: for non-ASCII terms, only the alphabetic terms are returned.
+  // We know it's an alphabetic term by checking the first unicode character.
+  if (u_isUAlphabetic(uchar32)) {
+    return true;
+  } else {
+    return Advance();
+  }
+}
+
+std::string_view LanguageSegmenter::Iterator::GetTerm() const {
+  if (text_[term_start_index_] == kASCIISpace) {
+    // Rule 3: multiple continuous whitespaces are treated as one.
+    return std::string_view(&text_[term_start_index_], 1);
+  }
+  return text_.substr(term_start_index_,
+                      term_end_index_exclusive_ - term_start_index_);
+}
+
+libtextclassifier3::StatusOr<int32_t>
+LanguageSegmenter::Iterator::ResetToTermStartingAfter(int32_t offset) {
+  term_start_index_ = ubrk_following(break_iterator_, offset);
+  if (term_start_index_ == UBRK_DONE) {
+    return absl_ports::NotFoundError("");
+  }
+  term_end_index_exclusive_ = ubrk_next(break_iterator_);
+  if (term_end_index_exclusive_ == UBRK_DONE) {
+    return absl_ports::NotFoundError("");
+  }
+  return term_start_index_;
+}
+
+libtextclassifier3::Status
+LanguageSegmenter::Iterator::ResetToTermStartingBefore(int32_t offset) {
+  term_start_index_ = ubrk_preceding(break_iterator_, offset);
+  if (term_start_index_ == UBRK_DONE) {
+    return absl_ports::NotFoundError("");
+  }
+  term_end_index_exclusive_ = ubrk_next(break_iterator_);
+  if (term_end_index_exclusive_ == UBRK_DONE) {
+    return absl_ports::NotFoundError("");
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+libtextclassifier3::StatusOr<int32_t>
+LanguageSegmenter::Iterator::ResetToTermEndingBefore(int32_t offset) {
+  ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(offset));
+  if (term_end_index_exclusive_ > offset) {
+    // This term ends after offset. So we need to get the term just before this
+    // one.
+    ICING_RETURN_IF_ERROR(ResetToTermStartingBefore(term_start_index_));
+  }
+  return term_start_index_;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h
new file mode 100644
index 0000000..2b8b9aa
--- /dev/null
+++ b/icing/tokenization/language-segmenter.h

@@ -0,0 +1,192 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_LANGUAGE_SEGMENTER_H_
+#define ICING_TOKENIZATION_LANGUAGE_SEGMENTER_H_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/tokenization/language-detector.h"
+#include "unicode/ubrk.h"
+#include "unicode/uloc.h"
+
+namespace icing {
+namespace lib {
+
+// This class is used to segment sentences into words based on rules
+// (https://unicode.org/reports/tr29/#Word_Boundaries) and language
+// understanding. Based on the basic segmentation done by UBreakIterator,
+// some extra rules are applied in this class:
+//
+// 1. All ASCII terms will be returned.
+// 2. For non-ASCII terms, only the alphabetic terms are returned, which means
+//    non-ASCII punctuation and special characters are left out.
+// 3. Multiple continuous whitespaces are treated as one.
+//
+// The rules above are common to the high-level tokenizers that might use this
+// class. Other special tokenization logic will be in each tokenizer.
+class LanguageSegmenter {
+ public:
+  LanguageSegmenter(const LanguageSegmenter&) = delete;
+  LanguageSegmenter& operator=(const LanguageSegmenter&) = delete;
+
+  // Creates a language segmenter that uses the given LangId model. Default
+  // locale is used when language can't be detected.
+  //
+  // Returns:
+  //   A LanguageSegmenter on success
+  //   INVALID_ARGUMENT if fails to load model
+  static libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>>
+  Create(const std::string& lang_id_model_path,
+         const std::string& default_locale = ULOC_US);
+
+  // An iterator helping to find terms in the input text.
+  // Example usage:
+  //
+  // while (iterator.Advance()) {
+  //   const std::string_view term = iterator.GetTerm();
+  //   // Do something
+  // }
+  class Iterator {
+   public:
+    // Factory function to create a segment iterator based on the given locale.
+    //
+    // Returns:
+    //   An iterator on success
+    //   INTERNAL_ERROR if unable to create
+    static libtextclassifier3::StatusOr<
+        std::unique_ptr<LanguageSegmenter::Iterator>>
+    Create(std::string_view text, const std::string locale);
+
+    ~Iterator();
+
+    // Advances to the next term. Returns false if it has reached the end.
+    bool Advance();
+
+    // Returns the current term. It can be called only when Advance() returns
+    // true.
+    std::string_view GetTerm() const;
+
+    // Resets the iterator to point to the first term that starts after offset.
+    // GetTerm will now return that term.
+    //
+    // Returns:
+    //   On success, the starting position of the first term that starts after
+    //   offset.
+    //   NOT_FOUND if an error occurred or there are no terms that start after
+    //   offset.
+    libtextclassifier3::StatusOr<int32_t> ResetToTermStartingAfter(
+        int32_t offset);
+
+    // Resets the iterator to point to the first term that ends before offset.
+    // GetTerm will now return that term.
+    //
+    // Returns:
+    //   On success, the starting position of the first term that ends before
+    //   offset.
+    //   NOT_FOUND if an error occurred or there are no terms that ends before
+    //   offset.
+    libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
+        int32_t offset);
+
+   private:
+    Iterator(std::string_view text, const std::string&& locale);
+
+    // Returns true on success
+    bool Initialize();
+
+    // Resets the iterator to point to the first term that starts before offset.
+    // GetTerm will now return that term.
+    //
+    // Returns:
+    //   OK on success
+    //   NOT_FOUND if an error occurred or there are no terms that start before
+    //   offset.
+    libtextclassifier3::Status ResetToTermStartingBefore(int32_t offset);
+
+    // The underlying class that does the segmentation, ubrk_close() must be
+    // called after using.
+    UBreakIterator* break_iterator_;
+
+    // Text to be segmented
+    const std::string_view text_;
+
+    // Locale of the input text, used to help segment more accurately. If a
+    // wrong locale is set, text could probably still be segmented correctly
+    // because the default break iterator behavior is used for most locales.
+    const std::string locale_;
+
+    // A thin wrapper around the input UTF8 text, needed by break_iterator_.
+    // utext_close() must be called after using.
+    UText u_text_;
+
+    // The start and end indices are used to track the positions of current
+    // term.
+    int term_start_index_;
+    int term_end_index_exclusive_;
+  };
+
+  // Segments the input text into terms. The segmentation depends on the
+  // language detected in the input text.
+  //
+  // Returns:
+  //   An iterator of terms on success
+  //   INTERNAL_ERROR if any error occurs
+  //
+  // Note: The underlying char* data of the input string won't be copied but
+  // shared with the return strings, so please make sure the input string
+  // outlives the returned iterator.
+  //
+  // Note: It could happen that the language detected from text is wrong, then
+  // there would be a small chance that the text is segmented incorrectly.
+  libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter::Iterator>>
+  Segment(std::string_view text) const;
+
+  // Segments and returns all terms in the input text. The segmentation depends
+  // on the language detected in the input text.
+  //
+  // Returns:
+  //   A list of terms on success
+  //   INTERNAL_ERROR if any error occurs
+  //
+  // Note: The underlying char* data of the input string won't be copied but
+  // shared with the return strings, so please make sure the input string
+  // outlives the returned terms.
+  //
+  // Note: It could happen that the language detected from text is wrong, then
+  // there would be a small chance that the text is segmented incorrectly.
+  libtextclassifier3::StatusOr<std::vector<std::string_view>> GetAllTerms(
+      std::string_view text) const;
+
+ private:
+  LanguageSegmenter(std::unique_ptr<LanguageDetector> language_detector,
+                    const std::string default_locale);
+
+  // Used to detect languages in text
+  const std::unique_ptr<LanguageDetector> language_detector_;
+
+  // Used as default locale when language can't be detected in text
+  const std::string default_locale_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_LANGUAGE_SEGMENTER_H_

diff --git a/icing/tokenization/language-segmenter_benchmark.cc b/icing/tokenization/language-segmenter_benchmark.cc
new file mode 100644
index 0000000..889763b
--- /dev/null
+++ b/icing/tokenization/language-segmenter_benchmark.cc

@@ -0,0 +1,181 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/transform/normalizer.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/tokenization:language-segmenter_benchmark
+//
+//    $ blaze-bin/icing/tokenization/language-segmenter_benchmark
+//    --benchmarks=all
+//
+// Run on an Android device:
+//    Make target //icing/tokenization:language-segmenter depend on
+//    //third_party/icu
+//
+//    Download LangId model file from
+//    //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and
+//    put it into your device:
+//    $ adb push [your model path] /data/local/tmp/
+//
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/tokenization:language-segmenter_benchmark
+//
+//    $ adb push
+//    blaze-bin/icing/tokenization/language-segmenter_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/language-segmenter_benchmark --benchmarks=all
+//    --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() {
+  if (absl::GetFlag(FLAGS_adb)) {
+    return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb")
+        .ValueOrDie();
+  } else {
+    return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie();
+  }
+}
+
+void BM_SegmentNoSpace(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+
+  std::string input_string(state.range(0), 'A');
+
+  for (auto _ : state) {
+    std::unique_ptr<LanguageSegmenter::Iterator> iterator =
+        language_segmenter->Segment(input_string).ValueOrDie();
+    while (iterator->Advance()) {
+      iterator->GetTerm();
+    }
+  }
+}
+BENCHMARK(BM_SegmentNoSpace)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_SegmentWithSpaces(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+
+  std::string input_string(state.range(0), 'A');
+  for (int i = 1; i < input_string.length(); i += 2) {
+    input_string[i] = ' ';
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<LanguageSegmenter::Iterator> iterator =
+        language_segmenter->Segment(input_string).ValueOrDie();
+    while (iterator->Advance()) {
+      iterator->GetTerm();
+    }
+  }
+}
+BENCHMARK(BM_SegmentWithSpaces)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_SegmentCJK(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  std::unique_ptr<LanguageSegmenter> language_segmenter =
+      CreateLanguageSegmenter();
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("你好こんにちは안녕하세요");
+  }
+
+  for (auto _ : state) {
+    std::unique_ptr<LanguageSegmenter::Iterator> iterator =
+        language_segmenter->Segment(input_string).ValueOrDie();
+    while (iterator->Advance()) {
+      iterator->GetTerm();
+    }
+  }
+}
+BENCHMARK(BM_SegmentCJK)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/language-segmenter_test.cc b/icing/tokenization/language-segmenter_test.cc
new file mode 100644
index 0000000..d87dca4
--- /dev/null
+++ b/icing/tokenization/language-segmenter_test.cc

@@ -0,0 +1,314 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/language-segmenter.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/i18n-test-utils.h"
+#include "icing/testing/test-data.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::IsEmpty;
+
+class LanguageSegmenterTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+  }
+};
+
+TEST_F(LanguageSegmenterTest, BadModelPath) {
+  EXPECT_THAT(LanguageSegmenter::Create("Bad Model Path"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(LanguageSegmenterTest, EmptyText) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms(""), IsOkAndHolds(IsEmpty()));
+}
+
+TEST_F(LanguageSegmenterTest, SimpleText) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms("Hello World"),
+              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
+}
+
+TEST_F(LanguageSegmenterTest, ASCII_Punctuation) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // ASCII punctuation marks are kept
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("Hello, World!!!"),
+      IsOkAndHolds(ElementsAre("Hello", ",", " ", "World", "!", "!", "!")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("Open-source project"),
+              IsOkAndHolds(ElementsAre("Open", "-", "source", " ", "project")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("100%"),
+              IsOkAndHolds(ElementsAre("100", "%")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("A&B"),
+              IsOkAndHolds(ElementsAre("A", "&", "B")));
+}
+
+TEST_F(LanguageSegmenterTest, ASCII_SpecialCharacter) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // ASCII special characters are kept
+  EXPECT_THAT(language_segmenter->GetAllTerms("Pay $1000"),
+              IsOkAndHolds(ElementsAre("Pay", " ", "$", "1000")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("A+B"),
+              IsOkAndHolds(ElementsAre("A", "+", "B")));
+  // 0x0009 is the unicode for tab (within ASCII range).
+  std::string text_with_tab = absl_ports::StrCat(
+      "Hello", UcharToString(0x0009), UcharToString(0x0009), "World");
+  EXPECT_THAT(language_segmenter->GetAllTerms(text_with_tab),
+              IsOkAndHolds(ElementsAre("Hello", UcharToString(0x0009),
+                                       UcharToString(0x0009), "World")));
+}
+
+TEST_F(LanguageSegmenterTest, Non_ASCII_Non_Alphabetic) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // Full-width (non-ASCII) punctuation marks and special characters are left
+  // out.
+  EXPECT_THAT(language_segmenter->GetAllTerms("。？·Hello！×"),
+              IsOkAndHolds(ElementsAre("Hello")));
+}
+
+TEST_F(LanguageSegmenterTest, Acronym) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms("U.S. Bank"),
+              IsOkAndHolds(ElementsAre("U.S", ".", " ", "Bank")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("I.B.M."),
+              IsOkAndHolds(ElementsAre("I.B.M", ".")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("I,B,M"),
+              IsOkAndHolds(ElementsAre("I", ",", "B", ",", "M")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("I B M"),
+              IsOkAndHolds(ElementsAre("I", " ", "B", " ", "M")));
+}
+
+TEST_F(LanguageSegmenterTest, WordConnector) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // According to unicode word break rules
+  // WB6(https://unicode.org/reports/tr29/#WB6),
+  // WB7(https://unicode.org/reports/tr29/#WB7), and a few others, some
+  // punctuation characters are used as word connecters. That is, words don't
+  // break before and after them. Here we just test some that we care about.
+
+  // Word connecters
+  EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android"),
+              IsOkAndHolds(ElementsAre("com.google.android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com:google:android"),
+              IsOkAndHolds(ElementsAre("com:google:android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com'google'android"),
+              IsOkAndHolds(ElementsAre("com'google'android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com_google_android"),
+              IsOkAndHolds(ElementsAre("com_google_android")));
+
+  // Word connecters can be mixed
+  EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android:icing"),
+              IsOkAndHolds(ElementsAre("com.google.android:icing")));
+
+  // Any heading and trailing characters are not connecters
+  EXPECT_THAT(language_segmenter->GetAllTerms(".com.google.android."),
+              IsOkAndHolds(ElementsAre(".", "com.google.android", ".")));
+
+  // Not word connecters
+  EXPECT_THAT(language_segmenter->GetAllTerms("com,google,android"),
+              IsOkAndHolds(ElementsAre("com", ",", "google", ",", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com-google-android"),
+              IsOkAndHolds(ElementsAre("com", "-", "google", "-", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com+google+android"),
+              IsOkAndHolds(ElementsAre("com", "+", "google", "+", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com*google*android"),
+              IsOkAndHolds(ElementsAre("com", "*", "google", "*", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com@google@android"),
+              IsOkAndHolds(ElementsAre("com", "@", "google", "@", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com^google^android"),
+              IsOkAndHolds(ElementsAre("com", "^", "google", "^", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com&google&android"),
+              IsOkAndHolds(ElementsAre("com", "&", "google", "&", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com|google|android"),
+              IsOkAndHolds(ElementsAre("com", "|", "google", "|", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com/google/android"),
+              IsOkAndHolds(ElementsAre("com", "/", "google", "/", "android")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("com;google;android"),
+              IsOkAndHolds(ElementsAre("com", ";", "google", ";", "android")));
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("com\"google\"android"),
+      IsOkAndHolds(ElementsAre("com", "\"", "google", "\"", "android")));
+}
+
+TEST_F(LanguageSegmenterTest, Apostrophes) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms("It's ok."),
+              IsOkAndHolds(ElementsAre("It's", " ", "ok", ".")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("He'll be back."),
+              IsOkAndHolds(ElementsAre("He'll", " ", "be", " ", "back", ".")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("'Hello 'World."),
+              IsOkAndHolds(ElementsAre("'", "Hello", " ", "'", "World", ".")));
+  EXPECT_THAT(language_segmenter->GetAllTerms("The dogs' bone"),
+              IsOkAndHolds(ElementsAre("The", " ", "dogs", "'", " ", "bone")));
+  // 0x2019 is the single right quote, should be treated the same as "'"
+  std::string token_with_quote =
+      absl_ports::StrCat("He", UcharToString(0x2019), "ll");
+  std::string text_with_quote =
+      absl_ports::StrCat(token_with_quote, " be back.");
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms(text_with_quote),
+      IsOkAndHolds(ElementsAre(token_with_quote, " ", "be", " ", "back", ".")));
+}
+
+TEST_F(LanguageSegmenterTest, Parentheses) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("(Hello)"),
+              IsOkAndHolds(ElementsAre("(", "Hello", ")")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms(")Hello("),
+              IsOkAndHolds(ElementsAre(")", "Hello", "(")));
+}
+
+TEST_F(LanguageSegmenterTest, Quotes) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("\"Hello\""),
+              IsOkAndHolds(ElementsAre("\"", "Hello", "\"")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("'Hello'"),
+              IsOkAndHolds(ElementsAre("'", "Hello", "'")));
+}
+
+TEST_F(LanguageSegmenterTest, Alphanumeric) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+
+  // Alphanumeric terms are allowed
+  EXPECT_THAT(language_segmenter->GetAllTerms("Se7en A4 3a"),
+              IsOkAndHolds(ElementsAre("Se7en", " ", "A4", " ", "3a")));
+}
+
+TEST_F(LanguageSegmenterTest, Number) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+
+  // Alphanumeric terms are allowed
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("3.141592653589793238462643383279"),
+      IsOkAndHolds(ElementsAre("3.141592653589793238462643383279")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("3,456.789"),
+              IsOkAndHolds(ElementsAre("3,456.789")));
+
+  EXPECT_THAT(language_segmenter->GetAllTerms("-123"),
+              IsOkAndHolds(ElementsAre("-", "123")));
+}
+
+TEST_F(LanguageSegmenterTest, ContinuousWhitespaces) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // Multiple continuous whitespaces are treated as one.
+  const int kNumSeparators = 256;
+  const std::string text_with_spaces =
+      absl_ports::StrCat("Hello", std::string(kNumSeparators, ' '), "World");
+  EXPECT_THAT(language_segmenter->GetAllTerms(text_with_spaces),
+              IsOkAndHolds(ElementsAre("Hello", " ", "World")));
+}
+
+TEST_F(LanguageSegmenterTest, CJKT) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // CJKT (Chinese, Japanese, Khmer, Thai) are the 4 main languages that don't
+  // have whitespaces as word delimiter.
+
+  // Chinese
+  EXPECT_THAT(language_segmenter->GetAllTerms("我每天走路去上班。"),
+              IsOkAndHolds(ElementsAre("我", "每天", "走路", "去", "上班")));
+  // Japanese
+  EXPECT_THAT(language_segmenter->GetAllTerms("私は毎日仕事に歩いています。"),
+              IsOkAndHolds(ElementsAre("私", "は", "毎日", "仕事", "に", "歩",
+                                       "い", "てい", "ます")));
+  // Khmer
+  EXPECT_THAT(language_segmenter->GetAllTerms("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
+              IsOkAndHolds(ElementsAre("ញុំ", "ដើរទៅ", "ធ្វើការ", "រាល់ថ្ងៃ")));
+  // Thai
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("ฉันเดินไปทำงานทุกวัน"),
+      IsOkAndHolds(ElementsAre("ฉัน", "เดิน", "ไป", "ทำงาน", "ทุก", "วัน")));
+}
+
+TEST_F(LanguageSegmenterTest, LatinLettersWithAccents) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms("āăąḃḅḇčćç"),
+              IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
+}
+
+// TODO(samzheng): test cases for more languages (e.g. top 20 in the world)
+TEST_F(LanguageSegmenterTest, WhitespaceSplitLanguages) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // Turkish
+  EXPECT_THAT(language_segmenter->GetAllTerms("merhaba dünya"),
+              IsOkAndHolds(ElementsAre("merhaba", " ", "dünya")));
+  // Korean
+  EXPECT_THAT(
+      language_segmenter->GetAllTerms("나는 매일 출근합니다."),
+      IsOkAndHolds(ElementsAre("나는", " ", "매일", " ", "출근합니다", ".")));
+}
+
+// TODO(samzheng): more mixed languages test cases
+TEST_F(LanguageSegmenterTest, MixedLanguages) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  EXPECT_THAT(language_segmenter->GetAllTerms("How are you你好吗お元気ですか"),
+              IsOkAndHolds(ElementsAre("How", " ", "are", " ", "you", "你好",
+                                       "吗", "お", "元気", "です", "か")));
+}
+
+TEST_F(LanguageSegmenterTest, NotCopyStrings) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  // Validates that the input strings are not copied
+  const std::string text = "Hello World";
+  const char* word1_address = text.c_str();
+  const char* word2_address = text.c_str() + 6;
+  ICING_ASSERT_OK_AND_ASSIGN(std::vector<std::string_view> terms,
+                             language_segmenter->GetAllTerms(text));
+  ASSERT_THAT(terms, ElementsAre("Hello", " ", "World"));
+  const char* word1_result_address = terms.at(0).data();
+  const char* word2_result_address = terms.at(2).data();
+
+  // The underlying char* should be the same
+  EXPECT_THAT(word1_address, Eq(word1_result_address));
+  EXPECT_THAT(word2_address, Eq(word2_result_address));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/plain-tokenizer.cc b/icing/tokenization/plain-tokenizer.cc
new file mode 100644
index 0000000..9cf6a0b
--- /dev/null
+++ b/icing/tokenization/plain-tokenizer.cc

@@ -0,0 +1,125 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/plain-tokenizer.h"
+
+#include <cstdint>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/util/i18n-utils.h"
+#include "unicode/umachine.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+// Helper function to validate a term.
+// A term is valid if:
+//   1. it's not empty
+//   2. it's not a whitespace
+//   3. it's not a punctuation mark
+//
+// TODO(b/141007791): figure out how we'd like to support special characters
+// like "+", "&", "@", "#" in indexing and query tokenizers.
+bool IsValidTerm(std::string_view term) {
+  if (term.empty()) {
+    return false;
+  }
+  // Gets the first unicode character. We can know what the whole term is by
+  // checking only the first character.
+  UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), 0);
+  return !u_isUWhiteSpace(uchar32) && !u_ispunct(uchar32);
+}
+}  // namespace
+
+// Plain tokenizer applies its rules to the results from language segmenter. It
+// simply filters out invalid terms from language segmenter and returns
+// everything else as tokens. Please refer to IsValidTerm() above for what terms
+// are valid.
+class PlainTokenIterator : public Tokenizer::Iterator {
+ public:
+  explicit PlainTokenIterator(
+      std::unique_ptr<LanguageSegmenter::Iterator> base_iterator)
+      : base_iterator_(std::move(base_iterator)) {}
+
+  bool Advance() override {
+    bool found_next_valid_term = false;
+    while (!found_next_valid_term && base_iterator_->Advance()) {
+      current_term_ = base_iterator_->GetTerm();
+      found_next_valid_term = IsValidTerm(current_term_);
+    }
+    return found_next_valid_term;
+  }
+
+  Token GetToken() const override {
+    if (current_term_.empty()) {
+      return Token(Token::INVALID);
+    }
+    return Token(Token::REGULAR, current_term_);
+  }
+
+  bool ResetToTokenAfter(int32_t offset) override {
+    if (!base_iterator_->ResetToTermStartingAfter(offset).ok()) {
+      return false;
+    }
+    current_term_ = base_iterator_->GetTerm();
+    if (!IsValidTerm(current_term_)) {
+      // If the current value isn't valid, advance to the next valid value.
+      return Advance();
+    }
+    return true;
+  }
+
+  bool ResetToTokenBefore(int32_t offset) override {
+    ICING_ASSIGN_OR_RETURN_VAL(
+        offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+    current_term_ = base_iterator_->GetTerm();
+    while (!IsValidTerm(current_term_)) {
+      // Haven't found a valid term yet. Retrieve the term prior to this one
+      // from the segmenter.
+      ICING_ASSIGN_OR_RETURN_VAL(
+          offset, base_iterator_->ResetToTermEndingBefore(offset), false);
+      current_term_ = base_iterator_->GetTerm();
+    }
+    return true;
+  }
+
+ private:
+  std::unique_ptr<LanguageSegmenter::Iterator> base_iterator_;
+  std::string_view current_term_;
+};
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
+PlainTokenizer::Tokenize(std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<LanguageSegmenter::Iterator> base_iterator,
+      language_segmenter_.Segment(text));
+  return std::make_unique<PlainTokenIterator>(std::move(base_iterator));
+}
+
+libtextclassifier3::StatusOr<std::vector<Token>> PlainTokenizer::TokenizeAll(
+    std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
+                         Tokenize(text));
+  std::vector<Token> tokens;
+  while (iterator->Advance()) {
+    tokens.push_back(iterator->GetToken());
+  }
+  return tokens;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/plain-tokenizer.h b/icing/tokenization/plain-tokenizer.h
new file mode 100644
index 0000000..cc3fe2e
--- /dev/null
+++ b/icing/tokenization/plain-tokenizer.h

@@ -0,0 +1,45 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_PLAIN_TOKENIZER_H_
+#define ICING_TOKENIZATION_PLAIN_TOKENIZER_H_
+
+#include "utils/base/statusor.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+// Provides basic tokenization on input text
+class PlainTokenizer : public Tokenizer {
+ public:
+  explicit PlainTokenizer(const LanguageSegmenter* language_segmenter)
+      : language_segmenter_(*language_segmenter) {}
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
+      std::string_view text) const override;
+
+  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const override;
+
+ private:
+  // Used to segment input texts based on language understanding
+  const LanguageSegmenter& language_segmenter_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_PLAIN_TOKENIZER_H_

diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
new file mode 100644
index 0000000..a3790f9
--- /dev/null
+++ b/icing/tokenization/plain-tokenizer_test.cc

@@ -0,0 +1,313 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/plain-tokenizer.h"
+
+#include <string_view>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/i18n-test-utils.h"
+#include "icing/testing/test-data.h"
+#include "gmock/gmock.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::IsEmpty;
+
+class PlainTokenizerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+  }
+};
+
+TEST_F(PlainTokenizerTest, Simple) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(""), IsOkAndHolds(IsEmpty()));
+
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("Hello World"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
+
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll(
+          "Lorem ipsum dolor sit amet, consectetur adipiscing elit. "
+          "Duis efficitur iaculis auctor."),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Lorem"),
+                               EqualsToken(Token::REGULAR, "ipsum"),
+                               EqualsToken(Token::REGULAR, "dolor"),
+                               EqualsToken(Token::REGULAR, "sit"),
+                               EqualsToken(Token::REGULAR, "amet"),
+                               EqualsToken(Token::REGULAR, "consectetur"),
+                               EqualsToken(Token::REGULAR, "adipiscing"),
+                               EqualsToken(Token::REGULAR, "elit"),
+                               EqualsToken(Token::REGULAR, "Duis"),
+                               EqualsToken(Token::REGULAR, "efficitur"),
+                               EqualsToken(Token::REGULAR, "iaculis"),
+                               EqualsToken(Token::REGULAR, "auctor"))));
+}
+
+TEST_F(PlainTokenizerTest, Whitespace) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  // There're many unicode characters that are whitespaces, here we choose tabs
+  // to represent others.
+
+  // 0x0009 is horizontal tab, considered as a whitespace
+  std::string text_with_horizontal_tab =
+      absl_ports::StrCat("Hello", UcharToString(0x0009), "World");
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_horizontal_tab),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
+
+  // 0x000B is vertical tab, considered as a whitespace
+  std::string text_with_vertical_tab =
+      absl_ports::StrCat("Hello", UcharToString(0x000B), "World");
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(text_with_vertical_tab),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
+}
+
+TEST_F(PlainTokenizerTest, Punctuation) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  // Half-width punctuation marks are filtered out.
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(
+                  "Hello, World! Hello: World. \"Hello\" World?"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"),
+                                       EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"),
+                                       EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
+
+  // Full-width punctuation marks are filtered out.
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("你好，世界！你好：世界。“你好”世界？"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
+                               EqualsToken(Token::REGULAR, "世界"),
+                               EqualsToken(Token::REGULAR, "你好"),
+                               EqualsToken(Token::REGULAR, "世界"),
+                               EqualsToken(Token::REGULAR, "你好"),
+                               EqualsToken(Token::REGULAR, "世界"))));
+}
+
+TEST_F(PlainTokenizerTest, SpecialCharacters) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  // Right now we don't have special logic for these characters, just output
+  // them as tokens.
+
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("1+1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "1"),
+                                       EqualsToken(Token::REGULAR, "+"),
+                                       EqualsToken(Token::REGULAR, "1"))));
+
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("$50"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "$"),
+                                       EqualsToken(Token::REGULAR, "50"))));
+}
+
+TEST_F(PlainTokenizerTest, CJKT) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
+  // characters, just add these tests as sanity checks.
+
+  // Chinese
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("我每天走路去上班。"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "我"),
+                                       EqualsToken(Token::REGULAR, "每天"),
+                                       EqualsToken(Token::REGULAR, "走路"),
+                                       EqualsToken(Token::REGULAR, "去"),
+                                       EqualsToken(Token::REGULAR, "上班"))));
+  // Japanese
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::REGULAR, "私"), EqualsToken(Token::REGULAR, "は"),
+          EqualsToken(Token::REGULAR, "毎日"),
+          EqualsToken(Token::REGULAR, "仕事"),
+          EqualsToken(Token::REGULAR, "に"), EqualsToken(Token::REGULAR, "歩"),
+          EqualsToken(Token::REGULAR, "い"),
+          EqualsToken(Token::REGULAR, "てい"),
+          EqualsToken(Token::REGULAR, "ます"))));
+  // Khmer
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ញុំ"),
+                                       EqualsToken(Token::REGULAR, "ដើរទៅ"),
+                                       EqualsToken(Token::REGULAR, "ធ្វើការ"),
+                                       EqualsToken(Token::REGULAR, "រាល់ថ្ងៃ"))));
+  // Korean
+  EXPECT_THAT(
+      plain_tokenizer->TokenizeAll("나는 매일 출근합니다."),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "나는"),
+                               EqualsToken(Token::REGULAR, "매일"),
+                               EqualsToken(Token::REGULAR, "출근합니다"))));
+
+  // Thai
+  EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
+                                       EqualsToken(Token::REGULAR, "เดิน"),
+                                       EqualsToken(Token::REGULAR, "ไป"),
+                                       EqualsToken(Token::REGULAR, "ทำงาน"),
+                                       EqualsToken(Token::REGULAR, "ทุก"),
+                                       EqualsToken(Token::REGULAR, "วัน"))));
+}
+
+TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+  constexpr std::string_view kText = "f b";
+  auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
+
+  EXPECT_TRUE(iterator->ResetToTokenAfter(0));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "b"));
+
+  EXPECT_FALSE(iterator->ResetToTokenAfter(2));
+}
+
+TEST_F(PlainTokenizerTest, ResetToTokenBeforeSimple) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+  constexpr std::string_view kText = "f b";
+  auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
+
+  EXPECT_TRUE(iterator->ResetToTokenBefore(2));
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "f"));
+
+  EXPECT_FALSE(iterator->ResetToTokenBefore(0));
+}
+
+TEST_F(PlainTokenizerTest, ResetToTokenAfter) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  constexpr std::string_view kText = " foo . bar baz.. bat ";
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
+                                       EqualsToken(Token::REGULAR, "bar"),
+                                       EqualsToken(Token::REGULAR, "baz"),
+                                       EqualsToken(Token::REGULAR, "bat"))));
+  std::vector<std::string> expected_text = {
+      "foo",  //  0: " foo . bar"
+      "bar",  //  1: "foo . bar "
+      "bar",  //  2: "oo . bar b"
+      "bar",  //  3: "o . bar ba"
+      "bar",  //  4: " . bar baz"
+      "bar",  //  5: ". bar baz."
+      "bar",  //  6: " bar baz.."
+      "baz",  //  7: "bar baz.. b"
+      "baz",  //  8: "ar baz.. ba"
+      "baz",  //  9: "r baz.. bat"
+      "baz",  // 10: " baz.. bat"
+      "bat",  // 11: "baz.. bat"
+      "bat",  // 12: "az.. bat"
+      "bat",  // 13: "z.. bat"
+      "bat",  // 14: ".. bat"
+      "bat",  // 15: ". bat"
+      "bat",  // 16: " bat"
+  };
+
+  auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
+  EXPECT_TRUE(iterator->Advance());
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  for (int i = 0; i < kText.length(); ++i) {
+    if (i < expected_text.size()) {
+      EXPECT_TRUE(iterator->ResetToTokenAfter(i));
+      EXPECT_THAT(iterator->GetToken(),
+                  EqualsToken(Token::REGULAR, expected_text[i]));
+    } else {
+      EXPECT_FALSE(iterator->ResetToTokenAfter(i));
+    }
+  }
+}
+
+TEST_F(PlainTokenizerTest, ResetToTokenBefore) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> plain_tokenizer =
+      std::make_unique<PlainTokenizer>(language_segmenter.get());
+
+  constexpr std::string_view kText = " foo . bar baz.. bat ";
+  EXPECT_THAT(plain_tokenizer->TokenizeAll(kText),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "foo"),
+                                       EqualsToken(Token::REGULAR, "bar"),
+                                       EqualsToken(Token::REGULAR, "baz"),
+                                       EqualsToken(Token::REGULAR, "bat"))));
+  std::vector<std::string> expected_text = {
+      "bat",  // 20: "baz.. bat "
+      "baz",  // 19: " baz.. bat"
+      "baz",  // 18: "r baz.. ba"
+      "baz",  // 17: "ar baz.. b"
+      "baz",  // 16: "bar baz.. "
+      "baz",  // 15: " bar baz.."
+      "baz",  // 14: ". bar baz."
+      "bar",  // 13: " . bar baz"
+      "bar",  // 12: "o . bar ba"
+      "bar",  // 11: "oo . bar b"
+      "bar",  // 10: "foo . bar "
+      "foo",  //  9: "foo . bar"
+      "foo",  //  8: "foo . ba"
+      "foo",  //  7: "foo . b"
+      "foo",  //  6: "foo . "
+      "foo",  //  5: "foo ."
+      "foo",  //  4: "foo "
+  };
+
+  auto iterator = plain_tokenizer->Tokenize(kText).ValueOrDie();
+  EXPECT_TRUE(iterator->Advance());
+  EXPECT_THAT(iterator->GetToken(), EqualsToken(Token::REGULAR, "foo"));
+  for (int i = kText.length() - 1; i >= 0; --i) {
+    int expected_index = kText.length() - 1 - i;
+    if (expected_index < expected_text.size()) {
+      EXPECT_TRUE(iterator->ResetToTokenBefore(i));
+      EXPECT_THAT(iterator->GetToken(),
+                  EqualsToken(Token::REGULAR, expected_text[expected_index]));
+    } else {
+      EXPECT_FALSE(iterator->ResetToTokenBefore(i));
+    }
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
new file mode 100644
index 0000000..779a555
--- /dev/null
+++ b/icing/tokenization/raw-query-tokenizer.cc

@@ -0,0 +1,552 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/raw-query-tokenizer.h"
+
+#include "utils/base/status.h"
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/i18n-utils.h"
+
+// This file provides rules that tell the tokenizer what to do when it sees a
+// term.
+//
+// Some definitions:
+//
+// 1. State: We treat raw query tokenizer as a state machine, it has different
+//           states when processing different terms.
+// 2. TermType: type of input terms from language segmenter
+// 3. Rule: a rule here is the combination of State and TermType, a rule tells
+//          the raw query tokenizer what to do when it's in a certain State and
+//          sees a certain TermType.
+//
+//    There are 2 kinds of rules here:
+//    3.1 State transition rule: it tells the raw query tokenizer what new state
+//                               to transition into.
+//    3.2 Action rule: it tells the raw query tokenizer whether to output the
+//                     current term as a token or skip.
+//
+// Then a rule can be described as:
+// [current state] + [next term type] -> [new state] + [action]
+//
+// Since currently there're 9 states and 8 term types, we need 9 * 8 = 72 rules
+// to cover all possible cases for both state transition and action.
+//
+// Besides the 72 rules, there're 4 extra rules that we handle separately:
+// 1. Property name must be in ASCII.
+// 2. "OR" is ignored if there's no valid token on its left.
+// 3. "OR" is ignored if there's no valid token on its right.
+// 4. Parentheses must appear in pairs.
+namespace icing {
+namespace lib {
+
+namespace {
+constexpr char kWhitespace = ' ';
+constexpr char kColon = ':';
+constexpr char kLeftParentheses = '(';
+constexpr char kRightParentheses = ')';
+constexpr char kExclusion = '-';
+constexpr char kOrOperator[] = "OR";
+
+enum State {
+  // Ready to process any terms
+  READY = 0,
+
+  // When seeing an alphanumeric term
+  PROCESSING_ALPHANUMERIC_TERM = 1,
+
+  // When seeing an exclusion operator "-"
+  PROCESSING_EXCLUSION = 2,
+
+  // When seeing an exclusion operator + alphanumeric term
+  PROCESSING_EXCLUSION_TERM = 3,
+
+  // When seeing ASCII alphanumeric term + colon
+  PROCESSING_PROPERTY_RESTRICT = 4,
+
+  // When seeing ASCII alphanumeric term + colon + alphanumeric term
+  PROCESSING_PROPERTY_TERM = 5,
+
+  // When seeing OR operator
+  PROCESSING_OR = 6,
+
+  // When seeing left parentheses
+  OPENING_PARENTHESES = 7,
+
+  // When seeing right parentheses
+  CLOSING_PARENTHESES = 8,
+
+  // Valid state count
+  STATE_COUNT = 9,
+
+  INVALID = 10
+};
+
+enum TermType {
+  // " "
+  WHITESPACE = 0,
+
+  // A term that consists of unicode alphabetic and numeric characters
+  ALPHANUMERIC_TERM = 1,
+
+  // "("
+  LEFT_PARENTHESES = 2,
+
+  // ")"
+  RIGHT_PARENTHESES = 3,
+
+  // "-"
+  EXCLUSION_OPERATOR = 4,
+
+  // "OR"
+  OR_OPERATOR = 5,
+
+  // ":"
+  COLON = 6,
+
+  // All the other characters seen that are not the types above
+  OTHER = 7,
+
+  TYPE_COUNT = 8
+};
+
+enum ActionOrError {
+  // Output the current term as token
+  OUTPUT = 0,
+
+  // Do nothing and wait for more information as it's not clear what the current
+  // term is used for.
+  KEEP = 1,
+
+  // Ignore / throw away the current term
+  IGNORE = 2,
+
+  // Errors
+  ERROR_UNKNOWN = 100,
+  ERROR_NO_WHITESPACE_AROUND_OR = 101,
+  ERROR_GROUP_AFTER_EXCLUSION = 102,
+  ERROR_GROUP_AS_PROPERTY_NAME = 103,
+  ERROR_GROUP_AFTER_PROPERTY_RESTRICTION = 104,
+  ERROR_EXCLUSION_PROPERTY_TOGETHER = 105,
+  ERROR_EXCLUSION_OR_TOGETHER = 106,
+  ERROR_PROPERTY_OR_TOGETHER = 107,
+};
+
+std::string_view GetErrorMessage(ActionOrError maybe_error) {
+  switch (maybe_error) {
+    case ERROR_UNKNOWN:
+      return "Unknown error";
+    case ERROR_NO_WHITESPACE_AROUND_OR:
+      return "No whitespaces before or after OR operator";
+    case ERROR_GROUP_AFTER_EXCLUSION:
+      return "Exclusion on groups is not supported";
+    case ERROR_GROUP_AS_PROPERTY_NAME:
+      return "Property name can't be a group";
+    case ERROR_GROUP_AFTER_PROPERTY_RESTRICTION:
+      return "Property restriction on groups is not supported";
+    case ERROR_EXCLUSION_PROPERTY_TOGETHER:
+      return "Exclusion and property restriction operators can't be used "
+             "together";
+    case ERROR_EXCLUSION_OR_TOGETHER:
+      return "Exclusion and OR operators can't be used together";
+    case ERROR_PROPERTY_OR_TOGETHER:
+      return "Property restriction and OR operators can't be used together";
+    default:
+      return "";
+  }
+}
+
+// The following state transition table uses numbers to represent states and
+// letters to represent actions:
+//
+// States:
+//
+// READY = 0
+// PROCESSING_ALPHANUMERIC_TERM = 1
+// PROCESSING_EXCLUSION = 2
+// PROCESSING_EXCLUSION_TERM = 3
+// PROCESSING_PROPERTY_RESTRICT = 4
+// PROCESSING_PROPERTY_TERM = 5
+// PROCESSING_OR = 6
+// OPENING_PARENTHESES = 7
+// CLOSING_PARENTHESES = 8
+//
+// Actions:
+//
+// OUTPUT = a
+// KEEP = b
+// IGNORE = c
+//
+//                    ========================================================
+//   Transition Table ||  0  |  1  |  2  |  3  |  4  |  5  |  6  |  7  |  8  |
+// ===========================================================================
+//         WHITESPACE || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
+//  ALPHANUMERIC_TERM || 1,c | 1,a | 3,a | 1,a | 5,a | 1,a |ERROR| 1,a | 1,a |
+//   LEFT_PARENTHESES || 7,c | 7,a |ERROR| 7,a |ERROR| 7,a | 7,a | 7,a | 7,a |
+//  RIGHT_PARENTHESES || 8,c | 8,a | 8,c | 8,a | 8,a | 8,a | 8,c | 8,a | 8,a |
+// EXCLUSION_OPERATOR || 2,c | 0,a | 2,c | 0,a |ERROR| 0,a |ERROR| 2,a | 2,a |
+//        OR_OPERATOR || 6,c |ERROR|ERROR|ERROR|ERROR|ERROR|ERROR| 7,b | 6,a |
+//              COLON || 0,c | 4,b |ERROR|ERROR| 4,b | 0,a |ERROR| 0,a |ERROR|
+//              OTHER || 0,c | 0,a | 0,c | 0,a | 0,a | 0,a | 0,a | 0,a | 0,a |
+//
+// Each cell is a rule that consists of 4 things:
+// [current state] + [next term type] -> [new state] + [action]
+//
+// E.g. the cell at intersection of "0" and "ALPHANUMERIC_TERM" means that when
+// we're at state 0 (READY) and seeing a new term with type "ALPHANUMERIC_TERM",
+// we'll transition into a new state 1 (PROCESSING_ALPHANUMERIC_TERM) and take
+// action c (IGNORE the current term).
+
+// We use a 2D array to encode the state transition rules,
+// The value of state_transition_rules[state1][term_type1] means "what state we
+// need to transition into when the current state is state1 and the next term
+// type is term_type1".
+//
+// NOTE: Please update the state transition table above if this is updated.
+//
+// TODO(samzheng): support syntax "-property1:term1", right now we don't allow
+// exclusion and property restriction applied on the same term.
+// TODO(b/141007791): figure out how we'd like to support special characters
+// like "+", "&", "@", "#" in indexing and query tokenizers.
+constexpr State state_transition_rules[STATE_COUNT][TYPE_COUNT] = {
+    /*State: Ready*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, READY, READY},
+    /*State: PROCESSING_ALPHANUMERIC_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
+    /*State: PROCESSING_EXCLUSION*/
+    {READY, PROCESSING_EXCLUSION_TERM, INVALID, CLOSING_PARENTHESES,
+     PROCESSING_EXCLUSION, INVALID, INVALID, READY},
+    /*State: PROCESSING_EXCLUSION_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, INVALID, READY},
+    /*State: PROCESSING_PROPERTY_RESTRICT*/
+    {READY, PROCESSING_PROPERTY_TERM, INVALID, CLOSING_PARENTHESES, INVALID,
+     INVALID, PROCESSING_PROPERTY_RESTRICT, READY},
+    /*State: PROCESSING_PROPERTY_TERM*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, READY, INVALID, READY, READY},
+    /*State: PROCESSING_OR*/
+    {READY, INVALID, OPENING_PARENTHESES, CLOSING_PARENTHESES, INVALID, INVALID,
+     INVALID, READY},
+    /*State: OPENING_PARENTHESES*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, OPENING_PARENTHESES, READY,
+     READY},
+    /*State: CLOSING_PARENTHESES*/
+    {READY, PROCESSING_ALPHANUMERIC_TERM, OPENING_PARENTHESES,
+     CLOSING_PARENTHESES, PROCESSING_EXCLUSION, PROCESSING_OR, INVALID, READY}};
+
+// We use a 2D array to encode the action rules,
+// The value of action_rules[state1][term_type1] means "what action we need to
+// take when the current state is state1 and the next term type is term_type1".
+//
+// NOTE: Please update the state transition table above if this is updated.
+constexpr ActionOrError action_rules[STATE_COUNT][TYPE_COUNT] = {
+    /*State: Ready*/
+    {IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE, IGNORE},
+    /*State: PROCESSING_ALPHANUMERIC_TERM*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
+     KEEP, OUTPUT},
+    /*State: PROCESSING_EXCLUSION*/
+    {IGNORE, OUTPUT, ERROR_GROUP_AFTER_EXCLUSION, IGNORE, IGNORE,
+     ERROR_EXCLUSION_OR_TOGETHER, ERROR_EXCLUSION_PROPERTY_TOGETHER, IGNORE},
+    /*State: PROCESSING_EXCLUSION_TERM*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
+     ERROR_EXCLUSION_PROPERTY_TOGETHER, OUTPUT},
+    /*State: PROCESSING_PROPERTY_RESTRICT*/
+    {OUTPUT, OUTPUT, ERROR_GROUP_AFTER_PROPERTY_RESTRICTION, OUTPUT,
+     ERROR_EXCLUSION_PROPERTY_TOGETHER, ERROR_PROPERTY_OR_TOGETHER, KEEP,
+     OUTPUT},
+    /*State: PROCESSING_PROPERTY_TERM*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR,
+     OUTPUT, OUTPUT},
+    /*State: PROCESSING_OR*/
+    {OUTPUT, ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT, IGNORE,
+     ERROR_NO_WHITESPACE_AROUND_OR, ERROR_NO_WHITESPACE_AROUND_OR,
+     ERROR_NO_WHITESPACE_AROUND_OR, OUTPUT},
+    /*State: OPENING_PARENTHESES*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, KEEP, OUTPUT, OUTPUT},
+    /*State: CLOSING_PARENTHESES*/
+    {OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT, OUTPUT,
+     ERROR_GROUP_AS_PROPERTY_NAME, OUTPUT}};
+
+// Helper function to get the TermType of the input term.
+TermType GetTermType(std::string_view term) {
+  if (term.length() == 1) {
+    // Must be an ASCII char
+    const char& first_term_char = term[0];
+    if (first_term_char == kWhitespace) {
+      return WHITESPACE;
+    } else if (first_term_char == kColon) {
+      return COLON;
+    } else if (first_term_char == kLeftParentheses) {
+      return LEFT_PARENTHESES;
+    } else if (first_term_char == kRightParentheses) {
+      return RIGHT_PARENTHESES;
+    } else if (first_term_char == kExclusion) {
+      return EXCLUSION_OPERATOR;
+    }
+  } else if (term.length() == 2 && term == kOrOperator) {
+    return OR_OPERATOR;
+  }
+  // Checks the first char to see if it's an ASCII term
+  if (i18n_utils::IsAscii(term[0])) {
+    if (u_isalnum(term[0])) {
+      return ALPHANUMERIC_TERM;
+    }
+    return OTHER;
+  }
+  // All non-ASCII terms are alphabetic since language segmenter already
+  // filters out non-ASCII and non-alphabetic terms
+  return ALPHANUMERIC_TERM;
+}
+
+// Helper function to remove the last token if it's OR operator. This is used to
+// correct the queries where there're no valid tokens after "OR", e.g. [cat OR]
+// and [(cat OR)]. This helps assert extra rule 3: "OR" is ignored if there's no
+// valid token on its right.
+void RemoveLastTokenIfOrOperator(std::vector<Token>* tokens) {
+  if (!tokens->empty() && tokens->back().type == Token::QUERY_OR) {
+    tokens->pop_back();
+  }
+}
+
+// Helper function to output an "OR" token while asserting the extra rule 2:
+// "OR" is ignored if there's no valid token on its left.
+libtextclassifier3::Status OutputOrOperatorToken(std::vector<Token>* tokens) {
+  if (tokens->empty()) {
+    // Ignores "OR" because it's the first token.
+    return libtextclassifier3::Status::OK;
+  }
+  Token::Type last_token_type = tokens->back().type;
+  switch (last_token_type) {
+    case Token::REGULAR:
+    case Token::QUERY_RIGHT_PARENTHESES:
+      tokens->emplace_back(Token::QUERY_OR);
+      break;
+    case Token::QUERY_OR:
+      // Ignores "OR" because there's already an "OR", e.g. "term1 OR OR term2"
+      break;
+    default:
+      // Ignores "OR" because there isn't a valid token on its left.
+      break;
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+// Helper function to output a token according to current term and new state.
+// The new token will be added to 'tokens'.
+//
+// NOTE: how we output the current term is depending on the new state and not
+// the current state. E.g. for these two queries: [property1: ] and
+// [property1:term], "property1" is a regular term in the first query but a
+// property name in the second. The meaning of "property1" is determined when
+// we read the content after the colon. That's why we need to get the new state
+// here.
+//
+// Returns:
+//   OK on success
+//   INVALID_ARGUMENT with error message on invalid query syntax
+libtextclassifier3::Status OutputToken(State new_state,
+                                       std::string_view current_term,
+                                       TermType current_term_type,
+                                       std::vector<Token>* tokens) {
+  switch (current_term_type) {
+    case ALPHANUMERIC_TERM:
+      if (new_state == PROCESSING_PROPERTY_TERM) {
+        // Asserts extra rule 1: property name must be in ASCII
+        if (!i18n_utils::IsAscii(current_term[0])) {
+          return absl_ports::InvalidArgumentError(
+              "Characters in property name must all be ASCII.");
+        }
+        tokens->emplace_back(Token::QUERY_PROPERTY, current_term);
+      } else {
+        tokens->emplace_back(Token::REGULAR, current_term);
+      }
+      break;
+    case LEFT_PARENTHESES:
+      tokens->emplace_back(Token::QUERY_LEFT_PARENTHESES);
+      break;
+    case RIGHT_PARENTHESES:
+      // Ignores "OR" if it's followed by right parentheses.
+      RemoveLastTokenIfOrOperator(tokens);
+      tokens->emplace_back(Token::QUERY_RIGHT_PARENTHESES);
+      break;
+    case EXCLUSION_OPERATOR:
+      tokens->emplace_back(Token::QUERY_EXCLUSION);
+      break;
+    case OR_OPERATOR:
+      return OutputOrOperatorToken(tokens);
+    default:
+      break;
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+// Helper function to apply proper rules on current state and next term type.
+// 'current_state' and other output parameters will be modified to new values,
+// new token will be added to 'tokens' if possible.
+//
+// Returns:
+//   OK on success
+//   INVALID_ARGUMENT with error message on invalid query syntax
+libtextclassifier3::Status ProcessTerm(State* current_state,
+                                       std::string_view* current_term,
+                                       TermType* current_term_type,
+                                       int* unclosed_parentheses_count,
+                                       const std::string_view next_term,
+                                       TermType next_term_type,
+                                       std::vector<Token>* tokens) {
+  // Asserts extra rule 4: parentheses must appear in pairs.
+  if (next_term_type == LEFT_PARENTHESES) {
+    ++(*unclosed_parentheses_count);
+  } else if (next_term_type == RIGHT_PARENTHESES &&
+             --(*unclosed_parentheses_count) < 0) {
+    return absl_ports::InvalidArgumentError("Too many right parentheses.");
+  }
+  // Asks the rules what action to take and what the new state is based on
+  // current state and next term.
+  ActionOrError action_or_error = action_rules[*current_state][next_term_type];
+  State new_state = state_transition_rules[*current_state][next_term_type];
+  // Sanity check
+  if (action_or_error >= ERROR_UNKNOWN || new_state == INVALID) {
+    return absl_ports::InvalidArgumentError(GetErrorMessage(action_or_error));
+  }
+  switch (action_or_error) {
+    case OUTPUT:
+      ICING_RETURN_IF_ERROR(
+          OutputToken(new_state, *current_term, *current_term_type, tokens));
+      U_FALLTHROUGH;
+    case IGNORE:
+      *current_term = next_term;
+      *current_term_type = next_term_type;
+      break;
+    case KEEP:
+      break;
+    default:
+      return absl_ports::InvalidArgumentError(GetErrorMessage(ERROR_UNKNOWN));
+  }
+  *current_state = new_state;
+  return libtextclassifier3::Status::OK;
+}
+
+// Processes all the terms from base iterator and produces a list of tokens
+// based on the raw query syntax rules.
+//
+// Returns:
+//   A list of tokens on success
+//   INVALID_ARGUMENT with error message on invalid query syntax
+libtextclassifier3::StatusOr<std::vector<Token>> ProcessTerms(
+    std::unique_ptr<LanguageSegmenter::Iterator> base_iterator) {
+  std::vector<Token> tokens;
+  State current_state = READY;
+  std::string_view current_term;
+  TermType current_term_type;
+  int unclosed_parentheses_count = 0;
+  while (base_iterator->Advance()) {
+    const std::string_view next_term = base_iterator->GetTerm();
+    size_t colon_position = next_term.find(kColon);
+    // Since colon ":" is a word connector per ICU's rule
+    // (https://unicode.org/reports/tr29/#Word_Boundaries), strings like
+    // "foo:bar" are returned by LanguageSegmenter as one term. Here we're
+    // trying to find the first colon as it represents property restriction in
+    // raw query.
+    if (colon_position == std::string_view::npos) {
+      // No colon found
+      ICING_RETURN_IF_ERROR(ProcessTerm(&current_state, &current_term,
+                                        &current_term_type,
+                                        &unclosed_parentheses_count, next_term,
+                                        GetTermType(next_term), &tokens));
+    } else if (next_term.size() == 1 && next_term[0] == kColon) {
+      // The whole term is a colon
+      ICING_RETURN_IF_ERROR(
+          ProcessTerm(&current_state, &current_term, &current_term_type,
+                      &unclosed_parentheses_count, next_term, COLON, &tokens));
+    } else {
+      // String before the colon is the property name
+      std::string_view property_name = next_term.substr(0, colon_position);
+      ICING_RETURN_IF_ERROR(
+          ProcessTerm(&current_state, &current_term, &current_term_type,
+                      &unclosed_parentheses_count, property_name,
+                      GetTermType(property_name), &tokens));
+      ICING_RETURN_IF_ERROR(
+          ProcessTerm(&current_state, &current_term, &current_term_type,
+                      &unclosed_parentheses_count, std::string_view(&kColon, 1),
+                      COLON, &tokens));
+      // String after the colon is the term that property restriction is applied
+      // on.
+      std::string_view property_term = next_term.substr(colon_position + 1);
+      ICING_RETURN_IF_ERROR(
+          ProcessTerm(&current_state, &current_term, &current_term_type,
+                      &unclosed_parentheses_count, property_term,
+                      GetTermType(property_term), &tokens));
+    }
+  }
+  // Adds a fake whitespace at the end to flush the last term.
+  ICING_RETURN_IF_ERROR(
+      ProcessTerm(&current_state, &current_term, &current_term_type,
+                  &unclosed_parentheses_count,
+                  std::string_view(&kWhitespace, 1), WHITESPACE, &tokens));
+  if (unclosed_parentheses_count > 0) {
+    return absl_ports::InvalidArgumentError("Unclosed left parentheses.");
+  }
+  // Ignores "OR" if it's at the end.
+  RemoveLastTokenIfOrOperator(&tokens);
+  return tokens;
+}
+
+// For raw query, it's easier to produce all the tokens together one time and
+// pass them to the iterator because the meaning of each term may relate to the
+// terms before or after it.
+class RawQueryTokenIterator : public Tokenizer::Iterator {
+ public:
+  explicit RawQueryTokenIterator(std::vector<Token>&& tokens)
+      : tokens_(std::move(tokens)) {}
+
+  bool Advance() override { return ++current_ < tokens_.size(); }
+
+  Token GetToken() const override {
+    if (current_ < 0 || current_ >= tokens_.size()) {
+      return Token(Token::INVALID);
+    }
+    return tokens_.at(current_);
+  }
+
+ private:
+  const std::vector<Token> tokens_;
+  int current_ = -1;
+};
+
+}  // namespace
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>>
+RawQueryTokenizer::Tokenize(std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens, TokenizeAll(text));
+  return std::make_unique<RawQueryTokenIterator>(std::move(tokens));
+}
+
+libtextclassifier3::StatusOr<std::vector<Token>> RawQueryTokenizer::TokenizeAll(
+    std::string_view text) const {
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<LanguageSegmenter::Iterator> base_iterator,
+      language_segmenter_.Segment(text));
+  return ProcessTerms(std::move(base_iterator));
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/raw-query-tokenizer.h b/icing/tokenization/raw-query-tokenizer.h
new file mode 100644
index 0000000..570a652
--- /dev/null
+++ b/icing/tokenization/raw-query-tokenizer.h

@@ -0,0 +1,46 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_RAW_QUERY_TOKENIZER_H_
+#define ICING_TOKENIZATION_RAW_QUERY_TOKENIZER_H_
+
+#include <string_view>
+
+#include "utils/base/statusor.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+class RawQueryTokenizer : public Tokenizer {
+ public:
+  explicit RawQueryTokenizer(const LanguageSegmenter* language_segmenter)
+      : language_segmenter_(*language_segmenter) {}
+
+  libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer::Iterator>> Tokenize(
+      std::string_view text) const override;
+
+  libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const override;
+
+ private:
+  // Used to segment input texts based on language understanding
+  const LanguageSegmenter& language_segmenter_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_RAW_QUERY_TOKENIZER_H_

diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
new file mode 100644
index 0000000..38420c2
--- /dev/null
+++ b/icing/tokenization/raw-query-tokenizer_test.cc

@@ -0,0 +1,536 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/raw-query-tokenizer.h"
+
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/tokenization/tokenizer.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::ElementsAre;
+using ::testing::HasSubstr;
+
+class RawQueryTokenizerTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+  }
+};
+
+TEST_F(RawQueryTokenizerTest, Simple) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("Hello World!"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "Hello"),
+                                       EqualsToken(Token::REGULAR, "World"))));
+}
+
+TEST_F(RawQueryTokenizerTest, Parentheses) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("()"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( )"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1 term2) (term3 term4))"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term3"),
+                  EqualsToken(Token::REGULAR, "term4"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1)term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1)-term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_EXCLUSION, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1)OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_OR, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1)OR(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1):term2"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Property name can't be a group")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Unclosed left parentheses")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1))"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Too many right parentheses")));
+}
+
+TEST_F(RawQueryTokenizerTest, Exclustion) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(-term1)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_EXCLUSION, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // Exclusion operator is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("- term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  // Exclusion operator is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1- term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::REGULAR, "term2"))));
+
+  // Exclusion operator is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 -)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // First exclusion operator is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("--term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
+
+  // First "-" is exclusion operator, second is not and will be discarded.
+  // In other words, exclusion only applies to the term right after it.
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1-term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-(term1)"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Exclusion on groups is not supported")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-OR"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Exclusion and OR operators can't be used together")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-:term1"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Exclusion and property restriction operators "
+                                 "can't be used together")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-property1:term1"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Exclusion and property restriction operators "
+                                 "can't be used together")));
+}
+
+TEST_F(RawQueryTokenizerTest, PropertyRestriction) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(property1:term1)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // Colon is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(":term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  // Colon is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(:term1)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // Colon is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1:"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  // property name can be a path
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("email.title:hello"),
+              IsOkAndHolds(
+                  ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "email.title"),
+                              EqualsToken(Token::REGULAR, "hello"))));
+
+  // The first colon ":" triggers property restriction, the second colon is used
+  // as a word connector per ICU's rule
+  // (https://unicode.org/reports/tr29/#Word_Boundaries).
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property:foo:bar"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property"),
+                               EqualsToken(Token::REGULAR, "foo:bar"))));
+
+  // Property restriction only applies to the term right after it.
+  // Note: "term1:term2" is not a term but 2 terms because word connectors
+  // don't apply to numbers and alphabets.
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1:term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1-"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
+
+  // Multiple continuous colons will still be recognized as a property
+  // restriction operator
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1::term1"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:(term1)"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Property restriction on groups is not supported")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:OR"),
+      StatusIs(
+          libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+          HasSubstr(
+              "Property restriction and OR operators can't be used together")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:-term1"),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("Exclusion and property restriction operators "
+                                 "can't be used together")));
+}
+
+TEST_F(RawQueryTokenizerTest, OR) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
+
+  // Two continuous "OR"s are treated as one
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR OR term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("(term1) OR term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                               EqualsToken(Token::QUERY_OR, ""),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR (term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("((term1) OR (term2))"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // Only "OR" (all in uppercase) is the operator
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 or term2 Or term3 oR term4"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "or"),
+                               EqualsToken(Token::REGULAR, "term2"),
+                               EqualsToken(Token::REGULAR, "Or"),
+                               EqualsToken(Token::REGULAR, "term3"),
+                               EqualsToken(Token::REGULAR, "oR"),
+                               EqualsToken(Token::REGULAR, "term4"))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("OR term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(OR term1)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR term1)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(term1 OR )"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // "OR" is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("( OR )"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1 OR(term2)"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_OR, ""),
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term2"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR-term2"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("term1 OR:term2"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+}
+
+// CJKT are treated the same way by language segmenter and raw tokenizer, so
+// here we test Chinese and Japanese to represent CJKT.
+TEST_F(RawQueryTokenizerTest, CJKT) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  // Exclusion only applies to the term right after it.
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-今天天气很好"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "今天"),
+                                       EqualsToken(Token::REGULAR, "天气"),
+                                       EqualsToken(Token::REGULAR, "很好"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:你好"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "你好"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("标题:你好"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("Characters in property name must all be ASCII")));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("cat OR ねこ"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "cat"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "ねこ"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("cat ORねこ"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("ねこOR cat"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("-ねこOR cat"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property:ねこOR cat"),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("No whitespaces before or after OR operator")));
+}
+
+// Raw tokenizer identifies all characters that it doesn't know as OTHER type,
+// so we can choose comma "," to represent all OTHER characters.
+TEST_F(RawQueryTokenizerTest, OtherChars) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  // Comma is ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll(",term1, ,"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("(,term1),"),
+              IsOkAndHolds(ElementsAre(
+                  EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+                  EqualsToken(Token::REGULAR, "term1"),
+                  EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+
+  // Exclusion operator and comma are ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-,term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-term1,"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+                                       EqualsToken(Token::REGULAR, "term1"))));
+
+  // Colon and comma are ignored
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:,term1"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "property1"),
+                                       EqualsToken(Token::REGULAR, "term1"))));
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll("property1:term1,term2"),
+      IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+                               EqualsToken(Token::REGULAR, "term1"),
+                               EqualsToken(Token::REGULAR, "term2"))));
+
+  // This is a special case for OR, unknown chars are treated the same as
+  // whitespaces before and after OR.
+  EXPECT_THAT(raw_query_tokenizer->TokenizeAll("term1,OR,term2"),
+              IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "term1"),
+                                       EqualsToken(Token::QUERY_OR, ""),
+                                       EqualsToken(Token::REGULAR, "term2"))));
+}
+
+TEST_F(RawQueryTokenizerTest, Mix) {
+  ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
+                             LanguageSegmenter::Create(GetLangIdModelPath()));
+  std::unique_ptr<Tokenizer> raw_query_tokenizer =
+      std::make_unique<RawQueryTokenizer>(language_segmenter.get());
+
+  EXPECT_THAT(
+      raw_query_tokenizer->TokenizeAll(
+          "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
+      IsOkAndHolds(ElementsAre(
+          EqualsToken(Token::REGULAR, "こんにちは"),
+          EqualsToken(Token::REGULAR, "good"),
+          EqualsToken(Token::REGULAR, "afternoon"),
+          EqualsToken(Token::QUERY_PROPERTY, "title"),
+          EqualsToken(Token::REGULAR, "今天"), EqualsToken(Token::QUERY_OR, ""),
+          EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+          EqualsToken(Token::REGULAR, "ใน"), EqualsToken(Token::REGULAR, "วัน"),
+          EqualsToken(Token::REGULAR, "นี้"),
+          EqualsToken(Token::QUERY_EXCLUSION, ""),
+          EqualsToken(Token::REGULAR, "B12"),
+          EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/token.h b/icing/tokenization/token.h
new file mode 100644
index 0000000..0bb3aaf
--- /dev/null
+++ b/icing/tokenization/token.h

@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_TOKEN_H_
+#define ICING_TOKENIZATION_TOKEN_H_
+
+#include <string_view>
+
+namespace icing {
+namespace lib {
+
+// TODO(samzheng) Add group id support if needed. Right now in raw query we
+// don't need group ids since all our query operators (OR, Exclusion, Property
+// Restriction) only apply to the token right after them (vs. applying to
+// multiple tokens after them). The "groups" of tokens can be easily recognized.
+struct Token {
+  enum Type {
+    // Common types
+    REGULAR,  // A token without special meanings, the value of it will be
+              // indexed or searched directly
+
+    // Types only used in raw query
+    QUERY_OR,         // Indicates OR logic between its left and right tokens
+    QUERY_EXCLUSION,  // Indicates exclusion operation on next token
+    QUERY_PROPERTY,   // Indicates property restrict on next token
+    QUERY_LEFT_PARENTHESES,   // Left parentheses
+    QUERY_RIGHT_PARENTHESES,  // Right parentheses
+
+    // Indicates errors
+    INVALID,
+  };
+
+  // The input text should outlive the Token instance.
+  explicit Token(Type type_in, std::string_view text_in = "")
+      : type(type_in), text(text_in) {}
+
+  // The type of token
+  const Type type;
+
+  // The content of token
+  const std::string_view text;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_TOKEN_H_

diff --git a/icing/tokenization/tokenizer-factory.cc b/icing/tokenization/tokenizer-factory.cc
new file mode 100644
index 0000000..09bf2d1
--- /dev/null
+++ b/icing/tokenization/tokenizer-factory.cc

@@ -0,0 +1,44 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tokenization/tokenizer-factory.h"
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/tokenization/plain-tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+namespace tokenizer_factory {
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
+CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+                        const LanguageSegmenter* lang_segmenter) {
+  switch (type) {
+    case IndexingConfig::TokenizerType::PLAIN:
+      return std::make_unique<PlainTokenizer>(lang_segmenter);
+    case IndexingConfig::TokenizerType::NONE:
+      U_FALLTHROUGH;
+    default:
+      // This should never happen.
+      return absl_ports::InvalidArgumentError(
+          "Invalid tokenizer type for an indexed section");
+  }
+}
+
+}  // namespace tokenizer_factory
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tokenization/tokenizer-factory.h b/icing/tokenization/tokenizer-factory.h
new file mode 100644
index 0000000..8a22f29
--- /dev/null
+++ b/icing/tokenization/tokenizer-factory.h

@@ -0,0 +1,40 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_TOKENIZER_FACTORY_H_
+#define ICING_TOKENIZATION_TOKENIZER_FACTORY_H_
+
+#include <memory>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer.h"
+
+namespace icing {
+namespace lib {
+
+namespace tokenizer_factory {
+
+libtextclassifier3::StatusOr<std::unique_ptr<Tokenizer>>
+CreateIndexingTokenizer(IndexingConfig::TokenizerType::Code type,
+                        const LanguageSegmenter* lang_segmenter);
+
+}  // namespace tokenizer_factory
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_TOKENIZER_FACTORY_H_

diff --git a/icing/tokenization/tokenizer.h b/icing/tokenization/tokenizer.h
new file mode 100644
index 0000000..96e3231
--- /dev/null
+++ b/icing/tokenization/tokenizer.h

@@ -0,0 +1,115 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOKENIZATION_TOKENIZER_H_
+#define ICING_TOKENIZATION_TOKENIZER_H_
+
+#include <cstdint>
+
+#include "utils/base/statusor.h"
+#include "icing/tokenization/token.h"
+namespace icing {
+namespace lib {
+
+// A virtual class that all other tokenizers should inherit. It provides
+// interfaces that allow callers to tokenize text. The return value could be an
+// iterator or a list of tokens. Example usage:
+//
+// std::unique_ptr<Tokenizer> tokenizer = GetTokenizer();
+// ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iter,
+//                  tokenizer->Tokenize(text));
+// ICING_ASSIGN_OR_RETURN(std::vector<Token> tokens,
+// tokenizer->TokenizeAll(text));
+class Tokenizer {
+ public:
+  virtual ~Tokenizer() = default;
+
+  enum Type {
+    // Index tokenizers
+    PLAIN,  // Used to tokenize plain text input
+
+    // Query tokenizers
+    RAW_QUERY,  // Used to tokenize raw queries
+  };
+
+  // An iterator helping to get tokens.
+  // Example usage:
+  //
+  // while (iterator.Advance()) {
+  //   const Token& token = iterator.GetToken();
+  //   // Do something
+  // }
+  class Iterator {
+   public:
+    virtual ~Iterator() = default;
+
+    // Advances to the next token. Returns false if it has reached the end.
+    virtual bool Advance() = 0;
+
+    // Returns the current token. It can be called only when Advance() returns
+    // true, otherwise an invalid token could be returned.
+    virtual Token GetToken() const = 0;
+
+    // Sets the tokenizer to point at the first token that *starts* *after*
+    // offset. Returns false if there are no valid tokens starting after
+    // offset.
+    // Ex.
+    // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
+    // iterator.ResetToTokenAfter(4);
+    // // The first full token starting after position 4 (the 'b' in "bar") is
+    // // "baz".
+    // PrintToken(iterator.GetToken());  // prints "baz"
+    virtual bool ResetToTokenAfter(int32_t offset) { return false; }
+
+    // Sets the tokenizer to point at the first token that *ends* *before*
+    // offset. Returns false if there are no valid tokens ending
+    // before offset.
+    // Ex.
+    // auto iterator = tokenizer.Tokenize("foo bar baz").ValueOrDie();
+    // iterator.ResetToTokenBefore(4);
+    // // The first full token ending before position 4 (the 'b' in "bar") is
+    // // "foo".
+    // PrintToken(iterator.GetToken());  // prints "foo"
+    virtual bool ResetToTokenBefore(int32_t offset) { return false; }
+  };
+
+  // Tokenizes the input text. The input text should outlive the returned
+  // iterator.
+  //
+  // Returns:
+  //   A token iterator on success
+  //   INVALID_ARGUMENT with error message if input text has a wrong syntax
+  //                    according to implementations of different tokenizer
+  //                    types.
+  //   INTERNAL_ERROR if any other errors occur
+  virtual libtextclassifier3::StatusOr<std::unique_ptr<Iterator>> Tokenize(
+      std::string_view text) const = 0;
+
+  // Tokenizes and returns all tokens in the input text. The input text should
+  // outlive the returned vector.
+  //
+  // Returns:
+  //   A list of tokens on success
+  //   INVALID_ARGUMENT with error message if input text has a wrong syntax
+  //                    according to implementations of different tokenizer
+  //                    types.
+  //   INTERNAL_ERROR if any other errors occur
+  virtual libtextclassifier3::StatusOr<std::vector<Token>> TokenizeAll(
+      std::string_view text) const = 0;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TOKENIZATION_TOKENIZER_H_

diff --git a/icing/tools/document-store-dump.cc b/icing/tools/document-store-dump.cc
new file mode 100644
index 0000000..45c9bf5
--- /dev/null
+++ b/icing/tools/document-store-dump.cc

@@ -0,0 +1,119 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/tools/document-store-dump.h"
+
+#include <cinttypes>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+void AppendDocumentProto(DocId document_id, const Document& doc,
+                         std::string* output) {
+  absl_ports::StrAppend(
+      output, IcingStringUtil::StringPrintf(
+                  "Document {\n   document_id: %d\n  corpus_id: %d\n  uri: "
+                  "'%s'\n  score: %d\n  created_timestamp_ms: %" PRIu64 "\n",
+                  static_cast<int>(document_id), doc.corpus_id(),
+                  doc.uri().c_str(), static_cast<int>(doc.score()),
+                  static_cast<int64_t>(doc.created_timestamp_ms())));
+  for (const auto& section : doc.sections()) {
+    absl_ports::StrAppend(
+        output, IcingStringUtil::StringPrintf(
+                    "  section {\n    id: %d\n    indexed_length: "
+                    "%d\n    content: '%s'\n    snippet: '%s'\n",
+                    static_cast<int>(section.id()),
+                    static_cast<int>(section.indexed_length()),
+                    section.content().c_str(), section.snippet().c_str()));
+    for (int64_t extracted_number : section.extracted_numbers()) {
+      absl_ports::StrAppend(output, IcingStringUtil::StringPrintf(
+                                        "    extracted_numbers: %" PRId64 "\n",
+                                        extracted_number));
+    }
+    for (const std::string& annotation_token : section.annotation_tokens()) {
+      absl_ports::StrAppend(
+          output, IcingStringUtil::StringPrintf("    annotation_tokens: '%s'\n",
+                                                annotation_token.c_str()));
+    }
+    std::string indexed = (section.config().indexed()) ? "true" : "false";
+    std::string index_prefixes =
+        (section.config().index_prefixes()) ? "true" : "false";
+    absl_ports::StrAppend(
+        output,
+        IcingStringUtil::StringPrintf(
+            "    config {\n      name: '%s'\n      indexed: %s\n      "
+            "tokenizer: %d\n      weight: %d\n      index_prefixes: %s\n      "
+            "subsection_separator: '%s'\n",
+            section.config().name().c_str(), indexed.c_str(),
+            section.config().tokenizer(),
+            static_cast<int>(section.config().weight()), index_prefixes.c_str(),
+            section.config().subsection_separator().c_str()));
+    for (const auto& variant_generator :
+         section.config().variant_generators()) {
+      absl_ports::StrAppend(
+          output, IcingStringUtil::StringPrintf(
+                      "      variant_generators: %d\n", variant_generator));
+    }
+    absl_ports::StrAppend(
+        output,
+        IcingStringUtil::StringPrintf(
+            "      common_term_legacy_hit_score: %d\n      "
+            "rfc822_host_name_term_legacy_hit_score: %d\n      "
+            "semantic_property: '%s'\n      universal_section_id: %d\n      "
+            "omnibox_section_type: %d\n      st_section_type: %d\n    }\n  }\n",
+            section.config().common_term_legacy_hit_score(),
+            section.config().rfc822_host_name_term_legacy_hit_score(),
+            section.config().semantic_property().c_str(),
+            section.config().universal_section_id(),
+            section.config().omnibox_section_type(),
+            section.config().st_section_type()));
+  }
+  for (const auto& language : doc.languages()) {
+    std::string used_classifier =
+        (language.used_classifier()) ? "true" : "false";
+    absl_ports::StrAppend(
+        output, IcingStringUtil::StringPrintf(
+                    "  languages {\n    language: %d\n    score: %d\n    "
+                    "used_classifier: %s\n  }\n",
+                    language.language(), static_cast<int>(language.score()),
+                    used_classifier.c_str()));
+  }
+  absl_ports::StrAppend(
+      output, IcingStringUtil::StringPrintf(
+                  " ANNOTATIONS PRINTING NOT IMPLEMENTED YET IN ICING-TOOL\n"));
+}
+
+}  // namespace
+
+std::string GetDocumentStoreDump(const DocumentStore& document_store) {
+  std::string output;
+  for (DocId document_id = 0; document_id < document_store.num_documents();
+       document_id++) {
+    Document doc;
+    if (!document_store.ReadDocument(document_id, &doc)) {
+      ICING_LOG(FATAL) << "Failed to read document";
+    }
+
+    AppendDocumentProto(document_id, doc, &output);
+  }
+  return output;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/tools/document-store-dump.h b/icing/tools/document-store-dump.h
new file mode 100644
index 0000000..023b301
--- /dev/null
+++ b/icing/tools/document-store-dump.h

@@ -0,0 +1,35 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
+#define ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
+
+#include <string>
+
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// Utility function for dumping the complete document store content.
+// This provides a human-readable representation of the document store, mainly
+// provided for easier understandability for developers.
+// The output of this class should only be available on cmdline-tool-level
+// (with root access), or unit tests. In other words it should not be possible
+// to trigger this on a release key device, for data protection reasons.
+std::string GetDocumentStoreDump(const DocumentStore& document_store);
+
+}  // namespace lib
+}  // namespace icing
+#endif  // ICING_TOOLS_DOCUMENT_STORE_DUMP_H_

diff --git a/icing/tools/icing-tool.cc b/icing/tools/icing-tool.cc
new file mode 100644
index 0000000..72a11e9
--- /dev/null
+++ b/icing/tools/icing-tool.cc

@@ -0,0 +1,306 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: ulas@google.com (Ulas Kirazci)
+//
+// A tool to debug the native index.
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include <string>
+
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/core/string-util.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/doc-property-filter.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/dynamic-trie.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/filesystem.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/mobstore.h"
+#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/native-index-impl.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/tools/document-store-dump.h"
+#include "icing/util/logging.h"
+
+using std::vector;
+using ::wireless_android_play_playlog::icing::IndexRestorationStats;
+
+namespace icing {
+namespace lib {
+
+// 256KB for debugging.
+const size_t kMaxDocumentSizeForDebugging = 1u << 18;
+// Dump dynamic trie stats and contents.
+void ProcessDynamicTrie(const char* filename) {
+  Filesystem filesystem;
+  DynamicTrie trie(filename, DynamicTrie::RuntimeOptions(), &filesystem);
+  if (!trie.Init()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Opening trie %s failed",
+                                                      filename);
+    return;
+  }
+
+  std::string out;
+  trie.GetDebugInfo(true, &out);
+  printf("Stats:\n%s", out.c_str());
+
+  std::ostringstream contents;
+  vector<std::string> keys;
+  trie.DumpTrie(&contents, &keys);
+  printf("Contents:\n%s", contents.str().c_str());
+}
+
+NativeIndexImpl* MakeIndex(const char* root_dir) {
+  NativeConfig native_config;
+  native_config.set_max_document_size(kMaxDocumentSizeForDebugging);
+  FlashIndexOptions flash_index_options(
+      NativeIndexImpl::GetNativeIndexDir(root_dir));
+  NativeIndexImpl* ni =
+      new NativeIndexImpl(root_dir, native_config, flash_index_options);
+  InitStatus init_status;
+  if (!ni->Init(&init_status)) {
+    ICING_LOG(FATAL) << "Failed to initialize legacy native index impl";
+  }
+
+  IndexRestorationStats unused;
+  ni->RestoreIndex(IndexRequestSpec::default_instance(), &unused);
+  return ni;
+}
+
+void RunQuery(NativeIndexImpl* ni, const std::string& query, int start,
+              int num_results) {
+  // Pull out corpusids and uris.
+  QueryRequestSpec spec;
+  spec.set_no_corpus_filter(true);
+  spec.set_want_uris(true);
+  spec.set_scoring_verbosity_level(1);
+  spec.set_prefix_match(true);
+
+  QueryResponse response;
+  ni->ExecuteQuery(query, spec, 10000, start, num_results, &response);
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Query [%s] num results %u", query.c_str(), response.num_results());
+
+  for (int i = 0, uri_offset = 0; i < response.num_results(); i++) {
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "%d: (cid=%u) uri %.*s", i, response.corpus_ids(i),
+        response.uri_lengths(i), response.uri_buffer().data() + uri_offset);
+    uri_offset += response.uri_lengths(i);
+  }
+}
+
+void RunSuggest(NativeIndexImpl* ni, const std::string& prefix,
+                int num_results) {
+  SuggestionResponse results;
+  ni->Suggest(prefix, num_results, vector<CorpusId>(), &results);
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "Query [%s] num results %zu", prefix.c_str(),
+      static_cast<size_t>(results.suggestions_size()));
+
+  for (size_t i = 0; i < results.suggestions_size(); i++) {
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Sugg: [%s] display text [%s]", results.suggestions(i).query().c_str(),
+        results.suggestions(i).display_text().c_str());
+  }
+}
+
+int IcingTool(int argc, char** argv) {
+  auto file_storage = CreatePosixFileStorage();
+  enum Options {
+    OPT_FILENAME,
+    OPT_OP,
+    OPT_QUERY,
+    NUM_OPT,
+  };
+  static const option kOptions[NUM_OPT + 1] = {
+      {"filename", 1, nullptr, 0},
+      {"op", 1, nullptr, 0},
+      {"query", 1, nullptr, 0},
+      {nullptr, 0, nullptr, 0},
+  };
+  const char* opt_values[NUM_OPT];
+  memset(opt_values, 0, sizeof(opt_values));
+
+  while (true) {
+    int opt_idx = -1;
+    int ret = getopt_long(argc, argv, "", kOptions, &opt_idx);
+    if (ret != 0) break;
+
+    if (opt_idx >= 0 && opt_idx < NUM_OPT) {
+      opt_values[opt_idx] = optarg;
+    }
+  }
+
+  if (!opt_values[OPT_OP]) {
+    ICING_LOG(ERROR) << "No op specified";
+    return -1;
+  }
+
+  if (!opt_values[OPT_FILENAME]) {
+    ICING_LOG(ERROR) << "No filename specified";
+    return -1;
+  }
+  if (!strncmp(
+          opt_values[OPT_FILENAME],
+          "/data/data/com.google.android.gms/files/AppDataSearch",
+          strlen("/data/data/com.google.android.gms/files/AppDataSearch"))) {
+    ICING_LOG(ERROR)
+        << "Should not read directly from the file in gmscore - "
+           "icing-tool also commits writes as side-effects which corrupts "
+           "the index on concurrent modification";
+    return -1;
+  }
+
+  const char* op = opt_values[OPT_OP];
+  DocumentStore::Options options(file_storage.get(),
+                                 kMaxDocumentSizeForDebugging);
+  if (!strcmp(op, "dyntrie")) {
+    std::string full_file_path =
+        absl_ports::StrCat(opt_values[OPT_FILENAME], "/idx.lexicon");
+    ProcessDynamicTrie(full_file_path.c_str());
+  } else if (!strcmp(op, "verify")) {
+    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
+    ni->CheckVerify();
+  } else if (!strcmp(op, "query")) {
+    if (opt_values[OPT_QUERY] == nullptr) {
+      ICING_LOG(FATAL) << "Opt value is null";
+    }
+
+    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
+    RunQuery(ni.get(), opt_values[OPT_QUERY], 0, 100);
+  } else if (!strcmp(op, "suggest")) {
+    if (opt_values[OPT_QUERY] == nullptr) {
+      ICING_LOG(FATAL) << "Opt value is null";
+    }
+
+    std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
+    RunSuggest(ni.get(), opt_values[OPT_QUERY], 100);
+  } else if (!strcmp(op, "dump-all-docs")) {
+    DocumentStore ds(opt_values[OPT_FILENAME], options);
+    if (!ds.Init()) {
+      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
+    }
+
+    printf(
+        "------ Document Store Dump Start ------\n"
+        "%s\n"
+        "------ Document Store Dump End ------\n",
+        GetDocumentStoreDump(ds).c_str());
+  } else if (!strcmp(op, "dump-uris")) {
+    CorpusId corpus_id = kInvalidCorpusId;
+    if (opt_values[OPT_QUERY]) {
+      // Query is corpus id.
+      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
+    }
+    DocumentStore ds(opt_values[OPT_FILENAME], options);
+    if (!ds.Init()) {
+      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
+    }
+
+    DocPropertyFilter dpf;
+    ds.AddDeletedTagFilter(&dpf);
+
+    // Dump with format "<corpusid> <uri> <tagname>*".
+    int filtered = 0;
+    vector<std::string> tagnames;
+    for (DocId document_id = 0; document_id < ds.num_documents();
+         document_id++) {
+      Document doc;
+      if (!ds.ReadDocument(document_id, &doc)) {
+        ICING_LOG(FATAL) << "Failed to read document.";
+      }
+
+      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
+        filtered++;
+        continue;
+      }
+      if (dpf.Match(0, document_id)) {
+        filtered++;
+        continue;
+      }
+
+      tagnames.clear();
+      ds.GetAllSetUserTagNames(document_id, &tagnames);
+
+      printf("%d %s %s\n", doc.corpus_id(), doc.uri().c_str(),
+             StringUtil::JoinStrings("/", tagnames).c_str());
+    }
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Processed %u filtered %d", ds.num_documents(), filtered);
+  } else if (!strcmp(op, "dump-docs")) {
+    std::string out_filename = opt_values[OPT_FILENAME];
+    out_filename.append("/docs-dump");
+    CorpusId corpus_id = kInvalidCorpusId;
+    if (opt_values[OPT_QUERY]) {
+      // Query is corpus id.
+      corpus_id = atoi(opt_values[OPT_QUERY]);  // NOLINT
+      out_filename.push_back('.');
+      out_filename.append(opt_values[OPT_QUERY]);
+    }
+    DocumentStore ds(opt_values[OPT_FILENAME], options);
+    if (!ds.Init()) {
+      ICING_LOG(FATAL) << "Legacy document store failed to initialize";
+    }
+
+    DocPropertyFilter dpf;
+    ds.AddDeletedTagFilter(&dpf);
+
+    // Dump with format (<32-bit length><serialized content>)*.
+    FILE* fp = fopen(out_filename.c_str(), "w");
+    int filtered = 0;
+    for (DocId document_id = 0; document_id < ds.num_documents();
+         document_id++) {
+      Document doc;
+      if (!ds.ReadDocument(document_id, &doc)) {
+        ICING_LOG(FATAL) << "Failed to read document.";
+      }
+
+      if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
+        filtered++;
+        continue;
+      }
+      if (dpf.Match(0, document_id)) {
+        filtered++;
+        continue;
+      }
+
+      std::string serialized = doc.SerializeAsString();
+      uint32_t length = serialized.size();
+      if (fwrite(&length, 1, sizeof(length), fp) != sizeof(length)) {
+        ICING_LOG(FATAL) << "Failed to write length information to file";
+      }
+
+      if (fwrite(serialized.data(), 1, serialized.size(), fp) !=
+          serialized.size()) {
+        ICING_LOG(FATAL) << "Failed to write document to file";
+      }
+    }
+    ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+        "Processed %u filtered %d", ds.num_documents(), filtered);
+    fclose(fp);
+  } else {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unknown op %s", op);
+    return -1;
+  }
+
+  return 0;
+}
+
+}  // namespace lib
+}  // namespace icing
+
+int main(int argc, char** argv) { return icing::lib::IcingTool(argc, argv); }

diff --git a/icing/transform/normalizer.cc b/icing/transform/normalizer.cc
new file mode 100644
index 0000000..7553e28
--- /dev/null
+++ b/icing/transform/normalizer.cc

@@ -0,0 +1,229 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/transform/normalizer.h"
+
+#include <cctype>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/util/i18n-utils.h"
+#include "icing/util/logging.h"
+#include "unicode/umachine.h"
+#include "unicode/unorm2.h"
+#include "unicode/utrans.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// The following is the compound id used to tell UTransliterator how to
+// transform terms. The basic normalization forms NFD (canonical normalization
+// form decomposition) and NFKC (compatible normalization form composition)
+// are applied as well as some other rules we need. More information at
+// http://www.unicode.org/reports/tr15/
+// TODO(samzheng) Figure out if we need to support small hiragana to katakana
+// transformation.
+constexpr UChar kTransformRulesUtf16[] =
+    u"Lower; "                      // Lowercase
+    "Latin-ASCII; "                 // Map Latin characters to ASCII characters
+    "Hiragana-Katakana; "           // Map hiragana to katakana
+    "[:Latin:] NFD; "               // Decompose Latin letters
+    "[:Nonspacing Mark:] Remove; "  // Remove accent / diacritic marks
+    "NFKC";                         // Decompose and compose everything
+
+// Length of the transform rules excluding the terminating NULL.
+constexpr int kTransformRulesLength =
+    sizeof(kTransformRulesUtf16) / sizeof(kTransformRulesUtf16[0]) - 1;
+
+// An invalid value defined by Unicode.
+constexpr UChar32 kInvalidUchar32 = 0xFFFD;
+}  // namespace
+
+// Creates a Normalizer with a valid TermTransformer instance.
+//
+// Note: UTokenizer2 is also an option to normalize Unicode strings, but since
+// we need some custom transform rules other than NFC/NFKC we have to use
+// TermTransformer as a custom transform rule executor.
+libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Normalizer::Create(
+    int max_term_byte_size) {
+  if (max_term_byte_size <= 0) {
+    return absl_ports::InvalidArgumentError(
+        "max_term_byte_size must be greater than zero.");
+  }
+
+  ICING_ASSIGN_OR_RETURN(
+      std::unique_ptr<Normalizer::TermTransformer> term_transformer,
+      Normalizer::TermTransformer::Create());
+
+  return std::unique_ptr<Normalizer>(
+      new Normalizer(std::move(term_transformer), max_term_byte_size));
+}
+
+Normalizer::Normalizer(
+    std::unique_ptr<Normalizer::TermTransformer> term_transformer,
+    int max_term_byte_size)
+    : term_transformer_(std::move(term_transformer)),
+      max_term_byte_size_(max_term_byte_size) {}
+
+std::string Normalizer::NormalizeTerm(const std::string_view term) const {
+  std::string normalized_text;
+
+  if (term.empty()) {
+    return normalized_text;
+  }
+
+  UErrorCode status = U_ZERO_ERROR;
+  // ICU manages the singleton instance
+  const UNormalizer2* normalizer2 = unorm2_getNFCInstance(&status);
+  if (U_FAILURE(status)) {
+    ICING_LOG(WARNING) << "Failed to create a UNormalizer2 instance";
+  }
+
+  // Checks if the first character is within ASCII range or can be transformed
+  // into an ASCII char. Since the term is tokenized, we know that the whole
+  // term can be transformed into ASCII if the first character can.
+  UChar32 first_uchar32 =
+      i18n_utils::GetUChar32At(term.data(), term.length(), 0);
+  if (normalizer2 != nullptr && first_uchar32 != kInvalidUchar32 &&
+      i18n_utils::DiacriticCharToAscii(normalizer2, first_uchar32, nullptr)) {
+    // This is a faster method to normalize Latin terms.
+    normalized_text = NormalizeLatin(normalizer2, term);
+  } else {
+    normalized_text = term_transformer_->Transform(term);
+  }
+
+  if (normalized_text.length() > max_term_byte_size_) {
+    i18n_utils::SafeTruncateUtf8(&normalized_text, max_term_byte_size_);
+  }
+
+  return normalized_text;
+}
+
+std::string Normalizer::NormalizeLatin(const UNormalizer2* normalizer2,
+                                       const std::string_view term) const {
+  std::string result;
+  result.reserve(term.length());
+  for (int i = 0; i < term.length(); i++) {
+    if (i18n_utils::IsAscii(term[i])) {
+      result.push_back(std::tolower(term[i]));
+    } else if (i18n_utils::IsLeadUtf8Byte(term[i])) {
+      UChar32 uchar32 = i18n_utils::GetUChar32At(term.data(), term.length(), i);
+      if (uchar32 == kInvalidUchar32) {
+        ICING_LOG(WARNING) << "Unable to get uchar32 from " << term
+                           << " at position" << i;
+        continue;
+      }
+      char ascii_char;
+      if (i18n_utils::DiacriticCharToAscii(normalizer2, uchar32, &ascii_char)) {
+        result.push_back(std::tolower(ascii_char));
+      } else {
+        // We don't know how to transform / decompose this Unicode character, it
+        // probably means that some other Unicode characters are mixed with
+        // Latin characters. This shouldn't happen if input term is properly
+        // tokenized. We handle it here in case there're something wrong with
+        // the tokenizers.
+        int utf8_length = i18n_utils::GetUtf8Length(uchar32);
+        absl_ports::StrAppend(&result, term.substr(i, utf8_length));
+      }
+    }
+  }
+
+  return result;
+}
+
+libtextclassifier3::StatusOr<std::unique_ptr<Normalizer::TermTransformer>>
+Normalizer::TermTransformer::Create() {
+  UErrorCode status = U_ZERO_ERROR;
+  UTransliterator* term_transformer = utrans_openU(
+      kTransformRulesUtf16, kTransformRulesLength, UTRANS_FORWARD,
+      /*rules=*/nullptr, /*rulesLength=*/0, /*parseError=*/nullptr, &status);
+
+  if (U_FAILURE(status)) {
+    return absl_ports::InternalError("Failed to create UTransliterator.");
+  }
+
+  return std::unique_ptr<Normalizer::TermTransformer>(
+      new Normalizer::TermTransformer(term_transformer));
+}
+
+Normalizer::TermTransformer::TermTransformer(UTransliterator* u_transliterator)
+    : u_transliterator_(u_transliterator) {}
+
+Normalizer::TermTransformer::~TermTransformer() {
+  if (u_transliterator_ != nullptr) {
+    utrans_close(u_transliterator_);
+  }
+}
+
+std::string Normalizer::TermTransformer::Transform(
+    const std::string_view term) const {
+  auto utf16_term_or = i18n_utils::Utf8ToUtf16(term);
+  if (!utf16_term_or.ok()) {
+    ICING_VLOG(0) << "Failed to convert UTF8 term '" << term << "' to UTF16";
+    return std::string(term);
+  }
+  std::u16string utf16_term = std::move(utf16_term_or).ValueOrDie();
+  UErrorCode status = U_ZERO_ERROR;
+  int utf16_term_desired_length = utf16_term.length();
+  int limit = utf16_term.length();
+  utrans_transUChars(u_transliterator_, &utf16_term[0],
+                     &utf16_term_desired_length, utf16_term.length(),
+                     /*start=*/0, &limit, &status);
+
+  // For most cases, one Unicode character is normalized to exact one Unicode
+  // character according to our transformation rules. However, there could be
+  // some rare cases where the normalized text is longer than the original
+  // one. E.g. "¼" (1 character) -> "1/4" (3 characters). That causes a buffer
+  // overflow error and we need to increase our buffer size and try again.
+  if (status == U_BUFFER_OVERFLOW_ERROR) {
+    // 'utf16_term_desired_length' has already been set to the desired value
+    // by utrans_transUChars(), here we increase the buffer size to that
+    // value.
+    //
+    // NOTE: we need to call resize() but not reserve() because values can't
+    // be set at positions after length().
+    int original_content_length = utf16_term.length();
+    utf16_term.resize(utf16_term_desired_length);
+    utf16_term_desired_length = original_content_length;
+    limit = original_content_length;
+    status = U_ZERO_ERROR;
+    utrans_transUChars(u_transliterator_, &utf16_term[0],
+                       &utf16_term_desired_length, utf16_term.length(),
+                       /*start=*/0, &limit, &status);
+  }
+
+  if (U_FAILURE(status)) {
+    // Failed to transform, return its original form.
+    ICING_LOG(WARNING) << "Failed to normalize UTF8 term: " << term;
+    return std::string(term);
+  }
+
+  auto utf8_term_or = i18n_utils::Utf16ToUtf8(utf16_term);
+  if (!utf8_term_or.ok()) {
+    ICING_VLOG(0) << "Failed to convert UTF16 term '" << term << "' to UTF8";
+    return std::string(term);
+  }
+  return std::move(utf8_term_or).ValueOrDie();
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/transform/normalizer.h b/icing/transform/normalizer.h
new file mode 100644
index 0000000..7f6350a
--- /dev/null
+++ b/icing/transform/normalizer.h

@@ -0,0 +1,114 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TRANSFORM_NORMALIZER_H_
+#define ICING_TRANSFORM_NORMALIZER_H_
+
+#include <memory>
+#include <string>
+#include <string_view>
+
+#include "utils/base/statusor.h"
+#include "unicode/unorm2.h"
+#include "unicode/utrans.h"
+
+namespace icing {
+namespace lib {
+
+// Used to normalize UTF8 strings for text matching. It enforces a set of rules:
+//  1. Transforms text to be lowercase UTF8.
+//  2. Transforms full-width Latin characters to ASCII characters if possible.
+//  3. Transforms hiragana to katakana.
+//  4. Removes accent / diacritic marks on Latin characters
+//  5. Normalized text must be less than or equal to max_term_byte_size,
+//     otherwise it will be truncated.
+//
+// There're some other rules from ICU not listed here, please see .cc file for
+// details.
+//
+// Example use:
+//   ICING_ASSIGN_OR_RETURN(auto normalizer,
+//       Normalizer::Create(/*max_term_byte_size=*/5);
+//
+//   std::string normalized_text = normalizer->NormalizeText("HELLO!");
+//   ICING_LOG(INFO) << normalized_text; // prints "hello"
+class Normalizer {
+ public:
+  Normalizer(const Normalizer&) = delete;
+  Normalizer& operator=(const Normalizer&) = delete;
+
+  // Creates a normalizer with the subcomponents it needs. max_term_byte_size
+  // enforces the max size of text after normalization, text will be truncated
+  // if exceeds the max size.
+  //
+  // Returns:
+  //   A normalizer on success
+  //   INVALID_ARGUMENT if max_term_byte_size <= 0
+  //   INTERNAL_ERROR if failed to create any subcomponent
+  static libtextclassifier3::StatusOr<std::unique_ptr<Normalizer>> Create(
+      int max_term_byte_size);
+
+  // Normalizes the input term based on rules. See .cc file for rule details.
+  //
+  // NOTE: Term should not mix Latin and non-Latin characters. Doing so may
+  // result in the non-Latin characters not properly being normalized
+  std::string NormalizeTerm(std::string_view term) const;
+
+ private:
+  // A handler class that helps manage the lifecycle of UTransliterator. It's
+  // used in Normalizer to transform terms into the formats we need.
+  class TermTransformer {
+   public:
+    // Creates TermTransformer with a valid UTransliterator instance
+    //
+    // Returns:
+    //   A term transformer on success
+    //   INTERNAL_ERROR if failed to create any subcomponent
+    static libtextclassifier3::StatusOr<std::unique_ptr<TermTransformer>>
+    Create();
+
+    // Closes the UTransliterator instance
+    ~TermTransformer();
+
+    // Transforms the text based on our rules described at top of this file
+    std::string Transform(std::string_view term) const;
+
+   private:
+    explicit TermTransformer(UTransliterator* u_transliterator);
+
+    // An ICU class to execute custom term transformation / normalization rules.
+    // utrans_close() must by called after using.
+    UTransliterator* u_transliterator_;
+  };
+
+  explicit Normalizer(std::unique_ptr<TermTransformer> term_transformer,
+                      int max_term_byte_size);
+
+  // Helper method to normalize Latin terms only. Rules applied:
+  // 1. Uppercase to lowercase
+  // 2. Remove diacritic (accent) marks
+  std::string NormalizeLatin(const UNormalizer2* normalizer2,
+                             std::string_view term) const;
+
+  // Used to transform terms into their normalized forms.
+  std::unique_ptr<TermTransformer> term_transformer_;
+
+  // The maximum term length allowed after normalization.
+  const int max_term_byte_size_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_TRANSFORM_NORMALIZER_H_

diff --git a/icing/transform/normalizer_benchmark.cc b/icing/transform/normalizer_benchmark.cc
new file mode 100644
index 0000000..53adac7
--- /dev/null
+++ b/icing/transform/normalizer_benchmark.cc

@@ -0,0 +1,157 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/test-data.h"
+#include "icing/transform/normalizer.h"
+
+// Run on a Linux workstation:
+//    $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/transform:normalizer_benchmark
+//
+//    $ blaze-bin/icing/transform/normalizer_benchmark
+//    --benchmarks=all
+//
+// Run on an Android device:
+//    Make target //icing/transform:normalizer depend on
+//    //third_party/icu
+//
+//    $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+//    --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+//    //icing/transform:normalizer_benchmark
+//
+//    $ adb push blaze-bin/icing/transform/normalizer_benchmark
+//    /data/local/tmp/
+//
+//    $ adb shell /data/local/tmp/normalizer_benchmark --benchmarks=all --adb
+
+// Flag to tell the benchmark that it'll be run on an Android device via adb,
+// the benchmark will set up data files accordingly.
+ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+void BM_NormalizeUppercase(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      Normalizer::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string(state.range(0), 'A');
+  for (auto _ : state) {
+    normalizer->NormalizeTerm(input_string);
+  }
+}
+BENCHMARK(BM_NormalizeUppercase)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_NormalizeAccent(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      Normalizer::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("àáâãā");
+  }
+
+  for (auto _ : state) {
+    normalizer->NormalizeTerm(input_string);
+  }
+}
+BENCHMARK(BM_NormalizeAccent)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+void BM_NormalizeHiragana(benchmark::State& state) {
+  bool run_via_adb = absl::GetFlag(FLAGS_adb);
+  if (!run_via_adb) {
+    ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
+  }
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Normalizer> normalizer,
+      Normalizer::Create(
+          /*max_term_byte_size=*/std::numeric_limits<int>::max()));
+
+  std::string input_string;
+  while (input_string.length() < state.range(0)) {
+    input_string.append("あいうえお");
+  }
+
+  for (auto _ : state) {
+    normalizer->NormalizeTerm(input_string);
+  }
+}
+BENCHMARK(BM_NormalizeHiragana)
+    ->Arg(1000)
+    ->Arg(2000)
+    ->Arg(4000)
+    ->Arg(8000)
+    ->Arg(16000)
+    ->Arg(32000)
+    ->Arg(64000)
+    ->Arg(128000)
+    ->Arg(256000)
+    ->Arg(384000)
+    ->Arg(512000)
+    ->Arg(1024000)
+    ->Arg(2048000)
+    ->Arg(4096000);
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/transform/normalizer_test.cc b/icing/transform/normalizer_test.cc
new file mode 100644
index 0000000..ec0a782
--- /dev/null
+++ b/icing/transform/normalizer_test.cc

@@ -0,0 +1,164 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/transform/normalizer.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/i18n-test-utils.h"
+#include "icing/testing/test-data.h"
+
+namespace icing {
+namespace lib {
+namespace {
+using ::testing::Eq;
+
+class NormalizerTest : public testing::Test {
+ protected:
+  void SetUp() override {
+    ICING_ASSERT_OK(
+        // File generated via icu_data_file rule in //icing/BUILD.
+        SetUpICUDataFile("icing/icu.dat"));
+
+    ICING_ASSERT_OK_AND_ASSIGN(normalizer_, Normalizer::Create(
+                                                /*max_term_byte_size=*/1024));
+  }
+
+  std::unique_ptr<Normalizer> normalizer_;
+};
+
+TEST_F(NormalizerTest, Creation) {
+  EXPECT_THAT(Normalizer::Create(5), IsOk());
+  EXPECT_THAT(Normalizer::Create(0),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+  EXPECT_THAT(Normalizer::Create(-1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+// Strings that are already normalized won't change if normalized again.
+TEST_F(NormalizerTest, AlreadyNormalized) {
+  EXPECT_THAT(normalizer_->NormalizeTerm(""), Eq(""));
+  EXPECT_THAT(normalizer_->NormalizeTerm("hello world"), Eq("hello world"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("你好"), Eq("你好"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("キャンパス"), Eq("キャンパス"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("안녕하세요"), Eq("안녕하세요"));
+}
+
+TEST_F(NormalizerTest, UppercaseToLowercase) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("MDI"), Eq("mdi"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("Icing"), Eq("icing"));
+}
+
+TEST_F(NormalizerTest, LatinLetterRemoveAccent) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("Zürich"), Eq("zurich"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("après-midi"), Eq("apres-midi"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("Buenos días"), Eq("buenos dias"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("āăąḃḅḇčćç"), Eq("aaabbbccc"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ÁȦÄḂḄḆĆČḈ"), Eq("aaabbbccc"));
+}
+
+// Accent / diacritic marks won't be removed in non-latin chars, e.g. in
+// Japanese and Greek
+TEST_F(NormalizerTest, NonLatinLetterNotRemoveAccent) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("ダヂヅデド"), Eq("ダヂヅデド"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("kαλημέρα"), Eq("kαλημέρα"));
+}
+
+TEST_F(NormalizerTest, FullWidthCharsToASCII) {
+  // Full-width punctuation to ASCII punctuation
+  EXPECT_THAT(normalizer_->NormalizeTerm("。，！？：”"), Eq(".,!?:\""));
+  // 0xff10 is the full-width number 0
+  EXPECT_THAT(normalizer_->NormalizeTerm(UcharToString(0xff10)), Eq("0"));
+  // 0xff21 is the full-width letter A
+  EXPECT_THAT(normalizer_->NormalizeTerm(UcharToString(0xff21)), Eq("a"));
+  // 0xff41 is the full-width letter a
+  EXPECT_THAT(normalizer_->NormalizeTerm(UcharToString(0xff41)), Eq("a"));
+}
+
+// For Katakana, each character is normalized to its full-width version.
+TEST_F(NormalizerTest, KatakanaHalfWidthToFullWidth) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("ｶ"), Eq("カ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ｫ"), Eq("ォ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ｻ"), Eq("サ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ﾎ"), Eq("ホ"));
+}
+
+TEST_F(NormalizerTest, HiraganaToKatakana) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("あいうえお"), Eq("アイウエオ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("かきくけこ"), Eq("カキクケコ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ばびぶべぼ"), Eq("バビブベボ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("がぎぐげご"), Eq("ガギグゲゴ"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("ぎゃぎゅぎょ"), Eq("ギャギュギョ"));
+}
+
+TEST_F(NormalizerTest, SuperscriptAndSubscriptToASCII) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("⁹"), Eq("9"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("₉"), Eq("9"));
+}
+
+TEST_F(NormalizerTest, CircledCharsToASCII) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("①"), Eq("1"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("Ⓐ"), Eq("a"));
+}
+
+TEST_F(NormalizerTest, RotatedCharsToASCII) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("︷"), Eq("{"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("︸"), Eq("}"));
+}
+
+TEST_F(NormalizerTest, SquaredCharsToASCII) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("㌀"), Eq("アパート"));
+}
+
+TEST_F(NormalizerTest, FractionsToASCII) {
+  EXPECT_THAT(normalizer_->NormalizeTerm("¼"), Eq(" 1/4"));
+  EXPECT_THAT(normalizer_->NormalizeTerm("⅚"), Eq(" 5/6"));
+}
+
+TEST_F(NormalizerTest, Truncate) {
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(auto normalizer,
+                               Normalizer::Create(/*max_term_byte_size=*/5));
+
+    // Won't be truncated
+    EXPECT_THAT(normalizer->NormalizeTerm("hi"), Eq("hi"));
+    EXPECT_THAT(normalizer->NormalizeTerm("hello"), Eq("hello"));
+
+    // Truncated to length 5.
+    EXPECT_THAT(normalizer->NormalizeTerm("hello!"), Eq("hello"));
+
+    // Each Japanese character has 3 bytes, so truncating to length 5 results in
+    // only 1 character.
+    EXPECT_THAT(normalizer->NormalizeTerm("キャンパス"), Eq("キ"));
+
+    // Each Greek character has 2 bytes, so truncating to length 5 results in 2
+    // character.
+    EXPECT_THAT(normalizer->NormalizeTerm("αβγδε"), Eq("αβ"));
+  }
+
+  {
+    ICING_ASSERT_OK_AND_ASSIGN(auto normalizer,
+                               Normalizer::Create(/*max_term_byte_size=*/2));
+    // The Japanese character has 3 bytes, truncating it results in an empty
+    // string.
+    EXPECT_THAT(normalizer->NormalizeTerm("キ"), Eq(""));
+  }
+}
+
+}  // namespace
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/bit-util.h b/icing/util/bit-util.h
new file mode 100644
index 0000000..e2bb817
--- /dev/null
+++ b/icing/util/bit-util.h

@@ -0,0 +1,68 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_BIT_UTIL_H_
+#define ICING_UTIL_BIT_UTIL_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+namespace bit_util {
+
+// Manipulating bit fields.
+//
+// x       value containing the bit field(s)
+// offset  offset of bit field in x
+// len     len of bit field in x
+//
+// REQUIREMENTS
+//
+// - x an unsigned integer <= 64 bits
+// - offset + len <= sizeof(x) * 8
+//
+// There is no error checking so you will get garbage if you don't
+// ensure the above.
+//
+// To set a value, use BITFIELD_CLEAR then BITFIELD_OR.
+
+// Shifting by more than the word length is undefined (on ARM it has the
+// intended effect, but on Intel it shifts by % word length), so check the
+// length).
+inline uint64_t BitfieldMask(uint32_t len) {
+  return ((len == 0) ? 0U : ((~uint64_t{0}) >> (64 - (len))));
+}
+inline uint64_t BitfieldGet(uint64_t mask, uint32_t lsb_offset, uint32_t len) {
+  return ((mask) >> (lsb_offset)) & BitfieldMask(len);
+}
+inline void BitfieldSet(uint32_t value, uint32_t lsb_offset, uint32_t len,
+                        uint32_t* mask) {
+  // We conservatively mask val at len so x won't be corrupted if val >=
+  // 1 << len.
+  *mask |= (uint64_t{value} & BitfieldMask(len)) << (lsb_offset);
+}
+inline void BitfieldSet(uint64_t value, uint32_t lsb_offset, uint32_t len,
+                        uint64_t* mask) {
+  // We conservatively mask val at len so x won't be corrupted if val >=
+  // 1 << len.
+  *mask |= (value & BitfieldMask(len)) << (lsb_offset);
+}
+
+}  // namespace bit_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_BIT_UTIL_H_

diff --git a/icing/util/clock.h b/icing/util/clock.h
new file mode 100644
index 0000000..09cb375
--- /dev/null
+++ b/icing/util/clock.h

@@ -0,0 +1,49 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_CLOCK_H_
+#define ICING_UTIL_CLOCK_H_
+
+#include <ctime>
+
+namespace icing {
+namespace lib {
+
+// Wrapper around real-time clock functions. This is separated primarily so
+// tests can override this clock and inject it into the class under test.
+//
+// A few things to note about std::time_t :
+// From cppreference:
+//   "Although not defined, this is almost always an integral value holding the
+//   number of seconds (not counting leap seconds) since 00:00, Jan 1 1970 UTC,
+//   corresponding to POSIX time"
+//
+// From Wikipedia:
+//   "ISO C defines time_t as an arithmetic type, but does not specify any
+//   particular type, range, resolution, or encoding for it. Also unspecified
+//   are the meanings of arithmetic operations applied to time values."
+class Clock {
+ public:
+  virtual ~Clock() {}
+
+  // Returns:
+  //   The current time defined by the clock on success
+  //   std::time_t(-1) on error
+  virtual std::time_t GetCurrentSeconds() const { return std::time(nullptr); }
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_CLOCK_H_

diff --git a/icing/util/crc32.cc b/icing/util/crc32.cc
new file mode 100644
index 0000000..8b2243a
--- /dev/null
+++ b/icing/util/crc32.cc

@@ -0,0 +1,96 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/crc32.h"
+
+#include <cstdint>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/zlib.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+uint32_t UpdateCrc32(uint32_t crc, const std::string_view str) {
+  if (str.length() > 0) {
+    // crc32() already includes a pre- and post-condition of taking the one's
+    // complement of the value.
+    crc =
+        ~crc32(~crc, reinterpret_cast<const Bytef*>(str.data()), str.length());
+  }
+  return crc;
+}
+}  // namespace
+
+uint32_t Crc32::Get() const { return crc_; }
+
+uint32_t Crc32::Append(const std::string_view str) {
+  crc_ = UpdateCrc32(crc_, str);
+  return crc_;
+}
+
+libtextclassifier3::StatusOr<uint32_t> Crc32::UpdateWithXor(
+    const std::string_view xored_str, int full_data_size, int position) {
+  // For appending, use Append().
+  if (position + xored_str.length() > full_data_size) {
+    return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+        "offset position %d + length %zd > full data size %d", position,
+        xored_str.length(), full_data_size));
+  }
+
+  // We have CRC(A|U|B) and we want CRC(A|V|B) where U is the slice
+  // that updated to V.
+  //
+  // xored_str = X = U ^ V
+  //
+  // Some terminology:
+  //   `|`: denotes concatenation, NOT the bitwise operator OR
+  //
+  //   (A|U|B): a concatenated string of A+U+B
+  //
+  //   CRC(A|U|B): The crc of a concatenated string of A+U+B
+  //
+  //   0_lenA: a string of 0's of the length of string A
+  //
+  //
+  // (A|V|B) = (0_lenA|X|0_lenB) ^ (A|U|B)
+  //
+  // since CRC(D) = CRC(E) ^ CRC(F), where D = E ^ F:
+  // CRC(A|V|B)
+  //   = CRC(0_lenA|X|0_lenB) ^ CRC(A|U|B)
+  //
+  // and CRC(D|E) = CRC_COMBINE(D, E), so
+  //   = CRC_COMBINE(CRC(0_lenA), CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
+  //
+  // and CRC(0) = 0, so
+  //   = CRC_COMBINE(0, CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
+  //
+  // and CRC(0|B) = CRC(B), so
+  //   = CRC_COMBINE(CRC(X), CRC(0_lenB)) ^ CRC(A|U|B)
+  //
+  // For more details, see this post by Mark Adler, one of the authors of zlib:
+  // https://stackoverflow.com/questions/23122312/crc-calculation-of-a-mostly-static-data-stream/23126768#23126768
+
+  uint32_t update_crc = UpdateCrc32(0, xored_str);
+  update_crc = crc32_combine(update_crc, 0,
+                             full_data_size - (position + xored_str.length()));
+  crc_ ^= update_crc;
+  return crc_;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/crc32.h b/icing/util/crc32.h
new file mode 100644
index 0000000..5f7a71b
--- /dev/null
+++ b/icing/util/crc32.h

@@ -0,0 +1,108 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_CRC32_H_
+#define ICING_UTIL_CRC32_H_
+
+#include <cstdint>
+#include <string_view>
+
+#include "utils/base/statusor.h"
+
+namespace icing {
+namespace lib {
+
+// Efficient mechanism to incrementally compute checksum of a file and keep it
+// updated when its content changes. Internally uses zlib based crc32()
+// implementation.
+//
+// See https://www.zlib.net/manual.html#Checksum for more details.
+//
+// TODO (samzheng): investigate/benchmark swapping zlib crc32 with
+// util/hash/crc32c.h. Regarding util/hash/crc32c.h, CRC32C::Extend crashes as
+// described in b/145837799.
+class Crc32 {
+ public:
+  // Default to the checksum of an empty string, that is "0".
+  Crc32() : crc_(0) {}
+
+  explicit Crc32(uint32_t init_crc) : crc_(init_crc) {}
+
+  inline bool operator==(const Crc32& other) const {
+    return crc_ == other.Get();
+  }
+
+  // Returns the checksum of all the data that has been processed till now.
+  uint32_t Get() const;
+
+  // Incrementally update the current checksum to reflect the fact that the
+  // underlying data has been appended with 'str'. It calculates a new crc32
+  // based on the current crc value and the newly appended string.
+  //
+  // NOTE: As this method accepts incremental appends, all these 3 will lead to
+  // the same checksum:
+  // 1) crc32.Append("AAA"); crc32.Append("BBB");
+  // 2) crc32.Append("AAABBB");
+  // 3) crc32.Append("AA"); crc32.Append("AB"); crc32.Append("BB");
+  //
+  // NOTE: While this class internally uses zlib's crc32(),
+  // Crc32(base_crc).Append(str) is not the same as zlib::crc32(base_crc, str);
+  uint32_t Append(std::string_view str);
+
+  // Update a string's rolling crc when some content is modified in the middle
+  // at an offset. We need the xored_str, which is the new value xored with the
+  // original value.
+  //
+  // Original string:
+  //   string(original_start | original_mid | original_end)
+  //          -------------------------------------------> full_data_size
+  //                         ^ offset position
+  //
+  // Modified string:
+  //   string(original_start | changed_mid | original_end)
+  //                         ^ offset position
+  //
+  // And where
+  //   xored_str = changed_mid ^ original_mid
+  //   xored_len = length(xored_str)
+  //   full_data_size = the length of all the strings that have been Appended to
+  //                    generate the current checksum
+  //
+  // REQUIRES: offset position + xored_len <= full_data_size.
+  //
+  // E.g.
+  // Old data: ABCDEF; New data: ABXYZF
+  //
+  // Crc32 crc32; crc32.Append("ABCDEF");
+  // crc32.UpdateWithXor("CDE" xor "XYZ", 6, 2);
+  //
+  // This is the same as
+  // Crc32 crc32; crc32.Append("ABXYZF");
+  //
+  // See .cc file for implementation notes.
+  //
+  // Returns:
+  //   Updated crc on success
+  //   INVALID_ARGUMENT if offset position + xored_len > full_data_size
+  libtextclassifier3::StatusOr<uint32_t> UpdateWithXor(
+      std::string_view xored_str, int full_data_size, int position);
+
+ private:
+  uint32_t crc_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_CRC32_H_

diff --git a/icing/util/crc32_test.cc b/icing/util/crc32_test.cc
new file mode 100644
index 0000000..ab8582a
--- /dev/null
+++ b/icing/util/crc32_test.cc

@@ -0,0 +1,108 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/crc32.h"
+
+#include <cstdint>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/portable/zlib.h"
+#include "icing/testing/common-matchers.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::Eq;
+
+void UpdateAtRandomOffset(std::string* buf, uint32_t* update_xor, int* offset) {
+  // The max value of rand() is 2^31 - 1 (2147483647) but the max value of
+  // uint32_t is 2^32 (4294967296), so we need rand() * rand() to cover all the
+  // possibilities.
+  *offset = (static_cast<uint32_t>(rand()) *
+             static_cast<uint32_t>(rand())) %  // NOLINT
+            (buf->size() - sizeof(uint32_t));
+  *update_xor =
+      static_cast<uint32_t>(rand()) * static_cast<uint32_t>(rand());  // NOLINT
+  const unsigned char* update_xor_buf =
+      reinterpret_cast<const unsigned char*>(update_xor);
+
+  // XOR update_xor at offset.
+  for (size_t j = 0; j < sizeof(*update_xor); j++) {
+    (*buf)[*offset + j] ^= update_xor_buf[j];
+  }
+}
+
+TEST(Crc32Test, Get) {
+  Crc32 crc32_test{10};
+  Crc32 crc32_test_empty{};
+  EXPECT_THAT(crc32_test.Get(), Eq(10));
+  EXPECT_THAT(crc32_test_empty.Get(), Eq(0));
+}
+
+TEST(Crc32Test, Append) {
+  // Test the complement logic inside Append()
+  const uLong kCrcInitZero = crc32(0L, nullptr, 0);
+  uint32_t foo_crc =
+      crc32(kCrcInitZero, reinterpret_cast<const Bytef*>("foo"), 3);
+  uint32_t foobar_crc =
+      crc32(kCrcInitZero, reinterpret_cast<const Bytef*>("foobar"), 6);
+
+  Crc32 crc32_test(~foo_crc);
+  ASSERT_THAT(~crc32_test.Append("bar"), Eq(foobar_crc));
+
+  // Test Append() that appending things separately should be the same as
+  // appending in one shot
+  Crc32 crc32_foobar{};
+  crc32_foobar.Append("foobar");
+  Crc32 crc32_foo_and_bar{};
+  crc32_foo_and_bar.Append("foo");
+  crc32_foo_and_bar.Append("bar");
+
+  EXPECT_THAT(crc32_foo_and_bar.Get(), Eq(crc32_foobar.Get()));
+}
+
+TEST(Crc32Test, UpdateAtPosition) {
+  std::string buf;
+  buf.resize(1000);
+  for (size_t i = 0; i < buf.size(); i++) {
+    buf[i] = i * 2;
+  }
+  Crc32 crc32_test{};
+  crc32_test.Append(buf);
+
+  for (int i = 0; i < 100; i++) {
+    uint32_t update_xor;
+    int offset;
+    UpdateAtRandomOffset(&buf, &update_xor, &offset);
+
+    // Compute crc from scratch and compare against update.
+    uint32_t new_crc =
+        ~crc32(~0, reinterpret_cast<const Bytef*>(buf.data()), buf.size());
+    const std::string_view xored_str(reinterpret_cast<const char*>(&update_xor),
+                                     sizeof(update_xor));
+    EXPECT_THAT(crc32_test.UpdateWithXor(xored_str, buf.size(), offset),
+                IsOkAndHolds(new_crc));
+  }
+
+  // Wrong string length
+  EXPECT_THAT(crc32_test.UpdateWithXor("12345", buf.size(), buf.size() - 1),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
new file mode 100644
index 0000000..6e1bf8b
--- /dev/null
+++ b/icing/util/document-validator.cc

@@ -0,0 +1,177 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/document-validator.h"
+
+#include <cstdint>
+#include <unordered_set>
+
+#include "utils/base/status.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/status_macros.h"
+#include "icing/schema/schema-util.h"
+
+namespace icing {
+namespace lib {
+
+using PropertyConfigMap =
+    std::unordered_map<std::string_view, const PropertyConfigProto*>;
+
+DocumentValidator::DocumentValidator(const SchemaStore* schema_store)
+    : schema_store_(schema_store) {}
+
+libtextclassifier3::Status DocumentValidator::Validate(
+    const DocumentProto& document) {
+  if (document.namespace_().empty()) {
+    return absl_ports::InvalidArgumentError("Field 'namespace' is empty.");
+  }
+
+  if (document.uri().empty()) {
+    return absl_ports::InvalidArgumentError("Field 'uri' is empty.");
+  }
+
+  if (document.schema().empty()) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("Field 'schema' is empty for key: (",
+                           document.namespace_(), ", ", document.uri(), ")."));
+  }
+
+  if (document.score() < 0) {
+    return absl_ports::InvalidArgumentError("Field 'score' is negative.");
+  }
+
+  if (document.creation_timestamp_secs() < 0) {
+    return absl_ports::InvalidArgumentError(
+        "Field 'creation_timestamp_secs' is negative.");
+  }
+
+  if (document.ttl_secs() < 0) {
+    return absl_ports::InvalidArgumentError("Field 'ttl_secs' is negative.");
+  }
+
+  // TODO(b/144458732): Implement a more robust version of
+  // ICING_ASSIGN_OR_RETURN that can support error logging.
+  auto type_config_or = schema_store_->GetSchemaTypeConfig(document.schema());
+  if (!type_config_or.ok()) {
+    ICING_LOG(ERROR) << type_config_or.status().error_message()
+                     << "Error while validating document ("
+                     << document.namespace_() << ", " << document.uri() << ")";
+    return type_config_or.status();
+  }
+  const SchemaTypeConfigProto* type_config =
+      std::move(type_config_or).ValueOrDie();
+
+  int32_t num_required_properties_expected = 0;
+  int32_t num_required_properties_actual = 0;
+  PropertyConfigMap property_config_map;
+  SchemaUtil::BuildPropertyConfigMap(*type_config, &property_config_map,
+                                     &num_required_properties_expected);
+  std::unordered_set<std::string_view> unique_properties;
+
+  for (const PropertyProto& property : document.properties()) {
+    if (property.name().empty()) {
+      return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+          "Field 'name' is empty in PropertyProto for key: (",
+          document.namespace_(), ", ", document.uri(), ")."));
+    }
+
+    if (!unique_properties.insert(property.name()).second) {
+      // Failed to insert because of duplicate property name
+      return absl_ports::AlreadyExistsError(absl_ports::StrCat(
+          "Property name '", property.name(), "' already exists for key: (",
+          document.namespace_(), ", ", document.uri(), ")."));
+    }
+
+    const auto& property_iter = property_config_map.find(property.name());
+    if (property_iter == property_config_map.end()) {
+      return absl_ports::NotFoundError(absl_ports::StrCat(
+          "Property config '", property.name(), "' not found for key: (",
+          document.namespace_(), ", ", document.uri(), ")."));
+    }
+    const PropertyConfigProto& property_config = *property_iter->second;
+
+    // Get the property value size according to data type.
+    // TODO (samzheng): make sure values of other data types are empty.
+    int value_size = 0;
+    if (property_config.data_type() == PropertyConfigProto::DataType::STRING) {
+      value_size = property.string_values_size();
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::INT64) {
+      value_size = property.int64_values_size();
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::DOUBLE) {
+      value_size = property.double_values_size();
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::BOOLEAN) {
+      value_size = property.boolean_values_size();
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::BYTES) {
+      value_size = property.bytes_values_size();
+    } else if (property_config.data_type() ==
+               PropertyConfigProto::DataType::DOCUMENT) {
+      value_size = property.document_values_size();
+    }
+
+    if (property_config.cardinality() ==
+        PropertyConfigProto::Cardinality::OPTIONAL) {
+      if (value_size != 0 && value_size != 1) {
+        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+            "Property '%s' is optional but %d elements are "
+            "found for key: (%s, %s).",
+            property.name().c_str(), value_size, document.namespace_().c_str(),
+            document.uri().c_str()));
+      }
+    } else if (property_config.cardinality() ==
+               PropertyConfigProto::Cardinality::REQUIRED) {
+      if (value_size != 1) {
+        return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+            "Property '%s' with only 1 value is required but "
+            "%d elements are found for key: (%s, %s).",
+            property.name().c_str(), value_size, document.namespace_().c_str(),
+            document.uri().c_str()));
+      }
+      num_required_properties_actual++;
+    }
+
+    // We put the validation for nested DocumentProto at last separately
+    // because it takes longer time to run. If any of the previous validations
+    // fail, we don't need to validate the extra documents.
+    if (property_config.data_type() ==
+        PropertyConfigProto::DataType::DOCUMENT) {
+      const std::string_view nested_type_expected =
+          property_config.schema_type();
+      for (const DocumentProto& nested_document : property.document_values()) {
+        if (nested_type_expected.compare(nested_document.schema()) != 0) {
+          return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+              "Property '", property.name(), "' should have type '",
+              nested_type_expected,
+              "' but actual "
+              "value has type '",
+              nested_document.schema(), "' for key: (", document.namespace_(),
+              ", ", document.uri(), ")."));
+        }
+        ICING_RETURN_IF_ERROR(Validate(nested_document));
+      }
+    }
+  }
+  if (num_required_properties_actual < num_required_properties_expected) {
+    return absl_ports::InvalidArgumentError(
+        absl_ports::StrCat("One or more required fields missing for key: (",
+                           document.namespace_(), ", ", document.uri(), ")."));
+  }
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/document-validator.h b/icing/util/document-validator.h
new file mode 100644
index 0000000..c684cb5
--- /dev/null
+++ b/icing/util/document-validator.h

@@ -0,0 +1,79 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_DOCUMENT_VALIDATOR_H_
+#define ICING_UTIL_DOCUMENT_VALIDATOR_H_
+
+#include "utils/base/status.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-store.h"
+
+namespace icing {
+namespace lib {
+
+// This class validates DocumentProto based on the corresponding
+// SchemaTypeConfigProto in the given type config map.
+class DocumentValidator {
+ public:
+  explicit DocumentValidator(const SchemaStore* schema_store);
+  DocumentValidator() = delete;
+
+  // This function validates:
+  //  1. DocumentProto.namespace is not empty
+  //  2. DocumentProto.uri is not empty
+  //  3. DocumentProto.schema is not empty
+  //  4. DocumentProto.schema matches one of SchemaTypeConfigProto.schema_type
+  //     in the given SchemaProto in constructor
+  //  5. Each PropertyProto.name in DocumentProto.properties is not empty
+  //  6. Each PropertyProto.name is unique
+  //  7. Each PropertyProto.name matches one of
+  //     PropertyConfigProto.property_name in the given SchemaProto in
+  //     constructor
+  //  8. For each PropertyProto, the size of repeated value field matches
+  //     PropertyConfigProto.cardinality defined in the given SchemaProto in
+  //     constructor (e.g. OPTIONAL means 0 or 1, REQUIRED means 1)
+  //  9. For each PropertyProto with nested DocumentProto,
+  //     DocumentProto.schema (nested) matches the current
+  //     PropertyConfigProto.schema_type
+  // 10. All PropertyProto with REQUIRED cardinality in the corresponding
+  //     PropertyConfigProto present in the DocumentProto
+  // 11. DocumentProto.score is not negative
+  // 12. DocumentProto.creation_timestamp_secs is not negative
+  // 13. DocumentProto.ttl_secs is not negative
+  //
+  // In addition, all nested DocumentProto will also be validated towards the
+  // requirements above.
+  //
+  // DocumentProto.custom_properties are not validated.
+  //
+  // Returns:
+  //   OK on success
+  //   INVALID_ARGUMENT if any of case 1, 2, 3, 5, 8, 9, 10, 11, 12, 13 fails
+  //   NOT_FOUND if case 4 or 7 fails
+  //   ALREADY_EXISTS if case 6 fails
+  libtextclassifier3::Status Validate(const DocumentProto& document);
+
+  void UpdateSchemaStore(const SchemaStore* schema_store) {
+    schema_store_ = schema_store;
+  }
+
+ private:
+  const SchemaStore* schema_store_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_DOCUMENT_VALIDATOR_H_

diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
new file mode 100644
index 0000000..7a43f6b
--- /dev/null
+++ b/icing/util/document-validator_test.cc

@@ -0,0 +1,449 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/document-validator.h"
+
+#include <cstdint>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema/schema-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+using ::testing::HasSubstr;
+
+// type and property names of EmailMessage
+constexpr char kTypeEmail[] = "EmailMessage";
+constexpr char kPropertySubject[] = "subject";
+constexpr char kPropertyText[] = "text";
+constexpr char kPropertyRecipients[] = "recipients";
+// type and property names of Conversation
+constexpr char kTypeConversation[] = "Conversation";
+constexpr char kPropertyName[] = "name";
+constexpr char kPropertyEmails[] = "emails";
+// Other values
+constexpr char kDefaultNamespace[] = "icing";
+constexpr char kDefaultString[] = "This is a string.";
+
+class DocumentValidatorTest : public ::testing::Test {
+ protected:
+  DocumentValidatorTest() {}
+
+  void SetUp() override {
+    SchemaProto schema;
+    auto type_config = schema.add_types();
+    CreateEmailTypeConfig(type_config);
+
+    type_config = schema.add_types();
+    CreateConversationTypeConfig(type_config);
+
+    ICING_ASSERT_OK_AND_ASSIGN(
+        schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
+    ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+    document_validator_ =
+        std::make_unique<DocumentValidator>(schema_store_.get());
+  }
+
+  static void CreateEmailTypeConfig(SchemaTypeConfigProto* type_config) {
+    type_config->set_schema_type(kTypeEmail);
+
+    auto subject = type_config->add_properties();
+    subject->set_property_name(kPropertySubject);
+    subject->set_data_type(PropertyConfigProto::DataType::STRING);
+    subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    auto text = type_config->add_properties();
+    text->set_property_name(kPropertyText);
+    text->set_data_type(PropertyConfigProto::DataType::STRING);
+    text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    auto recipients = type_config->add_properties();
+    recipients->set_property_name(kPropertyRecipients);
+    recipients->set_data_type(PropertyConfigProto::DataType::STRING);
+    recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+  }
+
+  static DocumentBuilder SimpleEmailBuilder() {
+    return DocumentBuilder()
+        .SetKey(kDefaultNamespace, "email/1")
+        .SetSchema(kTypeEmail)
+        .AddStringProperty(kPropertySubject, kDefaultString)
+        .AddStringProperty(kPropertyText, kDefaultString)
+        .AddStringProperty(kPropertyRecipients, kDefaultString, kDefaultString,
+                           kDefaultString);
+  }
+
+  static void CreateConversationTypeConfig(SchemaTypeConfigProto* type_config) {
+    type_config->set_schema_type(kTypeConversation);
+
+    auto name = type_config->add_properties();
+    name->set_property_name(kPropertyName);
+    name->set_data_type(PropertyConfigProto::DataType::STRING);
+    name->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    auto emails = type_config->add_properties();
+    emails->set_property_name(kPropertyEmails);
+    emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+    emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+    emails->set_schema_type(kTypeEmail);
+  }
+
+  static DocumentBuilder SimpleConversationBuilder() {
+    return DocumentBuilder()
+        .SetKey(kDefaultNamespace, "conversation/1")
+        .SetSchema(kTypeConversation)
+        .AddStringProperty(kPropertyName, kDefaultString)
+        .AddDocumentProperty(kPropertyEmails, SimpleEmailBuilder().Build(),
+                             SimpleEmailBuilder().Build(),
+                             SimpleEmailBuilder().Build());
+  }
+
+  std::unique_ptr<DocumentValidator> document_validator_;
+  std::unique_ptr<SchemaStore> schema_store_;
+  Filesystem filesystem_;
+};
+
+TEST_F(DocumentValidatorTest, ValidateSimpleSchemasOk) {
+  DocumentProto email = SimpleEmailBuilder().Build();
+  EXPECT_THAT(document_validator_->Validate(email), IsOk());
+
+  DocumentProto conversation = SimpleConversationBuilder().Build();
+  EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateEmptyNamespaceInvalid) {
+  DocumentProto email = SimpleEmailBuilder().SetNamespace("").Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'namespace' is empty")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateEmptyUriInvalid) {
+  DocumentProto email = SimpleEmailBuilder().SetUri("").Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'uri' is empty")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateEmptySchemaInvalid) {
+  DocumentProto email = SimpleEmailBuilder().SetSchema("").Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'schema' is empty")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNonexistentSchemaNotFound) {
+  DocumentProto email =
+      SimpleEmailBuilder().SetSchema("WrongEmailType").Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'WrongEmailType' not found")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateEmptyPropertyInvalid) {
+  DocumentProto email =
+      SimpleEmailBuilder().AddStringProperty("", kDefaultString).Build();
+
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'name' is empty")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateDuplicatePropertyAlreadyExists) {
+  DocumentProto email = SimpleEmailBuilder()
+                            .ClearProperties()
+                            .AddStringProperty(kPropertySubject, kDefaultString)
+                            .AddStringProperty(kPropertySubject, kDefaultString)
+                            .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS,
+                       HasSubstr("'subject' already exists")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNonexistentPropertyNotFound) {
+  DocumentProto email =
+      SimpleEmailBuilder()
+          .AddStringProperty("WrongPropertyName", kDefaultString)
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'WrongPropertyName' not found")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateAllCustomPropertyOk) {
+  DocumentProto email =
+      SimpleEmailBuilder()
+          // A nonexistent property, would've triggered a NotFound message
+          .AddCustomStringProperty("WrongPropertyName", kDefaultString)
+          // 'subject' property should've been a string according to the schema
+          .AddCustomBooleanProperty(kPropertySubject, false, true)
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateExactlyOneRequiredValueOk) {
+  // Required property should have exactly 1 value
+  DocumentProto email =
+      SimpleEmailBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertySubject, kDefaultString)  // 1 value
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateInvalidNumberOfRequiredValues) {
+  // Required property should have exactly 1 value
+  DocumentProto email = SimpleEmailBuilder()
+                            .ClearProperties()
+                            .AddStringProperty(kPropertySubject)  // 0 values
+                            .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'subject' with only 1 value is required "
+                                 "but 0 elements are found")));
+
+  email =
+      SimpleEmailBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertySubject, kDefaultString, kDefaultString)
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'subject' with only 1 value is required "
+                                 "but 2 elements are found")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateZeroOrOneOptionalValueOk) {
+  DocumentProto email = SimpleEmailBuilder()
+                            .ClearProperties()
+                            .AddStringProperty(kPropertySubject, kDefaultString)
+                            .AddStringProperty(kPropertyText)  // 0 values
+                            .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email), IsOk());
+
+  email = SimpleEmailBuilder()
+              .ClearProperties()
+              .AddStringProperty(kPropertySubject, kDefaultString)
+              .AddStringProperty(kPropertyText, kDefaultString)  // 1 value
+              .Build();
+
+  EXPECT_THAT(document_validator_->Validate(email), IsOk());
+}
+
+TEST_F(DocumentValidatorTest, ValidateInvalidNumberOfOptionalValues) {
+  DocumentProto email =
+      SimpleEmailBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertySubject, kDefaultString)
+          .AddStringProperty(kPropertyText, kDefaultString, kDefaultString)
+          .Build();
+
+  EXPECT_THAT(
+      document_validator_->Validate(email),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("'text' is optional but 2 elements are found")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateMissingRequiredPropertyInvalid) {
+  // All required properties should be present in document
+  DocumentProto email = SimpleEmailBuilder()
+                            .ClearProperties()
+                            .AddStringProperty(kPropertyText, kDefaultString)
+                            .Build();
+
+  // The required property 'subject' isn't added in email.
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("One or more required fields missing")));
+}
+
+TEST_F(DocumentValidatorTest,
+       ValidateNestedPropertyDoesntMatchSchemaTypeInvalid) {
+  // Nested DocumentProto should have the expected schema type
+  DocumentProto conversation =
+      SimpleConversationBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(
+              kPropertyEmails, SimpleEmailBuilder().Build(),
+              SimpleConversationBuilder().Build(),  // Wrong document type
+              SimpleEmailBuilder().Build())
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(conversation),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'emails' should have type 'EmailMessage' but "
+                                 "actual value has type 'Conversation'")));
+}
+
+TEST_F(DocumentValidatorTest, ValidateNestedPropertyInvalid) {
+  // Issues in nested DocumentProto should be detected
+  DocumentProto conversation =
+      SimpleConversationBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails,
+                               SimpleEmailBuilder()
+                                   .SetNamespace("")
+                                   .Build())  // Bad nested document
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(conversation),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("'namespace' is empty")));
+}
+
+TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
+  SchemaProto email_schema;
+  auto type_config = email_schema.add_types();
+  CreateEmailTypeConfig(type_config);
+
+  // Create a custom directory so we don't collide with the test's preset schema
+  // in SetUp
+  const std::string custom_schema_dir = GetTestTempDir() + "/custom_schema";
+  filesystem_.DeleteDirectoryRecursively(custom_schema_dir.c_str());
+  filesystem_.CreateDirectoryRecursively(custom_schema_dir.c_str());
+
+  // Set a schema with only the 'Email' type
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<SchemaStore> schema_store,
+      SchemaStore::Create(&filesystem_, custom_schema_dir));
+  ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk());
+
+  DocumentValidator document_validator(schema_store.get());
+
+  DocumentProto conversation = SimpleConversationBuilder().Build();
+
+  // Schema doesn't know about the 'Conversation' type yet
+  EXPECT_THAT(document_validator.Validate(conversation),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+                       HasSubstr("'Conversation' not found")));
+
+  // Add the 'Conversation' type
+  SchemaProto email_and_conversation_schema = email_schema;
+  type_config = email_and_conversation_schema.add_types();
+  CreateConversationTypeConfig(type_config);
+
+  // DocumentValidator should be able to handle the SchemaStore getting updated
+  // separately
+  ASSERT_THAT(schema_store->SetSchema(email_and_conversation_schema), IsOk());
+
+  ICING_EXPECT_OK(document_validator.Validate(conversation));
+}
+
+TEST_F(DocumentValidatorTest, PositiveDocumentScoreOk) {
+  DocumentProto email = SimpleEmailBuilder().SetScore(1).Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+
+  email = SimpleEmailBuilder()
+              .SetScore(std::numeric_limits<int32_t>::max())
+              .Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+}
+
+TEST_F(DocumentValidatorTest, NegativeDocumentScoreInvalid) {
+  DocumentProto email = SimpleEmailBuilder().SetScore(-1).Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+
+  email = SimpleEmailBuilder()
+              .SetScore(std::numeric_limits<int32_t>::min())
+              .Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+}
+
+TEST_F(DocumentValidatorTest, PositiveDocumentCreationTimestampSecsOk) {
+  DocumentProto email =
+      SimpleEmailBuilder().SetCreationTimestampSecs(1).Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+
+  email = SimpleEmailBuilder()
+              .SetCreationTimestampSecs(std::numeric_limits<int32_t>::max())
+              .Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+}
+
+TEST_F(DocumentValidatorTest, ZeroDocumentCreationTimestampSecssOk) {
+  DocumentProto email =
+      SimpleEmailBuilder().SetCreationTimestampSecs(0).Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+}
+
+TEST_F(DocumentValidatorTest, NegativeDocumentCreationTimestampSecsInvalid) {
+  DocumentProto email =
+      SimpleEmailBuilder().SetCreationTimestampSecs(-1).Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+
+  email = SimpleEmailBuilder()
+              .SetCreationTimestampSecs(std::numeric_limits<int32_t>::min())
+              .Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+}
+
+TEST_F(DocumentValidatorTest, PositiveDocumentTtlSecsOk) {
+  DocumentProto email = SimpleEmailBuilder().SetTtlSecs(1).Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+
+  email = SimpleEmailBuilder()
+              .SetTtlSecs(std::numeric_limits<int32_t>::max())
+              .Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+}
+
+TEST_F(DocumentValidatorTest, ZeroDocumentTtlSecsOk) {
+  DocumentProto email = SimpleEmailBuilder().SetTtlSecs(0).Build();
+  ICING_EXPECT_OK(document_validator_->Validate(email));
+}
+
+TEST_F(DocumentValidatorTest, NegativeDocumentTtlSecsInvalid) {
+  DocumentProto email = SimpleEmailBuilder().SetTtlSecs(-1).Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+
+  email = SimpleEmailBuilder()
+              .SetTtlSecs(std::numeric_limits<int32_t>::min())
+              .Build();
+  EXPECT_THAT(document_validator_->Validate(email),
+              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+                       HasSubstr("is negative")));
+}
+
+}  // namespace
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
new file mode 100644
index 0000000..e8b109a
--- /dev/null
+++ b/icing/util/i18n-utils.cc

@@ -0,0 +1,160 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/i18n-utils.h"
+
+#include <sys/types.h>
+
+#include <cctype>
+#include <string>
+#include <string_view>
+
+#include "utils/base/statusor.h"
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "unicode/umachine.h"
+#include "unicode/unorm2.h"
+#include "unicode/ustring.h"
+#include "unicode/utf8.h"
+
+namespace icing {
+namespace lib {
+namespace i18n_utils {
+
+libtextclassifier3::StatusOr<std::string> Utf16ToUtf8(
+    const std::u16string& utf16_string) {
+  std::string utf8_string;
+  // Allocates the maximum possible UTF8 string length:
+  // 3 UTF-8 bytes per UTF16 code unit, plus one for the terminating NUL.
+  //
+  // NOTE: we need to call resize() but not reserve() because values can't be
+  // set at positions after length().
+  utf8_string.resize(utf16_string.length() * 3 + 1);
+
+  int result_length = 0;
+  UErrorCode status = U_ZERO_ERROR;
+  u_strToUTF8(&utf8_string[0], utf8_string.length(), &result_length,
+              utf16_string.data(), utf16_string.length(), &status);
+  // Corrects the length
+  utf8_string.resize(result_length);
+
+  if (U_FAILURE(status)) {
+    return absl_ports::InternalError("Failed to convert UTF16 string to UTF8");
+  }
+  return utf8_string;
+}
+
+libtextclassifier3::StatusOr<std::u16string> Utf8ToUtf16(
+    std::string_view utf8_string) {
+  std::u16string utf16_result;
+  // The UTF16 string won't be longer than its UTF8 format
+  //
+  // NOTE: we need to call resize() but not reserve() because values can't be
+  // set at positions after length().
+  utf16_result.resize(utf8_string.length());
+
+  int result_length = 0;
+  UErrorCode status = U_ZERO_ERROR;
+  u_strFromUTF8(&utf16_result[0], utf16_result.length(), &result_length,
+                utf8_string.data(), utf8_string.length(), &status);
+  // Corrects the length
+  utf16_result.resize(result_length);
+
+  if (U_FAILURE(status)) {
+    return absl_ports::InternalError(absl_ports::StrCat(
+        "Failed to convert UTF8 string '", utf8_string, "' to UTF16"));
+  }
+  return utf16_result;
+}
+
+UChar32 GetUChar32At(const char* data, int length, int position) {
+  UChar32 uchar32;
+  U8_NEXT_OR_FFFD(data, position, length, uchar32);
+  return uchar32;
+}
+
+void SafeTruncateUtf8(std::string* str, int truncate_to_length) {
+  if (str == nullptr || truncate_to_length >= str->length()) {
+    return;
+  }
+
+  while (truncate_to_length > 0) {
+    if (IsLeadUtf8Byte(str->at(truncate_to_length))) {
+      str->resize(truncate_to_length);
+      return;
+    }
+    truncate_to_length--;
+  }
+
+  // Truncates to an empty string
+  str->resize(0);
+}
+
+bool IsAscii(char c) { return U8_IS_SINGLE((u_int8_t)c); }
+
+bool IsAscii(UChar32 c) { return U8_LENGTH(c) == 1; }
+
+int GetUtf8Length(UChar32 c) { return U8_LENGTH(c); }
+
+bool IsLeadUtf8Byte(char c) { return IsAscii(c) || U8_IS_LEAD((u_int8_t)c); }
+
+bool IsPunctuationAt(std::string_view input, int position, int* char_len_out) {
+  if (IsAscii(input[position])) {
+    if (char_len_out != nullptr) {
+      *char_len_out = 1;
+    }
+    return std::ispunct(input[position]);
+  }
+  UChar32 c = GetUChar32At(input.data(), input.length(), position);
+  if (char_len_out != nullptr) {
+    *char_len_out = U8_LENGTH(c);
+  }
+  return u_ispunct(c);
+}
+
+bool DiacriticCharToAscii(const UNormalizer2* normalizer2, UChar32 uchar32_in,
+                          char* char_out) {
+  if (IsAscii(uchar32_in)) {
+    // The Unicode character is within ASCII range
+    if (char_out != nullptr) {
+      *char_out = uchar32_in;
+    }
+    return true;
+  }
+
+  // Maximum number of pieces a Unicode character can be decomposed into.
+  // TODO(samzheng) figure out if this number is proper.
+  constexpr int kDecompositionBufferCapacity = 5;
+
+  // A buffer used to store Unicode decomposition mappings of only one
+  // character.
+  UChar decomposition_buffer[kDecompositionBufferCapacity];
+
+  // Decomposes the Unicode character, trying to get an ASCII char and some
+  // diacritic chars.
+  UErrorCode status = U_ZERO_ERROR;
+  if (unorm2_getDecomposition(normalizer2, uchar32_in, &decomposition_buffer[0],
+                              kDecompositionBufferCapacity, &status) > 0 &&
+      !U_FAILURE(status) && i18n_utils::IsAscii(decomposition_buffer[0])) {
+    if (char_out != nullptr) {
+      *char_out = decomposition_buffer[0];
+    }
+    return true;
+  }
+  return false;
+}
+
+}  // namespace i18n_utils
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/i18n-utils.h b/icing/util/i18n-utils.h
new file mode 100644
index 0000000..04edbc7
--- /dev/null
+++ b/icing/util/i18n-utils.h

@@ -0,0 +1,88 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_I18N_UTILS_H_
+#define ICING_UTIL_I18N_UTILS_H_
+
+#include <string>
+#include <string_view>
+
+#include "utils/base/statusor.h"
+#include "unicode/umachine.h"
+#include "unicode/unorm2.h"
+
+namespace icing {
+namespace lib {
+namespace i18n_utils {
+
+// Converts a UTF16 string to a UTF8 string.
+//
+// Returns:
+//   A UTF8 string on success
+//   INTERNAL_ERROR on any failures
+libtextclassifier3::StatusOr<std::string> Utf16ToUtf8(
+    const std::u16string& utf16_string);
+
+// Converts a UTF8 string to a UTF16 string.
+//
+// Returns:
+//   A UTF16 string on success
+//   INTERNAL_ERROR on any failures
+libtextclassifier3::StatusOr<std::u16string> Utf8ToUtf16(
+    std::string_view utf8_string);
+
+// Returns the Unicode char at the given position. If anything wrong happens, an
+// invalid value 0xFFFD is returned.
+UChar32 GetUChar32At(const char* data, int length, int position);
+
+// Safely truncates a UTF8 string so that multi-byte UTF8 characters are not cut
+// in the middle. The string will be truncated in place.
+void SafeTruncateUtf8(std::string* str, int truncate_to_length);
+
+// Checks if the single char is within ASCII range.
+bool IsAscii(char c);
+
+// Checks if the Unicode char is within ASCII range.
+bool IsAscii(UChar32 c);
+
+// Returns how many code units (bytes) are used for the UTF-8 encoding of this
+// Unicode character. Returns 0 if not valid.
+int GetUtf8Length(UChar32 c);
+
+// Checks if the single char is the first byte of a UTF8 character, note
+// that a single ASCII char is also considered a lead byte.
+bool IsLeadUtf8Byte(char c);
+
+// Checks if the character at position is punctuation. Assigns the length of the
+// character at position to *char_len_out if the character at position is valid
+// punctuation and char_len_out is not null.
+bool IsPunctuationAt(std::string_view input, int position,
+                     int* char_len_out = nullptr);
+
+// Transforms a Unicode character with diacritics to its counterpart in ASCII
+// range. E.g. "ü" -> "u". Result will be set to char_out. Returns true if
+// the transformation is successful.
+//
+// NOTE: According to our convention this function should have returned
+// StatusOr<char>. However, this function is performance-sensitive because is
+// could be called on every Latin character in normalization, so we make it
+// return a bool here to save a bit more time and memory.
+bool DiacriticCharToAscii(const UNormalizer2* normalizer2, UChar32 uchar32_in,
+                          char* char_out);
+
+}  // namespace i18n_utils
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_I18N_UTILS_H_

diff --git a/icing/util/logging.h b/icing/util/logging.h
new file mode 100644
index 0000000..1916e53
--- /dev/null
+++ b/icing/util/logging.h

@@ -0,0 +1,30 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_LOGGING_H_
+#define ICING_UTIL_LOGGING_H_
+
+#include "base/logging.h"
+
+namespace icing {
+namespace lib {
+
+// TODO(samzheng): Change to TC3_VLOG and TC3_LOG
+#define ICING_VLOG(severity) VLOG(severity)
+#define ICING_LOG(severity) LOG(severity)
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_LOGGING_H_

diff --git a/icing/util/math-util.h b/icing/util/math-util.h
new file mode 100644
index 0000000..fc11a09
--- /dev/null
+++ b/icing/util/math-util.h

@@ -0,0 +1,81 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_MATH_UTIL_H_
+#define ICING_UTIL_MATH_UTIL_H_
+
+#include <limits>
+
+namespace icing {
+namespace lib {
+
+namespace math_util {
+
+inline double SafeDivide(double first, double second) {
+  if (second == 0) {
+    return std::numeric_limits<double>::infinity();
+  }
+  return first / second;
+}
+
+// Returns the maximum integer value which is a multiple of rounding_value,
+// and less than or equal to input_value.
+//
+// input_value must be greater than or equal to zero, or else returns 0.
+// rounding_value must be greater than or equal to zero, or else returns 0.
+template <typename IntType>
+static IntType RoundDownTo(IntType input_value, IntType rounding_value) {
+  static_assert(std::numeric_limits<IntType>::is_integer,
+                "RoundUpTo() operation type is not integer");
+
+  if (input_value <= 0) {
+    return 0;
+  }
+
+  if (rounding_value <= 0) {
+    return 0;
+  }
+
+  return (input_value / rounding_value) * rounding_value;
+}
+
+// Returns the minimum integer value which is a multiple of rounding_value,
+// and greater than or equal to input_value.
+//
+// input_value must be greater than or equal to zero, or else returns 0.
+// rounding_value must be greater than or equal to zero, or else returns 0.
+template <typename IntType>
+static IntType RoundUpTo(IntType input_value, IntType rounding_value) {
+  static_assert(std::numeric_limits<IntType>::is_integer,
+                "RoundUpTo() operation type is not integer");
+
+  if (input_value <= 0) {
+    return 0;
+  }
+
+  if (rounding_value <= 0) {
+    return 0;
+  }
+
+  const IntType remainder = input_value % rounding_value;
+  return (remainder == 0) ? input_value
+                          : (input_value - remainder + rounding_value);
+}
+
+}  // namespace math_util
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_UTIL_MATH_UTIL_H_
commit	128c9db88925c8425f2ad81e1d8985461d7ba21a	[log] [tgz]
author	Cassie Wang <cassiewang@google.com>	Fri Dec 20 15:11:45 2019 -0800
committer	Cassie Wang <cassiewang@google.com>	Fri Dec 20 16:18:05 2019 -0800
tree	f97ee47cc99d2c162eb30a5e051c606823dfd1ec
parent	1897505cb34f3d53e848da13fafe7691c17417ea [diff]