Update Icing from upstream. Change-Id: I43038a59e7170fb8ecbaf6098a37221b3682ce09

commit: a34db390d80f862bfaaa49dea3605c5fec3bca3d [log] [tgz]
author: Tim Barron <tjbarron@google.com> Thu Jan 14 20:53:07 2021 +0000
committer: Tim Barron <tjbarron@google.com> Thu Jan 14 20:53:07 2021 +0000
tree: 67a4a87803cf2b31619c3ddff3674967fc1461ce
parent: 59c2caa38fd8dca3760dad751f4f8e5de8be25f5 [diff]
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index d915d65..791368a 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc

@@ -43,6 +43,8 @@
 #include "icing/proto/search.pb.h"
 #include "icing/proto/status.pb.h"
 #include "icing/query/query-processor.h"
+#include "icing/result/projection-tree.h"
+#include "icing/result/projector.h"
 #include "icing/result/result-retriever.h"
 #include "icing/schema/schema-store.h"
 #include "icing/schema/schema-util.h"
@@ -60,6 +62,7 @@
 #include "icing/util/crc32.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
 #include "unicode/uloc.h"
 
 namespace icing {
@@ -693,7 +696,19 @@
     return result_proto;
   }
 
-  auto document_id_or = document_store_->Put(document, put_document_stats);
+  auto tokenized_document_or = TokenizedDocument::Create(
+      schema_store_.get(), language_segmenter_.get(), std::move(document));
+  if (!tokenized_document_or.ok()) {
+    TransformStatus(tokenized_document_or.status(), result_status);
+    put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
+    return result_proto;
+  }
+  TokenizedDocument tokenized_document(
+      std::move(tokenized_document_or).ValueOrDie());
+
+  auto document_id_or =
+      document_store_->Put(tokenized_document.document(),
+                           tokenized_document.num_tokens(), put_document_stats);
   if (!document_id_or.ok()) {
     TransformStatus(document_id_or.status(), result_status);
     put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
@@ -702,8 +717,8 @@
   DocumentId document_id = document_id_or.ValueOrDie();
 
   auto index_processor_or = IndexProcessor::Create(
-      schema_store_.get(), language_segmenter_.get(), normalizer_.get(),
-      index_.get(), CreateIndexProcessorOptions(options_), clock_.get());
+      normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+      clock_.get());
   if (!index_processor_or.ok()) {
     TransformStatus(index_processor_or.status(), result_status);
     put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
@@ -712,8 +727,8 @@
   std::unique_ptr<IndexProcessor> index_processor =
       std::move(index_processor_or).ValueOrDie();
 
-  auto status =
-      index_processor->IndexDocument(document, document_id, put_document_stats);
+  auto status = index_processor->IndexDocument(tokenized_document, document_id,
+                                               put_document_stats);
 
   TransformStatus(status, result_status);
   put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
@@ -721,7 +736,8 @@
 }
 
 GetResultProto IcingSearchEngine::Get(const std::string_view name_space,
-                                      const std::string_view uri) {
+                                      const std::string_view uri,
+                                      const GetResultSpecProto& result_spec) {
   GetResultProto result_proto;
   StatusProto* result_status = result_proto.mutable_status();
 
@@ -738,8 +754,29 @@
     return result_proto;
   }
 
+  DocumentProto document = std::move(document_or).ValueOrDie();
+  std::unique_ptr<ProjectionTree> type_projection_tree;
+  std::unique_ptr<ProjectionTree> wildcard_projection_tree;
+  for (const TypePropertyMask& type_field_mask :
+       result_spec.type_property_masks()) {
+    if (type_field_mask.schema_type() == document.schema()) {
+      type_projection_tree = std::make_unique<ProjectionTree>(type_field_mask);
+    } else if (type_field_mask.schema_type() ==
+               ProjectionTree::kSchemaTypeWildcard) {
+      wildcard_projection_tree =
+          std::make_unique<ProjectionTree>(type_field_mask);
+    }
+  }
+
+  // Apply projection
+  if (type_projection_tree != nullptr) {
+    projector::Project(type_projection_tree->root().children, &document);
+  } else if (wildcard_projection_tree != nullptr) {
+    projector::Project(wildcard_projection_tree->root().children, &document);
+  }
+
   result_status->set_code(StatusProto::OK);
-  *result_proto.mutable_document() = std::move(document_or).ValueOrDie();
+  *result_proto.mutable_document() = std::move(document);
   return result_proto;
 }
 
@@ -1237,7 +1274,8 @@
       std::move(scoring_processor_or).ValueOrDie();
   std::vector<ScoredDocumentHit> result_document_hits =
       scoring_processor->Score(std::move(query_results.root_iterator),
-                               performance_configuration_.num_to_score);
+                               performance_configuration_.num_to_score,
+                               &query_results.query_term_iterators);
   query_stats->set_scoring_latency_ms(
       component_timer->GetElapsedMilliseconds());
   query_stats->set_num_documents_scored(result_document_hits.size());
@@ -1416,7 +1454,8 @@
   }
 
   // Copies valid document data to tmp directory
-  auto optimize_status = document_store_->OptimizeInto(temporary_document_dir);
+  auto optimize_status = document_store_->OptimizeInto(
+      temporary_document_dir, language_segmenter_.get());
 
   // Handles error if any
   if (!optimize_status.ok()) {
@@ -1523,9 +1562,9 @@
 
   ICING_ASSIGN_OR_RETURN(
       std::unique_ptr<IndexProcessor> index_processor,
-      IndexProcessor::Create(
-          schema_store_.get(), language_segmenter_.get(), normalizer_.get(),
-          index_.get(), CreateIndexProcessorOptions(options_), clock_.get()));
+      IndexProcessor::Create(normalizer_.get(), index_.get(),
+                             CreateIndexProcessorOptions(options_),
+                             clock_.get()));
 
   ICING_VLOG(1) << "Restoring index by replaying documents from document id "
                 << first_document_to_reindex << " to document id "
@@ -1546,9 +1585,20 @@
         return document_or.status();
       }
     }
+    DocumentProto document(std::move(document_or).ValueOrDie());
+
+    libtextclassifier3::StatusOr<TokenizedDocument> tokenized_document_or =
+        TokenizedDocument::Create(schema_store_.get(),
+                                  language_segmenter_.get(),
+                                  std::move(document));
+    if (!tokenized_document_or.ok()) {
+      return tokenized_document_or.status();
+    }
+    TokenizedDocument tokenized_document(
+        std::move(tokenized_document_or).ValueOrDie());
 
     libtextclassifier3::Status status =
-        index_processor->IndexDocument(document_or.ValueOrDie(), document_id);
+        index_processor->IndexDocument(tokenized_document, document_id);
     if (!status.ok()) {
       if (!absl_ports::IsDataLoss(status)) {
         // Real error. Stop recovering and pass it up.

diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index b2bb4f1..dfe56c4 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h

@@ -36,6 +36,7 @@
 #include "icing/proto/reset.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
+#include "icing/proto/search.proto.h"
 #include "icing/proto/search.pb.h"
 #include "icing/proto/usage.pb.h"
 #include "icing/result/result-state-manager.h"
@@ -213,7 +214,8 @@
   //   NOT_FOUND if the key doesn't exist or doc has been deleted
   //   FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
   //   INTERNAL_ERROR on IO error
-  GetResultProto Get(std::string_view name_space, std::string_view uri);
+  GetResultProto Get(std::string_view name_space, std::string_view uri,
+                     const GetResultSpecProto& result_spec);
 
   // Reports usage. The corresponding usage scores of the specified document in
   // the report will be updated.

diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index f4249f3..8c64614 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc

@@ -54,10 +54,13 @@
 
 using ::icing::lib::portable_equals_proto::EqualsProto;
 using ::testing::_;
+using ::testing::ElementsAre;
 using ::testing::Eq;
+using ::testing::Ge;
 using ::testing::Gt;
 using ::testing::HasSubstr;
 using ::testing::IsEmpty;
+using ::testing::Le;
 using ::testing::Lt;
 using ::testing::Matcher;
 using ::testing::Ne;
@@ -112,7 +115,6 @@
       ICING_ASSERT_OK(
           icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
     }
-
     filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
   }
 
@@ -156,6 +158,19 @@
       .Build();
 }
 
+DocumentProto CreateEmailDocument(const std::string& name_space,
+                                  const std::string& uri, int score,
+                                  const std::string& subject_content,
+                                  const std::string& body_content) {
+  return DocumentBuilder()
+      .SetKey(name_space, uri)
+      .SetSchema("Email")
+      .SetScore(score)
+      .AddStringProperty("subject", subject_content)
+      .AddStringProperty("body", body_content)
+      .Build();
+}
+
 SchemaProto CreateMessageSchema() {
   SchemaProto schema;
   auto type = schema.add_types();
@@ -265,6 +280,17 @@
   return usage_report;
 }
 
+std::vector<std::string> GetUrisFromSearchResults(
+    SearchResultProto& search_result_proto) {
+  std::vector<std::string> result_uris;
+  result_uris.reserve(search_result_proto.results_size());
+  for (int i = 0; i < search_result_proto.results_size(); i++) {
+    result_uris.push_back(
+        search_result_proto.mutable_results(i)->document().uri());
+  }
+  return result_uris;
+}
+
 TEST_F(IcingSearchEngineTest, SimpleInitialization) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -287,12 +313,14 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document;
 
-  ASSERT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  ASSERT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
@@ -670,6 +698,194 @@
   }
 }
 
+TEST_F(IcingSearchEngineTest, SetSchemaUnsetVersionIsZero) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+  // 1. Create a schema with an Email type with version 1
+  SchemaProto schema;
+  SchemaTypeConfigProto* type = schema.add_types();
+  type->set_schema_type("Email");
+  PropertyConfigProto* property = type->add_properties();
+  property->set_property_name("title");
+  property->set_data_type(PropertyConfigProto::DataType::STRING);
+  property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+  EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+  EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(0));
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaCompatibleVersionUpdateSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that adds a new optional property and updates version.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+    property->set_property_name("body");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleVersionUpdateFails) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    // 3. SetSchema should fail and the version number should NOT be updated.
+    EXPECT_THAT(icing.SetSchema(schema).status(),
+                ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+}
+
+TEST_F(IcingSearchEngineTest,
+       SetSchemaIncompatibleVersionUpdateForceOverrideSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that makes an incompatible change (OPTIONAL -> REQUIRED)
+    // with force override to true.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
+TEST_F(IcingSearchEngineTest, SetSchemaNoChangeVersionUpdateSucceeds) {
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 1. Create a schema with an Email type with version 1
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(1);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(1));
+  }
+
+  {
+    IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+    ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+    // 2. Create schema that only changes the version.
+    SchemaProto schema;
+    SchemaTypeConfigProto* type = schema.add_types();
+    type->set_version(2);
+    type->set_schema_type("Email");
+    PropertyConfigProto* property = type->add_properties();
+    property->set_property_name("title");
+    property->set_data_type(PropertyConfigProto::DataType::STRING);
+    property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+    // 3. SetSchema should succeed and the version number should be updated.
+    EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+    EXPECT_THAT(icing.GetSchema().schema().types(0).version(), Eq(2));
+  }
+}
+
 TEST_F(IcingSearchEngineTest, SetSchemaDuplicateTypesReturnsAlreadyExists) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -894,7 +1110,8 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = email_document_with_subject;
 
-  EXPECT_THAT(icing.Get("namespace", "with_subject"),
+  EXPECT_THAT(icing.Get("namespace", "with_subject",
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 
   // The document without a subject got deleted because it failed validation
@@ -904,7 +1121,8 @@
       "Document (namespace, without_subject) not found.");
   expected_get_result_proto.clear_document();
 
-  EXPECT_THAT(icing.Get("namespace", "without_subject"),
+  EXPECT_THAT(icing.Get("namespace", "without_subject",
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 }
 
@@ -962,7 +1180,8 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = email_document;
 
-  EXPECT_THAT(icing.Get("namespace", "email_uri"),
+  EXPECT_THAT(icing.Get("namespace", "email_uri",
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 
   // "message" document got deleted
@@ -971,7 +1190,8 @@
       "Document (namespace, message_uri) not found.");
   expected_get_result_proto.clear_document();
 
-  EXPECT_THAT(icing.Get("namespace", "message_uri"),
+  EXPECT_THAT(icing.Get("namespace", "message_uri",
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 }
 
@@ -1035,8 +1255,9 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() =
       CreateMessageDocument("namespace", "uri");
-  ASSERT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  ASSERT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Put an invalid document
   PutResultProto put_result_proto = icing.Put(DocumentProto());
@@ -1050,7 +1271,208 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (wrong, uri) not found.");
   expected_get_result_proto.clear_document();
-  ASSERT_THAT(icing.Get("wrong", "uri"),
+  ASSERT_THAT(icing.Get("wrong", "uri", GetResultSpecProto::default_instance()),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionEmpty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type(document.schema());
+  mask->add_paths("");
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+  expected_get_result_proto.mutable_document()->clear_properties();
+  ASSERT_THAT(icing.Get("namespace", "uri", result_spec),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentWildCardProjectionEmpty) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+  DocumentProto document = CreateMessageDocument("namespace", "uri");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("");
+
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() = document;
+  expected_get_result_proto.mutable_document()->clear_properties();
+  ASSERT_THAT(icing.Get("namespace", "uri", result_spec),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentProjectionMultipleFieldPaths) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add an email document
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("Email");
+  mask->add_paths("sender.name");
+  mask->add_paths("subject");
+
+  // 2. Verify that the returned result only contains the 'sender.name'
+  // property and the 'subject' property.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, GetDocumentWildcardProjectionMultipleFieldPaths) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add an email document
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("sender.name");
+  mask->add_paths("subject");
+
+  // 2. Verify that the returned result only contains the 'sender.name'
+  // property and the 'subject' property.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty("sender",
+                               DocumentBuilder()
+                                   .SetKey("namespace", "uri1")
+                                   .SetSchema("Person")
+                                   .AddStringProperty("name", "Meg Ryan")
+                                   .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
+              EqualsProto(expected_get_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+       GetDocumentSpecificProjectionOverridesWildcardProjection) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+              ProtoIsOk());
+
+  // 1. Add an email document
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddDocumentProperty(
+              "sender",
+              DocumentBuilder()
+                  .SetKey("namespace", "uri1")
+                  .SetSchema("Person")
+                  .AddStringProperty("name", "Meg Ryan")
+                  .AddStringProperty("emailAddress", "shopgirl@aol.com")
+                  .Build())
+          .AddStringProperty("subject", "Hello World!")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // 2. Add type property masks for the wildcard and the specific type of the
+  // document 'Email'. The wildcard should be ignored and only the 'Email'
+  // projection should apply.
+  GetResultSpecProto result_spec;
+  TypePropertyMask* mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("*");
+  mask->add_paths("subject");
+  mask = result_spec.add_type_property_masks();
+  mask->set_schema_type("Email");
+  mask->add_paths("body");
+
+  // 3. Verify that the returned result only contains the 'body' property.
+  GetResultProto expected_get_result_proto;
+  expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
+  *expected_get_result_proto.mutable_document() =
+      DocumentBuilder()
+          .SetKey("namespace", "uri1")
+          .SetCreationTimestampMs(1000)
+          .SetSchema("Email")
+          .AddStringProperty(
+              "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+          .Build();
+  ASSERT_THAT(icing.Get("namespace", "uri1", result_spec),
               EqualsProto(expected_get_result_proto));
 }
 
@@ -1593,16 +2015,18 @@
         filesystem()->GetFileSize(document_log_path.c_str());
 
     // Validates that document can't be found right after Optimize()
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
     // Validates that document is actually removed from document log
     EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
   }  // Destroys IcingSearchEngine to make sure nothing is cached.
 
   IcingSearchEngine icing(icing_options, GetTestJniCache());
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
@@ -1712,19 +2136,22 @@
     ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
 
     // Validates that Get() and Put() are good right after Optimize()
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
     EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
   }  // Destroys IcingSearchEngine to make sure nothing is cached.
 
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   EXPECT_THAT(icing.Put(document3).status(), ProtoIsOk());
 }
@@ -1748,14 +2175,16 @@
         StatusProto::NOT_FOUND);
     expected_get_result_proto.mutable_status()->set_message(
         "Document (namespace, uri1) not found.");
-    EXPECT_THAT(icing.Get("namespace", "uri1"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
 
     expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
     expected_get_result_proto.mutable_status()->clear_message();
     *expected_get_result_proto.mutable_document() = document2;
-    EXPECT_THAT(icing.Get("namespace", "uri2"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
   }  // Destroys IcingSearchEngine to make sure nothing is cached.
 
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -1766,13 +2195,15 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace, uri1) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace, uri2) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, OptimizationFailureUninitializesIcing) {
@@ -1832,7 +2263,10 @@
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
   EXPECT_THAT(icing.Put(simple_doc).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.Get(simple_doc.namespace_(), simple_doc.uri()).status(),
+  EXPECT_THAT(icing
+                  .Get(simple_doc.namespace_(), simple_doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
   EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
@@ -1841,7 +2275,10 @@
   EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
   EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
   EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
-  EXPECT_THAT(icing.Get(simple_doc.namespace_(), simple_doc.uri()).status(),
+  EXPECT_THAT(icing
+                  .Get(simple_doc.namespace_(), simple_doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
               ProtoIsOk());
   EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
               ProtoIsOk());
@@ -1900,12 +2337,14 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete the first type. The first doc should be irretrievable. The
   // second should still be present.
@@ -1922,14 +2361,16 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri1) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Search for "message", only document2 should show up.
   SearchResultProto expected_search_result_proto;
@@ -1976,12 +2417,14 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete the first type. The first doc should be irretrievable. The
   // second should still be present.
@@ -1993,14 +2436,16 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri1) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   search_spec = SearchSpecProto::default_instance();
   search_spec.set_query("message");
@@ -2055,16 +2500,19 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace1", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document3;
-  EXPECT_THAT(icing.Get("namespace3", "uri3"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete namespace1. Document1 and document2 should be irretrievable.
   // Document3 should still be present.
@@ -2081,21 +2529,24 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri1) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri2) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document3;
-  EXPECT_THAT(icing.Get("namespace3", "uri3"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace3", "uri3", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Search for "message", only document3 should show up.
   SearchResultProto expected_search_result_proto;
@@ -2137,12 +2588,14 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete the first namespace. The first doc should be irretrievable. The
   // second should still be present.
@@ -2154,14 +2607,16 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri1) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   search_spec = SearchSpecProto::default_instance();
   search_spec.set_query("message");
@@ -2208,12 +2663,14 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete all docs containing 'body1'. The first doc should be irretrievable.
   // The second should still be present.
@@ -2232,14 +2689,16 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace1, uri1) not found.");
   expected_get_result_proto.clear_document();
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   search_spec = SearchSpecProto::default_instance();
   search_spec.set_query("message");
@@ -2281,12 +2740,14 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Delete all docs containing 'foo', which should be none of them. Both docs
   // should still be present.
@@ -2299,14 +2760,16 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace1", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace1", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   expected_get_result_proto.mutable_status()->clear_message();
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace2", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace2", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   search_spec = SearchSpecProto::default_instance();
   search_spec.set_query("message");
@@ -2428,8 +2891,9 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document1;
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
 
@@ -2484,8 +2948,9 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace, uri) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   DocumentProto new_document =
       DocumentBuilder()
@@ -2556,8 +3021,9 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace, uri) not found.");
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   DocumentProto new_document =
       DocumentBuilder()
@@ -2804,8 +3270,9 @@
     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
                 ProtoIsOk());
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
@@ -2820,8 +3287,9 @@
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 
   // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Checks that the index is still ok so we can search over it
   SearchResultProto search_result_proto =
@@ -2857,8 +3325,9 @@
     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
                 ProtoIsOk());
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
@@ -2877,8 +3346,9 @@
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 
   // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Checks that the index is still ok so we can search over it
   SearchResultProto search_result_proto =
@@ -2914,8 +3384,9 @@
     EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
     EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
                 ProtoIsOk());
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
     SearchResultProto search_result_proto =
         icing.Search(search_spec, GetDefaultScoringSpec(),
                      ResultSpecProto::default_instance());
@@ -2935,8 +3406,9 @@
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
 
   // Checks that DocumentLog is still ok
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Checks that the index is still ok so we can search over it
   SearchResultProto search_result_proto =
@@ -2964,8 +3436,9 @@
     *expected_get_result_proto.mutable_document() =
         CreateMessageDocument("namespace", "uri");
 
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   const std::string schema_file =
@@ -2993,8 +3466,9 @@
     *expected_get_result_proto.mutable_document() =
         CreateMessageDocument("namespace", "uri");
 
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
   }  // This should shut down IcingSearchEngine and persist anything it needs to
 
   const std::string document_log_file =
@@ -3118,7 +3592,8 @@
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = email_document;
 
-  EXPECT_THAT(icing.Get("namespace", "email_uri"),
+  EXPECT_THAT(icing.Get("namespace", "email_uri",
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 
   SearchSpecProto search_spec;
@@ -3184,12 +3659,14 @@
   *expected_get_result_proto.mutable_document() = document1;
 
   // DocumentStore kept the additional document
-  EXPECT_THAT(icing.Get("namespace", "uri1"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri1", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   *expected_get_result_proto.mutable_document() = document2;
-  EXPECT_THAT(icing.Get("namespace", "uri2"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri2", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // We indexed the additional document
   SearchSpecProto search_spec;
@@ -3666,6 +4143,392 @@
               EqualsSearchResultIgnoreStats(expected_search_result_proto));
 }
 
+TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // However, doc5 has more matches in the text section.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace1/uri5",    // 'coffee' 3 times
+                          "namespace1/uri7",    // 'coffee' 2 times
+                          "namespace1/uri1",    // 'food' 2 times
+                          "namespace1/uri4",    // 'food' 2 times
+                          "namespace1/uri2",    // 'food' 1 time
+                          "namespace1/uri6"));  // 'food' 1 time
+}
+
+TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespaceNotOperator) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri3", /*score=*/23, "speederia pizza",
+      "thin-crust pizza. good and fast. nice coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee -starbucks");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  EXPECT_THAT(
+      GetUrisFromSearchResults(search_result_proto),
+      ElementsAre("namespace1/uri5",    // 'coffee' 3 times, 'starbucks' 0 times
+                  "namespace1/uri3"));  // 'coffee' 1 times, 'starbucks' 0 times
+}
+
+TEST_F(IcingSearchEngineTest,
+       Bm25fRelevanceScoringOneNamespaceSectionRestrict) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document =
+      CreateEmailDocument("namespace1", "namespace1/uri5", /*score=*/18,
+                          "peets coffee, best coffee",
+                          "espresso. decaf. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri7", /*score=*/4, "starbucks",
+      "habit. birthday rewards. good coffee. brewed coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("body:coffee OR body:food");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+
+  // Result should be in descending score order, section restrict doesn't impact
+  // the BM25F score.
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // However, doc5 has more matches.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(
+      GetUrisFromSearchResults(search_result_proto),
+      ElementsAre("namespace1/uri5",    // 'coffee' 2 times in section subject,
+                                        // 1 time in section body
+                  "namespace1/uri7",    // 'coffee' 2 times in section body
+                  "namespace1/uri1",    // 'food' 2 times in section body
+                  "namespace1/uri4",    // 'food' 2 times in section body
+                  "namespace1/uri2",    // 'food' 1 time in section body
+                  "namespace1/uri6"));  // 'food' 1 time in section body
+}
+
+TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringTwoNamespaces) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace2".
+  document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+                                 "sushi belmont",
+                                 "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+                                 "starbucks coffee", "good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  ResultSpecProto result_spec_proto;
+  result_spec_proto.set_num_per_page(16);
+  SearchResultProto search_result_proto =
+      icing.Search(search_spec, scoring_spec, result_spec_proto);
+
+  // Result should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // The two corpora have the same documents except for document 7, which in
+  // "namespace2" is much shorter than the average dcoument length, so it is
+  // boosted.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace2/uri7",    // 'coffee' 2 times, short doc
+                          "namespace1/uri5",    // 'coffee' 3 times
+                          "namespace2/uri5",    // 'coffee' 3 times
+                          "namespace1/uri7",    // 'coffee' 2 times
+                          "namespace1/uri1",    // 'food' 2 times
+                          "namespace2/uri1",    // 'food' 2 times
+                          "namespace1/uri4",    // 'food' 2 times
+                          "namespace2/uri4",    // 'food' 2 times
+                          "namespace1/uri2",    // 'food' 1 time
+                          "namespace2/uri2",    // 'food' 1 time
+                          "namespace1/uri6",    // 'food' 1 time
+                          "namespace2/uri6"));  // 'food' 1 time
+}
+
+TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringWithNamespaceFilter) {
+  IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+  EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+  EXPECT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace1".
+  DocumentProto document = CreateEmailDocument(
+      "namespace1", "namespace1/uri0", /*score=*/10, "sushi belmont",
+      "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace1", "namespace1/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace1", "namespace1/uri7", /*score=*/4,
+                                 "starbucks coffee",
+                                 "habit. birthday rewards. good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  // Create and index documents in namespace "namespace2".
+  document = CreateEmailDocument("namespace2", "namespace2/uri0", /*score=*/10,
+                                 "sushi belmont",
+                                 "fresh fish. inexpensive. good sushi.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri1", /*score=*/13, "peacock koriander",
+      "indian food. buffet. spicy food. kadai chicken.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri2", /*score=*/4,
+                                 "panda express",
+                                 "chinese food. cheap. inexpensive. kung pao.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri3", /*score=*/23,
+                                 "speederia pizza",
+                                 "thin-crust pizza. good and fast.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri4", /*score=*/8,
+                                 "whole foods",
+                                 "salads. pizza. organic food. expensive.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri5", /*score=*/18, "peets coffee",
+      "espresso. decaf. brewed coffee. whole beans. excellent coffee.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument(
+      "namespace2", "namespace2/uri6", /*score=*/4, "costco",
+      "bulk. cheap whole beans. frozen fish. food samples.");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+  document = CreateEmailDocument("namespace2", "namespace2/uri7", /*score=*/4,
+                                 "starbucks coffee", "good coffee");
+  ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+  SearchSpecProto search_spec;
+  search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+  search_spec.set_query("coffee OR food");
+  // Now query only corpus 2
+  search_spec.add_namespace_filters("namespace2");
+  ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+  scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+  SearchResultProto search_result_proto = icing.Search(
+      search_spec, scoring_spec, ResultSpecProto::default_instance());
+  search_result_proto = icing.Search(search_spec, scoring_spec,
+                                     ResultSpecProto::default_instance());
+
+  // Result from namespace "namespace2" should be in descending score order
+  EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
+  // Both doc5 and doc7 have "coffee" in name and text sections.
+  // Even though doc5 has more matches in the text section, doc7's length is
+  // much shorter than the average corpus's length, so it's being boosted.
+  // Documents with "food" are ranked lower as the term "food" is commonly
+  // present in this corpus, and thus, has a lower IDF.
+  EXPECT_THAT(GetUrisFromSearchResults(search_result_proto),
+              ElementsAre("namespace2/uri7",    // 'coffee' 2 times, short doc
+                          "namespace2/uri5",    // 'coffee' 3 times
+                          "namespace2/uri1",    // 'food' 2 times
+                          "namespace2/uri4",    // 'food' 2 times
+                          "namespace2/uri2",    // 'food' 1 time
+                          "namespace2/uri6"));  // 'food' 1 time
+}
+
 TEST_F(IcingSearchEngineTest,
        SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -3922,8 +4785,9 @@
     expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
     *expected_get_result_proto.mutable_document() = document;
 
-    ASSERT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    ASSERT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
 
     // Can search for it
     SearchResultProto expected_search_result_proto;
@@ -3950,8 +4814,9 @@
   expected_get_result_proto.mutable_status()->set_message(
       "Document (namespace, uri) not found.");
 
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 
   // Can't search for it
   SearchResultProto empty_result;
@@ -3978,14 +4843,16 @@
     // Persisting shouldn't affect anything
     EXPECT_THAT(icing.PersistToDisk().status(), ProtoIsOk());
 
-    EXPECT_THAT(icing.Get("namespace", "uri"),
-                EqualsProto(expected_get_result_proto));
+    EXPECT_THAT(
+        icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+        EqualsProto(expected_get_result_proto));
   }  // Destructing persists as well
 
   IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
   EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-  EXPECT_THAT(icing.Get("namespace", "uri"),
-              EqualsProto(expected_get_result_proto));
+  EXPECT_THAT(
+      icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
+      EqualsProto(expected_get_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest, ResetOk) {
@@ -4044,7 +4911,8 @@
   GetResultProto expected_get_result_proto;
   expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
   *expected_get_result_proto.mutable_document() = document;
-  EXPECT_THAT(icing.Get(document.namespace_(), document.uri()),
+  EXPECT_THAT(icing.Get(document.namespace_(), document.uri(),
+                        GetResultSpecProto::default_instance()),
               EqualsProto(expected_get_result_proto));
 
   // Can add new data.
@@ -4275,7 +5143,10 @@
   DocumentProto doc = CreateMessageDocument("namespace", "uri");
   EXPECT_THAT(icing.Put(doc).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
-  EXPECT_THAT(icing.Get(doc.namespace_(), doc.uri()).status(),
+  EXPECT_THAT(icing
+                  .Get(doc.namespace_(), doc.uri(),
+                       GetResultSpecProto::default_instance())
+                  .status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
   EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
               ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
@@ -5310,11 +6181,13 @@
 }
 
 TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
-  DocumentProto document = DocumentBuilder()
-                               .SetKey("icing", "fake_type/0")
-                               .SetSchema("Message")
-                               .AddStringProperty("body", "message body")
-                               .Build();
+  DocumentProto document =
+      DocumentBuilder()
+          .SetKey("icing", "fake_type/0")
+          .SetSchema("Message")
+          .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+          .AddStringProperty("body", "message body")
+          .Build();
 
   auto fake_clock = std::make_unique<FakeClock>();
   fake_clock->SetTimerElapsedMilliseconds(10);
@@ -5330,8 +6203,11 @@
   EXPECT_THAT(
       put_result_proto.native_put_document_stats().document_store_latency_ms(),
       Eq(10));
-  EXPECT_THAT(put_result_proto.native_put_document_stats().document_size(),
-              Eq(document.ByteSizeLong()));
+  size_t document_size =
+      put_result_proto.native_put_document_stats().document_size();
+  EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
+  EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
+                                sizeof(DocumentProto::InternalFields)));
 }
 
 TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
@@ -5483,8 +6359,7 @@
   // Retrieve only one result at a time to make sure that projection works when
   // retrieving all pages.
   result_spec.set_num_per_page(1);
-  ResultSpecProto::TypePropertyMask* email_field_mask =
-      result_spec.add_type_property_masks();
+  TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
   email_field_mask->set_schema_type("Email");
   email_field_mask->add_paths("");
 
@@ -5568,8 +6443,7 @@
   // Retrieve only one result at a time to make sure that projection works when
   // retrieving all pages.
   result_spec.set_num_per_page(1);
-  ResultSpecProto::TypePropertyMask* email_field_mask =
-      result_spec.add_type_property_masks();
+  TypePropertyMask* email_field_mask = result_spec.add_type_property_masks();
   email_field_mask->set_schema_type("Email");
   email_field_mask->add_paths("sender.name");
   email_field_mask->add_paths("subject");

diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
index 8171960..0be87d6 100644
--- a/icing/index/hit/doc-hit-info.h
+++ b/icing/index/hit/doc-hit-info.h

@@ -25,7 +25,7 @@
 namespace icing {
 namespace lib {
 
-// DocHitInfo provides a collapsed view of all hits for a specific term and doc.
+// DocHitInfo provides a collapsed view of all hits for a specific doc.
 // Hits contain a document_id, section_id and a term frequency. The
 // information in multiple hits is collapse into a DocHitInfo by providing a
 // SectionIdMask of all sections that contained a hit for this term as well as
@@ -36,7 +36,7 @@
                       SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
       : document_id_(document_id_in),
         hit_section_ids_mask_(hit_section_ids_mask) {
-    memset(hit_term_frequency_, Hit::kDefaultTermFrequency,
+    memset(hit_term_frequency_, Hit::kNoTermFrequency,
            sizeof(hit_term_frequency_));
   }
 

diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
index 15c0de9..36c1a06 100644
--- a/icing/index/hit/doc-hit-info_test.cc
+++ b/icing/index/hit/doc-hit-info_test.cc

@@ -34,13 +34,13 @@
 TEST(DocHitInfoTest, InitialMaxHitTermFrequencies) {
   DocHitInfo info(kSomeDocumentId);
   for (SectionId i = 0; i <= kMaxSectionId; ++i) {
-    EXPECT_THAT(info.hit_term_frequency(i), Eq(Hit::kDefaultTermFrequency));
+    EXPECT_THAT(info.hit_term_frequency(i), Eq(Hit::kNoTermFrequency));
   }
 }
 
 TEST(DocHitInfoTest, UpdateHitTermFrequenciesForTheFirstTime) {
   DocHitInfo info(kSomeDocumentId);
-  ASSERT_THAT(info.hit_term_frequency(3), Eq(Hit::kDefaultTermFrequency));
+  ASSERT_THAT(info.hit_term_frequency(3), Eq(Hit::kNoTermFrequency));
 
   // Updating a section for the first time, should change its hit
   // term_frequency

diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index 525a5e5..ee1f64b 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h

@@ -58,6 +58,7 @@
   static constexpr TermFrequency kMaxTermFrequency =
       std::numeric_limits<TermFrequency>::max();
   static constexpr TermFrequency kDefaultTermFrequency = 1;
+  static constexpr TermFrequency kNoTermFrequency = 0;
 
   explicit Hit(Value value = kInvalidValue,
                TermFrequency term_frequency = kDefaultTermFrequency)

diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 892263b..d2f9d41 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc

@@ -31,34 +31,30 @@
 #include "icing/schema/section-manager.h"
 #include "icing/schema/section.h"
 #include "icing/store/document-id.h"
-#include "icing/tokenization/language-segmenter.h"
 #include "icing/tokenization/token.h"
 #include "icing/tokenization/tokenizer-factory.h"
 #include "icing/tokenization/tokenizer.h"
 #include "icing/transform/normalizer.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
 
 libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>>
-IndexProcessor::Create(const SchemaStore* schema_store,
-                       const LanguageSegmenter* lang_segmenter,
-                       const Normalizer* normalizer, Index* index,
+IndexProcessor::Create(const Normalizer* normalizer, Index* index,
                        const IndexProcessor::Options& options,
                        const Clock* clock) {
-  ICING_RETURN_ERROR_IF_NULL(schema_store);
-  ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
   ICING_RETURN_ERROR_IF_NULL(normalizer);
   ICING_RETURN_ERROR_IF_NULL(index);
   ICING_RETURN_ERROR_IF_NULL(clock);
 
-  return std::unique_ptr<IndexProcessor>(new IndexProcessor(
-      schema_store, lang_segmenter, normalizer, index, options, clock));
+  return std::unique_ptr<IndexProcessor>(
+      new IndexProcessor(normalizer, index, options, clock));
 }
 
 libtextclassifier3::Status IndexProcessor::IndexDocument(
-    const DocumentProto& document, DocumentId document_id,
+    const TokenizedDocument& tokenized_document, DocumentId document_id,
     NativePutDocumentStats* put_document_stats) {
   std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
 
@@ -68,54 +64,45 @@
         "DocumentId %d must be greater than last added document_id %d",
         document_id, index_->last_added_document_id()));
   }
-  ICING_ASSIGN_OR_RETURN(std::vector<Section> sections,
-                         schema_store_.ExtractSections(document));
   uint32_t num_tokens = 0;
   libtextclassifier3::Status overall_status;
-  for (const Section& section : sections) {
+  for (const TokenizedSection& section : tokenized_document.sections()) {
     // TODO(b/152934343): pass real namespace ids in
     Index::Editor editor =
         index_->Edit(document_id, section.metadata.id,
                      section.metadata.term_match_type, /*namespace_id=*/0);
-    for (std::string_view subcontent : section.content) {
-      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
-                             tokenizer_factory::CreateIndexingTokenizer(
-                                 section.metadata.tokenizer, &lang_segmenter_));
-      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
-                             tokenizer->Tokenize(subcontent));
-      while (itr->Advance()) {
-        if (++num_tokens > options_.max_tokens_per_document) {
-          // Index all tokens buffered so far.
-          editor.IndexAllBufferedTerms();
-          if (put_document_stats != nullptr) {
-            put_document_stats->mutable_tokenization_stats()
-                ->set_exceeded_max_token_num(true);
-            put_document_stats->mutable_tokenization_stats()
-                ->set_num_tokens_indexed(options_.max_tokens_per_document);
-          }
-          switch (options_.token_limit_behavior) {
-            case Options::TokenLimitBehavior::kReturnError:
-              return absl_ports::ResourceExhaustedError(
-                  "Max number of tokens reached!");
-            case Options::TokenLimitBehavior::kSuppressError:
-              return overall_status;
-          }
+    for (std::string_view token : section.token_sequence) {
+      if (++num_tokens > options_.max_tokens_per_document) {
+        // Index all tokens buffered so far.
+        editor.IndexAllBufferedTerms();
+        if (put_document_stats != nullptr) {
+          put_document_stats->mutable_tokenization_stats()
+              ->set_exceeded_max_token_num(true);
+          put_document_stats->mutable_tokenization_stats()
+              ->set_num_tokens_indexed(options_.max_tokens_per_document);
         }
-        std::string term = normalizer_.NormalizeTerm(itr->GetToken().text);
-        // Add this term to Hit buffer. Even if adding this hit fails, we keep
-        // trying to add more hits because it's possible that future hits could
-        // still be added successfully. For instance if the lexicon is full, we
-        // might fail to add a hit for a new term, but should still be able to
-        // add hits for terms that are already in the index.
-        auto status = editor.BufferTerm(term.c_str());
-        if (overall_status.ok() && !status.ok()) {
-          // If we've succeeded to add everything so far, set overall_status to
-          // represent this new failure. If we've already failed, no need to
-          // update the status - we're already going to return a resource
-          // exhausted error.
-          overall_status = status;
+        switch (options_.token_limit_behavior) {
+          case Options::TokenLimitBehavior::kReturnError:
+            return absl_ports::ResourceExhaustedError(
+                "Max number of tokens reached!");
+          case Options::TokenLimitBehavior::kSuppressError:
+            return overall_status;
         }
       }
+      std::string term = normalizer_.NormalizeTerm(token);
+      // Add this term to Hit buffer. Even if adding this hit fails, we keep
+      // trying to add more hits because it's possible that future hits could
+      // still be added successfully. For instance if the lexicon is full, we
+      // might fail to add a hit for a new term, but should still be able to
+      // add hits for terms that are already in the index.
+      auto status = editor.BufferTerm(term.c_str());
+      if (overall_status.ok() && !status.ok()) {
+        // If we've succeeded to add everything so far, set overall_status to
+        // represent this new failure. If we've already failed, no need to
+        // update the status - we're already going to return a resource
+        // exhausted error.
+        overall_status = status;
+      }
     }
     // Add all the seen terms to the index with their term frequency.
     auto status = editor.IndexAllBufferedTerms();

diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 2eb4ad8..9fc7c46 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h

@@ -21,12 +21,11 @@
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/index/index.h"
 #include "icing/proto/document.pb.h"
-#include "icing/schema/schema-store.h"
 #include "icing/schema/section-manager.h"
 #include "icing/store/document-id.h"
-#include "icing/tokenization/language-segmenter.h"
 #include "icing/tokenization/token.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
@@ -58,14 +57,13 @@
   //   An IndexProcessor on success
   //   FAILED_PRECONDITION if any of the pointers is null.
   static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
-      const SchemaStore* schema_store, const LanguageSegmenter* lang_segmenter,
       const Normalizer* normalizer, Index* index, const Options& options,
       const Clock* clock);
 
-  // Add document to the index, associated with document_id. If the number of
-  // tokens in the document exceeds max_tokens_per_document, then only the first
-  // max_tokens_per_document will be added to the index. All tokens of length
-  // exceeding max_token_length will be shortened to max_token_length.
+  // Add tokenized document to the index, associated with document_id. If the
+  // number of tokens in the document exceeds max_tokens_per_document, then only
+  // the first max_tokens_per_document will be added to the index. All tokens of
+  // length exceeding max_token_length will be shortened to max_token_length.
   //
   // Indexing a document *may* trigger an index merge. If a merge fails, then
   // all content in the index will be lost.
@@ -82,25 +80,19 @@
   //   NOT_FOUND if there is no definition for the document's schema type.
   //   INTERNAL_ERROR if any other errors occur
   libtextclassifier3::Status IndexDocument(
-      const DocumentProto& document, DocumentId document_id,
+      const TokenizedDocument& tokenized_document, DocumentId document_id,
       NativePutDocumentStats* put_document_stats = nullptr);
 
  private:
-  IndexProcessor(const SchemaStore* schema_store,
-                 const LanguageSegmenter* lang_segmenter,
-                 const Normalizer* normalizer, Index* index,
+  IndexProcessor(const Normalizer* normalizer, Index* index,
                  const Options& options, const Clock* clock)
-      : schema_store_(*schema_store),
-        lang_segmenter_(*lang_segmenter),
-        normalizer_(*normalizer),
+      : normalizer_(*normalizer),
         index_(index),
         options_(options),
         clock_(*clock) {}
 
   std::string NormalizeToken(const Token& token);
 
-  const SchemaStore& schema_store_;
-  const LanguageSegmenter& lang_segmenter_;
   const Normalizer& normalizer_;
   Index* const index_;
   const Options options_;

diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 96a390b..afeac4d 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc

@@ -31,6 +31,7 @@
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
 #include "icing/util/logging.h"
+#include "icing/util/tokenized-document.h"
 #include "unicode/uloc.h"
 
 // Run on a Linux workstation:
@@ -168,16 +169,13 @@
 }
 
 std::unique_ptr<IndexProcessor> CreateIndexProcessor(
-    const SchemaStore* schema_store,
-    const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
-    Index* index, const Clock* clock) {
+    const Normalizer* normalizer, Index* index, const Clock* clock) {
   IndexProcessor::Options processor_options{};
   processor_options.max_tokens_per_document = 1024 * 1024 * 10;
   processor_options.token_limit_behavior =
       IndexProcessor::Options::TokenLimitBehavior::kReturnError;
 
-  return IndexProcessor::Create(schema_store, language_segmenter, normalizer,
-                                index, processor_options, clock)
+  return IndexProcessor::Create(normalizer, index, processor_options, clock)
       .ValueOrDie();
 }
 
@@ -203,15 +201,18 @@
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
   std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get(), &clock);
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
   CleanUp(filesystem, index_dir);
@@ -254,16 +255,19 @@
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
   std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get(), &clock);
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithTenProperties(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
   CleanUp(filesystem, index_dir);
@@ -306,16 +310,19 @@
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
   std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get(), &clock);
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document =
       CreateDocumentWithDiacriticLetters(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
   CleanUp(filesystem, index_dir);
@@ -358,15 +365,18 @@
   Clock clock;
   std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
   std::unique_ptr<IndexProcessor> index_processor =
-      CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
-                           normalizer.get(), index.get(), &clock);
+      CreateIndexProcessor(normalizer.get(), index.get(), &clock);
 
   DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
+  TokenizedDocument tokenized_document(std::move(
+      TokenizedDocument::Create(schema_store.get(), language_segmenter.get(),
+                                input_document)
+          .ValueOrDie()));
 
   DocumentId document_id = 0;
   for (auto _ : state) {
     ICING_ASSERT_OK(
-        index_processor->IndexDocument(input_document, document_id++));
+        index_processor->IndexDocument(tokenized_document, document_id++));
   }
 
   CleanUp(filesystem, index_dir);

diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index bdd9575..e6bb615 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc

@@ -53,6 +53,7 @@
 #include "icing/tokenization/language-segmenter.h"
 #include "icing/transform/normalizer-factory.h"
 #include "icing/transform/normalizer.h"
+#include "icing/util/tokenized-document.h"
 #include "unicode/uloc.h"
 
 namespace icing {
@@ -140,8 +141,7 @@
 
     ICING_ASSERT_OK_AND_ASSIGN(
         index_processor_,
-        IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                               normalizer_.get(), index_.get(),
+        IndexProcessor::Create(normalizer_.get(), index_.get(),
                                processor_options, &fake_clock_));
     mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
   }
@@ -195,7 +195,7 @@
     type_config->set_schema_type(std::string(kFakeType));
 
     AddStringProperty(std::string(kExactProperty), DataType::STRING,
-                      Cardinality::REQUIRED, TermMatchType::EXACT_ONLY,
+                      Cardinality::OPTIONAL, TermMatchType::EXACT_ONLY,
                       type_config);
 
     AddStringProperty(std::string(kPrefixedProperty), DataType::STRING,
@@ -244,25 +244,11 @@
   processor_options.token_limit_behavior =
       IndexProcessor::Options::TokenLimitBehavior::kReturnError;
 
-  EXPECT_THAT(
-      IndexProcessor::Create(/*schema_store=*/nullptr, lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), processor_options,
-                             &fake_clock_),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  EXPECT_THAT(
-      IndexProcessor::Create(schema_store_.get(), /*lang_segmenter=*/nullptr,
-                             normalizer_.get(), index_.get(), processor_options,
-                             &fake_clock_),
-      StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
-
-  EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                                     /*normalizer=*/nullptr, index_.get(),
+  EXPECT_THAT(IndexProcessor::Create(/*normalizer=*/nullptr, index_.get(),
                                      processor_options, &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 
-  EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                                     normalizer_.get(), /*index=*/nullptr,
+  EXPECT_THAT(IndexProcessor::Create(normalizer_.get(), /*index=*/nullptr,
                                      processor_options, &fake_clock_),
               StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
 }
@@ -276,7 +262,12 @@
           .AddBytesProperty(std::string(kUnindexedProperty2),
                             "attachment bytes")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
 }
 
@@ -287,7 +278,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -313,7 +309,12 @@
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   std::string coffeeRepeatedString = "coffee";
@@ -329,7 +330,12 @@
           .AddStringProperty(std::string(kPrefixedProperty),
                              "mr. world world wide")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -372,11 +378,18 @@
           .AddDocumentProperty(
               std::string(kSubProperty),
               DocumentBuilder()
+                  .SetKey("icing", "nested_type/1")
+                  .SetSchema(std::string(kNestedType))
                   .AddStringProperty(std::string(kNestedProperty),
                                      "rocky raccoon")
                   .Build())
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -396,7 +409,12 @@
           .AddStringProperty(std::string(kRepeatedProperty), "rocky",
                              "italian stallion")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -417,8 +435,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), options,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), options,
                              &fake_clock_));
 
   DocumentProto document =
@@ -428,7 +445,11 @@
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
@@ -457,8 +478,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), options,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), options,
                              &fake_clock_));
 
   DocumentProto document =
@@ -468,7 +488,12 @@
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   // "night" should have been indexed.
@@ -498,8 +523,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer.get(), index_.get(), options,
+      IndexProcessor::Create(normalizer.get(), index_.get(), options,
                              &fake_clock_));
 
   DocumentProto document =
@@ -509,7 +533,12 @@
           .AddStringProperty(std::string(kExactProperty), "hello world")
           .AddStringProperty(std::string(kPrefixedProperty), "good night moon!")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   // "good" should have been indexed normally.
@@ -542,7 +571,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "best rocky movies")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   document =
@@ -551,7 +585,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kPrefixedProperty), "rocky raccoon")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
   // Only document_id 1 should surface in a prefix query for "Rock"
@@ -570,7 +609,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   document =
@@ -579,7 +623,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "all lower case")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -600,7 +649,12 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "ALL UPPER CASE")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId1),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
 
   // Indexing a document with document_id < last_added_document_id should cause
@@ -611,11 +665,15 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), "all lower case")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // As should indexing a document document_id == last_added_document_id.
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -635,8 +693,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(),
+      IndexProcessor::Create(normalizer_.get(), index_.get(),
                              processor_options, &fake_clock_));
 
   DocumentProto document =
@@ -646,7 +703,12 @@
           .AddStringProperty(std::string(kExactProperty),
                              "你好，世界！你好：世界。“你好”世界？")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
   ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
@@ -666,8 +728,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), processor_options,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
                              &fake_clock_));
 
   // This is the maximum token length that an empty lexicon constructed for a
@@ -684,7 +745,11 @@
                              absl_ports::StrCat(enormous_string, " foo"))
           .AddStringProperty(std::string(kPrefixedProperty), "bar baz")
           .Build();
-  EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0),
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
               StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
 
@@ -715,6 +780,10 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kExactProperty), kIpsumText)
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
   Index::Options options(index_dir_,
                          /*index_merge_size=*/document.ByteSizeLong() * 100);
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -727,8 +796,7 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), processor_options,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
                              &fake_clock_));
   DocumentId doc_id = 0;
   // Have determined experimentally that indexing 3373 documents with this text
@@ -737,10 +805,12 @@
   // empties the LiteIndex.
   constexpr int kNumDocsLiteIndexExhaustion = 3373;
   for (; doc_id < kNumDocsLiteIndexExhaustion; ++doc_id) {
-    EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+    EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+                IsOk());
     EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
   }
-  EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
 }
 
@@ -768,6 +838,10 @@
           .SetSchema(std::string(kFakeType))
           .AddStringProperty(std::string(kPrefixedProperty), kIpsumText)
           .Build();
+  ICING_ASSERT_OK_AND_ASSIGN(
+      TokenizedDocument tokenized_document,
+      TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+                                document));
 
   // 2. Recreate the index with the mock filesystem and a merge size that will
   // only allow one document to be added before requiring a merge.
@@ -784,25 +858,26 @@
 
   ICING_ASSERT_OK_AND_ASSIGN(
       index_processor_,
-      IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
-                             normalizer_.get(), index_.get(), processor_options,
+      IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
                              &fake_clock_));
 
   // 3. Index one document. This should fit in the LiteIndex without requiring a
   // merge.
   DocumentId doc_id = 0;
-  EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
 
   // 4. Add one more document to trigger a merge, which should fail and result
   // in a Reset.
   ++doc_id;
-  EXPECT_THAT(index_processor_->IndexDocument(document, doc_id),
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
               StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
   EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
 
   // 5. Indexing a new document should succeed.
-  EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+  EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, doc_id),
+              IsOk());
   EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
 }
 

diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index 4618fb9..faca785 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h

@@ -46,6 +46,16 @@
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    short_->PopulateMatchedTermsStats(matched_terms_stats);
+    long_->PopulateMatchedTermsStats(matched_terms_stats);
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> short_;
   std::unique_ptr<DocHitInfoIterator> long_;
@@ -67,6 +77,17 @@
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    for (size_t i = 0; i < iterators_.size(); ++i) {
+      iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+    }
+  }
+
  private:
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
 };

diff --git a/icing/index/iterator/doc-hit-info-iterator-and_test.cc b/icing/index/iterator/doc-hit-info-iterator-and_test.cc
index 35574b7..783e937 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and_test.cc

@@ -32,8 +32,10 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::Eq;
 using ::testing::IsEmpty;
+using ::testing::SizeIs;
 
 TEST(CreateAndIteratorTest, And) {
   // Basic test that we can create a working And iterator. Further testing of
@@ -196,6 +198,125 @@
   EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
 }
 
+TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats) {
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1{
+        1, 0, 2, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2{
+        0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    DocHitInfo doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+    DocHitInfo doc_hit_info2 = DocHitInfo(4);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+    std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfo> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorAnd and_iter(std::move(first_iter),
+                                   std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(and_iter.Advance());
+    EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(4));
+
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+    EXPECT_EQ(matched_terms_stats.at(1).term, "hello");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies1));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies2));
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask2);
+
+    EXPECT_FALSE(and_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    SectionIdMask section_id_mask1 = 0b00000101;  // hits in sections 0, 2
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1{
+        1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    DocHitInfo doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+    std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfo> second_vector = {doc_hit_info1};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
+    second_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    DocHitInfoIteratorAnd and_iter(std::move(first_iter),
+                                   std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(and_iter.Advance());
+    EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(4));
+
+    and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies1));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1);
+
+    EXPECT_FALSE(and_iter.Advance().ok());
+  }
+}
+
+TEST(DocHitInfoIteratorAndTest, PopulateMatchedTermsStats_NoMatchingDocument) {
+  DocHitInfo doc_hit_info1 = DocHitInfo(4);
+  doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+
+  DocHitInfo doc_hit_info2 = DocHitInfo(5);
+  doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+  std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+  std::vector<DocHitInfo> second_vector = {doc_hit_info2};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+
+  DocHitInfoIteratorAnd and_iter(std::move(first_iter), std::move(second_iter));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+  EXPECT_FALSE(and_iter.Advance().ok());
+}
+
 TEST(DocHitInfoIteratorAndNaryTest, Initialize) {
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
@@ -345,6 +466,90 @@
   EXPECT_THAT(and_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
 }
 
+TEST(DocHitInfoIteratorAndNaryTest, PopulateMatchedTermsStats) {
+  // Arbitrary section ids/term frequencies for the documents in the
+  // DocHitInfoIterators.
+  // For term "hi", document 10 and 8
+  SectionIdMask section_id_mask1_hi = 0b01000101;  // hits in sections 0, 2, 6
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_hi{
+      1, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_hi = DocHitInfo(10);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+  DocHitInfo doc_hit_info2_hi = DocHitInfo(8);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+  // For term "hello", document 10 and 9
+  SectionIdMask section_id_mask1_hello = 0b00001001;  // hits in sections 0, 3
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_hello{
+      2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_hello = DocHitInfo(10);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+
+  DocHitInfo doc_hit_info2_hello = DocHitInfo(9);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/3);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/2);
+
+  // For term "ciao", document 10 and 9
+  SectionIdMask section_id_mask1_ciao = 0b00000011;  // hits in sections 0, 1
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_ciao{
+      2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_ciao = DocHitInfo(10);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/3);
+
+  DocHitInfo doc_hit_info2_ciao = DocHitInfo(9);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/2);
+
+  std::vector<DocHitInfo> first_vector = {doc_hit_info1_hi, doc_hit_info2_hi};
+  std::vector<DocHitInfo> second_vector = {doc_hit_info1_hello,
+                                           doc_hit_info2_hello};
+  std::vector<DocHitInfo> third_vector = {doc_hit_info1_ciao,
+                                          doc_hit_info2_ciao};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+  auto third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "ciao");
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+
+  DocHitInfoIteratorAndNary and_iter(std::move(iterators));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(and_iter.Advance());
+  EXPECT_THAT(and_iter.doc_hit_info().document_id(), Eq(10));
+
+  and_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(3));  // 3 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies1_hi));
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1_hi);
+  EXPECT_EQ(matched_terms_stats.at(1).term, "hello");
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies1_hello));
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask1_hello);
+  EXPECT_EQ(matched_terms_stats.at(2).term, "ciao");
+  EXPECT_THAT(matched_terms_stats.at(2).term_frequencies,
+              ElementsAreArray(term_frequencies1_ciao));
+  EXPECT_EQ(matched_terms_stats.at(2).section_ids_mask, section_id_mask1_ciao);
+
+  EXPECT_FALSE(and_iter.Advance().ok());
+}
+
 }  // namespace
 
 }  // namespace lib

diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index 9119610..fb60e38 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h

@@ -67,6 +67,11 @@
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+    delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> delegate_;
   const DocumentStore& document_store_;

diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 8f00f88..b4234e0 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc

@@ -108,6 +108,7 @@
   } else {
     chosen = left_.get();
   }
+  current_ = chosen;
 
   doc_hit_info_ = chosen->doc_hit_info();
   hit_intersect_section_ids_mask_ = chosen->hit_intersect_section_ids_mask();
@@ -139,6 +140,7 @@
     : iterators_(std::move(iterators)) {}
 
 libtextclassifier3::Status DocHitInfoIteratorOrNary::Advance() {
+  current_iterators_.clear();
   if (iterators_.size() < 2) {
     return absl_ports::InvalidArgumentError(
         "Not enough iterators to OR together");
@@ -187,6 +189,7 @@
   hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
   for (const auto& iterator : iterators_) {
     if (iterator->doc_hit_info().document_id() == next_document_id) {
+      current_iterators_.push_back(iterator.get());
       if (doc_hit_info_.document_id() == kInvalidDocumentId) {
         doc_hit_info_ = iterator->doc_hit_info();
         hit_intersect_section_ids_mask_ =

diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 4128e0f..2f49430 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h

@@ -42,9 +42,26 @@
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    current_->PopulateMatchedTermsStats(matched_terms_stats);
+    // If equal, then current_ == left_. Combine with results from right_.
+    if (left_document_id_ == right_document_id_) {
+      right_->PopulateMatchedTermsStats(matched_terms_stats);
+    }
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> left_;
   std::unique_ptr<DocHitInfoIterator> right_;
+  // Pointer to the chosen iterator that points to the current doc_hit_info_. If
+  // both left_ and right_ point to the same docid, then chosen_ == left.
+  // chosen_ does not own the iterator it points to.
+  DocHitInfoIterator *current_;
   DocumentId left_document_id_ = kMaxDocumentId;
   DocumentId right_document_id_ = kMaxDocumentId;
 };
@@ -65,8 +82,22 @@
 
   std::string ToString() const override;
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo> *matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    for (size_t i = 0; i < current_iterators_.size(); i++) {
+      current_iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+    }
+  }
+
  private:
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators_;
+  // Pointers to the iterators that point to the current doc_hit_info_.
+  // current_iterators_ does not own the iterators it points to.
+  std::vector<DocHitInfoIterator *> current_iterators_;
 };
 
 }  // namespace lib

diff --git a/icing/index/iterator/doc-hit-info-iterator-or_test.cc b/icing/index/iterator/doc-hit-info-iterator-or_test.cc
index 3faa5ab..3f00a39 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or_test.cc

@@ -32,7 +32,10 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
 
 TEST(CreateAndIteratorTest, Or) {
   // Basic test that we can create a working Or iterator. Further testing of
@@ -175,6 +178,159 @@
   EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
 }
 
+TEST(DocHitInfoIteratorOrTest, PopulateMatchedTermsStats) {
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1{
+        1, 0, 2, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2{
+        0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    DocHitInfo doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+    DocHitInfo doc_hit_info2 = DocHitInfo(4);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+    std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfo> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+    EXPECT_EQ(matched_terms_stats.at(1).term, "hello");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies1));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies2));
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask2);
+
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    SectionIdMask section_id_mask1 = 0b00000101;  // hits in sections 0, 2
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1{
+        1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    DocHitInfo doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+    std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfo> second_vector = {doc_hit_info1};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hi");
+    second_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies1));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1);
+
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+  {
+    // Arbitrary section ids for the documents in the DocHitInfoIterators.
+    // Created to test correct section_id_mask behavior.
+    SectionIdMask section_id_mask1 = 0b01010101;  // hits in sections 0, 2, 4, 6
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1{
+        1, 0, 2, 0, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+    SectionIdMask section_id_mask2 = 0b00000110;  // hits in sections 1, 2
+    std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2{
+        0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+    DocHitInfo doc_hit_info1 = DocHitInfo(4);
+    doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+    doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+    doc_hit_info1.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/3);
+    doc_hit_info1.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+    DocHitInfo doc_hit_info2 = DocHitInfo(5);
+    doc_hit_info2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+    doc_hit_info2.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+    std::vector<DocHitInfo> first_vector = {doc_hit_info1};
+    std::vector<DocHitInfo> second_vector = {doc_hit_info2};
+
+    auto first_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+    first_iter->set_hit_intersect_section_ids_mask(section_id_mask1);
+
+    auto second_iter =
+        std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+    second_iter->set_hit_intersect_section_ids_mask(section_id_mask2);
+
+    DocHitInfoIteratorOr or_iter(std::move(first_iter), std::move(second_iter));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(5));
+
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies2));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask2);
+
+    ICING_EXPECT_OK(or_iter.Advance());
+    EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(4));
+
+    matched_terms_stats.clear();
+    or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+    EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies1));
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1);
+
+    EXPECT_FALSE(or_iter.Advance().ok());
+  }
+}
+
 TEST(DocHitInfoIteratorOrNaryTest, Initialize) {
   std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
   iterators.push_back(std::make_unique<DocHitInfoIteratorDummy>());
@@ -316,6 +472,125 @@
   EXPECT_THAT(or_iter.hit_intersect_section_ids_mask(), Eq(mask_anded_result));
 }
 
+TEST(DocHitInfoIteratorOrNaryTest, PopulateMatchedTermsStats) {
+  // Arbitrary section ids/term frequencies for the documents in the
+  // DocHitInfoIterators.
+  // For term "hi", document 10 and 8
+  SectionIdMask section_id_mask1_hi = 0b01000101;  // hits in sections 0, 2, 6
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_hi{
+      1, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_hi = DocHitInfo(10);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+  doc_hit_info1_hi.UpdateSection(/*section_id=*/6, /*hit_term_frequency=*/4);
+
+  SectionIdMask section_id_mask2_hi = 0b00000110;  // hits in sections 1, 2
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2_hi{
+      0, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info2_hi = DocHitInfo(8);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/2);
+  doc_hit_info2_hi.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/6);
+
+  // For term "hello", document 10 and 9
+  SectionIdMask section_id_mask1_hello = 0b00001001;  // hits in sections 0, 3
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_hello{
+      2, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_hello = DocHitInfo(10);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+
+  SectionIdMask section_id_mask2_hello = 0b00001100;  // hits in sections 2, 3
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2_hello{
+      0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info2_hello = DocHitInfo(9);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/3);
+  doc_hit_info2_hello.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/2);
+
+  // For term "ciao", document 9 and 8
+  SectionIdMask section_id_mask1_ciao = 0b00000011;  // hits in sections 0, 1
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies1_ciao{
+      2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info1_ciao = DocHitInfo(9);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/2);
+  doc_hit_info1_ciao.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/3);
+
+  SectionIdMask section_id_mask2_ciao = 0b00011000;  // hits in sections 3, 4
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies2_ciao{
+      0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+  DocHitInfo doc_hit_info2_ciao = DocHitInfo(8);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/3, /*hit_term_frequency=*/3);
+  doc_hit_info2_ciao.UpdateSection(/*section_id=*/4, /*hit_term_frequency=*/2);
+
+  std::vector<DocHitInfo> first_vector = {doc_hit_info1_hi, doc_hit_info2_hi};
+  std::vector<DocHitInfo> second_vector = {doc_hit_info1_hello,
+                                           doc_hit_info2_hello};
+  std::vector<DocHitInfo> third_vector = {doc_hit_info1_ciao,
+                                          doc_hit_info2_ciao};
+
+  auto first_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(first_vector, "hi");
+  auto second_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(second_vector, "hello");
+  auto third_iter =
+      std::make_unique<DocHitInfoIteratorDummy>(third_vector, "ciao");
+
+  std::vector<std::unique_ptr<DocHitInfoIterator>> iterators;
+  iterators.push_back(std::move(first_iter));
+  iterators.push_back(std::move(second_iter));
+  iterators.push_back(std::move(third_iter));
+
+  DocHitInfoIteratorOrNary or_iter(std::move(iterators));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(10));
+
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies1_hi));
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask1_hi);
+  EXPECT_EQ(matched_terms_stats.at(1).term, "hello");
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies1_hello));
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask1_hello);
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(9));
+
+  matched_terms_stats.clear();
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies2_hello));
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask2_hello);
+  EXPECT_EQ(matched_terms_stats.at(1).term, "ciao");
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies1_ciao));
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask1_ciao);
+
+  ICING_EXPECT_OK(or_iter.Advance());
+  EXPECT_THAT(or_iter.doc_hit_info().document_id(), Eq(8));
+
+  matched_terms_stats.clear();
+  or_iter.PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies2_hi));
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask2_hi);
+  EXPECT_EQ(matched_terms_stats.at(1).term, "ciao");
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies2_ciao));
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask2_ciao);
+
+  EXPECT_FALSE(or_iter.Advance().ok());
+}
+
 }  // namespace
 
 }  // namespace lib

diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ae5a896..ba74384 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h

@@ -52,6 +52,15 @@
 
   std::string ToString() const override;
 
+  // NOTE: currently, section restricts does decide which documents to
+  // return, but doesn't impact the relevance score of a document.
+  // TODO(b/173156803): decide whether we want to filter the matched_terms_stats
+  // for the restricted sections.
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+    delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+  }
+
  private:
   std::unique_ptr<DocHitInfoIterator> delegate_;
   const DocumentStore& document_store_;

diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index c4d7aa7..913696a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h

@@ -15,7 +15,6 @@
 #ifndef ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
 #define ICING_INDEX_ITERATOR_DOC_HIT_INFO_ITERATOR_TEST_UTIL_H_
 
-#include <cstdint>
 #include <string>
 #include <utility>
 #include <vector>
@@ -40,8 +39,9 @@
 class DocHitInfoIteratorDummy : public DocHitInfoIterator {
  public:
   DocHitInfoIteratorDummy() = default;
-  explicit DocHitInfoIteratorDummy(std::vector<DocHitInfo> doc_hit_infos)
-      : doc_hit_infos_(std::move(doc_hit_infos)) {}
+  explicit DocHitInfoIteratorDummy(std::vector<DocHitInfo> doc_hit_infos,
+                                   std::string term = "")
+      : doc_hit_infos_(std::move(doc_hit_infos)), term_(std::move(term)) {}
 
   libtextclassifier3::Status Advance() override {
     if (index_ < doc_hit_infos_.size()) {
@@ -54,6 +54,36 @@
         "No more DocHitInfos in iterator");
   }
 
+  // Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
+        Hit::kNoTermFrequency};
+
+    while (section_mask) {
+      SectionId section_id = __builtin_ctz(section_mask);
+      section_term_frequencies.at(section_id) =
+          doc_hit_info_.hit_term_frequency(section_id);
+      section_mask &= ~(1u << section_id);
+    }
+    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+                             section_term_frequencies);
+
+    for (auto& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(term_stats);
+  }
+
   void set_hit_intersect_section_ids_mask(
       SectionIdMask hit_intersect_section_ids_mask) {
     hit_intersect_section_ids_mask_ = hit_intersect_section_ids_mask;
@@ -91,6 +121,7 @@
   int32_t num_blocks_inspected_ = 0;
   int32_t num_leaf_advance_calls_ = 0;
   std::vector<DocHitInfo> doc_hit_infos_;
+  std::string term_;
 };
 
 inline std::vector<DocumentId> GetDocumentIds(DocHitInfoIterator* iterator) {

diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index bcc2b6e..c4d9901 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h

@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <string>
+#include <string_view>
 
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -28,6 +29,26 @@
 namespace icing {
 namespace lib {
 
+// Data structure that maps a single matched query term to its section mask
+// and the list of term frequencies.
+// TODO(b/158603837): add stat on whether the matched terms are prefix matched
+// or not. This information will be used to boost exact match.
+struct TermMatchInfo {
+  std::string_view term;
+  // SectionIdMask associated to the term.
+  SectionIdMask section_ids_mask;
+  // Array with fixed size kMaxSectionId. For every section id, i.e.
+  // vector index, it stores the term frequency of the term.
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies;
+
+  explicit TermMatchInfo(
+      std::string_view term, SectionIdMask section_ids_mask,
+      std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies)
+      : term(term),
+        section_ids_mask(section_ids_mask),
+        term_frequencies(std::move(term_frequencies)) {}
+};
+
 // Iterator over DocHitInfos (collapsed Hits) in REVERSE document_id order.
 //
 // NOTE: You must call Advance() before calling hit_info() or
@@ -70,6 +91,14 @@
   // A string representing the iterator.
   virtual std::string ToString() const = 0;
 
+  // For the last hit docid, retrieves all the matched query terms and other
+  // stats, see TermMatchInfo.
+  // If Advance() wasn't called after construction, Advance() returned false or
+  // the concrete HitIterator didn't override this method, the vectors aren't
+  // populated.
+  virtual void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const {}
+
  protected:
   DocHitInfo doc_hit_info_;
   SectionIdMask hit_intersect_section_ids_mask_ = kSectionIdMaskNone;

diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index bd2de6d..ac5e97f 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h

@@ -49,6 +49,34 @@
   }
   int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
+        Hit::kNoTermFrequency};
+    while (section_mask) {
+      SectionId section_id = __builtin_ctz(section_mask);
+      section_term_frequencies.at(section_id) =
+          doc_hit_info_.hit_term_frequency(section_id);
+      section_mask &= ~(1u << section_id);
+    }
+    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+                             std::move(section_term_frequencies));
+
+    for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(std::move(term_stats));
+  }
+
  protected:
   // Add DocHitInfos corresponding to term_ to cached_hits_.
   virtual libtextclassifier3::Status RetrieveMoreHits() = 0;

diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index 1f77226..d626d7a 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h

@@ -49,6 +49,34 @@
   }
   int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
 
+  void PopulateMatchedTermsStats(
+      std::vector<TermMatchInfo>* matched_terms_stats) const override {
+    if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+      // Current hit isn't valid, return.
+      return;
+    }
+    SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+    std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
+        Hit::kNoTermFrequency};
+    while (section_mask) {
+      SectionId section_id = __builtin_ctz(section_mask);
+      section_term_frequencies.at(section_id) =
+          doc_hit_info_.hit_term_frequency(section_id);
+      section_mask &= ~(1u << section_id);
+    }
+    TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+                             std::move(section_term_frequencies));
+
+    for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
+      if (cur_term_stats.term == term_stats.term) {
+        // Same docId and same term, we don't need to add the term and the term
+        // frequency should always be the same
+        return;
+      }
+    }
+    matched_terms_stats->push_back(std::move(term_stats));
+  }
+
  protected:
   // Add DocHitInfos corresponding to term_ to cached_doc_hit_infos_.
   virtual libtextclassifier3::Status RetrieveMoreHits() = 0;

diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index a18a183..bf709cd 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc

@@ -189,16 +189,21 @@
 
 JNIEXPORT jbyteArray JNICALL
 Java_com_google_android_icing_IcingSearchEngine_nativeGet(
-    JNIEnv* env, jclass clazz, jobject object, jstring name_space,
-    jstring uri) {
+    JNIEnv* env, jclass clazz, jobject object, jstring name_space, jstring uri,
+    jbyteArray result_spec_bytes) {
   icing::lib::IcingSearchEngine* icing =
       GetIcingSearchEnginePointer(env, object);
 
   const char* native_name_space =
       env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
   const char* native_uri = env->GetStringUTFChars(uri, /*isCopy=*/nullptr);
+  icing::lib::GetResultSpecProto get_result_spec;
+  if (!ParseProtoFromJniByteArray(env, result_spec_bytes, &get_result_spec)) {
+    ICING_LOG(ERROR) << "Failed to parse GetResultSpecProto in nativeGet";
+    return nullptr;
+  }
   icing::lib::GetResultProto get_result_proto =
-      icing->Get(native_name_space, native_uri);
+      icing->Get(native_name_space, native_uri, get_result_spec);
 
   return SerializeProtoToJniByteArray(env, get_result_proto);
 }

diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 4d714f8..0732ed0 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc

@@ -131,10 +131,8 @@
       schema_store_(*schema_store),
       clock_(*clock) {}
 
-libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
-QueryProcessor::ParseSearch(const SearchSpecProto& search_spec) {
-  ICING_ASSIGN_OR_RETURN(QueryResults results, ParseRawQuery(search_spec));
-
+DocHitInfoIteratorFilter::Options QueryProcessor::getFilterOptions(
+    const SearchSpecProto& search_spec) {
   DocHitInfoIteratorFilter::Options options;
 
   if (search_spec.namespace_filters_size() > 0) {
@@ -148,7 +146,14 @@
         std::vector<std::string_view>(search_spec.schema_type_filters().begin(),
                                       search_spec.schema_type_filters().end());
   }
+  return options;
+}
 
+libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
+QueryProcessor::ParseSearch(const SearchSpecProto& search_spec) {
+  ICING_ASSIGN_OR_RETURN(QueryResults results, ParseRawQuery(search_spec));
+
+  DocHitInfoIteratorFilter::Options options = getFilterOptions(search_spec);
   results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
       std::move(results.root_iterator), &document_store_, &schema_store_,
       &clock_, options);
@@ -158,6 +163,8 @@
 // TODO(cassiewang): Collect query stats to populate the SearchResultsProto
 libtextclassifier3::StatusOr<QueryProcessor::QueryResults>
 QueryProcessor::ParseRawQuery(const SearchSpecProto& search_spec) {
+  DocHitInfoIteratorFilter::Options options = getFilterOptions(search_spec);
+
   // Tokenize the incoming raw query
   //
   // TODO(cassiewang): Consider caching/creating a tokenizer factory that will
@@ -258,12 +265,22 @@
             index_.GetIterator(normalized_text, kSectionIdMaskAll,
                                search_spec.term_match_type()));
 
-        // Add terms to match if this is not a negation term.
+        // Add term iterator and terms to match if this is not a negation term.
         // WARNING: setting query terms at this point is not compatible with
         // group-level excludes, group-level sections restricts or excluded
         // section restricts. Those are not currently supported. If they became
         // supported, this handling for query terms would need to be altered.
         if (!frames.top().saw_exclude) {
+          ICING_ASSIGN_OR_RETURN(
+              std::unique_ptr<DocHitInfoIterator> term_iterator,
+              index_.GetIterator(normalized_text, kSectionIdMaskAll,
+                                 search_spec.term_match_type()));
+
+          results.query_term_iterators[normalized_text] =
+              std::make_unique<DocHitInfoIteratorFilter>(
+                  std::move(term_iterator), &document_store_, &schema_store_,
+                  &clock_, options);
+
           results.query_terms[frames.top().section_restrict].insert(
               std::move(normalized_text));
         }

diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index fa98627..0932ec5 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h

@@ -19,6 +19,7 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/index/index.h"
+#include "icing/index/iterator/doc-hit-info-iterator-filter.h"
 #include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/search.pb.h"
 #include "icing/query/query-terms.h"
@@ -53,6 +54,11 @@
     // A map from section names to sets of terms restricted to those sections.
     // Query terms that are not restricted are found at the entry with key "".
     SectionRestrictQueryTermsMap query_terms;
+    // Hit iterators for the text terms in the query. These query_term_iterators
+    // are completely separate from the iterators that make the iterator tree
+    // beginning with root_iterator.
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+        query_term_iterators;
   };
   // Parse the search configurations (including the query, any additional
   // filters, etc.) in the SearchSpecProto into one DocHitInfoIterator.
@@ -85,6 +91,11 @@
   libtextclassifier3::StatusOr<QueryResults> ParseRawQuery(
       const SearchSpecProto& search_spec);
 
+  // Return the options for the DocHitInfoIteratorFilter based on the
+  // search_spec.
+  DocHitInfoIteratorFilter::Options getFilterOptions(
+      const SearchSpecProto& search_spec);
+
   // Not const because we could modify/sort the hit buffer in the lite index at
   // query time.
   Index& index_;

diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 7546ae4..6ec0a2a 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc

@@ -54,6 +54,7 @@
 namespace {
 
 using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
 using ::testing::IsEmpty;
 using ::testing::SizeIs;
 using ::testing::Test;
@@ -232,6 +233,7 @@
   EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
               ElementsAre(document_id2, document_id1));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
 TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
@@ -281,6 +283,7 @@
   EXPECT_THAT(GetDocumentIds(results.root_iterator.get()),
               ElementsAre(document_id2, document_id1));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
 TEST_F(QueryProcessorTest, QueryTermNormalized) {
@@ -312,6 +315,8 @@
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
   EXPECT_THAT(
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
@@ -334,11 +339,26 @@
   ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                              query_processor->ParseSearch(search_spec));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+  EXPECT_EQ(matched_terms_stats.at(1).term, "world");
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
@@ -370,6 +390,8 @@
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
   EXPECT_THAT(
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
@@ -389,11 +411,21 @@
   ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                              query_processor->ParseSearch(search_spec));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "he");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, OneTermExactMatch) {
@@ -425,6 +457,8 @@
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
   EXPECT_THAT(
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
@@ -444,11 +478,90 @@
   ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                              query_processor->ParseSearch(search_spec));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
+}
+
+TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
+  // Create the schema and document store
+  SchemaProto schema;
+  AddSchemaType(&schema, "email");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      schema_store_,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  document_store_ = std::move(create_result.document_store);
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Populate the index
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  // Perform query
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QueryProcessor> query_processor,
+      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
+                             normalizer_.get(), document_store_.get(),
+                             schema_store_.get(), &fake_clock_));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("hello hello");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor->ParseSearch(search_spec));
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
+  ASSERT_FALSE(results.root_iterator->Advance().ok());
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
@@ -480,6 +593,8 @@
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
   EXPECT_THAT(
       AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
@@ -502,11 +617,94 @@
   ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                              query_processor->ParseSearch(search_spec));
 
-  // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+  EXPECT_EQ(matched_terms_stats.at(1).term, "world");
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
+}
+
+TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
+  // Create the schema and document store
+  SchemaProto schema;
+  AddSchemaType(&schema, "email");
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      schema_store_,
+      SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+  ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  document_store_ = std::move(create_result.document_store);
+
+  // These documents don't actually match to the tokens in the index. We're
+  // just inserting the documents so that the DocHitInfoIterators will see
+  // that the document exists and not filter out the DocumentId as deleted.
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store_->Put(DocumentBuilder()
+                                                      .SetKey("namespace", "1")
+                                                      .SetSchema("email")
+                                                      .Build()));
+
+  // Populate the index
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+  TermMatchType::Code term_match_type = TermMatchType::PREFIX;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  EXPECT_THAT(
+      AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
+      IsOk());
+
+  // Perform query
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<QueryProcessor> query_processor,
+      QueryProcessor::Create(index_.get(), language_segmenter_.get(),
+                             normalizer_.get(), document_store_.get(),
+                             schema_store_.get(), &fake_clock_));
+
+  SearchSpecProto search_spec;
+  search_spec.set_query("he he");
+  search_spec.set_term_match_type(term_match_type);
+
+  ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
+                             query_processor->ParseSearch(search_spec));
+
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "he");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
+  ASSERT_FALSE(results.root_iterator->Advance().ok());
+
+  EXPECT_THAT(results.query_terms, SizeIs(1));
+  EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
@@ -537,6 +735,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   TermMatchType::Code term_match_type = TermMatchType::PREFIX;
 
   EXPECT_THAT(
@@ -561,10 +761,25 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "he");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+  EXPECT_EQ(matched_terms_stats.at(1).term, "wo");
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
@@ -595,6 +810,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   TermMatchType::Code term_match_type = TermMatchType::PREFIX;
 
   EXPECT_THAT(AddTokenToIndex(document_id, section_id,
@@ -619,10 +836,25 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+  EXPECT_EQ(matched_terms_stats.at(1).term, "wo");
+  EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
@@ -658,6 +890,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   EXPECT_THAT(
@@ -682,11 +916,33 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "world");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
+  matched_terms_stats.clear();
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "world"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
@@ -722,6 +978,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   TermMatchType::Code term_match_type = TermMatchType::PREFIX;
 
   EXPECT_THAT(
@@ -746,11 +1004,32 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "wo");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
+  matched_terms_stats.clear();
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "he");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("he", "wo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
@@ -786,6 +1065,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
   EXPECT_THAT(AddTokenToIndex(document_id1, section_id,
                               TermMatchType::EXACT_ONLY, "hello"),
@@ -809,11 +1090,32 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
-  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+  std::vector<TermMatchInfo> matched_terms_stats;
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id2);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "wo");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
+
+  matched_terms_stats.clear();
+  ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+  EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(), document_id1);
+  EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+            section_id_mask);
+  results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+  ASSERT_THAT(matched_terms_stats, SizeIs(1));  // 1 term
+  EXPECT_EQ(matched_terms_stats.at(0).term, "hello");
+  EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+  EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+              ElementsAreArray(term_frequencies));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("hello", "wo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
@@ -848,6 +1150,8 @@
   // Populate the index
   SectionId section_id = 0;
   SectionIdMask section_id_mask = 1U << section_id;
+  std::array<Hit::TermFrequency, kMaxSectionId> term_frequencies{
+      1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal puppy dog"
@@ -888,11 +1192,27 @@
                                query_processor->ParseSearch(search_spec));
 
     // Only Document 1 matches since it has puppy AND dog
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id1);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "puppy");
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies));
+    EXPECT_EQ(matched_terms_stats.at(1).term, "dog");
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("puppy", "kitten", "dog"));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(3));
   }
 
   {
@@ -905,15 +1225,47 @@
     ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                                query_processor->ParseSearch(search_spec));
 
-    // Both Document 1 and 2 match since Document 1 has puppy AND dog, and
-    // Document 2 has kitten
+    // Both Document 1 and 2 match since Document 1 has animal AND puppy, and
+    // Document 2 has animal AND kitten
     // Descending order of valid DocumentIds
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                            DocHitInfo(document_id1, section_id_mask)));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id2);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "animal");
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies));
+    EXPECT_EQ(matched_terms_stats.at(1).term, "kitten");
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies));
+
+    matched_terms_stats.clear();
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id1);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "animal");
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies));
+    EXPECT_EQ(matched_terms_stats.at(1).term, "puppy");
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("animal", "puppy", "kitten"));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(3));
   }
 
   {
@@ -927,11 +1279,27 @@
                                query_processor->ParseSearch(search_spec));
 
     // Only Document 2 matches since it has both kitten and cat
-    EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+    std::vector<TermMatchInfo> matched_terms_stats;
+    ASSERT_THAT(results.root_iterator->Advance(), IsOk());
+    EXPECT_EQ(results.root_iterator->doc_hit_info().document_id(),
+              document_id2);
+    EXPECT_EQ(results.root_iterator->doc_hit_info().hit_section_ids_mask(),
+              section_id_mask);
+    results.root_iterator->PopulateMatchedTermsStats(&matched_terms_stats);
+    ASSERT_THAT(matched_terms_stats, SizeIs(2));  // 2 terms
+    EXPECT_EQ(matched_terms_stats.at(0).term, "kitten");
+    EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+                ElementsAreArray(term_frequencies));
+    EXPECT_EQ(matched_terms_stats.at(1).term, "cat");
+    EXPECT_EQ(matched_terms_stats.at(1).section_ids_mask, section_id_mask);
+    EXPECT_THAT(matched_terms_stats.at(1).term_frequencies,
+                ElementsAreArray(term_frequencies));
+
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""],
                 UnorderedElementsAre("kitten", "foo", "bar", "cat"));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(4));
   }
 }
 
@@ -967,7 +1335,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1001,11 +1368,14 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "foo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
 }
 
 TEST_F(QueryProcessorTest, TwoGroups) {
@@ -1040,7 +1410,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1074,12 +1443,16 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo1(document_id1);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  DocHitInfo expectedDocHitInfo2(document_id2);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "dog", "kitten", "cat"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(4));
 }
 
 TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
@@ -1114,7 +1487,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1148,11 +1520,14 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "foo"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
 }
 
 TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
@@ -1187,7 +1562,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "puppy dog"
@@ -1220,12 +1594,16 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo1(document_id1);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  DocHitInfo expectedDocHitInfo2(document_id2);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                          DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""],
               UnorderedElementsAre("puppy", "kitten", "cat"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(3));
 }
 
 TEST_F(QueryProcessorTest, ExcludeTerm) {
@@ -1289,6 +1667,7 @@
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
               ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone)));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
 TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
@@ -1350,6 +1729,7 @@
               ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone),
                           DocHitInfo(document_id1, kSectionIdMaskNone)));
   EXPECT_THAT(results.query_terms, IsEmpty());
+  EXPECT_THAT(results.query_term_iterators, IsEmpty());
 }
 
 TEST_F(QueryProcessorTest, ExcludeAnd) {
@@ -1420,6 +1800,7 @@
     // animal, there are no results.
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
     EXPECT_THAT(results.query_terms, IsEmpty());
+    EXPECT_THAT(results.query_term_iterators, IsEmpty());
   }
 
   {
@@ -1436,6 +1817,7 @@
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(1));
   }
 }
 
@@ -1471,7 +1853,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1509,6 +1890,7 @@
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
                 ElementsAre(DocHitInfo(document_id1, kSectionIdMaskNone)));
     EXPECT_THAT(results.query_terms, IsEmpty());
+    EXPECT_THAT(results.query_term_iterators, IsEmpty());
   }
 
   {
@@ -1520,11 +1902,17 @@
                                query_processor->ParseSearch(search_spec));
 
     // Descending order of valid DocumentIds
+    DocHitInfo expectedDocHitInfo1(document_id1);
+    expectedDocHitInfo1.UpdateSection(/*section_id=*/0,
+                                      /*hit_term_frequency=*/1);
+    DocHitInfo expectedDocHitInfo2(document_id2);
+    expectedDocHitInfo2.UpdateSection(/*section_id=*/0,
+                                      /*hit_term_frequency=*/1);
     EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-                ElementsAre(DocHitInfo(document_id2, section_id_mask),
-                            DocHitInfo(document_id1, section_id_mask)));
+                ElementsAre(expectedDocHitInfo2, expectedDocHitInfo1));
     EXPECT_THAT(results.query_terms, SizeIs(1));
     EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+    EXPECT_THAT(results.query_term_iterators, SizeIs(1));
   }
 }
 
@@ -1561,7 +1949,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1593,10 +1980,13 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id2);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id2, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, NamespaceFilter) {
@@ -1631,7 +2021,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1664,10 +2053,13 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SchemaTypeFilter) {
@@ -1703,7 +2095,6 @@
 
   // Populate the index
   SectionId section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document 1 has content "animal dog"
@@ -1732,10 +2123,13 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id1);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id1, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
@@ -1768,7 +2162,6 @@
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask section_id_mask = 1U << subject_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Document has content "animal"
@@ -1792,10 +2185,13 @@
                              query_processor->ParseSearch(search_spec));
 
   // Descending order of valid DocumentIds
+  DocHitInfo expectedDocHitInfo(document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["subject"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
@@ -1840,8 +2236,6 @@
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
-  SectionIdMask message_section_id_mask = 1U << message_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -1871,12 +2265,15 @@
 
   // Ordered by descending DocumentId, so message comes first since it was
   // inserted last
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
-                  DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo1(message_document_id);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  DocHitInfo expectedDocHitInfo2(email_document_id);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/1, /*hit_term_frequency=*/1);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
@@ -1920,7 +2317,6 @@
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -1952,11 +2348,13 @@
 
   // Shouldn't include the message document since we're only looking at email
   // types
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo(email_document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
@@ -2000,7 +2398,6 @@
                                                       .Build()));
 
   // Populate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -2033,11 +2430,13 @@
 
   // Even though the section id is the same, we should be able to tell that it
   // doesn't match to the name of the section filter
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo(email_document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo));
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
@@ -2095,6 +2494,7 @@
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["nonexistent"],
               UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
@@ -2152,6 +2552,7 @@
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
   EXPECT_THAT(results.query_terms, SizeIs(1));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(1));
 }
 
 TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
@@ -2195,8 +2596,6 @@
                                                       .Build()));
 
   // Poplate the index
-  SectionIdMask email_section_id_mask = 1U << email_foo_section_id;
-  SectionIdMask message_section_id_mask = 1U << message_foo_section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   // Email document has content "animal"
@@ -2228,13 +2627,16 @@
 
   // Ordered by descending DocumentId, so message comes first since it was
   // inserted last
-  EXPECT_THAT(
-      GetDocHitInfos(results.root_iterator.get()),
-      ElementsAre(DocHitInfo(message_document_id, message_section_id_mask),
-                  DocHitInfo(email_document_id, email_section_id_mask)));
+  DocHitInfo expectedDocHitInfo1(message_document_id);
+  expectedDocHitInfo1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  DocHitInfo expectedDocHitInfo2(email_document_id);
+  expectedDocHitInfo2.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+  EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
+              ElementsAre(expectedDocHitInfo1, expectedDocHitInfo2));
   EXPECT_THAT(results.query_terms, SizeIs(2));
   EXPECT_THAT(results.query_terms[""], UnorderedElementsAre("cat"));
   EXPECT_THAT(results.query_terms["foo"], UnorderedElementsAre("animal"));
+  EXPECT_THAT(results.query_term_iterators, SizeIs(2));
 }
 
 TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
@@ -2263,7 +2665,6 @@
 
   // Populate the index
   int section_id = 0;
-  SectionIdMask section_id_mask = 1U << section_id;
   TermMatchType::Code term_match_type = TermMatchType::EXACT_ONLY;
 
   EXPECT_THAT(
@@ -2289,8 +2690,10 @@
   ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
                              query_processor->ParseSearch(search_spec));
 
+  DocHitInfo expectedDocHitInfo(document_id);
+  expectedDocHitInfo.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
   EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
-              ElementsAre(DocHitInfo(document_id, section_id_mask)));
+              ElementsAre(expectedDocHitInfo));
 }
 
 TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {

diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
index 382fcb4..67617a3 100644
--- a/icing/result/projection-tree.cc
+++ b/icing/result/projection-tree.cc

@@ -22,8 +22,7 @@
 namespace icing {
 namespace lib {
 
-ProjectionTree::ProjectionTree(
-    const ResultSpecProto::TypePropertyMask& type_field_mask) {
+ProjectionTree::ProjectionTree(const TypePropertyMask& type_field_mask) {
   for (const std::string& field_mask : type_field_mask.paths()) {
     Node* current_node = &root_;
     for (std::string_view sub_field_mask :

diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
index a87a8fc..b2e5ffc 100644
--- a/icing/result/projection-tree.h
+++ b/icing/result/projection-tree.h

@@ -35,8 +35,7 @@
     std::vector<Node> children;
   };
 
-  explicit ProjectionTree(
-      const ResultSpecProto::TypePropertyMask& type_field_mask);
+  explicit ProjectionTree(const TypePropertyMask& type_field_mask);
 
   const Node& root() const { return root_; }
 

diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
index 77d1d21..2b0f966 100644
--- a/icing/result/projection-tree_test.cc
+++ b/icing/result/projection-tree_test.cc

@@ -28,14 +28,14 @@
 using ::testing::SizeIs;
 
 TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
-  ResultSpecProto::TypePropertyMask type_field_mask;
+  TypePropertyMask type_field_mask;
   ProjectionTree tree(type_field_mask);
   EXPECT_THAT(tree.root().name, IsEmpty());
   EXPECT_THAT(tree.root().children, IsEmpty());
 }
 
 TEST(ProjectionTreeTest, CreateTreeTopLevel) {
-  ResultSpecProto::TypePropertyMask type_field_mask;
+  TypePropertyMask type_field_mask;
   type_field_mask.add_paths("subject");
 
   ProjectionTree tree(type_field_mask);
@@ -46,7 +46,7 @@
 }
 
 TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
-  ResultSpecProto::TypePropertyMask type_field_mask;
+  TypePropertyMask type_field_mask;
   type_field_mask.add_paths("subject");
   type_field_mask.add_paths("body");
 
@@ -60,7 +60,7 @@
 }
 
 TEST(ProjectionTreeTest, CreateTreeNested) {
-  ResultSpecProto::TypePropertyMask type_field_mask;
+  TypePropertyMask type_field_mask;
   type_field_mask.add_paths("subject.body");
   type_field_mask.add_paths("body");
 
@@ -76,7 +76,7 @@
 }
 
 TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
-  ResultSpecProto::TypePropertyMask type_field_mask;
+  TypePropertyMask type_field_mask;
   type_field_mask.add_paths("sender.name.first");
   type_field_mask.add_paths("sender.emailAddress");
 

diff --git a/icing/result/projector.cc b/icing/result/projector.cc
new file mode 100644
index 0000000..8acdc8a
--- /dev/null
+++ b/icing/result/projector.cc

@@ -0,0 +1,60 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projector.h"
+
+#include <algorithm>
+
+namespace icing {
+namespace lib {
+
+namespace projector {
+
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+             DocumentProto* document) {
+  int num_kept = 0;
+  for (int cur_pos = 0; cur_pos < document->properties_size(); ++cur_pos) {
+    PropertyProto* prop = document->mutable_properties(cur_pos);
+    auto itr = std::find_if(projection_tree.begin(), projection_tree.end(),
+                            [&prop](const ProjectionTree::Node& node) {
+                              return node.name == prop->name();
+                            });
+    if (itr == projection_tree.end()) {
+      // Property is not present in the projection tree. Just skip it.
+      continue;
+    }
+    // This property should be kept.
+    document->mutable_properties()->SwapElements(num_kept, cur_pos);
+    ++num_kept;
+    if (itr->children.empty()) {
+      // A field mask does refer to this property, but it has no children. So
+      // we should take the entire property, with all of its
+      // subproperties/values
+      continue;
+    }
+    // The field mask refers to children of this property. Recurse through the
+    // document values that this property holds and project the children
+    // requested by this field mask.
+    for (DocumentProto& subproperty : *(prop->mutable_document_values())) {
+      Project(itr->children, &subproperty);
+    }
+  }
+  document->mutable_properties()->DeleteSubrange(
+      num_kept, document->properties_size() - num_kept);
+}
+
+}  // namespace projector
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/store/enable-bm25f.h b/icing/result/projector.h
similarity index 61%
rename from icing/store/enable-bm25f.h
rename to icing/result/projector.h
index cee94d1..43d9052 100644
--- a/icing/store/enable-bm25f.h
+++ b/icing/result/projector.h

@@ -1,4 +1,4 @@
-// Copyright (C) 2020 Google LLC
+// Copyright (C) 2019 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -12,20 +12,25 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-#ifndef ICING_STORE_ENABLE_BM25F_H_
-#define ICING_STORE_ENABLE_BM25F_H_
+#ifndef ICING_RESULT_PROJECTOR_H_
+#define ICING_RESULT_PROJECTOR_H_
+
+#include <vector>
+
+#include "icing/proto/document.pb.h"
+#include "icing/result/projection-tree.h"
 
 namespace icing {
 namespace lib {
 
-inline bool enable_bm25f_ = false;
+namespace projector {
 
-inline bool enableBm25f() { return enable_bm25f_; }
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+             DocumentProto* document);
 
-// Setter for testing purposes. It should never be called in production code.
-inline void setEnableBm25f(bool enable_bm25f) { enable_bm25f_ = enable_bm25f; }
+}  // namespace projector
 
 }  // namespace lib
 }  // namespace icing
 
-#endif  // ICING_STORE_ENABLE_BM25F_H_
+#endif  // ICING_RESULT_PROJECTOR_H_

diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index 0b8ad4a..85e78a8 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc

@@ -22,48 +22,13 @@
 #include "icing/proto/term.pb.h"
 #include "icing/result/page-result-state.h"
 #include "icing/result/projection-tree.h"
+#include "icing/result/projector.h"
 #include "icing/result/snippet-context.h"
 #include "icing/util/status-macros.h"
 
 namespace icing {
 namespace lib {
 
-namespace {
-
-void Project(const std::vector<ProjectionTree::Node>& projection_tree,
-             google::protobuf::RepeatedPtrField<PropertyProto>* properties) {
-  int num_kept = 0;
-  for (int cur_pos = 0; cur_pos < properties->size(); ++cur_pos) {
-    PropertyProto* prop = properties->Mutable(cur_pos);
-    auto itr = std::find_if(projection_tree.begin(), projection_tree.end(),
-                            [&prop](const ProjectionTree::Node& node) {
-                              return node.name == prop->name();
-                            });
-    if (itr == projection_tree.end()) {
-      // Property is not present in the projection tree. Just skip it.
-      continue;
-    }
-    // This property should be kept.
-    properties->SwapElements(num_kept, cur_pos);
-    ++num_kept;
-    if (itr->children.empty()) {
-      // A field mask does refer to this property, but it has no children. So
-      // we should take the entire property, with all of its
-      // subproperties/values
-      continue;
-    }
-    // The field mask refers to children of this property. Recurse through the
-    // document values that this property holds and project the children
-    // requested by this field mask.
-    for (DocumentProto& subproperty : *(prop->mutable_document_values())) {
-      Project(itr->children, subproperty.mutable_properties());
-    }
-  }
-  properties->DeleteSubrange(num_kept, properties->size() - num_kept);
-}
-
-}  // namespace
-
 libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>>
 ResultRetriever::Create(const DocumentStore* doc_store,
                         const SchemaStore* schema_store,
@@ -118,17 +83,15 @@
       }
     }
 
+    DocumentProto document = std::move(document_or).ValueOrDie();
     // Apply projection
-    auto itr = page_result_state.projection_tree_map.find(
-        document_or.ValueOrDie().schema());
-
+    auto itr = page_result_state.projection_tree_map.find(document.schema());
     if (itr != page_result_state.projection_tree_map.end()) {
-      Project(itr->second.root().children,
-              document_or.ValueOrDie().mutable_properties());
+      projector::Project(itr->second.root().children, &document);
     } else if (wildcard_projection_tree_itr !=
                page_result_state.projection_tree_map.end()) {
-      Project(wildcard_projection_tree_itr->second.root().children,
-              document_or.ValueOrDie().mutable_properties());
+      projector::Project(wildcard_projection_tree_itr->second.root().children,
+                         &document);
     }
 
     SearchResultProto::ResultProto result;
@@ -137,13 +100,13 @@
         remaining_num_to_snippet > search_results.size()) {
       SnippetProto snippet_proto = snippet_retriever_->RetrieveSnippet(
           snippet_context.query_terms, snippet_context.match_type,
-          snippet_context.snippet_spec, document_or.ValueOrDie(),
+          snippet_context.snippet_spec, document,
           scored_document_hit.hit_section_id_mask());
       *result.mutable_snippet() = std::move(snippet_proto);
     }
 
     // Add the document, itself.
-    *result.mutable_document() = std::move(document_or).ValueOrDie();
+    *result.mutable_document() = std::move(document);
     search_results.push_back(std::move(result));
   }
   return search_results;

diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 98cc75a..7cb2d62 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc

@@ -779,7 +779,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("name");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
@@ -881,7 +881,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("sender.name");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
@@ -994,7 +994,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("sender");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
@@ -1111,7 +1111,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("sender.name");
   type_property_mask.add_paths("sender.emailAddress");
@@ -1214,7 +1214,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
   type_projection_tree_map.insert(
@@ -1297,7 +1297,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("nonExistentProperty");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
@@ -1381,7 +1381,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("name");
   type_property_mask.add_paths("nonExistentProperty");
@@ -1469,7 +1469,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask type_property_mask;
+  TypePropertyMask type_property_mask;
   type_property_mask.set_schema_type("Email");
   type_property_mask.add_paths("name");
   std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
@@ -1558,7 +1558,7 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+  TypePropertyMask wildcard_type_property_mask;
   wildcard_type_property_mask.set_schema_type(
       std::string(ProjectionTree::kSchemaTypeWildcard));
   wildcard_type_property_mask.add_paths("name");
@@ -1648,10 +1648,10 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask email_type_property_mask;
+  TypePropertyMask email_type_property_mask;
   email_type_property_mask.set_schema_type("Email");
   email_type_property_mask.add_paths("body");
-  ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+  TypePropertyMask wildcard_type_property_mask;
   wildcard_type_property_mask.set_schema_type(
       std::string(ProjectionTree::kSchemaTypeWildcard));
   wildcard_type_property_mask.add_paths("name");
@@ -1752,10 +1752,10 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask email_type_property_mask;
+  TypePropertyMask email_type_property_mask;
   email_type_property_mask.set_schema_type("Email");
   email_type_property_mask.add_paths("sender.name");
-  ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+  TypePropertyMask wildcard_type_property_mask;
   wildcard_type_property_mask.set_schema_type(
       std::string(ProjectionTree::kSchemaTypeWildcard));
   wildcard_type_property_mask.add_paths("name");
@@ -1861,10 +1861,10 @@
       {document_id1, hit_section_id_mask, /*score=*/0},
       {document_id2, hit_section_id_mask, /*score=*/0}};
 
-  ResultSpecProto::TypePropertyMask email_type_property_mask;
+  TypePropertyMask email_type_property_mask;
   email_type_property_mask.set_schema_type("Email");
   email_type_property_mask.add_paths("sender.name");
-  ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+  TypePropertyMask wildcard_type_property_mask;
   wildcard_type_property_mask.set_schema_type(
       std::string(ProjectionTree::kSchemaTypeWildcard));
   wildcard_type_property_mask.add_paths("sender");

diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index f1479b9..82738a9 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc

@@ -47,7 +47,7 @@
       num_returned_(0),
       scored_document_hit_comparator_(scoring_spec.order_by() ==
                                       ScoringSpecProto::Order::DESC) {
-  for (const ResultSpecProto::TypePropertyMask& type_field_mask :
+  for (const TypePropertyMask& type_field_mask :
        result_spec.type_property_masks()) {
     projection_tree_map_.insert(
         {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});

diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index 09d0f7a..d4a5f79 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc

@@ -334,7 +334,8 @@
         snippet_spec.num_matches_per_property();
 
     // Retrieve values and snippet them.
-    auto values_or = schema_store_.GetSectionContent(document, metadata->path);
+    auto values_or =
+        schema_store_.GetStringSectionContent(document, metadata->path);
     if (!values_or.ok()) {
       continue;
     }
@@ -344,7 +345,7 @@
       // If we couldn't create the tokenizer properly, just skip this section.
       continue;
     }
-    std::vector<std::string> values = values_or.ValueOrDie();
+    std::vector<std::string_view> values = values_or.ValueOrDie();
     for (int value_index = 0; value_index < values.size(); ++value_index) {
       if (match_options.max_matches_remaining <= 0) {
         break;

diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index e54cc0c..b43d2a4 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc

@@ -421,16 +421,16 @@
   return schema_type_mapper_->Get(schema_type);
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SchemaStore::GetSectionContent(const DocumentProto& document,
-                               std::string_view section_path) const {
-  return section_manager_->GetSectionContent(document, section_path);
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+SchemaStore::GetStringSectionContent(const DocumentProto& document,
+                                     std::string_view section_path) const {
+  return section_manager_->GetStringSectionContent(document, section_path);
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SchemaStore::GetSectionContent(const DocumentProto& document,
-                               SectionId section_id) const {
-  return section_manager_->GetSectionContent(document, section_id);
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+SchemaStore::GetStringSectionContent(const DocumentProto& document,
+                                     SectionId section_id) const {
+  return section_manager_->GetStringSectionContent(document, section_id);
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>

diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index cff7abd..3854704 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h

@@ -180,8 +180,9 @@
   //     1. Property is optional and not found in the document
   //     2. section_path is invalid
   //     3. Content is empty
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, std::string_view section_path) const;
+  libtextclassifier3::StatusOr<std::vector<std::string_view>>
+  GetStringSectionContent(const DocumentProto& document,
+                          std::string_view section_path) const;
 
   // Finds content of a section by id
   //
@@ -189,8 +190,9 @@
   //   A string of content on success
   //   INVALID_ARGUMENT if section id is invalid
   //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, SectionId section_id) const;
+  libtextclassifier3::StatusOr<std::vector<std::string_view>>
+  GetStringSectionContent(const DocumentProto& document,
+                          SectionId section_id) const;
 
   // Returns the SectionMetadata associated with the SectionId that's in the
   // SchemaTypeId.

diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 0285cef..a10e9b9 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc

@@ -155,8 +155,9 @@
 
 // Helper function to get string content from a property. Repeated values are
 // joined into one string. We only care about the STRING data type.
-std::vector<std::string> GetPropertyContent(const PropertyProto& property) {
-  std::vector<std::string> values;
+std::vector<std::string_view> GetStringPropertyContent(
+    const PropertyProto& property) {
+  std::vector<std::string_view> values;
   if (!property.string_values().empty()) {
     std::copy(property.string_values().begin(), property.string_values().end(),
               std::back_inserter(values));
@@ -194,9 +195,9 @@
       schema_type_mapper, std::move(section_metadata_cache)));
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SectionManager::GetSectionContent(const DocumentProto& document,
-                                  std::string_view section_path) const {
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+SectionManager::GetStringSectionContent(const DocumentProto& document,
+                                        std::string_view section_path) const {
   // Finds the first property name in section_path
   size_t separator_position = section_path.find(kPropertySeparator);
   std::string_view current_property_name =
@@ -221,7 +222,8 @@
 
   if (separator_position == std::string::npos) {
     // Current property name is the last one in section path
-    std::vector<std::string> content = GetPropertyContent(*property_iterator);
+    std::vector<std::string_view> content =
+        GetStringPropertyContent(*property_iterator);
     if (content.empty()) {
       // The content of property is explicitly set to empty, we'll treat it as
       // NOT_FOUND because the index doesn't care about empty strings.
@@ -234,11 +236,13 @@
   // Gets section content recursively
   std::string_view sub_section_path =
       section_path.substr(separator_position + 1);
-  std::vector<std::string> nested_document_content;
+  std::vector<std::string_view> nested_document_content;
   for (const auto& nested_document : property_iterator->document_values()) {
-    auto content_or = GetSectionContent(nested_document, sub_section_path);
+    auto content_or =
+        GetStringSectionContent(nested_document, sub_section_path);
     if (content_or.ok()) {
-      std::vector<std::string> content = std::move(content_or).ValueOrDie();
+      std::vector<std::string_view> content =
+          std::move(content_or).ValueOrDie();
       std::move(content.begin(), content.end(),
                 std::back_inserter(nested_document_content));
     }
@@ -251,9 +255,9 @@
   return nested_document_content;
 }
 
-libtextclassifier3::StatusOr<std::vector<std::string>>
-SectionManager::GetSectionContent(const DocumentProto& document,
-                                  SectionId section_id) const {
+libtextclassifier3::StatusOr<std::vector<std::string_view>>
+SectionManager::GetStringSectionContent(const DocumentProto& document,
+                                        SectionId section_id) const {
   if (!IsSectionIdValid(section_id)) {
     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
         "Section id %d is greater than the max value %d", section_id,
@@ -270,7 +274,7 @@
   }
   // The index of metadata list is the same as the section id, so we can use
   // section id as the index.
-  return GetSectionContent(document, metadata_list[section_id].path);
+  return GetStringSectionContent(document, metadata_list[section_id].path);
 }
 
 libtextclassifier3::StatusOr<const SectionMetadata*>
@@ -303,7 +307,7 @@
   std::vector<Section> sections;
   for (const auto& section_metadata : metadata_list) {
     auto section_content_or =
-        GetSectionContent(document, section_metadata.path);
+        GetStringSectionContent(document, section_metadata.path);
     // Adds to result vector if section is found in document
     if (section_content_or.ok()) {
       sections.emplace_back(SectionMetadata(section_metadata),

diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 475fa6a..191a169 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h

@@ -61,8 +61,9 @@
   //     1. Property is optional and not found in the document
   //     2. section_path is invalid
   //     3. Content is empty
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, std::string_view section_path) const;
+  libtextclassifier3::StatusOr<std::vector<std::string_view>>
+  GetStringSectionContent(const DocumentProto& document,
+                          std::string_view section_path) const;
 
   // Finds content of a section by id
   //
@@ -70,8 +71,9 @@
   //   A string of content on success
   //   INVALID_ARGUMENT if section id is invalid
   //   NOT_FOUND if type config name of document not found
-  libtextclassifier3::StatusOr<std::vector<std::string>> GetSectionContent(
-      const DocumentProto& document, SectionId section_id) const;
+  libtextclassifier3::StatusOr<std::vector<std::string_view>>
+  GetStringSectionContent(const DocumentProto& document,
+                          SectionId section_id) const;
 
   // Returns the SectionMetadata associated with the SectionId that's in the
   // SchemaTypeId.

diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 2d995df..15d9a19 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc

@@ -186,60 +186,64 @@
                HasSubstr("Too many properties")));
 }
 
-TEST_F(SectionManagerTest, GetSectionContent) {
+TEST_F(SectionManagerTest, GetStringSectionContent) {
   ICING_ASSERT_OK_AND_ASSIGN(
       auto section_manager,
       SectionManager::Create(type_config_map_, schema_type_mapper_.get()));
 
   // Test simple section paths
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "subject"),
-              IsOkAndHolds(ElementsAre("the subject")));
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "text"),
+  EXPECT_THAT(
+      section_manager->GetStringSectionContent(email_document_,
+                                               /*section_path*/ "subject"),
+      IsOkAndHolds(ElementsAre("the subject")));
+  EXPECT_THAT(section_manager->GetStringSectionContent(email_document_,
+                                                       /*section_path*/ "text"),
               IsOkAndHolds(ElementsAre("the text")));
 
   // Test repeated values, they are joined into one string
-  ICING_ASSERT_OK_AND_ASSIGN(auto content, section_manager->GetSectionContent(
-                                               email_document_,
+  ICING_ASSERT_OK_AND_ASSIGN(
+      auto content,
+      section_manager->GetStringSectionContent(email_document_,
                                                /*section_path*/ "recipients"));
   EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
 
   // Test concatenated section paths: "property1.property2"
-  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
+  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetStringSectionContent(
                                           conversation_document_,
                                           /*section_path*/ "emails.subject"));
   EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
 
-  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetSectionContent(
+  ICING_ASSERT_OK_AND_ASSIGN(content, section_manager->GetStringSectionContent(
                                           conversation_document_,
                                           /*section_path*/ "emails.text"));
   EXPECT_THAT(content, ElementsAre("the text", "the text"));
 
-  ICING_ASSERT_OK_AND_ASSIGN(
-      content,
-      section_manager->GetSectionContent(conversation_document_,
-                                         /*section_path*/ "emails.recipients"));
+  ICING_ASSERT_OK_AND_ASSIGN(content,
+                             section_manager->GetStringSectionContent(
+                                 conversation_document_,
+                                 /*section_path*/ "emails.recipients"));
   EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
                                    "recipient1", "recipient2", "recipient3"));
 
   // Test non-existing paths
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "name"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "invalid"),
+  EXPECT_THAT(section_manager->GetStringSectionContent(email_document_,
+                                                       /*section_path*/ "name"),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(
-      section_manager->GetSectionContent(conversation_document_,
-                                         /*section_path*/ "emails.invalid"),
+      section_manager->GetStringSectionContent(email_document_,
+                                               /*section_path*/ "invalid"),
       StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(section_manager->GetStringSectionContent(
+                  conversation_document_,
+                  /*section_path*/ "emails.invalid"),
+              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   // Test other data types
   // BYTES type can't be indexed, so content won't be returned
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 /*section_path*/ "attachment"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(
+      section_manager->GetStringSectionContent(email_document_,
+                                               /*section_path*/ "attachment"),
+      StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
   // The following tests are similar to the ones above but use section ids
   // instead of section paths
@@ -249,16 +253,16 @@
   SectionId subject_section_id = 1;
   SectionId invalid_email_section_id = 2;
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(email_document_,
-                                                  recipients_section_id));
+      content, section_manager->GetStringSectionContent(email_document_,
+                                                        recipients_section_id));
   EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3"));
 
-  EXPECT_THAT(
-      section_manager->GetSectionContent(email_document_, subject_section_id),
-      IsOkAndHolds(ElementsAre("the subject")));
+  EXPECT_THAT(section_manager->GetStringSectionContent(email_document_,
+                                                       subject_section_id),
+              IsOkAndHolds(ElementsAre("the subject")));
 
-  EXPECT_THAT(section_manager->GetSectionContent(email_document_,
-                                                 invalid_email_section_id),
+  EXPECT_THAT(section_manager->GetStringSectionContent(
+                  email_document_, invalid_email_section_id),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 
   // Conversation (section id -> section path):
@@ -270,21 +274,21 @@
   SectionId name_section_id = 2;
   SectionId invalid_conversation_section_id = 3;
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(
+      content, section_manager->GetStringSectionContent(
                    conversation_document_, emails_recipients_section_id));
   EXPECT_THAT(content, ElementsAre("recipient1", "recipient2", "recipient3",
                                    "recipient1", "recipient2", "recipient3"));
 
   ICING_ASSERT_OK_AND_ASSIGN(
-      content, section_manager->GetSectionContent(conversation_document_,
-                                                  emails_subject_section_id));
+      content, section_manager->GetStringSectionContent(
+                   conversation_document_, emails_subject_section_id));
   EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
 
-  EXPECT_THAT(section_manager->GetSectionContent(conversation_document_,
-                                                 name_section_id),
+  EXPECT_THAT(section_manager->GetStringSectionContent(conversation_document_,
+                                                       name_section_id),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 
-  EXPECT_THAT(section_manager->GetSectionContent(
+  EXPECT_THAT(section_manager->GetStringSectionContent(
                   conversation_document_, invalid_conversation_section_id),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
 }

diff --git a/icing/schema/section.h b/icing/schema/section.h
index 058f261..40e623a 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h

@@ -17,6 +17,7 @@
 
 #include <cstdint>
 #include <string>
+#include <string_view>
 #include <utility>
 #include <vector>
 
@@ -83,9 +84,10 @@
 // values of a property.
 struct Section {
   SectionMetadata metadata;
-  std::vector<std::string> content;
+  std::vector<std::string_view> content;
 
-  Section(SectionMetadata&& metadata_in, std::vector<std::string>&& content_in)
+  Section(SectionMetadata&& metadata_in,
+          std::vector<std::string_view>&& content_in)
       : metadata(std::move(metadata_in)), content(std::move(content_in)) {}
 };
 

diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
new file mode 100644
index 0000000..7495e98
--- /dev/null
+++ b/icing/scoring/bm25f-calculator.cc

@@ -0,0 +1,223 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/scoring/bm25f-calculator.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/absl_ports/str_cat.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
+#include "icing/store/document-associated-score-data.h"
+#include "icing/store/document-id.h"
+
+namespace icing {
+namespace lib {
+
+// Smoothing parameter, determines the relevance of higher term frequency
+// documents. The higher k1, the higher their relevance. 1.2 is the default
+// value in the BM25F literature and works well in most corpora.
+constexpr float k1_ = 1.2f;
+// Smoothing parameter, determines the weight of the document length on the
+// final score. The higher b, the higher the influence of the document length.
+// 0.7 is the default value in the BM25F literature and works well in most
+// corpora.
+constexpr float b_ = 0.7f;
+
+// TODO(b/158603900): add tests for Bm25fCalculator
+Bm25fCalculator::Bm25fCalculator(const DocumentStore *document_store)
+    : document_store_(document_store) {}
+
+// During initialization, Bm25fCalculator iterates through
+// hit-iterators for each query term to pre-compute n(q_i) for each corpus under
+// consideration.
+void Bm25fCalculator::PrepareToScore(
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+        *query_term_iterators) {
+  Clear();
+  TermId term_id = 0;
+  for (auto &iter : *query_term_iterators) {
+    const std::string &term = iter.first;
+    if (term_id_map_.find(term) != term_id_map_.end()) {
+      continue;
+    }
+    term_id_map_[term] = ++term_id;
+    DocHitInfoIterator *term_it = iter.second.get();
+    while (term_it->Advance().ok()) {
+      auto status_or = document_store_->GetDocumentAssociatedScoreData(
+          term_it->doc_hit_info().document_id());
+      if (!status_or.ok()) {
+        ICING_LOG(ERROR) << "No document score data";
+        continue;
+      }
+      DocumentAssociatedScoreData data = status_or.ValueOrDie();
+      CorpusId corpus_id = data.corpus_id();
+      CorpusTermInfo corpus_term_info(corpus_id, term_id);
+      corpus_nqi_map_[corpus_term_info.value]++;
+    }
+  }
+}
+
+void Bm25fCalculator::Clear() {
+  term_id_map_.clear();
+  corpus_avgdl_map_.clear();
+  corpus_nqi_map_.clear();
+  corpus_idf_map_.clear();
+}
+
+// Computes BM25F relevance score for query terms matched in document D.
+//
+// BM25F = \sum_i IDF(q_i) * tf(q_i, D)
+//
+// where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
+// term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
+// normalized term frequency of query term q_i in the document D.
+float Bm25fCalculator::ComputeScore(const DocHitInfoIterator *query_it,
+                                    const DocHitInfo &hit_info,
+                                    double default_score) {
+  auto status_or =
+      document_store_->GetDocumentAssociatedScoreData(hit_info.document_id());
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << "No document score data";
+    return default_score;
+  }
+  DocumentAssociatedScoreData data = status_or.ValueOrDie();
+  std::vector<TermMatchInfo> matched_terms_stats;
+  query_it->PopulateMatchedTermsStats(&matched_terms_stats);
+
+  float score = 0;
+  for (const TermMatchInfo &term_match_info : matched_terms_stats) {
+    float idf_weight =
+        GetCorpusIdfWeightForTerm(term_match_info.term, data.corpus_id());
+    float normalized_tf =
+        ComputedNormalizedTermFrequency(term_match_info, hit_info, data);
+    score += idf_weight * normalized_tf;
+  }
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "BM25F: corpus_id:%d docid:%d score:%f\n", data.corpus_id(),
+      hit_info.document_id(), score);
+  return score;
+}
+
+// Compute inverse document frequency (IDF) weight for query term in the given
+// corpus, and cache it in the map.
+//
+//                     N - n(q_i) + 0.5
+// IDF(q_i) = log(1 + ------------------)
+//                       n(q_i) + 0.5
+//
+// where N is the number of documents in the corpus, and n(q_i) is the number
+// of documents in the corpus containing the query term q_i.
+float Bm25fCalculator::GetCorpusIdfWeightForTerm(std::string_view term,
+                                                 CorpusId corpus_id) {
+  TermId term_id = term_id_map_[term];
+
+  CorpusTermInfo corpus_term_info(corpus_id, term_id);
+  auto iter = corpus_idf_map_.find(corpus_term_info.value);
+  if (iter != corpus_idf_map_.end()) {
+    return iter->second;
+  }
+
+  // First, figure out corpus scoring data.
+  auto status_or = document_store_->GetCorpusAssociatedScoreData(corpus_id);
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "No scoring data for corpus [%d]", corpus_id);
+    return 0;
+  }
+  CorpusAssociatedScoreData csdata = status_or.ValueOrDie();
+
+  uint32_t num_docs = csdata.num_docs();
+  uint32_t nqi = corpus_nqi_map_[corpus_term_info.value];
+  float idf =
+      nqi != 0 ? log(1.0f + (num_docs - nqi + 0.5f) / (nqi - 0.5f)) : 0.0f;
+  corpus_idf_map_.insert({corpus_term_info.value, idf});
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "corpus_id:%d term:%s N:%d nqi:%d idf:%f", corpus_id,
+      std::string(term).c_str(), num_docs, nqi, idf);
+  return idf;
+}
+
+// Get per corpus average document length and cache the result in the map.
+float Bm25fCalculator::GetCorpusAvgDocLength(CorpusId corpus_id) {
+  auto iter = corpus_avgdl_map_.find(corpus_id);
+  if (iter != corpus_avgdl_map_.end()) {
+    return iter->second;
+  }
+
+  // First, figure out corpus scoring data.
+  auto status_or = document_store_->GetCorpusAssociatedScoreData(corpus_id);
+  if (!status_or.ok()) {
+    ICING_LOG(ERROR) << IcingStringUtil::StringPrintf(
+        "No scoring data for corpus [%d]", corpus_id);
+    return 0;
+  }
+  CorpusAssociatedScoreData csdata = status_or.ValueOrDie();
+
+  corpus_avgdl_map_[corpus_id] = csdata.average_doc_length_in_tokens();
+  return csdata.average_doc_length_in_tokens();
+}
+
+// Computes normalized term frequency for query term q_i in document D.
+//
+//                            f(q_i, D) * (k1 + 1)
+// Normalized TF = --------------------------------------------
+//                 f(q_i, D) + k1 * (1 - b + b * |D| / avgdl)
+//
+// where f(q_i, D) is the frequency of query term q_i in document D,
+// |D| is the #tokens in D, avgdl is the average document length in the corpus,
+// k1 and b are smoothing parameters.
+float Bm25fCalculator::ComputedNormalizedTermFrequency(
+    const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
+    const DocumentAssociatedScoreData &data) {
+  uint32_t dl = data.length_in_tokens();
+  float avgdl = GetCorpusAvgDocLength(data.corpus_id());
+  float f_q =
+      ComputeTermFrequencyForMatchedSections(data.corpus_id(), term_match_info);
+  float normalized_tf =
+      f_q * (k1_ + 1) / (f_q + k1_ * (1 - b_ + b_ * dl / avgdl));
+
+  ICING_VLOG(1) << IcingStringUtil::StringPrintf(
+      "corpus_id:%d docid:%d dl:%d avgdl:%f f_q:%f norm_tf:%f\n",
+      data.corpus_id(), hit_info.document_id(), dl, avgdl, f_q, normalized_tf);
+  return normalized_tf;
+}
+
+// Note: once we support section weights, we should update this function to
+// compute the weighted term frequency.
+float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
+    CorpusId corpus_id, const TermMatchInfo &term_match_info) const {
+  float sum = 0.0f;
+  SectionIdMask sections = term_match_info.section_ids_mask;
+  while (sections != 0) {
+    SectionId section_id = __builtin_ctz(sections);
+    sections &= ~(1u << section_id);
+
+    Hit::TermFrequency tf = term_match_info.term_frequencies[section_id];
+    if (tf != Hit::kNoTermFrequency) {
+      sum += tf;
+    }
+  }
+  return sum;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/scoring/bm25f-calculator.h b/icing/scoring/bm25f-calculator.h
new file mode 100644
index 0000000..91b4f24
--- /dev/null
+++ b/icing/scoring/bm25f-calculator.h

@@ -0,0 +1,148 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCORING_BM25F_CALCULATOR_H_
+#define ICING_SCORING_BM25F_CALCULATOR_H_
+
+#include <cstdint>
+#include <string>
+#include <unordered_set>
+#include <vector>
+
+#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/legacy/index/icing-bit-util.h"
+#include "icing/store/corpus-id.h"
+#include "icing/store/document-store.h"
+
+namespace icing {
+namespace lib {
+
+// Bm25fCalculator encapsulates the logic to compute BM25F term-weight based
+// ranking function.
+//
+// The formula to compute BM25F is as follows:
+//
+// BM25F = \sum_i IDF(q_i) * tf(q_i, D)
+//
+// where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
+// term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
+// normalized term frequency of query term q_i in the document D.
+//
+// IDF(q_i) is computed as follows:
+//
+//                     N - n(q_i) + 0.5
+// IDF(q_i) = log(1 + ------------------)
+//                       n(q_i) + 0.5
+//
+// where N is the number of documents in the corpus, and n(q_i) is the number
+// of documents in the corpus containing the query term q_i.
+//
+// Lastly, tf(q_i, D) is computed as follows:
+//
+//                            f(q_i, D) * (k1 + 1)
+// Normalized TF = --------------------------------------------
+//                 f(q_i, D) + k1 * (1 - b + b * |D| / avgdl)
+//
+// where f(q_i, D) is the frequency of query term q_i in document D,
+// |D| is the #tokens in D, avgdl is the average document length in the corpus,
+// k1 and b are smoothing parameters.
+//
+// see: go/icing-bm25f
+// see: glossary/bm25
+class Bm25fCalculator {
+ public:
+  explicit Bm25fCalculator(const DocumentStore *document_store_);
+
+  // Precompute and cache statistics relevant to BM25F.
+  // Populates term_id_map_ and corpus_nqi_map_ for use while scoring other
+  // results.
+  // The query_term_iterators map is used to build the
+  // std::unordered_map<std::string_view, TermId> term_id_map_. It must
+  // outlive the bm25f-calculator otherwise the string_view key in term_id_map_,
+  // used later to compute a document score, will be meaningless.
+  void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+          *query_term_iterators);
+
+  // Compute the BM25F relevance score for the given hit, represented by
+  // DocHitInfo.
+  // The default score will be returned only when the scorer fails to find or
+  // calculate a score for the document.
+  float ComputeScore(const DocHitInfoIterator *query_it,
+                     const DocHitInfo &hit_info, double default_score);
+
+ private:
+  // Compact ID for each query term.
+  using TermId = uint16_t;
+
+  // Compact representation of <CorpusId, TermId> for use as a key in a
+  // hash_map.
+  struct CorpusTermInfo {
+    // Layout bits: 16 bit CorpusId + 16 bit TermId
+    using Value = uint32_t;
+
+    Value value;
+
+    static constexpr int kCorpusIdBits = sizeof(CorpusId);
+    static constexpr int kTermIdBits = sizeof(TermId);
+
+    explicit CorpusTermInfo(CorpusId corpus_id, TermId term_id) : value(0) {
+      BITFIELD_OR(value, kTermIdBits, kCorpusIdBits,
+                  static_cast<uint64_t>(corpus_id));
+      BITFIELD_OR(value, 0, kTermIdBits, term_id);
+    }
+
+    bool operator==(const CorpusTermInfo &other) const {
+      return value == other.value;
+    }
+  };
+
+  float GetCorpusIdfWeightForTerm(std::string_view term, CorpusId corpus_id);
+  float GetCorpusAvgDocLength(CorpusId corpus_id);
+  float ComputedNormalizedTermFrequency(
+      const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
+      const DocumentAssociatedScoreData &data);
+  float ComputeTermFrequencyForMatchedSections(
+      CorpusId corpus_id, const TermMatchInfo &term_match_info) const;
+
+  void Clear();
+
+  const DocumentStore *document_store_;  // Does not own.
+
+  // Map from query term to compact term ID.
+  // Necessary as a key to the other maps.
+  // The use of the string_view as key here means that the query_term_iterators
+  // map must outlive the bm25f
+  std::unordered_map<std::string_view, TermId> term_id_map_;
+
+  // Map from corpus ID to average document length (avgdl).
+  // Necessary to calculate the normalized term frequency.
+  // This information is cached in the DocumentStore::CorpusScoreCache
+  std::unordered_map<CorpusId, float> corpus_avgdl_map_;
+
+  // Map from <corpus ID, term ID> to number of documents containing term q_i,
+  // called n(q_i).
+  // Necessary to calculate IDF(q_i) (inverse document frequency).
+  // This information must be calculated by iterating through the hits for these
+  // terms.
+  std::unordered_map<CorpusTermInfo::Value, uint32_t> corpus_nqi_map_;
+
+  // Map from <corpus ID, term ID> to IDF(q_i) (inverse document frequency).
+  std::unordered_map<CorpusTermInfo::Value, float> corpus_idf_map_;
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_SCORING_BM25F_CALCULATOR_H_

diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index 0739532..b29d8b6 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc

@@ -18,8 +18,10 @@
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
 #include "icing/absl_ports/canonical_errors.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/scoring.pb.h"
-#include "icing/store/document-associated-score-data.h"
+#include "icing/scoring/bm25f-calculator.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
 #include "icing/util/status-macros.h"
@@ -33,10 +35,11 @@
                                double default_score)
       : document_store_(*document_store), default_score_(default_score) {}
 
-  double GetScore(DocumentId document_id) override {
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
     ICING_ASSIGN_OR_RETURN(
         DocumentAssociatedScoreData score_data,
-        document_store_.GetDocumentAssociatedScoreData(document_id),
+        document_store_.GetDocumentAssociatedScoreData(hit_info.document_id()),
         default_score_);
 
     return static_cast<double>(score_data.document_score());
@@ -53,10 +56,11 @@
                                            double default_score)
       : document_store_(*document_store), default_score_(default_score) {}
 
-  double GetScore(DocumentId document_id) override {
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
     ICING_ASSIGN_OR_RETURN(
         DocumentAssociatedScoreData score_data,
-        document_store_.GetDocumentAssociatedScoreData(document_id),
+        document_store_.GetDocumentAssociatedScoreData(hit_info.document_id()),
         default_score_);
 
     return static_cast<double>(score_data.creation_timestamp_ms());
@@ -67,6 +71,33 @@
   double default_score_;
 };
 
+class RelevanceScoreScorer : public Scorer {
+ public:
+  explicit RelevanceScoreScorer(
+      std::unique_ptr<Bm25fCalculator> bm25f_calculator, double default_score)
+      : bm25f_calculator_(std::move(bm25f_calculator)),
+        default_score_(default_score) {}
+
+  void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators) {
+    bm25f_calculator_->PrepareToScore(query_term_iterators);
+  }
+
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator* query_it) override {
+    if (!query_it) {
+      return default_score_;
+    }
+    return static_cast<double>(
+        bm25f_calculator_->ComputeScore(query_it, hit_info, default_score_));
+  }
+
+ private:
+  std::unique_ptr<Bm25fCalculator> bm25f_calculator_;
+  double default_score_;
+};
+
 // A scorer which assigns scores to documents based on usage reports.
 class UsageScorer : public Scorer {
  public:
@@ -77,10 +108,11 @@
         ranking_strategy_(ranking_strategy),
         default_score_(default_score) {}
 
-  double GetScore(DocumentId document_id) override {
-    ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
-                           document_store_.GetUsageScores(document_id),
-                           default_score_);
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    ICING_ASSIGN_OR_RETURN(
+        UsageStore::UsageScores usage_scores,
+        document_store_.GetUsageScores(hit_info.document_id()), default_score_);
 
     switch (ranking_strategy_) {
       case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
@@ -113,7 +145,10 @@
  public:
   explicit NoScorer(double default_score) : default_score_(default_score) {}
 
-  double GetScore(DocumentId document_id) override { return default_score_; }
+  double GetScore(const DocHitInfo& hit_info,
+                  const DocHitInfoIterator*) override {
+    return default_score_;
+  }
 
  private:
   double default_score_;
@@ -131,6 +166,11 @@
     case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
       return std::make_unique<DocumentCreationTimestampScorer>(document_store,
                                                                default_score);
+    case ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE: {
+      auto bm25f_calculator = std::make_unique<Bm25fCalculator>(document_store);
+      return std::make_unique<RelevanceScoreScorer>(std::move(bm25f_calculator),
+                                                    default_score);
+    }
     case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
       [[fallthrough]];
     case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
@@ -144,9 +184,6 @@
     case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
       return std::make_unique<UsageScorer>(document_store, rank_by,
                                            default_score);
-    case ScoringSpecProto::RankingStrategy::
-        RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER:
-      [[fallthrough]];
     case ScoringSpecProto::RankingStrategy::NONE:
       return std::make_unique<NoScorer>(default_score);
   }

diff --git a/icing/scoring/scorer.h b/icing/scoring/scorer.h
index 55c6b5c..a22db0f 100644
--- a/icing/scoring/scorer.h
+++ b/icing/scoring/scorer.h

@@ -18,6 +18,8 @@
 #include <memory>
 
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/index/hit/doc-hit-info.h"
+#include "icing/index/iterator/doc-hit-info-iterator.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/store/document-id.h"
 #include "icing/store/document-store.h"
@@ -46,18 +48,28 @@
 
   // Returns a non-negative score of a document. The score can be a
   // document-associated score which comes from the DocumentProto directly, an
-  // accumulated score, or even an inferred score. If it fails to find or
-  // calculate a score, the user-provided default score will be returned.
+  // accumulated score, a relevance score, or even an inferred score. If it
+  // fails to find or calculate a score, the user-provided default score will be
+  // returned.
   //
   // Some examples of possible scores:
   // 1. Document-associated scores: document score, creation timestamp score.
   // 2. Accumulated scores: usage count score.
   // 3. Inferred scores: a score calculated by a machine learning model.
+  // 4. Relevance score: computed as BM25F score.
   //
   // NOTE: This method is performance-sensitive as it's called for every
   // potential result document. We're trying to avoid returning StatusOr<double>
   // to save a little more time and memory.
-  virtual double GetScore(DocumentId document_id) = 0;
+  virtual double GetScore(const DocHitInfo& hit_info,
+                          const DocHitInfoIterator* query_it = nullptr) = 0;
+
+  // Currently only overriden by the RelevanceScoreScorer.
+  // NOTE: the query_term_iterators map must
+  // outlive the scorer, see bm25f-calculator for more details.
+  virtual void PrepareToScore(
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators) {}
 };
 
 }  // namespace lib

diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index b669eb1..b114515 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc

@@ -21,6 +21,7 @@
 #include "gtest/gtest.h"
 #include "icing/document-builder.h"
 #include "icing/file/filesystem.h"
+#include "icing/index/hit/doc-hit-info.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
@@ -120,9 +121,10 @@
       Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
                      /*default_score=*/10, document_store()));
 
-  DocumentId non_existing_document_id = 1;
+  // Non existent document id
+  DocHitInfo docHitInfo = DocHitInfo(/*document_id_in=*/1);
   // The caller-provided default score is returned
-  EXPECT_THAT(scorer->GetScore(non_existing_document_id), Eq(10));
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
 }
 
 TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
@@ -142,7 +144,8 @@
       Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
                      /*default_score=*/10, document_store()));
 
-  EXPECT_THAT(scorer->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(0));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectDocumentScore) {
@@ -163,7 +166,32 @@
       Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
                      /*default_score=*/0, document_store()));
 
-  EXPECT_THAT(scorer->GetScore(document_id), Eq(5));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(5));
+}
+
+// See scoring-processor_test.cc and icing-search-engine_test.cc for better
+// Bm25F scoring tests.
+TEST_F(ScorerTest, QueryIteratorNullRelevanceScoreShouldReturnDefaultScore) {
+  // Creates a test document with document score 5
+  DocumentProto test_document =
+      DocumentBuilder()
+          .SetScore(5)
+          .SetKey("icing", "email/1")
+          .SetSchema("email")
+          .AddStringProperty("subject", "subject foo")
+          .SetCreationTimestampMs(fake_clock2().GetSystemTimeMilliseconds())
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+                             document_store()->Put(test_document));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<Scorer> scorer,
+      Scorer::Create(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE,
+                     /*default_score=*/10, document_store()));
+
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectCreationTimestampScore) {
@@ -193,9 +221,11 @@
       Scorer::Create(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP,
                      /*default_score=*/0, document_store()));
 
-  EXPECT_THAT(scorer->GetScore(document_id1),
+  DocHitInfo docHitInfo1 = DocHitInfo(document_id1);
+  DocHitInfo docHitInfo2 = DocHitInfo(document_id2);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1),
               Eq(fake_clock1().GetSystemTimeMilliseconds()));
-  EXPECT_THAT(scorer->GetScore(document_id2),
+  EXPECT_THAT(scorer->GetScore(docHitInfo2),
               Eq(fake_clock2().GetSystemTimeMilliseconds()));
 }
 
@@ -224,9 +254,10 @@
       std::unique_ptr<Scorer> scorer3,
       Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
                      /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report a type1 usage.
   UsageReport usage_report_type1 = CreateUsageReport(
@@ -234,9 +265,9 @@
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
 
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(1));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
@@ -264,9 +295,10 @@
       std::unique_ptr<Scorer> scorer3,
       Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
                      /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report a type2 usage.
   UsageReport usage_report_type2 = CreateUsageReport(
@@ -274,9 +306,9 @@
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2));
 
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(1));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
@@ -304,9 +336,10 @@
       std::unique_ptr<Scorer> scorer3,
       Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
                      /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report a type1 usage.
   UsageReport usage_report_type3 = CreateUsageReport(
@@ -314,9 +347,9 @@
       UsageReport::USAGE_TYPE3);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3));
 
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
@@ -347,35 +380,36 @@
       Scorer::Create(
           ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
           /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   UsageReport usage_report_type1_time1 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(1));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 5000ms, score should be updated.
   UsageReport usage_report_type1_time5 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(5));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 3000ms, score should not be updated.
   UsageReport usage_report_type1_time3 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
       UsageReport::USAGE_TYPE1);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(5));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
@@ -406,35 +440,36 @@
       Scorer::Create(
           ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
           /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   UsageReport usage_report_type2_time1 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(1));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 5000ms, score should be updated.
   UsageReport usage_report_type2_time5 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(5));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   // Report usage with timestamp = 3000ms, score should not be updated.
   UsageReport usage_report_type2_time3 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
       UsageReport::USAGE_TYPE2);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(5));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 }
 
 TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
@@ -465,35 +500,36 @@
       Scorer::Create(
           ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
           /*default_score=*/0, document_store()));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+  DocHitInfo docHitInfo = DocHitInfo(document_id);
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
 
   UsageReport usage_report_type3_time1 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
       UsageReport::USAGE_TYPE3);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(1));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
 
   // Report usage with timestamp = 5000ms, score should be updated.
   UsageReport usage_report_type3_time5 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
       UsageReport::USAGE_TYPE3);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
 
   // Report usage with timestamp = 3000ms, score should not be updated.
   UsageReport usage_report_type3_time3 = CreateUsageReport(
       /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
       UsageReport::USAGE_TYPE3);
   ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
-  EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
-  EXPECT_THAT(scorer3->GetScore(document_id), Eq(5));
+  EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
+  EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
 }
 
 TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
@@ -502,17 +538,23 @@
       Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
                      /*default_score=*/3, document_store()));
 
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/0), Eq(3));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/1), Eq(3));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/2), Eq(3));
+  DocHitInfo docHitInfo1 = DocHitInfo(/*document_id_in=*/0);
+  DocHitInfo docHitInfo2 = DocHitInfo(/*document_id_in=*/1);
+  DocHitInfo docHitInfo3 = DocHitInfo(/*document_id_in=*/2);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(3));
+  EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(3));
+  EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(3));
 
   ICING_ASSERT_OK_AND_ASSIGN(
       scorer, Scorer::Create(ScoringSpecProto::RankingStrategy::NONE,
                              /*default_score=*/111, document_store()));
 
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/4), Eq(111));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/5), Eq(111));
-  EXPECT_THAT(scorer->GetScore(/*document_id=*/6), Eq(111));
+  docHitInfo1 = DocHitInfo(/*document_id_in=*/4);
+  docHitInfo2 = DocHitInfo(/*document_id_in=*/5);
+  docHitInfo3 = DocHitInfo(/*document_id_in=*/6);
+  EXPECT_THAT(scorer->GetScore(docHitInfo1), Eq(111));
+  EXPECT_THAT(scorer->GetScore(docHitInfo2), Eq(111));
+  EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
 }
 
 }  // namespace

diff --git a/icing/scoring/scoring-processor.cc b/icing/scoring/scoring-processor.cc
index 0933094..24480ef 100644
--- a/icing/scoring/scoring-processor.cc
+++ b/icing/scoring/scoring-processor.cc

@@ -58,9 +58,11 @@
 }
 
 std::vector<ScoredDocumentHit> ScoringProcessor::Score(
-    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
-    int num_to_score) {
+    std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator, int num_to_score,
+    std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+        query_term_iterators) {
   std::vector<ScoredDocumentHit> scored_document_hits;
+  scorer_->PrepareToScore(query_term_iterators);
 
   while (doc_hit_info_iterator->Advance().ok() && num_to_score-- > 0) {
     const DocHitInfo& doc_hit_info = doc_hit_info_iterator->doc_hit_info();
@@ -69,7 +71,8 @@
     // The final score of the doc_hit_info = score of doc * demotion factor of
     // hit.
     double score =
-        scorer_->GetScore(doc_hit_info.document_id()) * hit_demotion_factor;
+        scorer_->GetScore(doc_hit_info, doc_hit_info_iterator.get()) *
+        hit_demotion_factor;
     scored_document_hits.emplace_back(
         doc_hit_info.document_id(), doc_hit_info.hit_section_ids_mask(), score);
   }

diff --git a/icing/scoring/scoring-processor.h b/icing/scoring/scoring-processor.h
index 60c3b32..2289605 100644
--- a/icing/scoring/scoring-processor.h
+++ b/icing/scoring/scoring-processor.h

@@ -48,11 +48,14 @@
   // num_to_score. The order of results is the same as DocHitInfos from
   // DocHitInfoIterator.
   //
-  // NOTE: if the scoring spec doesn't require a scoring strategy, all
+  // If necessary, query_term_iterators is used to compute the BM25F relevance
+  // score. NOTE: if the scoring spec doesn't require a scoring strategy, all
   // ScoredDocumentHits will be assigned a default score 0.
   std::vector<ScoredDocumentHit> Score(
       std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator,
-      int num_to_score);
+      int num_to_score,
+      std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+          query_term_iterators = nullptr);
 
  private:
   explicit ScoringProcessor(std::unique_ptr<Scorer> scorer)

diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 14b2a20..65eecd1 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc

@@ -253,6 +253,216 @@
                           EqualsScoredDocumentHit(scored_document_hits.at(2))));
 }
 
+TEST_F(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithDifferentLength) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/100));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/50));
+
+  DocHitInfo doc_hit_info1(document_id1);
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfo doc_hit_info2(document_id2);
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfo doc_hit_info3(document_id3);
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+                                           doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all contain the query term "foo" exactly once,
+  // the document's length determines the final score. Document shorter than the
+  // average corpus length are slightly boosted.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
+                                             /*score=*/0.255482);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
+                                             /*score=*/0.115927);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
+                                             /*score=*/0.166435);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
+TEST_F(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithSameLength) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/10));
+
+  DocHitInfo doc_hit_info1(document_id1);
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfo doc_hit_info2(document_id2);
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfo doc_hit_info3(document_id3);
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+
+  SectionId section_id = 0;
+  SectionIdMask section_id_mask = 1U << section_id;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+                                           doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all contain the query term "foo" exactly once
+  // and they have the same length, they will have the same BM25F scoret.
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
+                                             /*score=*/0.16173716);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
+                                             /*score=*/0.16173716);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
+                                             /*score=*/0.16173716);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
+TEST_F(ScoringProcessorTest,
+       ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency) {
+  DocumentProto document1 =
+      CreateDocument("icing", "email/1", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document2 =
+      CreateDocument("icing", "email/2", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+  DocumentProto document3 =
+      CreateDocument("icing", "email/3", kDefaultScore,
+                     /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      document_store()->Put(document1, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      document_store()->Put(document2, /*num_tokens=*/10));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id3,
+      document_store()->Put(document3, /*num_tokens=*/10));
+
+  DocHitInfo doc_hit_info1(document_id1);
+  // Document 1 contains the query term "foo" 5 times
+  doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/5);
+  DocHitInfo doc_hit_info2(document_id2);
+  // Document 1 contains the query term "foo" 1 time
+  doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  DocHitInfo doc_hit_info3(document_id3);
+  // Document 1 contains the query term "foo" 3 times
+  doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
+  doc_hit_info3.UpdateSection(/*section_id*/ 1, /*hit_term_frequency=*/2);
+
+  SectionIdMask section_id_mask1 = 0b00000001;
+  SectionIdMask section_id_mask2 = 0b00000001;
+  SectionIdMask section_id_mask3 = 0b00000011;
+
+  // Creates input doc_hit_infos and expected output scored_document_hits
+  std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+                                           doc_hit_info3};
+
+  // Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
+  std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+
+  ScoringSpecProto spec_proto;
+  spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
+
+  // Creates a ScoringProcessor
+  ICING_ASSERT_OK_AND_ASSIGN(
+      std::unique_ptr<ScoringProcessor> scoring_processor,
+      ScoringProcessor::Create(spec_proto, document_store()));
+
+  std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
+      query_term_iterators;
+  query_term_iterators["foo"] =
+      std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
+  // Since the three documents all have the same length, the score is decided by
+  // the frequency of the query term "foo".
+  ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
+                                             /*score=*/0.309497);
+  ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
+                                             /*score=*/0.16173716);
+  ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
+                                             /*score=*/0.268599);
+  EXPECT_THAT(
+      scoring_processor->Score(std::move(doc_hit_info_iterator),
+                               /*num_to_score=*/3, &query_term_iterators),
+      ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit2),
+                  EqualsScoredDocumentHit(expected_scored_doc_hit3)));
+}
+
 TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
   DocumentProto document1 =
       CreateDocument("icing", "email/1", kDefaultScore,

diff --git a/icing/store/corpus-associated-scoring-data.h b/icing/store/corpus-associated-scoring-data.h
new file mode 100644
index 0000000..52be5cd
--- /dev/null
+++ b/icing/store/corpus-associated-scoring-data.h

@@ -0,0 +1,79 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
+#define ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
+
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+#include "icing/legacy/core/icing-packed-pod.h"
+
+namespace icing {
+namespace lib {
+
+// This is the cache entity of corpus-associated scores. The ground-truth data
+// is stored somewhere else. The cache includes:
+// 1. Number of documents contained in the corpus.
+//    Positive values are required.
+// 2. The sum of the documents' lengths, in number of tokens.
+class CorpusAssociatedScoreData {
+ public:
+  explicit CorpusAssociatedScoreData(int num_docs = 0,
+                                     int64_t sum_length_in_tokens = 0)
+      : sum_length_in_tokens_(sum_length_in_tokens), num_docs_(num_docs) {}
+
+  bool operator==(const CorpusAssociatedScoreData& other) const {
+    return num_docs_ == other.num_docs() &&
+           sum_length_in_tokens_ == other.sum_length_in_tokens();
+  }
+
+  uint32_t num_docs() const { return num_docs_; }
+  void set_num_docs(uint32_t val) { num_docs_ = val; }
+
+  uint64_t sum_length_in_tokens() const { return sum_length_in_tokens_; }
+  void set_sum_length_in_tokens(uint64_t val) { sum_length_in_tokens_ = val; }
+
+  float average_doc_length_in_tokens() const {
+    return sum_length_in_tokens_ / (1.0f + num_docs_);
+  }
+
+  // Adds a new document.
+  // Adds the document's length to the total length of the corpus,
+  // sum_length_in_tokens_.
+  void AddDocument(uint32_t doc_length_in_tokens) {
+    ++num_docs_;
+    sum_length_in_tokens_ =
+        (std::numeric_limits<int>::max() - doc_length_in_tokens <
+         sum_length_in_tokens_)
+            ? std::numeric_limits<int>::max()
+            : sum_length_in_tokens_ + doc_length_in_tokens;
+  }
+
+ private:
+  // The sum total of the length of all documents in the corpus.
+  int sum_length_in_tokens_;
+  int num_docs_;
+} __attribute__((packed));
+
+static_assert(sizeof(CorpusAssociatedScoreData) == 8,
+              "Size of CorpusAssociatedScoreData should be 8");
+static_assert(icing_is_packed_pod<CorpusAssociatedScoreData>::value,
+              "go/icing-ubsan");
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_

diff --git a/icing/store/corpus-id.h b/icing/store/corpus-id.h
index a8f21ba..01135b9 100644
--- a/icing/store/corpus-id.h
+++ b/icing/store/corpus-id.h

@@ -24,6 +24,8 @@
 // DocumentProto. Generated in DocumentStore.
 using CorpusId = int32_t;
 
+inline constexpr CorpusId kInvalidCorpusId = -1;
+
 }  // namespace lib
 }  // namespace icing
 

diff --git a/icing/store/document-associated-score-data.h b/icing/store/document-associated-score-data.h
index b9039c5..9a711c8 100644
--- a/icing/store/document-associated-score-data.h
+++ b/icing/store/document-associated-score-data.h

@@ -19,6 +19,7 @@
 #include <type_traits>
 
 #include "icing/legacy/core/icing-packed-pod.h"
+#include "icing/store/corpus-id.h"
 
 namespace icing {
 namespace lib {
@@ -26,33 +27,46 @@
 // This is the cache entity of document-associated scores. It contains scores
 // that are related to the document itself. The ground-truth data is stored
 // somewhere else. The cache includes:
-// 1. Document score. It's defined in and passed from DocumentProto.score.
+// 1. Corpus Id.
+// 2. Document score. It's defined in and passed from DocumentProto.score.
 //    Positive values are required.
-// 2. Document creation timestamp. Unix timestamp of when the document is
+// 3. Document creation timestamp. Unix timestamp of when the document is
 //    created and inserted into Icing.
+// 4. Document length in number of tokens.
 class DocumentAssociatedScoreData {
  public:
-  explicit DocumentAssociatedScoreData(int document_score,
-                                       int64_t creation_timestamp_ms)
-      : document_score_(document_score),
-        creation_timestamp_ms_(creation_timestamp_ms) {}
+  explicit DocumentAssociatedScoreData(CorpusId corpus_id, int document_score,
+                                       int64_t creation_timestamp_ms,
+                                       int length_in_tokens = 0)
+      : creation_timestamp_ms_(creation_timestamp_ms),
+        corpus_id_(corpus_id),
+        document_score_(document_score),
+        length_in_tokens_(length_in_tokens) {}
 
   bool operator==(const DocumentAssociatedScoreData& other) const {
     return document_score_ == other.document_score() &&
-           creation_timestamp_ms_ == other.creation_timestamp_ms();
+           creation_timestamp_ms_ == other.creation_timestamp_ms() &&
+           length_in_tokens_ == other.length_in_tokens() &&
+           corpus_id_ == other.corpus_id();
   }
 
+  CorpusId corpus_id() const { return corpus_id_; }
+
   int document_score() const { return document_score_; }
 
   int64_t creation_timestamp_ms() const { return creation_timestamp_ms_; }
 
+  int length_in_tokens() const { return length_in_tokens_; }
+
  private:
-  int document_score_;
   int64_t creation_timestamp_ms_;
+  CorpusId corpus_id_;
+  int document_score_;
+  int length_in_tokens_;
 } __attribute__((packed));
 
-static_assert(sizeof(DocumentAssociatedScoreData) == 12,
-              "Size of DocumentAssociatedScoreData should be 12");
+static_assert(sizeof(DocumentAssociatedScoreData) == 20,
+              "Size of DocumentAssociatedScoreData should be 20");
 static_assert(icing_is_packed_pod<DocumentAssociatedScoreData>::value,
               "go/icing-ubsan");
 

diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 6a664a3..72bf736 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc

@@ -37,18 +37,20 @@
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
-#include "icing/store/enable-bm25f.h"
 #include "icing/store/key-mapper.h"
 #include "icing/store/namespace-id.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
 #include "icing/util/data-loss.h"
 #include "icing/util/logging.h"
 #include "icing/util/status-macros.h"
+#include "icing/util/tokenized-document.h"
 
 namespace icing {
 namespace lib {
@@ -61,6 +63,7 @@
 constexpr char kDocumentIdMapperFilename[] = "document_id_mapper";
 constexpr char kDocumentStoreHeaderFilename[] = "document_store_header";
 constexpr char kScoreCacheFilename[] = "score_cache";
+constexpr char kCorpusScoreCache[] = "corpus_score_cache";
 constexpr char kFilterCacheFilename[] = "filter_cache";
 constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
 constexpr char kUsageStoreDirectoryName[] = "usage_store";
@@ -122,6 +125,10 @@
   return absl_ports::StrCat(base_dir, "/", kScoreCacheFilename);
 }
 
+std::string MakeCorpusScoreCache(const std::string& base_dir) {
+  return absl_ports::StrCat(base_dir, "/", kCorpusScoreCache);
+}
+
 std::string MakeFilterCacheFilename(const std::string& base_dir) {
   return absl_ports::StrCat(base_dir, "/", kFilterCacheFilename);
 }
@@ -195,8 +202,16 @@
       document_validator_(schema_store) {}
 
 libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    const DocumentProto& document, NativePutDocumentStats* put_document_stats) {
-  return Put(DocumentProto(document), put_document_stats);
+    const DocumentProto& document, int32_t num_tokens,
+    NativePutDocumentStats* put_document_stats) {
+  return Put(DocumentProto(document), num_tokens, put_document_stats);
+}
+
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
+    DocumentProto&& document, int32_t num_tokens,
+    NativePutDocumentStats* put_document_stats) {
+  document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
+  return InternalPut(document, put_document_stats);
 }
 
 DocumentStore::~DocumentStore() {
@@ -366,12 +381,15 @@
       usage_store_,
       UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
 
-  if (enableBm25f()) {
-    ICING_ASSIGN_OR_RETURN(
-        corpus_mapper_, KeyMapper<CorpusId>::Create(
-                            *filesystem_, MakeCorpusMapperFilename(base_dir_),
-                            kCorpusMapperMaxSize));
-  }
+  ICING_ASSIGN_OR_RETURN(corpus_mapper_,
+                         KeyMapper<CorpusId>::Create(
+                             *filesystem_, MakeCorpusMapperFilename(base_dir_),
+                             kCorpusMapperMaxSize));
+
+  ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
+                         FileBackedVector<CorpusAssociatedScoreData>::Create(
+                             *filesystem_, MakeCorpusScoreCache(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
 
   // Ensure the usage store is the correct size.
   ICING_RETURN_IF_ERROR(
@@ -392,9 +410,8 @@
   ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
   ICING_RETURN_IF_ERROR(ResetFilterCache());
   ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
-  if (enableBm25f()) {
-    ICING_RETURN_IF_ERROR(ResetCorpusMapper());
-  }
+  ICING_RETURN_IF_ERROR(ResetCorpusMapper());
+  ICING_RETURN_IF_ERROR(ResetCorpusAssociatedScoreCache());
 
   // Creates a new UsageStore instance. Note that we don't reset the data in
   // usage store here because we're not able to regenerate the usage scores.
@@ -506,12 +523,6 @@
       ICING_RETURN_IF_ERROR(
           document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
 
-      ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-          new_document_id,
-          DocumentAssociatedScoreData(
-              document_wrapper.document().score(),
-              document_wrapper.document().creation_timestamp_ms())));
-
       SchemaTypeId schema_type_id;
       auto schema_type_id_or =
           schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
@@ -536,13 +547,30 @@
           namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
                                       namespace_mapper_->num_keys()));
 
-      if (enableBm25f()) {
-        // Update corpus maps
-        std::string corpus =
-            MakeFingerprint(document_wrapper.document().namespace_(),
-                            document_wrapper.document().schema());
-        corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys());
-      }
+      // Update corpus maps
+      std::string corpus =
+          MakeFingerprint(document_wrapper.document().namespace_(),
+                          document_wrapper.document().schema());
+      ICING_ASSIGN_OR_RETURN(
+          CorpusId corpusId,
+          corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
+
+      ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+                             GetCorpusAssociatedScoreDataToUpdate(corpusId));
+      scoring_data.AddDocument(
+          document_wrapper.document().internal_fields().length_in_tokens());
+
+      ICING_RETURN_IF_ERROR(
+          UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+      ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+          new_document_id,
+          DocumentAssociatedScoreData(
+              corpusId, document_wrapper.document().score(),
+              document_wrapper.document().creation_timestamp_ms(),
+              document_wrapper.document()
+                  .internal_fields()
+                  .length_in_tokens())));
 
       int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
           document_wrapper.document().creation_timestamp_ms(),
@@ -638,6 +666,18 @@
   return libtextclassifier3::Status::OK;
 }
 
+libtextclassifier3::Status DocumentStore::ResetCorpusAssociatedScoreCache() {
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  corpus_score_cache_.reset();
+  ICING_RETURN_IF_ERROR(FileBackedVector<CorpusAssociatedScoreData>::Delete(
+      *filesystem_, MakeCorpusScoreCache(base_dir_)));
+  ICING_ASSIGN_OR_RETURN(corpus_score_cache_,
+                         FileBackedVector<CorpusAssociatedScoreData>::Create(
+                             *filesystem_, MakeCorpusScoreCache(base_dir_),
+                             MemoryMappedFile::READ_WRITE_AUTO_SYNC));
+  return libtextclassifier3::Status::OK;
+}
+
 libtextclassifier3::Status DocumentStore::ResetFilterCache() {
   // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
   filter_cache_.reset();
@@ -671,23 +711,21 @@
 }
 
 libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
-  if (enableBm25f()) {
-    // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
-    corpus_mapper_.reset();
-    // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
-    // that can support error logging.
-    libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
-        *filesystem_, MakeCorpusMapperFilename(base_dir_));
-    if (!status.ok()) {
-      ICING_LOG(ERROR) << status.error_message()
-                       << "Failed to delete old corpus_id mapper";
-      return status;
-    }
-    ICING_ASSIGN_OR_RETURN(
-        corpus_mapper_, KeyMapper<CorpusId>::Create(
-                            *filesystem_, MakeCorpusMapperFilename(base_dir_),
-                            kCorpusMapperMaxSize));
+  // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+  corpus_mapper_.reset();
+  // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+  // that can support error logging.
+  libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
+      *filesystem_, MakeCorpusMapperFilename(base_dir_));
+  if (!status.ok()) {
+    ICING_LOG(ERROR) << status.error_message()
+                     << "Failed to delete old corpus_id mapper";
+    return status;
   }
+  ICING_ASSIGN_OR_RETURN(corpus_mapper_,
+                         KeyMapper<CorpusId>::Create(
+                             *filesystem_, MakeCorpusMapperFilename(base_dir_),
+                             kCorpusMapperMaxSize));
   return libtextclassifier3::Status::OK;
 }
 
@@ -738,16 +776,26 @@
 
   Crc32 namespace_mapper_checksum = namespace_mapper_->ComputeChecksum();
 
+  Crc32 corpus_mapper_checksum = corpus_mapper_->ComputeChecksum();
+
+  // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+  // that can support error logging.
+  checksum_or = corpus_score_cache_->ComputeChecksum();
+  if (!checksum_or.ok()) {
+    ICING_LOG(WARNING) << checksum_or.status().error_message()
+                       << "Failed to compute checksum of score cache";
+    return checksum_or.status();
+  }
+  Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
+
   total_checksum.Append(std::to_string(document_log_checksum.Get()));
   total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
   total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
   total_checksum.Append(std::to_string(score_cache_checksum.Get()));
   total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
   total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
-  if (enableBm25f()) {
-    Crc32 corpus_mapper_checksum = corpus_mapper_->ComputeChecksum();
-    total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
-  }
+  total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
+  total_checksum.Append(std::to_string(corpus_score_cache_checksum.Get()));
 
   return total_checksum;
 }
@@ -779,8 +827,8 @@
   return libtextclassifier3::Status::OK;
 }
 
-libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
-    DocumentProto&& document, NativePutDocumentStats* put_document_stats) {
+libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
+    DocumentProto& document, NativePutDocumentStats* put_document_stats) {
   std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
   ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
 
@@ -793,6 +841,7 @@
   std::string uri = document.uri();
   std::string schema = document.schema();
   int document_score = document.score();
+  int32_t length_in_tokens = document.internal_fields().length_in_tokens();
   int64_t creation_timestamp_ms = document.creation_timestamp_ms();
 
   // Sets the creation timestamp if caller hasn't specified.
@@ -829,20 +878,28 @@
       MakeFingerprint(name_space, uri), new_document_id));
   ICING_RETURN_IF_ERROR(document_id_mapper_->Set(new_document_id, file_offset));
 
-  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-      new_document_id,
-      DocumentAssociatedScoreData(document_score, creation_timestamp_ms)));
-
   // Update namespace maps
   ICING_ASSIGN_OR_RETURN(
       NamespaceId namespace_id,
       namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
 
-  if (enableBm25f()) {
-    // Update corpus maps
-    ICING_RETURN_IF_ERROR(corpus_mapper_->GetOrPut(
-        MakeFingerprint(name_space, schema), corpus_mapper_->num_keys()));
-  }
+  // Update corpus maps
+  ICING_ASSIGN_OR_RETURN(
+      CorpusId corpusId,
+      corpus_mapper_->GetOrPut(MakeFingerprint(name_space, schema),
+                               corpus_mapper_->num_keys()));
+
+  ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+                         GetCorpusAssociatedScoreDataToUpdate(corpusId));
+  scoring_data.AddDocument(length_in_tokens);
+
+  ICING_RETURN_IF_ERROR(
+      UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+  ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+      new_document_id,
+      DocumentAssociatedScoreData(corpusId, document_score,
+                                  creation_timestamp_ms, length_in_tokens)));
 
   ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
                          schema_store_->GetSchemaTypeId(schema));
@@ -876,7 +933,8 @@
 }
 
 libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
-    const std::string_view name_space, const std::string_view uri) const {
+    const std::string_view name_space, const std::string_view uri,
+    bool clear_internal_fields) const {
   // TODO(b/147231617): Make a better way to replace the error message in an
   // existing Status.
   auto document_id_or = GetDocumentId(name_space, uri);
@@ -903,7 +961,7 @@
 }
 
 libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
-    DocumentId document_id) const {
+    DocumentId document_id, bool clear_internal_fields) const {
   ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
                          DoesDocumentExistAndGetFileOffset(document_id));
 
@@ -917,6 +975,9 @@
   }
   DocumentWrapper document_wrapper =
       std::move(document_wrapper_or).ValueOrDie();
+  if (clear_internal_fields) {
+    document_wrapper.mutable_document()->clear_internal_fields();
+  }
 
   return std::move(*document_wrapper.mutable_document());
 }
@@ -1088,10 +1149,7 @@
 
 libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
     const std::string_view name_space, const std::string_view schema) const {
-  if (enableBm25f()) {
-    return corpus_mapper_->Get(MakeFingerprint(name_space, schema));
-  }
-  return absl_ports::NotFoundError("corpus_mapper disabled");
+  return corpus_mapper_->Get(MakeFingerprint(name_space, schema));
 }
 
 libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
@@ -1112,6 +1170,34 @@
   return document_associated_score_data;
 }
 
+libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
+  auto score_data_or = corpus_score_cache_->Get(corpus_id);
+  if (!score_data_or.ok()) {
+    return score_data_or.status();
+  }
+
+  CorpusAssociatedScoreData corpus_associated_score_data =
+      *std::move(score_data_or).ValueOrDie();
+  return corpus_associated_score_data;
+}
+
+libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+DocumentStore::GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const {
+  auto corpus_scoring_data_or = GetCorpusAssociatedScoreData(corpus_id);
+  if (corpus_scoring_data_or.ok()) {
+    return std::move(corpus_scoring_data_or).ValueOrDie();
+  }
+  CorpusAssociatedScoreData scoringData;
+  // OUT_OF_RANGE is the StatusCode returned when a corpus id is added to
+  // corpus_score_cache_ for the first time.
+  if (corpus_scoring_data_or.status().CanonicalCode() ==
+      libtextclassifier3::StatusCode::OUT_OF_RANGE) {
+    return scoringData;
+  }
+  return corpus_scoring_data_or.status();
+}
+
 libtextclassifier3::StatusOr<DocumentFilterData>
 DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
   auto filter_data_or = filter_cache_->Get(document_id);
@@ -1308,10 +1394,8 @@
   ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
   ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
   ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
-
-  if (enableBm25f()) {
-    ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
-  }
+  ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
+  ICING_RETURN_IF_ERROR(corpus_score_cache_->PersistToDisk());
 
   // Update the combined checksum and write to header file.
   ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
@@ -1333,16 +1417,16 @@
                          filter_cache_->GetDiskUsage());
   ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
                          namespace_mapper_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
+                         corpus_mapper_->GetDiskUsage());
+  ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage,
+                         corpus_score_cache_->GetDiskUsage());
 
   int64_t disk_usage = document_log_disk_usage +
                        document_key_mapper_disk_usage +
                        document_id_mapper_disk_usage + score_cache_disk_usage +
-                       filter_cache_disk_usage + namespace_mapper_disk_usage;
-  if (enableBm25f()) {
-    ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
-                           corpus_mapper_->GetDiskUsage());
-    disk_usage += corpus_mapper_disk_usage;
-  }
+                       filter_cache_disk_usage + namespace_mapper_disk_usage +
+                       corpus_mapper_disk_usage + corpus_score_cache_disk_usage;
   return disk_usage;
 }
 
@@ -1493,7 +1577,7 @@
 }
 
 libtextclassifier3::Status DocumentStore::OptimizeInto(
-    const std::string& new_directory) {
+    const std::string& new_directory, const LanguageSegmenter* lang_segmenter) {
   // Validates directory
   if (new_directory == base_dir_) {
     return absl_ports::InvalidArgumentError(
@@ -1509,7 +1593,7 @@
   // Writes all valid docs into new document store (new directory)
   int size = document_id_mapper_->num_elements();
   for (DocumentId document_id = 0; document_id < size; document_id++) {
-    auto document_or = Get(document_id);
+    auto document_or = Get(document_id, /*clear_internal_fields=*/false);
     if (absl_ports::IsNotFound(document_or.status())) {
       // Skip nonexistent documents
       continue;
@@ -1523,9 +1607,26 @@
 
     // Guaranteed to have a document now.
     DocumentProto document_to_keep = document_or.ValueOrDie();
-    // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
-    // that can support error logging.
-    auto new_document_id_or = new_doc_store->Put(std::move(document_to_keep));
+
+    libtextclassifier3::StatusOr<DocumentId> new_document_id_or;
+    if (document_to_keep.internal_fields().length_in_tokens() == 0) {
+      auto tokenized_document_or = TokenizedDocument::Create(
+          schema_store_, lang_segmenter, document_to_keep);
+      if (!tokenized_document_or.ok()) {
+        return absl_ports::Annotate(
+            tokenized_document_or.status(),
+            IcingStringUtil::StringPrintf(
+                "Failed to tokenize Document for DocumentId %d", document_id));
+      }
+      TokenizedDocument tokenized_document(
+          std::move(tokenized_document_or).ValueOrDie());
+      new_document_id_or =
+          new_doc_store->Put(document_to_keep, tokenized_document.num_tokens());
+    } else {
+      // TODO(b/144458732): Implement a more robust version of
+      // TC_ASSIGN_OR_RETURN that can support error logging.
+      new_document_id_or = new_doc_store->InternalPut(document_to_keep);
+    }
     if (!new_document_id_or.ok()) {
       ICING_LOG(ERROR) << new_document_id_or.status().error_message()
                        << "Failed to write into new document store";
@@ -1577,26 +1678,39 @@
                          score_cache_->GetElementsFileSize());
   ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_file_size,
                          filter_cache_->GetElementsFileSize());
+  ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_file_size,
+                         corpus_score_cache_->GetElementsFileSize());
+
+  // Usage store might be sparse, but we'll still use file size for more
+  // accurate counting.
+  ICING_ASSIGN_OR_RETURN(const int64_t usage_store_file_size,
+                         usage_store_->GetElementsFileSize());
 
   // We use a combined disk usage and file size for the KeyMapper because it's
   // backed by a trie, which has some sparse property bitmaps.
   ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
                          document_key_mapper_->GetElementsSize());
 
-  // We don't include the namespace mapper because it's not clear if we could
-  // recover any space even if Optimize were called. Deleting 100s of documents
-  // could still leave a few documents of a namespace, and then there would be
-  // no change.
+  // We don't include the namespace_mapper or the corpus_mapper because it's not
+  // clear if we could recover any space even if Optimize were called. Deleting
+  // 100s of documents could still leave a few documents of a namespace, and
+  // then there would be no change.
 
   int64_t total_size = document_log_file_size + document_key_mapper_size +
                        document_id_mapper_file_size + score_cache_file_size +
-                       filter_cache_file_size;
+                       filter_cache_file_size + corpus_score_cache_file_size +
+                       usage_store_file_size;
 
   optimize_info.estimated_optimizable_bytes =
       total_size * optimize_info.optimizable_docs / optimize_info.total_docs;
   return optimize_info;
 }
 
+libtextclassifier3::Status DocumentStore::UpdateCorpusAssociatedScoreCache(
+    CorpusId corpus_id, const CorpusAssociatedScoreData& score_data) {
+  return corpus_score_cache_->Set(corpus_id, score_data);
+}
+
 libtextclassifier3::Status DocumentStore::UpdateDocumentAssociatedScoreCache(
     DocumentId document_id, const DocumentAssociatedScoreData& score_data) {
   return score_cache_->Set(document_id, score_data);
@@ -1617,8 +1731,10 @@
 
   // Resets the score cache entry
   ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
-      document_id, DocumentAssociatedScoreData(/*document_score=*/-1,
-                                               /*creation_timestamp_ms=*/-1)));
+      document_id, DocumentAssociatedScoreData(kInvalidCorpusId,
+                                               /*document_score=*/-1,
+                                               /*creation_timestamp_ms=*/-1,
+                                               /*length_in_tokens=*/0)));
 
   // Resets the filter cache entry
   ICING_RETURN_IF_ERROR(UpdateFilterCache(

diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 78590a5..b2908f0 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h

@@ -30,6 +30,7 @@
 #include "icing/proto/document_wrapper.pb.h"
 #include "icing/proto/logging.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
 #include "icing/store/corpus-id.h"
 #include "icing/store/document-associated-score-data.h"
 #include "icing/store/document-filter-data.h"
@@ -37,6 +38,7 @@
 #include "icing/store/key-mapper.h"
 #include "icing/store/namespace-id.h"
 #include "icing/store/usage-store.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/clock.h"
 #include "icing/util/crc32.h"
 #include "icing/util/data-loss.h"
@@ -149,23 +151,27 @@
   //     exist in schema
   //   INTERNAL_ERROR on IO error
   libtextclassifier3::StatusOr<DocumentId> Put(
-      const DocumentProto& document,
+      const DocumentProto& document, int32_t num_tokens = 0,
       NativePutDocumentStats* put_document_stats = nullptr);
   libtextclassifier3::StatusOr<DocumentId> Put(
-      DocumentProto&& document,
+      DocumentProto&& document, int32_t num_tokens = 0,
       NativePutDocumentStats* put_document_stats = nullptr);
 
   // Finds and returns the document identified by the given key (namespace +
-  // uri)
+  // uri). If 'clear_internal_fields' is true, document level data that's
+  // generated internally by DocumentStore is cleared.
   //
   // Returns:
   //   The document found on success
   //   NOT_FOUND if the key doesn't exist or document has been deleted
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentProto> Get(std::string_view name_space,
-                                                  std::string_view uri) const;
+  libtextclassifier3::StatusOr<DocumentProto> Get(
+      std::string_view name_space, std::string_view uri,
+      bool clear_internal_fields = true) const;
 
-  // Finds and returns the document identified by the given document id
+  // Finds and returns the document identified by the given document id. If
+  // 'clear_internal_fields' is true, document level data that's generated
+  // internally by DocumentStore is cleared.
   //
   // Returns:
   //   The document found on success
@@ -173,7 +179,8 @@
   //                    maximum value
   //   NOT_FOUND if the document doesn't exist or has been deleted
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::StatusOr<DocumentProto> Get(DocumentId document_id) const;
+  libtextclassifier3::StatusOr<DocumentProto> Get(
+      DocumentId document_id, bool clear_internal_fields = true) const;
 
   // Returns all namespaces which have at least 1 active document (not deleted
   // or expired). Order of namespaces is undefined.
@@ -256,6 +263,20 @@
   libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
   GetDocumentAssociatedScoreData(DocumentId document_id) const;
 
+  // Returns the CorpusAssociatedScoreData of the corpus specified by the
+  // corpus_id.
+  //
+  // NOTE: This does not check if the corpus exists and will return the
+  // CorpusAssociatedScoreData of the corpus even if all documents belonging to
+  // that corpus have been deleted.
+  //
+  // Returns:
+  //   CorpusAssociatedScoreData on success
+  //   OUT_OF_RANGE if corpus_id is negative or exceeds previously seen
+  //                CorpusIds
+  libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+  GetCorpusAssociatedScoreData(CorpusId corpus_id) const;
+
   // Returns the DocumentFilterData of the document specified by the DocumentId.
   //
   // NOTE: This does not check if the document exists and will return the
@@ -394,7 +415,9 @@
   //   OK on success
   //   INVALID_ARGUMENT if new_directory is same as current base directory
   //   INTERNAL_ERROR on IO error
-  libtextclassifier3::Status OptimizeInto(const std::string& new_directory);
+  libtextclassifier3::Status OptimizeInto(
+      const std::string& new_directory,
+      const LanguageSegmenter* lang_segmenter);
 
   // Calculates status for a potential Optimize call. Includes how many docs
   // there are vs how many would be optimized away. And also includes an
@@ -441,8 +464,10 @@
 
   // A cache of document associated scores. The ground truth of the scores is
   // DocumentProto stored in document_log_. This cache contains:
+  //   - CorpusId
   //   - Document score
   //   - Document creation timestamp in seconds
+  //   - Document length in number of tokens
   std::unique_ptr<FileBackedVector<DocumentAssociatedScoreData>> score_cache_;
 
   // A cache of data, indexed by DocumentId, used to filter documents. Currently
@@ -452,6 +477,13 @@
   //   - Expiration timestamp in seconds
   std::unique_ptr<FileBackedVector<DocumentFilterData>> filter_cache_;
 
+  // A cache of corpus associated scores. The ground truth of the scores is
+  // DocumentProto stored in document_log_. This cache contains:
+  //   - Number of documents belonging to the corpus score
+  //   - The sum of the documents' lengths, in number of tokens.
+  std::unique_ptr<FileBackedVector<CorpusAssociatedScoreData>>
+      corpus_score_cache_;
+
   // Maps namespaces to a densely-assigned unique id. Namespaces are assigned an
   // id when the first document belonging to that namespace is added to the
   // DocumentStore. Namespaces may be removed from the mapper during compaction.
@@ -516,6 +548,12 @@
   // Returns OK or any IO errors.
   libtextclassifier3::Status ResetDocumentAssociatedScoreCache();
 
+  // Resets the unique_ptr to the corpus_score_cache, deletes the underlying
+  // file, and re-creates a new instance of the corpus_score_cache.
+  //
+  // Returns OK or any IO errors.
+  libtextclassifier3::Status ResetCorpusAssociatedScoreCache();
+
   // Resets the unique_ptr to the filter_cache, deletes the underlying file, and
   // re-creates a new instance of the filter_cache.
   //
@@ -546,6 +584,10 @@
   //   INTERNAL on I/O error
   libtextclassifier3::Status UpdateHeader(const Crc32& checksum);
 
+  libtextclassifier3::StatusOr<DocumentId> InternalPut(
+      DocumentProto& document,
+      NativePutDocumentStats* put_document_stats = nullptr);
+
   // Helper function to do batch deletes. Documents with the given
   // "namespace_id" and "schema_type_id" will be deleted. If callers don't need
   // to specify the namespace or schema type, pass in kInvalidNamespaceId or
@@ -597,6 +639,21 @@
   libtextclassifier3::StatusOr<DocumentId> GetDocumentId(
       std::string_view name_space, std::string_view uri) const;
 
+  // Returns the CorpusAssociatedScoreData of the corpus specified by the
+  // corpus_id.
+  //
+  // If the corpus_id has never been seen before, it returns a
+  // CorpusAssociatedScoreData with properties set to default values.
+  //
+  // NOTE: This does not check if the corpus exists and will return the
+  // CorpusAssociatedScoreData of the corpus even if all documents belonging to
+  // that corpus have been deleted.
+  //
+  // Returns:
+  //   CorpusAssociatedScoreData on success
+  libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
+  GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const;
+
   // Helper method to validate the document id and return the file offset of the
   // associated document in document_log_.
   //
@@ -617,6 +674,10 @@
   libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
       DocumentId document_id, const DocumentAssociatedScoreData& score_data);
 
+  // Updates the entry in the corpus score cache for corpus_id.
+  libtextclassifier3::Status UpdateCorpusAssociatedScoreCache(
+      CorpusId corpus_id, const CorpusAssociatedScoreData& score_data);
+
   // Updates the entry in the filter cache for document_id.
   libtextclassifier3::Status UpdateFilterCache(
       DocumentId document_id, const DocumentFilterData& filter_data);

diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 29bf8bb..7754373 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc

@@ -27,20 +27,25 @@
 #include "icing/file/filesystem.h"
 #include "icing/file/memory-mapped-file.h"
 #include "icing/file/mock-filesystem.h"
+#include "icing/helpers/icu/icu-data-file-helper.h"
 #include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/schema/schema-store.h"
+#include "icing/store/corpus-associated-scoring-data.h"
+#include "icing/store/corpus-id.h"
 #include "icing/store/document-filter-data.h"
 #include "icing/store/document-id.h"
-#include "icing/store/enable-bm25f.h"
 #include "icing/store/namespace-id.h"
 #include "icing/testing/common-matchers.h"
 #include "icing/testing/fake-clock.h"
 #include "icing/testing/platform.h"
 #include "icing/testing/test-data.h"
 #include "icing/testing/tmp-directory.h"
+#include "icing/tokenization/language-segmenter-factory.h"
+#include "icing/tokenization/language-segmenter.h"
 #include "icing/util/crc32.h"
+#include "unicode/uloc.h"
 
 namespace icing {
 namespace lib {
@@ -101,7 +106,19 @@
   }
 
   void SetUp() override {
-    setEnableBm25f(true);
+    if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+      // If we've specified using the reverse-JNI method for segmentation (i.e.
+      // not ICU), then we won't have the ICU data file included to set up.
+      // Technically, we could choose to use reverse-JNI for segmentation AND
+      // include an ICU data file, but that seems unlikely and our current BUILD
+      // setup doesn't do this.
+      // File generated via icu_data_file rule in //icing/BUILD.
+      std::string icu_data_file_path =
+          GetTestFilePath("icing/icu.dat");
+      ICING_ASSERT_OK(
+          icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+    }
+
     filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
     filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
@@ -133,6 +150,11 @@
         schema_store_,
         SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
     ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+
+    language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
+    ICING_ASSERT_OK_AND_ASSIGN(
+        lang_segmenter_,
+        language_segmenter_factory::Create(std::move(segmenter_options)));
   }
 
   void TearDown() override {
@@ -147,6 +169,7 @@
   DocumentProto test_document1_;
   DocumentProto test_document2_;
   std::unique_ptr<SchemaStore> schema_store_;
+  std::unique_ptr<LanguageSegmenter> lang_segmenter_;
 
   // Document1 values
   const int document1_score_ = 1;
@@ -1184,9 +1207,10 @@
       filesystem_.GetFileSize(original_document_log.c_str());
 
   // Optimizing into the same directory is not allowed
-  EXPECT_THAT(doc_store->OptimizeInto(document_store_dir_),
-              StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
-                       HasSubstr("directory is the same")));
+  EXPECT_THAT(
+      doc_store->OptimizeInto(document_store_dir_, lang_segmenter_.get()),
+      StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+               HasSubstr("directory is the same")));
 
   std::string optimized_dir = document_store_dir_ + "_optimize";
   std::string optimized_document_log = optimized_dir + "/document_log";
@@ -1195,7 +1219,8 @@
   // deleted
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
   int64_t optimized_size1 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_EQ(original_size, optimized_size1);
@@ -1205,7 +1230,8 @@
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
   ICING_ASSERT_OK(doc_store->Delete("namespace", "uri1"));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
   int64_t optimized_size2 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_THAT(original_size, Gt(optimized_size2));
@@ -1218,7 +1244,8 @@
   // expired
   ASSERT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   ASSERT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(doc_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      doc_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
   int64_t optimized_size3 =
       filesystem_.GetFileSize(optimized_document_log.c_str());
   EXPECT_THAT(optimized_size2, Gt(optimized_size3));
@@ -1235,14 +1262,32 @@
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+
+    // Delete document 1
     EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1281,9 +1326,14 @@
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
   // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
 }
 
 TEST_F(DocumentStoreTest, ShouldRecoverFromCorruptDerivedFile) {
@@ -1297,14 +1347,31 @@
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
+    // Delete document 1
     EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1328,6 +1395,7 @@
               IsOk());
 
   // Successfully recover from a corrupt derived file issue.
+  // NOTE: this doesn't trigger RegenerateDerivedFiles.
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
       DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1345,10 +1413,16 @@
               IsOkAndHolds(DocumentFilterData(
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
-  // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  // Checks derived score cache - note that they aren't regenerated from
+  // scratch.
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
 }
 
 TEST_F(DocumentStoreTest, ShouldRecoverFromBadChecksum) {
@@ -1362,14 +1436,30 @@
     std::unique_ptr<DocumentStore> doc_store =
         std::move(create_result.document_store);
 
-    ICING_ASSERT_OK_AND_ASSIGN(document_id1,
-                               doc_store->Put(DocumentProto(test_document1_)));
-    ICING_ASSERT_OK_AND_ASSIGN(document_id2,
-                               doc_store->Put(DocumentProto(test_document2_)));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id1,
+        doc_store->Put(DocumentProto(test_document1_), /*num_tokens=*/4));
+    ICING_ASSERT_OK_AND_ASSIGN(
+        document_id2,
+        doc_store->Put(DocumentProto(test_document2_), /*num_tokens=*/4));
     EXPECT_THAT(doc_store->Get(document_id1),
                 IsOkAndHolds(EqualsProto(test_document1_)));
     EXPECT_THAT(doc_store->Get(document_id2),
                 IsOkAndHolds(EqualsProto(test_document2_)));
+    // Checks derived score cache
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id1),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(
+        doc_store->GetDocumentAssociatedScoreData(document_id2),
+        IsOkAndHolds(DocumentAssociatedScoreData(
+            /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+            /*length_in_tokens=*/4)));
+    EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+                IsOkAndHolds(CorpusAssociatedScoreData(
+                    /*num_docs=*/2, /*sum_length_in_tokens=*/8)));
     EXPECT_THAT(doc_store->Delete("icing", "email/1"), IsOk());
     EXPECT_THAT(doc_store->Get(document_id1),
                 StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1407,9 +1497,14 @@
                   /*namespace_id=*/0,
                   /*schema_type_id=*/0, document2_expiration_timestamp_)));
   // Checks derived score cache
-  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
-              IsOkAndHolds(DocumentAssociatedScoreData(
-                  document2_score_, document2_creation_timestamp_)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/4)));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/4)));
 }
 
 TEST_F(DocumentStoreTest, GetDiskUsage) {
@@ -1544,28 +1639,6 @@
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
 }
 
-TEST_F(DocumentStoreTest, GetCorpusIdReturnsNotFoundWhenFeatureIsDisabled) {
-  setEnableBm25f(false);
-  ICING_ASSERT_OK_AND_ASSIGN(
-      DocumentStore::CreateResult create_result,
-      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
-                            schema_store_.get()));
-  std::unique_ptr<DocumentStore> doc_store =
-      std::move(create_result.document_store);
-
-  DocumentProto document1 =
-      DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
-  DocumentProto document2 =
-      DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
-
-  ICING_ASSERT_OK(doc_store->Put(document1));
-  ICING_ASSERT_OK(doc_store->Put(document2));
-
-  EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
-              StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
-                       HasSubstr("corpus_mapper disabled")));
-}
-
 TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
   ICING_ASSERT_OK_AND_ASSIGN(
       DocumentStore::CreateResult create_result,
@@ -1582,7 +1655,7 @@
   ICING_ASSERT_OK(doc_store->Put(document1));
   ICING_ASSERT_OK(doc_store->Put(document2));
 
-  // NamespaceId of 0 since it was the first namespace seen by the DocumentStore
+  // CorpusId of 0 since it was the first namespace seen by the DocumentStore
   EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
               IsOkAndHolds(Eq(0)));
 }
@@ -1642,6 +1715,183 @@
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
   EXPECT_THAT(doc_store->GetCorpusId("namespace1", "nonexistent_schema"),
               StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataSameCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+  DocumentProto document2 =
+      DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(doc_store->Put(document1, /*num_tokens=*/5));
+  ICING_ASSERT_OK(doc_store->Put(document2, /*num_tokens=*/7));
+
+  // CorpusId of 0 since it was the first namespace seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/2, /*sum_length_in_tokens=*/12)));
+  // Only one corpus exists
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreData) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document_corpus1 =
+      DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+  DocumentProto document_corpus2 =
+      DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+  ICING_ASSERT_OK(
+      doc_store->Put(DocumentProto(document_corpus1), /*num_tokens=*/5));
+  ICING_ASSERT_OK(
+      doc_store->Put(DocumentProto(document_corpus2), /*num_tokens=*/7));
+
+  // CorpusId of 0 since it was the first corpus seen by the DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/5)));
+
+  // CorpusId of 1 since it was the second corpus seen by the
+  // DocumentStore
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/1),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/7)));
+
+  // DELETE namespace1 - document_corpus1 is deleted.
+  ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace1").status);
+
+  // Corpus score cache doesn't care if the document has been deleted
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              IsOkAndHolds(CorpusAssociatedScoreData(
+                  /*num_docs=*/1, /*sum_length_in_tokens=*/5)));
+}
+
+TEST_F(DocumentStoreTest, NonexistentCorpusAssociatedScoreDataOutOfRange) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(doc_store->GetCorpusAssociatedScoreData(/*corpus_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(DocumentStoreTest, GetDocumentAssociatedScoreDataSameCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace", "1")
+          .SetSchema("email")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace", "2")
+          .SetSchema("email")
+          .SetScore(document2_score_)
+          .SetCreationTimestampMs(
+              document2_creation_timestamp_)  // A random timestamp
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      doc_store->Put(DocumentProto(document1), /*num_tokens=*/5));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      doc_store->Put(DocumentProto(document2), /*num_tokens=*/7));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id1),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+          /*length_in_tokens=*/5)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/7)));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusAssociatedScoreDataDifferentCorpus) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  DocumentProto document1 =
+      DocumentBuilder()
+          .SetKey("namespace1", "1")
+          .SetSchema("email")
+          .SetScore(document1_score_)
+          .SetCreationTimestampMs(
+              document1_creation_timestamp_)  // A random timestamp
+          .Build();
+  DocumentProto document2 =
+      DocumentBuilder()
+          .SetKey("namespace2", "2")
+          .SetSchema("email")
+          .SetScore(document2_score_)
+          .SetCreationTimestampMs(
+              document2_creation_timestamp_)  // A random timestamp
+          .Build();
+
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id1,
+      doc_store->Put(DocumentProto(document1), /*num_tokens=*/5));
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentId document_id2,
+      doc_store->Put(DocumentProto(document2), /*num_tokens=*/7));
+
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id1),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/0, document1_score_, document1_creation_timestamp_,
+          /*length_in_tokens=*/5)));
+  EXPECT_THAT(
+      doc_store->GetDocumentAssociatedScoreData(document_id2),
+      IsOkAndHolds(DocumentAssociatedScoreData(
+          /*corpus_id=*/1, document2_score_, document2_creation_timestamp_,
+          /*length_in_tokens=*/7)));
+}
+
+TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataOutOfRange) {
+  ICING_ASSERT_OK_AND_ASSIGN(
+      DocumentStore::CreateResult create_result,
+      DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+                            schema_store_.get()));
+  std::unique_ptr<DocumentStore> doc_store =
+      std::move(create_result.document_store);
+
+  EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(/*document_id=*/0),
+              StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
 }
 
 TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
@@ -1700,12 +1950,13 @@
       std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(test_document1_));
+                             doc_store->Put(test_document1_, /*num_tokens=*/4));
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
               IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*document_score=*/document1_score_,
-                  /*creation_timestamp_ms=*/document1_creation_timestamp_)));
+                  /*corpus_id=*/0, /*document_score=*/document1_score_,
+                  /*creation_timestamp_ms=*/document1_creation_timestamp_,
+                  /*length_in_tokens=*/4)));
 
   ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
   // Associated entry of the deleted document is removed.
@@ -1722,12 +1973,14 @@
       std::move(create_result.document_store);
 
   ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
-                             doc_store->Put(test_document1_));
+                             doc_store->Put(test_document1_, /*num_tokens=*/4));
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
               IsOkAndHolds(DocumentAssociatedScoreData(
+                  /*corpus_id=*/0,
                   /*document_score=*/document1_score_,
-                  /*creation_timestamp_ms=*/document1_creation_timestamp_)));
+                  /*creation_timestamp_ms=*/document1_creation_timestamp_,
+                  /*length_in_tokens=*/4)));
 
   ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
   // Associated entry of the deleted document is removed.
@@ -1931,11 +2184,15 @@
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id1),
               IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*document_score=*/0, /*creation_timestamp_ms=*/0)));
+                  /*corpus_id=*/0,
+                  /*document_score=*/0, /*creation_timestamp_ms=*/0,
+                  /*length_in_tokens=*/0)));
 
   EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id2),
               IsOkAndHolds(DocumentAssociatedScoreData(
-                  /*document_score=*/5, /*creation_timestamp_ms=*/0)));
+                  /*corpus_id=*/0,
+                  /*document_score=*/5, /*creation_timestamp_ms=*/0,
+                  /*length_in_tokens=*/0)));
 }
 
 TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
@@ -2636,7 +2893,8 @@
   std::string optimized_dir = document_store_dir_ + "_optimize";
   EXPECT_TRUE(filesystem_.DeleteDirectoryRecursively(optimized_dir.c_str()));
   EXPECT_TRUE(filesystem_.CreateDirectoryRecursively(optimized_dir.c_str()));
-  ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
   document_store.reset();
   ICING_ASSERT_OK_AND_ASSIGN(
       create_result, DocumentStore::Create(&filesystem_, optimized_dir,
@@ -3046,7 +3304,8 @@
   // Run optimize
   std::string optimized_dir = document_store_dir_ + "/optimize_test";
   filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
-  ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
+  ICING_ASSERT_OK(
+      document_store->OptimizeInto(optimized_dir, lang_segmenter_.get()));
 
   // Get optimized document store
   ICING_ASSERT_OK_AND_ASSIGN(
@@ -3149,9 +3408,9 @@
   // the current code is compatible with the format of the v0 scoring_cache,
   // then an empty document store should be initialized, but the non-empty
   // scoring_cache should be retained.
-  // Since the current document-asscoiated-score-data is compatible with the
-  // score_cache in testdata/v0/document_store, the document store should be
-  // initialized without having to re-generate the derived files.
+  // The current document-asscoiated-score-data has a new field with respect to
+  // the ones stored in testdata/v0, hence the document store's initialization
+  // requires regenerating its derived files.
 
   // Create dst directory
   ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true);
@@ -3186,9 +3445,10 @@
                             schema_store_.get(), &initializeStats));
   std::unique_ptr<DocumentStore> doc_store =
       std::move(create_result.document_store);
-  // Regeneration never happens.
-  EXPECT_EQ(initializeStats.document_store_recovery_cause(),
-            NativeInitializeStats::NONE);
+  // The store_cache trigger regeneration because its element size is
+  // inconsistent: expected 20 (current new size), actual 12 (as per the v0
+  // score_cache).
+  EXPECT_TRUE(initializeStats.has_document_store_recovery_cause());
 }
 
 }  // namespace

diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 7a0af9c..54896dc 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc

@@ -214,6 +214,10 @@
   return usage_score_cache_->ComputeChecksum();
 }
 
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetElementsFileSize() const {
+  return usage_score_cache_->GetElementsFileSize();
+}
+
 libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
   if (num_documents >= usage_score_cache_->num_elements()) {
     // No need to truncate

diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index 0a622a0..b7de970 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h

@@ -148,6 +148,15 @@
   //   INTERNAL_ERROR if the internal state is inconsistent
   libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
 
+  // Returns the file size of the all the elements held in the UsageStore. File
+  // size is in bytes. This excludes the size of any internal metadata, e.g. any
+  // internal headers.
+  //
+  // Returns:
+  //   File size on success
+  //   INTERNAL_ERROR on IO error
+  libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
   // Resizes the storage so that only the usage scores of and before
   // last_document_id are stored.
   //

diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index f7fa778..220c226 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc

@@ -24,6 +24,7 @@
 
 namespace {
 using ::testing::Eq;
+using ::testing::Gt;
 using ::testing::Not;
 
 class UsageStoreTest : public testing::Test {
@@ -560,6 +561,22 @@
               IsOkAndHolds(UsageStore::UsageScores()));
 }
 
+TEST_F(UsageStoreTest, GetElementsFileSize) {
+  ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+                             UsageStore::Create(&filesystem_, test_dir_));
+
+  ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_file_size,
+                             usage_store->GetElementsFileSize());
+  EXPECT_THAT(empty_file_size, Eq(0));
+
+  UsageReport usage_report = CreateUsageReport(
+      "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+  usage_store->AddUsageReport(usage_report, /*document_id=*/1);
+
+  EXPECT_THAT(usage_store->GetElementsFileSize(),
+              IsOkAndHolds(Gt(empty_file_size)));
+}
+
 }  // namespace
 
 }  // namespace lib

diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index a15e64e..b7f54ba 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h

@@ -15,6 +15,8 @@
 #ifndef ICING_TESTING_COMMON_MATCHERS_H_
 #define ICING_TESTING_COMMON_MATCHERS_H_
 
+#include <cmath>
+
 #include "icing/text_classifier/lib3/utils/base/status.h"
 #include "icing/text_classifier/lib3/utils/base/status_macros.h"
 #include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -103,7 +105,7 @@
   if (arg.document_id() != expected_scored_document_hit.document_id() ||
       arg.hit_section_id_mask() !=
           expected_scored_document_hit.hit_section_id_mask() ||
-      arg.score() != expected_scored_document_hit.score()) {
+      std::fabs(arg.score() - expected_scored_document_hit.score()) > 1e-6) {
     *result_listener << IcingStringUtil::StringPrintf(
         "Expected: document_id=%d, hit_section_id_mask=%d, score=%.2f. Actual: "
         "document_id=%d, hit_section_id_mask=%d, score=%.2f",

diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index fb1fc4b..8d6d51a 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc

@@ -32,12 +32,13 @@
     : schema_store_(schema_store) {}
 
 libtextclassifier3::Status DocumentValidator::Validate(
-    const DocumentProto& document) {
+    const DocumentProto& document, int depth) {
   if (document.namespace_().empty()) {
     return absl_ports::InvalidArgumentError("Field 'namespace' is empty.");
   }
 
-  if (document.uri().empty()) {
+  // Only require a non-empty uri on top-level documents.
+  if (depth == 0 && document.uri().empty()) {
     return absl_ports::InvalidArgumentError("Field 'uri' is empty.");
   }
 
@@ -160,7 +161,7 @@
               nested_document.schema(), "' for key: (", document.namespace_(),
               ", ", document.uri(), ")."));
         }
-        ICING_RETURN_IF_ERROR(Validate(nested_document));
+        ICING_RETURN_IF_ERROR(Validate(nested_document, depth + 1));
       }
     }
   }

diff --git a/icing/util/document-validator.h b/icing/util/document-validator.h
index 036d1fa..8542283 100644
--- a/icing/util/document-validator.h
+++ b/icing/util/document-validator.h

@@ -32,7 +32,8 @@
 
   // This function validates:
   //  1. DocumentProto.namespace is not empty
-  //  2. DocumentProto.uri is not empty
+  //  2. DocumentProto.uri is not empty in top-level documents. Nested documents
+  //     may have empty uris.
   //  3. DocumentProto.schema is not empty
   //  4. DocumentProto.schema matches one of SchemaTypeConfigProto.schema_type
   //     in the given SchemaProto in constructor
@@ -56,6 +57,9 @@
   // In addition, all nested DocumentProto will also be validated towards the
   // requirements above.
   //
+  // 'depth' indicates what nesting level the document may be at. A top-level
+  // document has a nesting depth of 0.
+  //
   // Returns:
   //   OK on success
   //   FAILED_PRECONDITION if no schema is set yet
@@ -63,7 +67,8 @@
   //   NOT_FOUND if case 4 or 7 fails
   //   ALREADY_EXISTS if case 6 fails
   //   INTERNAL on any I/O error
-  libtextclassifier3::Status Validate(const DocumentProto& document);
+  libtextclassifier3::Status Validate(const DocumentProto& document,
+                                      int depth = 0);
 
   void UpdateSchemaStore(const SchemaStore* schema_store) {
     schema_store_ = schema_store;

diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index ad5a93e..f05e8a6 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc

@@ -141,13 +141,27 @@
                        HasSubstr("'namespace' is empty")));
 }
 
-TEST_F(DocumentValidatorTest, ValidateEmptyUriInvalid) {
+TEST_F(DocumentValidatorTest, ValidateTopLevelEmptyUriInvalid) {
   DocumentProto email = SimpleEmailBuilder().SetUri("").Build();
   EXPECT_THAT(document_validator_->Validate(email),
               StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
                        HasSubstr("'uri' is empty")));
 }
 
+TEST_F(DocumentValidatorTest, ValidateNestedEmptyUriValid) {
+  DocumentProto conversation =
+      SimpleConversationBuilder()
+          .ClearProperties()
+          .AddStringProperty(kPropertyName, kDefaultString)
+          .AddDocumentProperty(kPropertyEmails,
+                               SimpleEmailBuilder()
+                                   .SetUri("")  // Empty nested uri
+                                   .Build())
+          .Build();
+
+  EXPECT_THAT(document_validator_->Validate(conversation), IsOk());
+}
+
 TEST_F(DocumentValidatorTest, ValidateEmptySchemaInvalid) {
   DocumentProto email = SimpleEmailBuilder().SetSchema("").Build();
   EXPECT_THAT(document_validator_->Validate(email),

diff --git a/icing/util/tokenized-document.cc b/icing/util/tokenized-document.cc
new file mode 100644
index 0000000..02ee459
--- /dev/null
+++ b/icing/util/tokenized-document.cc

@@ -0,0 +1,74 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/tokenized-document.h"
+
+#include <string>
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/proto/document.proto.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/language-segmenter.h"
+#include "icing/tokenization/tokenizer-factory.h"
+#include "icing/tokenization/tokenizer.h"
+#include "icing/util/document-validator.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+libtextclassifier3::StatusOr<TokenizedDocument> TokenizedDocument::Create(
+    const SchemaStore* schema_store,
+    const LanguageSegmenter* language_segmenter, DocumentProto document) {
+  TokenizedDocument tokenized_document(std::move(document));
+  ICING_RETURN_IF_ERROR(
+      tokenized_document.Tokenize(schema_store, language_segmenter));
+  return tokenized_document;
+}
+
+TokenizedDocument::TokenizedDocument(DocumentProto document)
+    : document_(std::move(document)) {}
+
+libtextclassifier3::Status TokenizedDocument::Tokenize(
+    const SchemaStore* schema_store,
+    const LanguageSegmenter* language_segmenter) {
+  DocumentValidator validator(schema_store);
+  ICING_RETURN_IF_ERROR(validator.Validate(document_));
+
+  ICING_ASSIGN_OR_RETURN(std::vector<Section> sections,
+                         schema_store->ExtractSections(document_));
+  for (const Section& section : sections) {
+    ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer> tokenizer,
+                           tokenizer_factory::CreateIndexingTokenizer(
+                               section.metadata.tokenizer, language_segmenter));
+    std::vector<std::string_view> token_sequence;
+    for (std::string_view subcontent : section.content) {
+      ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> itr,
+                             tokenizer->Tokenize(subcontent));
+      while (itr->Advance()) {
+        token_sequence.push_back(itr->GetToken().text);
+      }
+    }
+    tokenized_sections_.emplace_back(SectionMetadata(section.metadata),
+                                     std::move(token_sequence));
+  }
+
+  return libtextclassifier3::Status::OK;
+}
+
+}  // namespace lib
+}  // namespace icing

diff --git a/icing/util/tokenized-document.h b/icing/util/tokenized-document.h
new file mode 100644
index 0000000..5283195
--- /dev/null
+++ b/icing/util/tokenized-document.h

@@ -0,0 +1,76 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_TOKENIZED_DOCUMENT_H_
+#define ICING_STORE_TOKENIZED_DOCUMENT_H_
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/document.proto.h"
+#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
+#include "icing/tokenization/language-segmenter.h"
+
+namespace icing {
+namespace lib {
+
+struct TokenizedSection {
+  SectionMetadata metadata;
+  std::vector<std::string_view> token_sequence;
+
+  TokenizedSection(SectionMetadata&& metadata_in,
+                   std::vector<std::string_view>&& token_sequence_in)
+      : metadata(std::move(metadata_in)),
+        token_sequence(std::move(token_sequence_in)) {}
+};
+
+class TokenizedDocument {
+ public:
+  static libtextclassifier3::StatusOr<TokenizedDocument> Create(
+      const SchemaStore* schema_store,
+      const LanguageSegmenter* language_segmenter, DocumentProto document);
+
+  const DocumentProto& document() const { return document_; }
+
+  int32_t num_tokens() const {
+    int32_t num_tokens = 0;
+    for (const TokenizedSection& section : tokenized_sections_) {
+      num_tokens += section.token_sequence.size();
+    }
+    return num_tokens;
+  }
+
+  const std::vector<TokenizedSection>& sections() const {
+    return tokenized_sections_;
+  }
+
+ private:
+  // Use TokenizedDocument::Create() to instantiate.
+  explicit TokenizedDocument(DocumentProto document);
+
+  DocumentProto document_;
+  std::vector<TokenizedSection> tokenized_sections_;
+
+  libtextclassifier3::Status Tokenize(
+      const SchemaStore* schema_store,
+      const LanguageSegmenter* language_segmenter);
+};
+
+}  // namespace lib
+}  // namespace icing
+
+#endif  // ICING_STORE_TOKENIZED_DOCUMENT_H_

diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 22c607c..88d0578 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java

@@ -24,6 +24,7 @@
 import com.google.android.icing.proto.GetAllNamespacesResultProto;
 import com.google.android.icing.proto.GetOptimizeInfoResultProto;
 import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetResultSpecProto;
 import com.google.android.icing.proto.GetSchemaResultProto;
 import com.google.android.icing.proto.GetSchemaTypeResultProto;
 import com.google.android.icing.proto.IcingSearchEngineOptions;
@@ -41,8 +42,8 @@
 import com.google.android.icing.proto.SetSchemaResultProto;
 import com.google.android.icing.proto.StatusProto;
 import com.google.android.icing.proto.UsageReport;
-import com.google.android.icing.protobuf.ExtensionRegistryLite;
-import com.google.android.icing.protobuf.InvalidProtocolBufferException;
+import com.google.protobuf.ExtensionRegistryLite;
+import com.google.protobuf.InvalidProtocolBufferException;
 import java.io.Closeable;
 
 /**
@@ -84,7 +85,9 @@
 
   @Override
   public void close() {
-    throwIfClosed();
+    if (closed) {
+      return;
+    }
 
     if (nativePointer != 0) {
       nativeDestroy(this);
@@ -95,8 +98,8 @@
 
   @Override
   protected void finalize() throws Throwable {
-    super.finalize();
     close();
+    super.finalize();
   }
 
   @NonNull
@@ -217,10 +220,11 @@
   }
 
   @NonNull
-  public GetResultProto get(@NonNull String namespace, @NonNull String uri) {
+  public GetResultProto get(
+      @NonNull String namespace, @NonNull String uri, @NonNull GetResultSpecProto getResultSpec) {
     throwIfClosed();
 
-    byte[] getResultBytes = nativeGet(this, namespace, uri);
+    byte[] getResultBytes = nativeGet(this, namespace, uri, getResultSpec.toByteArray());
     if (getResultBytes == null) {
       Log.e(TAG, "Received null GetResultProto from native.");
       return GetResultProto.newBuilder()
@@ -533,7 +537,8 @@
 
   private static native byte[] nativePut(IcingSearchEngine instance, byte[] documentBytes);
 
-  private static native byte[] nativeGet(IcingSearchEngine instance, String namespace, String uri);
+  private static native byte[] nativeGet(
+      IcingSearchEngine instance, String namespace, String uri, byte[] getResultSpecBytes);
 
   private static native byte[] nativeReportUsage(
       IcingSearchEngine instance, byte[] usageReportBytes);

diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 6f07e1a..56edaf1 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java

@@ -25,6 +25,7 @@
 import com.google.android.icing.proto.GetAllNamespacesResultProto;
 import com.google.android.icing.proto.GetOptimizeInfoResultProto;
 import com.google.android.icing.proto.GetResultProto;
+import com.google.android.icing.proto.GetResultSpecProto;
 import com.google.android.icing.proto.GetSchemaResultProto;
 import com.google.android.icing.proto.GetSchemaTypeResultProto;
 import com.google.android.icing.proto.IcingSearchEngineOptions;
@@ -163,7 +164,8 @@
     PutResultProto putResultProto = icingSearchEngine.put(emailDocument);
     assertStatusOk(putResultProto.getStatus());
 
-    GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertStatusOk(getResultProto.getStatus());
     assertThat(getResultProto.getDocument()).isEqualTo(emailDocument);
   }
@@ -281,7 +283,8 @@
     DeleteResultProto deleteResultProto = icingSearchEngine.delete("namespace", "uri");
     assertStatusOk(deleteResultProto.getStatus());
 
-    GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
@@ -305,7 +308,8 @@
         icingSearchEngine.deleteByNamespace("namespace");
     assertStatusOk(deleteByNamespaceResultProto.getStatus());
 
-    GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
@@ -329,7 +333,8 @@
         icingSearchEngine.deleteBySchemaType(EMAIL_TYPE);
     assertStatusOk(deleteBySchemaTypeResultProto.getStatus());
 
-    GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
   }
 
@@ -377,9 +382,11 @@
     DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec);
     assertStatusOk(deleteResultProto.getStatus());
 
-    GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri1");
+    GetResultProto getResultProto =
+        icingSearchEngine.get("namespace", "uri1", GetResultSpecProto.getDefaultInstance());
     assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
-    getResultProto = icingSearchEngine.get("namespace", "uri2");
+    getResultProto =
+        icingSearchEngine.get("namespace", "uri2", GetResultSpecProto.getDefaultInstance());
     assertStatusOk(getResultProto.getStatus());
   }
 

diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index ae73917..d55b7e2 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto

@@ -24,7 +24,7 @@
 option objc_class_prefix = "ICNG";
 
 // Defines a unit of data understood by the IcingSearchEngine.
-// Next tag: 9
+// Next tag: 10
 message DocumentProto {
   // REQUIRED: Namespace that this Document resides in.
   // Namespaces can affect read/write permissions.
@@ -65,6 +65,15 @@
   // in terms of space/time efficiency. Both for ttl_ms and timestamp fields
   optional int64 ttl_ms = 8 [default = 0];
 
+  // Defines document level data that's generated internally by Icing.
+  message InternalFields {
+    // The length of the document as a count of tokens (or terms) in all indexed
+    // text properties. This field is used in the computation of BM25F relevance
+    // score.
+    optional int32 length_in_tokens = 1;
+  }
+  optional InternalFields internal_fields = 9;
+
   reserved 6;
 }
 

diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 0298f65..4188a8c 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto

@@ -34,7 +34,7 @@
 // TODO(cassiewang) Define a sample proto file that can be used by tests and for
 // documentation.
 //
-// Next tag: 5
+// Next tag: 6
 message SchemaTypeConfigProto {
   // REQUIRED: Named type that uniquely identifies the structured, logical
   // schema being defined.
@@ -51,6 +51,15 @@
   // easier.
   repeated PropertyConfigProto properties = 4;
 
+  // Version is an arbitrary number that the client may use to keep track of
+  // different incarnations of the schema. Icing library imposes no requirements
+  // on this field and will not validate it in anyway. If a client calls
+  // SetSchema with a schema that contains one or more new version numbers, then
+  // those version numbers will be updated so long as the SetSchema call
+  // succeeds. Clients are free to leave the version number unset, in which case
+  // it will default to value == 0.
+  optional int32 version = 5;
+
   reserved 2, 3;
 }
 

diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index bfa7aec..6186fde 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto

@@ -64,11 +64,8 @@
       // compared in seconds.
       USAGE_TYPE3_LAST_USED_TIMESTAMP = 8;
 
-      // Placeholder for ranking by relevance score, currently computed as BM25F
-      // score.
-      // TODO(b/173156803): one the implementation is ready, rename to
-      // RELEVANCE_SCORE.
-      RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER = 9;
+      // Ranked by relevance score, currently computed as BM25F score.
+      RELEVANCE_SCORE = 9;
     }
   }
   optional RankingStrategy.Code rank_by = 1;

diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index f63acfa..6c4e3c9 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto

@@ -101,20 +101,6 @@
   // How to specify a subset of properties to retrieve. If no type property mask
   // has been specified for a schema type, then *all* properties of that schema
   // type will be retrieved.
-  // Next tag: 3
-  message TypePropertyMask {
-    // The schema type to which these property masks should apply.
-    // If the schema type is the wildcard ("*"), then the type property masks
-    // will apply to all results of types that don't have their own, specific
-    // type property mask entry.
-    optional string schema_type = 1;
-
-    // The property masks specifying the property to be retrieved. Property
-    // masks must be composed only of property names, property separators (the
-    // '.' character). For example, "subject", "recipients.name". Specifying no
-    // property masks will result in *no* properties being retrieved.
-    repeated string paths = 2;
-  }
   repeated TypePropertyMask type_property_masks = 4;
 }
 
@@ -214,3 +200,26 @@
   // Stats for query execution performance.
   optional NativeQueryStats query_stats = 5;
 }
+
+// Next tag: 3
+message TypePropertyMask {
+  // The schema type to which these property masks should apply.
+  // If the schema type is the wildcard ("*"), then the type property masks
+  // will apply to all results of types that don't have their own, specific
+  // type property mask entry.
+  optional string schema_type = 1;
+
+  // The property masks specifying the property to be retrieved. Property
+  // masks must be composed only of property names, property separators (the
+  // '.' character). For example, "subject", "recipients.name". Specifying no
+  // property masks will result in *no* properties being retrieved.
+  repeated string paths = 2;
+}
+
+// Next tag: 2
+message GetResultSpecProto {
+  // How to specify a subset of properties to retrieve. If no type property mask
+  // has been specified for a schema type, then *all* properties of that schema
+  // type will be retrieved.
+  repeated TypePropertyMask type_property_masks = 1;
+}

diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index be9e98c..af8248d 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt

@@ -1 +1 @@
-set(synced_AOSP_CL_number=349594076)
+set(synced_AOSP_CL_number=351841227)
commit	a34db390d80f862bfaaa49dea3605c5fec3bca3d	[log] [tgz]
author	Tim Barron <tjbarron@google.com>	Thu Jan 14 20:53:07 2021 +0000
committer	Tim Barron <tjbarron@google.com>	Thu Jan 14 20:53:07 2021 +0000
tree	67a4a87803cf2b31619c3ddff3674967fc1461ce
parent	59c2caa38fd8dca3760dad751f4f8e5de8be25f5 [diff]