Merge remote-tracking branch 'goog/androidx-platform-dev' into sc-dev am: 46325e158d

Original change: https://googleplex-android-review.googlesource.com/c/platform/external/icing/+/14106922

Change-Id: I3fdd37ea1e7d1750b0fa6f67a44b51108c542f90
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 3ecef54..2443cb2 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -175,7 +175,27 @@
   // synced by the system and the checksum will be updated.
   ~FileBackedVector();
 
-  // Accesses the element at idx.
+  // Gets a copy of the element at idx.
+  //
+  // This is useful if you think the FileBackedVector may grow before you need
+  // to access this return value. When the FileBackedVector grows, the
+  // underlying mmap will be unmapped and remapped, which will invalidate any
+  // pointers to the previously mapped region. Getting a copy will avoid
+  // referencing the now-invalidated region.
+  //
+  // Returns:
+  //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
+  libtextclassifier3::StatusOr<T> GetCopy(int32_t idx) const;
+
+  // Gets a pointer to the element at idx.
+  //
+  // WARNING: Subsequent calls to Set may invalidate the pointer returned by
+  // Get.
+  //
+  // This is useful if you do not think the FileBackedVector will grow before
+  // you need to reference this value, and you want to avoid a copy. When the
+  // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
+  // which will invalidate this pointer to the previously mapped region.
   //
   // Returns:
   //   OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
@@ -183,6 +203,10 @@
 
   // Writes the value at idx.
   //
+  // May grow the underlying file and mmapped region as needed to fit the new
+  // value. If it does grow, then any pointers to previous values returned
+  // from Get() may be invalidated.
+  //
   // Returns:
   //   OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
   libtextclassifier3::Status Set(int32_t idx, const T& value);
@@ -468,6 +492,13 @@
 }
 
 template <typename T>
+libtextclassifier3::StatusOr<T> FileBackedVector<T>::GetCopy(
+    int32_t idx) const {
+  ICING_ASSIGN_OR_RETURN(const T* value, Get(idx));
+  return *value;
+}
+
+template <typename T>
 libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
     int32_t idx) const {
   if (idx < 0) {
@@ -492,8 +523,6 @@
         IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
   }
 
-  int32_t start_byte = idx * sizeof(T);
-
   ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
 
   if (idx + 1 > header_->num_elements) {
@@ -518,6 +547,8 @@
       changes_end_ = 0;
       header_->vector_checksum = 0;
     } else {
+      int32_t start_byte = idx * sizeof(T);
+
       changes_.push_back(idx);
       saved_original_buffer_.append(
           reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index af7efa0..b437724 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -527,6 +527,56 @@
 BENCHMARK(BM_SearchNoStackOverflow)
     ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
 
+// Added for b/184373205. Ensure that we can repeatedly put documents even if
+// the underlying mmapped areas grow past a few page sizes.
+void BM_RepeatedPut(benchmark::State& state) {
+  // Initialize the filesystem
+  std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+  Filesystem filesystem;
+  DestructibleDirectory ddir(filesystem, test_dir);
+
+  // Create the schema.
+  SchemaProto schema =
+      SchemaBuilder()
+          .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+              PropertyConfigBuilder()
+                  .SetName("body")
+                  .SetDataTypeString(TermMatchType::PREFIX,
+                                     StringIndexingConfig::TokenizerType::PLAIN)
+                  .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+          .Build();
+
+  // Create the index.
+  IcingSearchEngineOptions options;
+  options.set_base_dir(test_dir);
+  options.set_index_merge_size(kIcingFullIndexSize);
+  std::unique_ptr<IcingSearchEngine> icing =
+      std::make_unique<IcingSearchEngine>(options);
+
+  ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+  ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+  // Create a document that has the term "foo"
+  DocumentProto base_document = DocumentBuilder()
+                                    .SetSchema("Message")
+                                    .SetNamespace("namespace")
+                                    .AddStringProperty("body", "foo")
+                                    .Build();
+
+  // Insert a lot of documents with the term "foo"
+  int64_t num_docs = state.range(0);
+  for (auto s : state) {
+    for (int64_t i = 0; i < num_docs; ++i) {
+      DocumentProto document =
+          DocumentBuilder(base_document).SetUri("uri").Build();
+      ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+    }
+  }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+
 }  // namespace
 
 }  // namespace lib
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index ca90b9d..9f8b7fa 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -32,8 +32,8 @@
 #include "icing/portable/equals-proto.h"
 #include "icing/proto/document.pb.h"
 #include "icing/proto/initialize.pb.h"
-#include "icing/proto/persist.pb.h"
 #include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
 #include "icing/proto/schema.pb.h"
 #include "icing/proto/scoring.pb.h"
 #include "icing/proto/search.pb.h"
@@ -5002,8 +5002,8 @@
   *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
       document2;
 
-  EXPECT_THAT(search_result_proto,
-              EqualsSearchResultIgnoreStatsAndScores(expected_search_result_proto));
+  EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+                                       expected_search_result_proto));
 }
 
 TEST_F(IcingSearchEngineTest,
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index ac2dc9a..2436571 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -1165,7 +1165,7 @@
 
 libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
 DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
-  auto score_data_or = score_cache_->Get(document_id);
+  auto score_data_or = score_cache_->GetCopy(document_id);
   if (!score_data_or.ok()) {
     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
                      << " from score_cache_";
@@ -1173,7 +1173,7 @@
   }
 
   DocumentAssociatedScoreData document_associated_score_data =
-      *std::move(score_data_or).ValueOrDie();
+      std::move(score_data_or).ValueOrDie();
   if (document_associated_score_data.document_score() < 0) {
     // An negative / invalid score means that the score data has been deleted.
     return absl_ports::NotFoundError("Document score data not found.");
@@ -1183,13 +1183,13 @@
 
 libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
 DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
-  auto score_data_or = corpus_score_cache_->Get(corpus_id);
+  auto score_data_or = corpus_score_cache_->GetCopy(corpus_id);
   if (!score_data_or.ok()) {
     return score_data_or.status();
   }
 
   CorpusAssociatedScoreData corpus_associated_score_data =
-      *std::move(score_data_or).ValueOrDie();
+      std::move(score_data_or).ValueOrDie();
   return corpus_associated_score_data;
 }
 
@@ -1211,14 +1211,14 @@
 
 libtextclassifier3::StatusOr<DocumentFilterData>
 DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
-  auto filter_data_or = filter_cache_->Get(document_id);
+  auto filter_data_or = filter_cache_->GetCopy(document_id);
   if (!filter_data_or.ok()) {
     ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
                      << " from filter_cache_";
     return filter_data_or.status();
   }
   DocumentFilterData document_filter_data =
-      *std::move(filter_data_or).ValueOrDie();
+      std::move(filter_data_or).ValueOrDie();
   if (document_filter_data.namespace_id() == kInvalidNamespaceId) {
     // An invalid namespace id means that the filter data has been deleted.
     return absl_ports::NotFoundError("Document filter data not found.");
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 7e5cebf..546067d 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -74,6 +74,9 @@
         "Document id %d is invalid.", document_id));
   }
 
+  // We don't need a copy here because we'll set the value at the same index.
+  // This won't unintentionally grow the underlying file since we already have
+  // enough space for the current index.
   auto usage_scores_or = usage_score_cache_->Get(document_id);
 
   // OutOfRange means that the mapper hasn't seen this document id before, it's
@@ -159,7 +162,7 @@
         "Document id %d is invalid.", document_id));
   }
 
-  auto usage_scores_or = usage_score_cache_->Get(document_id);
+  auto usage_scores_or = usage_score_cache_->GetCopy(document_id);
   if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
     // No usage scores found. Return the default scores.
     return UsageScores();
@@ -168,7 +171,7 @@
     return usage_scores_or.status();
   }
 
-  return *std::move(usage_scores_or).ValueOrDie();
+  return std::move(usage_scores_or).ValueOrDie();
 }
 
 libtextclassifier3::Status UsageStore::SetUsageScores(
@@ -193,10 +196,10 @@
         "to_document_id %d is invalid.", to_document_id));
   }
 
-  auto usage_scores_or = usage_score_cache_->Get(from_document_id);
+  auto usage_scores_or = usage_score_cache_->GetCopy(from_document_id);
   if (usage_scores_or.ok()) {
     return usage_score_cache_->Set(to_document_id,
-                                   *std::move(usage_scores_or).ValueOrDie());
+                                   std::move(usage_scores_or).ValueOrDie());
   } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
     // No usage scores found. Set default scores to to_document_id.
     return usage_score_cache_->Set(to_document_id, UsageScores());
diff --git a/icing/util/math-util.h b/icing/util/math-util.h
index fc11a09..3f2a69d 100644
--- a/icing/util/math-util.h
+++ b/icing/util/math-util.h
@@ -37,7 +37,7 @@
 template <typename IntType>
 static IntType RoundDownTo(IntType input_value, IntType rounding_value) {
   static_assert(std::numeric_limits<IntType>::is_integer,
-                "RoundUpTo() operation type is not integer");
+                "RoundDownTo() operation type is not integer");
 
   if (input_value <= 0) {
     return 0;
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index 56fb657..4421ac5 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=366069684)
+set(synced_AOSP_CL_number=367066667)