Merge remote-tracking branch 'goog/upstream-master' into androidx-platform-dev
* goog/upstream-master:
Sync from upstream.
Change-Id: I6553b44dff05c97febf7ffd4ce567db93380e491
diff --git a/icing/file/destructible-file.h b/icing/file/destructible-file.h
new file mode 100644
index 0000000..006dcb4
--- /dev/null
+++ b/icing/file/destructible-file.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_FILE_H_
+#define ICING_FILE_DESTRUCTIBLE_FILE_H_
+
+#include <unistd.h>
+
+#include <string>
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will open the specified file path for write and
+// delete the underlying file upon destruction.
+class DestructibleFile {
+ public:
+ explicit DestructibleFile(const std::string& filepath,
+ const Filesystem* filesystem)
+ : filesystem_(filesystem), filepath_(filepath) {
+ fd_ = filesystem_->OpenForWrite(filepath_.c_str());
+ }
+
+ DestructibleFile(const DestructibleFile&) = delete;
+ DestructibleFile(DestructibleFile&& other) : filesystem_(nullptr), fd_(-1) {
+ *this = std::move(other);
+ }
+
+ DestructibleFile& operator=(const DestructibleFile&) = delete;
+ DestructibleFile& operator=(DestructibleFile&& other) {
+ std::swap(fd_, other.fd_);
+ std::swap(filesystem_, other.filesystem_);
+ std::swap(filepath_, other.filepath_);
+ return *this;
+ }
+
+ ~DestructibleFile() {
+ if (is_valid()) {
+ close(fd_);
+ if (!filesystem_->DeleteFile(filepath_.c_str())) {
+ ICING_VLOG(1) << "Failed to delete file " << filepath_;
+ }
+ }
+ }
+
+ bool is_valid() const { return fd_ >= 0; }
+ int get_fd() const { return fd_; }
+
+ private:
+ const Filesystem* filesystem_;
+ std::string filepath_;
+ int fd_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_DESTRUCTIBLE_FILE_H_
diff --git a/icing/file/destructible-file_test.cc b/icing/file/destructible-file_test.cc
new file mode 100644
index 0000000..61316d1
--- /dev/null
+++ b/icing/file/destructible-file_test.cc
@@ -0,0 +1,117 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-file.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(DestructibleFileTest, DeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+
+ {
+ // 1. Create the file
+ ScopedFd sfd(filesystem.OpenForWrite(filepath1.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int i = 127;
+ ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+ }
+
+ {
+ // 2. Open with a Destructible file.
+ DestructibleFile destructible(filepath1, &filesystem);
+ ASSERT_TRUE(destructible.is_valid());
+ }
+
+ // 3. Ensure that the file doesn't exist.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+ std::string filepath2 = GetTestTempDir() + "/file2";
+
+ // 1. Create file1
+ DestructibleFile destructible1(filepath1, &filesystem);
+ ASSERT_TRUE(destructible1.is_valid());
+ int i = 127;
+ ASSERT_TRUE(filesystem.Write(destructible1.get_fd(), &i, sizeof(i)));
+
+ {
+ // 2. Create file2
+ DestructibleFile destructible2(filepath2, &filesystem);
+ ASSERT_TRUE(destructible2.is_valid());
+ i = 458;
+ ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+ // Move assign destructible2 into destructible1
+ destructible1 = std::move(destructible2);
+ }
+
+ // 3. file1 shouldn't exist because it was destroyed when destructible1 was
+ // move assigned to.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+
+ // 4. file2 should still exist because it moved into destructible1 from
+ // destructible2.
+ EXPECT_TRUE(filesystem.FileExists(filepath2.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+
+ // 1. Create destructible1, it'll be reconstructed soon anyways.
+ std::unique_ptr<DestructibleFile> destructible1;
+ {
+ // 2. Create file1
+ DestructibleFile destructible2(filepath1, &filesystem);
+ ASSERT_TRUE(destructible2.is_valid());
+ int i = 458;
+ ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+ // Move construct destructible1 from destructible2
+ destructible1 =
+ std::make_unique<DestructibleFile>(std::move(destructible2));
+ }
+
+ // 3. file1 should still exist because it moved into destructible1 from
+ // destructible2.
+ ASSERT_TRUE(destructible1->is_valid());
+ EXPECT_TRUE(filesystem.FileExists(filepath1.c_str()));
+
+ {
+ // 4. Move construct destructible3 from destructible1
+ DestructibleFile destructible3(std::move(*destructible1));
+ ASSERT_TRUE(destructible3.is_valid());
+ }
+
+ // 5. file1 shouldn't exist because it was destroyed when destructible3 was
+ // destroyed.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 1d5b689..9ccd81b 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -70,6 +70,7 @@
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/platform.h"
#include "icing/portable/zlib.h"
#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
@@ -422,7 +423,8 @@
static constexpr int kDeflateCompressionLevel = 3;
// Chunks of the file to mmap at a time, so we don't mmap the entire file.
- static constexpr int kMmapChunkSize = 4 * 1024;
+ // Only used on 32-bit devices
+ static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
ScopedFd fd_;
const Filesystem* const filesystem_;
@@ -631,6 +633,14 @@
file_path.c_str(), static_cast<long long>(start)));
}
+ if (end < start) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Ending checksum offset of file '%s' must be greater than start "
+ "'%lld', was '%lld'",
+ file_path.c_str(), static_cast<long long>(start),
+ static_cast<long long>(end)));
+ }
+
int64_t file_size = filesystem->GetFileSize(file_path.c_str());
if (end > file_size) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -640,17 +650,41 @@
static_cast<long long>(end)));
}
- for (int i = start; i < end; i += kMmapChunkSize) {
- // Don't read past the file size.
- int next_chunk_size = kMmapChunkSize;
- if ((i + kMmapChunkSize) >= end) {
- next_chunk_size = end - i;
+ Architecture architecture = GetArchitecture();
+ switch (architecture) {
+ case Architecture::BIT_64: {
+ // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+ // devices where mmap/munmap calls need the mmap write semaphore, which
+ // blocks mmap/munmap/mprotect and all page faults from executing while
+ // they run. On 64-bit devices, this doesn't actually load into memory, it
+ // just makes the file faultable. So the whole file should be ok.
+ // b/185822878.
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+ auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+ new_crc.Append(mmap_str);
+ break;
}
+ case Architecture::BIT_32:
+ [[fallthrough]];
+ case Architecture::UNKNOWN: {
+ // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+ // much memory at once. If we're unknown, then also chunk it because we're
+ // not sure what the device can handle.
+ for (int i = start; i < end; i += kMmapChunkSize) {
+ // Don't read past the file size.
+ int next_chunk_size = kMmapChunkSize;
+ if ((i + kMmapChunkSize) >= end) {
+ next_chunk_size = end - i;
+ }
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
- auto mmap_str = std::string_view(mmapped_file.region(), next_chunk_size);
- new_crc.Append(mmap_str);
+ auto mmap_str =
+ std::string_view(mmapped_file.region(), next_chunk_size);
+ new_crc.Append(mmap_str);
+ }
+ break;
+ }
}
return new_crc;
@@ -670,7 +704,8 @@
static_cast<long long>(proto_size), header_->max_proto_size));
}
- // At this point, we've guaranteed that proto_size is under kMaxProtoSize (see
+ // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+ // (see
// ::Create), so we can safely store it in an int.
int final_size = 0;
@@ -735,8 +770,8 @@
MemoryMappedFile mmapped_file(*filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_ONLY);
if (file_offset >= file_size) {
- // file_size points to the next byte to write at, so subtract one to get the
- // inclusive, actual size of file.
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
return absl_ports::OutOfRangeError(
IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
"out of range of the file size, %lld",
@@ -778,8 +813,8 @@
int64_t file_offset) {
int64_t file_size = filesystem_->GetFileSize(fd_.get());
if (file_offset >= file_size) {
- // file_size points to the next byte to write at, so subtract one to get the
- // inclusive, actual size of file.
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
"Trying to erase data at a location, %lld, "
"out of range of the file size, %lld",
@@ -798,12 +833,12 @@
ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
GetProtoSize(metadata)));
- // We need to update the crc checksum if the erased area is before the rewind
- // position.
+ // We need to update the crc checksum if the erased area is before the
+ // rewind position.
if (file_offset + sizeof(metadata) < header_->rewind_offset) {
// We need to calculate [original string xor 0s].
- // The xored string is the same as the original string because 0 xor 0 = 0,
- // 1 xor 0 = 1.
+ // The xored string is the same as the original string because 0 xor 0 =
+ // 0, 1 xor 0 = 1.
const std::string_view xored_str(mmapped_file.region(),
mmapped_file.region_size());
@@ -896,7 +931,8 @@
template <typename ProtoT>
typename FileBackedProtoLog<ProtoT>::Iterator
FileBackedProtoLog<ProtoT>::GetIterator() {
- return Iterator(*filesystem_, file_path_, /*initial_offset=*/sizeof(Header));
+ return Iterator(*filesystem_, file_path_,
+ /*initial_offset=*/sizeof(Header));
}
template <typename ProtoT>
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 26e0fb0..766cc64 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,48 @@
// 16MiB, and we need some extra space for the
// rest of the document properties
+static void BM_ComputeChecksum(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = GetTestTempDir() + "/proto.log";
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log =
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Make each document 1KiB
+ int string_length = 1024;
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ int num_docs = state.range(0);
+ for (int i = 0; i < num_docs; ++i) {
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+ }
+
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ComputeChecksum());
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 44241bc..e9865e4 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -27,6 +27,7 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-file.h"
#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
@@ -75,8 +76,7 @@
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
-constexpr std::string_view kIcingSearchEngineHeaderFilename =
- "icing_search_engine_header";
+constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
libtextclassifier3::Status ValidateOptions(
@@ -137,10 +137,6 @@
return index_processor_options;
}
-std::string MakeHeaderFilename(const std::string& base_dir) {
- return absl_ports::StrCat(base_dir, "/", kIcingSearchEngineHeaderFilename);
-}
-
// Document store files are in a standalone subfolder for easier file
// management. We can delete and recreate the subfolder and not touch/affect
// anything else.
@@ -168,6 +164,9 @@
std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
}
+std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
+}
void TransformStatus(const libtextclassifier3::Status& internal_status,
StatusProto* status_proto) {
@@ -298,69 +297,6 @@
}
libtextclassifier3::Status status = InitializeMembers(initialize_stats);
- if (!status.ok()) {
- TransformStatus(status, result_status);
- initialize_stats->set_latency_ms(
- initialize_timer->GetElapsedMilliseconds());
- return result_proto;
- }
-
- // Even if each subcomponent initialized fine independently, we need to
- // check if they're consistent with each other.
- if (!CheckConsistency().ok()) {
- // The total checksum doesn't match the stored value, it could be one of the
- // following cases:
- // 1. Icing is initialized the first time in this directory.
- // 2. Non-checksumed changes have been made to some files.
- if (index_->last_added_document_id() == kInvalidDocumentId &&
- document_store_->last_added_document_id() == kInvalidDocumentId &&
- absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
- // First time initialize. Not recovering but creating all the files.
- // We need to explicitly clear the recovery-related fields because some
- // sub-components may not be able to tell if the storage is being
- // initialized the first time or has lost some files. Sub-components may
- // already have set these fields in earlier steps.
- *initialize_stats = InitializeStatsProto();
- status = RegenerateDerivedFiles();
- } else {
- ICING_VLOG(1)
- << "IcingSearchEngine in inconsistent state, regenerating all "
- "derived data";
- // Total checksum mismatch may not be the root cause of document store
- // recovery. Preserve the root cause that was set by the document store.
- bool should_log_document_store_recovery_cause =
- initialize_stats->document_store_recovery_cause() ==
- InitializeStatsProto::NONE;
- if (should_log_document_store_recovery_cause) {
- initialize_stats->set_document_store_recovery_cause(
- InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH);
- }
- initialize_stats->set_index_restoration_cause(
- InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH);
- status = RegenerateDerivedFiles(initialize_stats,
- should_log_document_store_recovery_cause);
- }
- } else {
- DocumentId last_stored_document_id =
- document_store_->last_added_document_id();
- DocumentId last_indexed_document_id = index_->last_added_document_id();
- if (last_stored_document_id != last_indexed_document_id) {
- if (last_stored_document_id == kInvalidDocumentId) {
- // Document store is empty but index is not. Reset the index.
- status = index_->Reset();
- } else {
- // Index is inconsistent with the document store, we need to restore the
- // index.
- initialize_stats->set_index_restoration_cause(
- InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH);
- std::unique_ptr<Timer> index_restore_timer = clock_->GetNewTimer();
- status = RestoreIndexIfNeeded();
- initialize_stats->set_index_restoration_latency_ms(
- index_restore_timer->GetElapsedMilliseconds());
- }
- }
- }
-
if (status.ok() || absl_ports::IsDataLoss(status)) {
initialized_ = true;
}
@@ -374,10 +310,6 @@
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
ICING_RETURN_IF_ERROR(InitializeOptions());
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(initialize_stats));
-
- result_state_manager_ = std::make_unique<ResultStateManager>(
- performance_configuration_.max_num_total_hits, *document_store_);
// TODO(b/156383798) : Resolve how to specify the locale.
language_segmenter_factory::SegmenterOptions segmenter_options(
@@ -388,9 +320,75 @@
TC3_ASSIGN_OR_RETURN(normalizer_,
normalizer_factory::Create(options_.max_token_length()));
- ICING_RETURN_IF_ERROR(InitializeIndex(initialize_stats));
+ std::string marker_filepath =
+ MakeSetSchemaMarkerFilePath(options_.base_dir());
+ libtextclassifier3::Status status;
+ if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
+ // The schema was either lost or never set before. Wipe out the doc store
+ // and index directories and initialize them from scratch.
+ const std::string doc_store_dir =
+ MakeDocumentDirectoryPath(options_.base_dir());
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
+ !filesystem_->DeleteDirectoryRecursively(index_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Could not delete directories: ", index_dir, " and ", doc_store_dir));
+ }
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+ status = InitializeIndex(initialize_stats);
+ } else if (filesystem_->FileExists(marker_filepath.c_str())) {
+ // If the marker file is still around then something wonky happened when we
+ // last tried to set the schema.
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
- return libtextclassifier3::Status::OK;
+ // We're going to need to build the index from scratch. So just delete its
+ // files now.
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ Index::Options index_options(index_dir, options_.index_merge_size());
+ if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+ !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Could not recreate directory: ", index_dir));
+ }
+ ICING_ASSIGN_OR_RETURN(index_,
+ Index::Create(index_options, filesystem_.get(),
+ icing_filesystem_.get()));
+
+ std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ status = std::move(restore_result.status);
+ // DATA_LOSS means that we have successfully initialized and re-added
+ // content to the index. Some indexed content was lost, but otherwise the
+ // index is in a valid state and can be queried.
+ if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+ return status;
+ }
+
+ // Delete the marker file to indicate that everything is now in sync with
+ // whatever changes were made to the schema.
+ filesystem_->DeleteFile(marker_filepath.c_str());
+
+ initialize_stats->set_index_restoration_latency_ms(
+ restore_timer->GetElapsedMilliseconds());
+ initialize_stats->set_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ } else {
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+ status = InitializeIndex(initialize_stats);
+ if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+ return status;
+ }
+ }
+
+ result_state_manager_ = std::make_unique<ResultStateManager>(
+ performance_configuration_.max_num_total_hits, *document_store_);
+
+ return status;
}
libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
@@ -424,6 +422,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
+ bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
@@ -436,8 +435,9 @@
}
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
- schema_store_.get(), initialize_stats));
+ DocumentStore::Create(
+ filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
+ force_recovery_and_revalidate_documents, initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
@@ -455,6 +455,7 @@
}
Index::Options index_options(index_dir, options_.index_merge_size());
+ InitializeStatsProto::RecoveryCause recovery_cause;
auto index_or =
Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
if (!index_or.ok()) {
@@ -464,88 +465,28 @@
absl_ports::StrCat("Could not recreate directory: ", index_dir));
}
- initialize_stats->set_index_restoration_cause(
- InitializeStatsProto::IO_ERROR);
+ recovery_cause = InitializeStatsProto::IO_ERROR;
// Try recreating it from scratch and re-indexing everything.
ICING_ASSIGN_OR_RETURN(index_,
Index::Create(index_options, filesystem_.get(),
icing_filesystem_.get()));
-
- std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
- initialize_stats->set_index_restoration_latency_ms(
- restore_timer->GetElapsedMilliseconds());
} else {
// Index was created fine.
index_ = std::move(index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
- if (!HeaderExists()) {
- // Without a header file, we have no checksum and can't even detect
- // inconsistencies
- return absl_ports::NotFoundError("No header file found.");
- }
-
- // Header does exist, verify that the header looks fine.
- IcingSearchEngine::Header header;
- if (!filesystem_->Read(MakeHeaderFilename(options_.base_dir()).c_str(),
- &header, sizeof(header))) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Couldn't read: ", MakeHeaderFilename(options_.base_dir())));
- }
-
- if (header.magic != IcingSearchEngine::Header::kMagic) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Invalid header kMagic for file: ",
- MakeHeaderFilename(options_.base_dir())));
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- if (checksum.Get() != header.checksum) {
- return absl_ports::InternalError(
- "IcingSearchEngine checksum doesn't match");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles(
- InitializeStatsProto* initialize_stats, bool log_document_store_stats) {
- // Measure the latency of the data recovery. The cause of the recovery should
- // be logged by the caller.
- std::unique_ptr<Timer> timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(
- document_store_->UpdateSchemaStore(schema_store_.get()));
- if (initialize_stats != nullptr && log_document_store_stats) {
- initialize_stats->set_document_store_recovery_latency_ms(
- timer->GetElapsedMilliseconds());
- }
- // Restart timer.
- timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(index_->Reset());
- ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
- if (initialize_stats != nullptr) {
+ std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ if (restore_result.needed_restoration) {
initialize_stats->set_index_restoration_latency_ms(
- timer->GetElapsedMilliseconds());
+ restore_timer->GetElapsedMilliseconds());
+ initialize_stats->set_index_restoration_cause(recovery_cause);
}
-
- const std::string header_file =
- MakeHeaderFilename(options_.base_dir().c_str());
- if (HeaderExists()) {
- if (!filesystem_->DeleteFile(header_file.c_str())) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Unable to delete file: ", header_file));
- }
- }
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
+ return restore_result.status;
}
SetSchemaResultProto IcingSearchEngine::SetSchema(
@@ -580,6 +521,15 @@
}
bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
+ std::string marker_filepath =
+ MakeSetSchemaMarkerFilePath(options_.base_dir());
+ // Create the marker file indicating that we are going to apply a schema
+ // change. No need to write anything to the marker file - its existence is the
+ // only thing that matters. The marker file is used to indicate if we
+ // encountered a crash or a power loss while updating the schema and other
+ // files. So set it up to be deleted as long as we return from this function.
+ DestructibleFile marker_file(marker_filepath, filesystem_.get());
+
auto set_schema_result_or = schema_store_->SetSchema(
std::move(new_schema), ignore_errors_and_delete_documents);
if (!set_schema_result_or.ok()) {
@@ -627,8 +577,12 @@
return result_proto;
}
- status = RestoreIndexIfNeeded();
- if (!status.ok()) {
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ // DATA_LOSS means that we have successfully re-added content to the
+ // index. Some indexed content was lost, but otherwise the index is in a
+ // valid state and can be queried.
+ if (!restore_result.status.ok() &&
+ !absl_ports::IsDataLoss(restore_result.status)) {
TransformStatus(status, result_status);
return result_proto;
}
@@ -639,6 +593,7 @@
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("Schema is incompatible.");
}
+
return result_proto;
}
@@ -1095,14 +1050,18 @@
return result_proto;
}
- libtextclassifier3::Status index_restoration_status = RestoreIndexIfNeeded();
+ IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
optimize_stats->set_index_restoration_latency_ms(
optimize_index_timer->GetElapsedMilliseconds());
- if (!index_restoration_status.ok()) {
+ // DATA_LOSS means that we have successfully re-added content to the index.
+ // Some indexed content was lost, but otherwise the index is in a valid state
+ // and can be queried.
+ if (!index_restoration_status.status.ok() &&
+ !absl_ports::IsDataLoss(index_restoration_status.status)) {
status = absl_ports::Annotate(
absl_ports::InternalError(
"Failed to reindex documents after optimization."),
- index_restoration_status.error_message());
+ index_restoration_status.status.error_message());
TransformStatus(status, result_status);
return result_proto;
@@ -1240,72 +1199,6 @@
ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
ICING_RETURN_IF_ERROR(index_->PersistToDisk());
- // Update the combined checksum and write to header file.
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::StatusOr<Crc32> IcingSearchEngine::ComputeChecksum() {
- Crc32 total_checksum;
- // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
- // that can support error logging.
- auto checksum_or = schema_store_->ComputeChecksum();
- if (!checksum_or.ok()) {
- ICING_LOG(ERROR) << checksum_or.status().error_message()
- << "Failed to compute checksum of SchemaStore";
- return checksum_or.status();
- }
-
- Crc32 schema_store_checksum = std::move(checksum_or).ValueOrDie();
-
- // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
- // that can support error logging.
- checksum_or = document_store_->ComputeChecksum();
- if (!checksum_or.ok()) {
- ICING_LOG(ERROR) << checksum_or.status().error_message()
- << "Failed to compute checksum of DocumentStore";
- return checksum_or.status();
- }
- Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
-
- total_checksum.Append(std::to_string(document_store_checksum.Get()));
- total_checksum.Append(std::to_string(schema_store_checksum.Get()));
-
- return total_checksum;
-}
-
-bool IcingSearchEngine::HeaderExists() {
- if (!filesystem_->FileExists(
- MakeHeaderFilename(options_.base_dir()).c_str())) {
- return false;
- }
-
- int64_t file_size =
- filesystem_->GetFileSize(MakeHeaderFilename(options_.base_dir()).c_str());
-
- // If it's been truncated to size 0 before, we consider it to be a new file
- return file_size != 0 && file_size != Filesystem::kBadFileSize;
-}
-
-libtextclassifier3::Status IcingSearchEngine::UpdateHeader(
- const Crc32& checksum) {
- // Write the header
- IcingSearchEngine::Header header;
- header.magic = IcingSearchEngine::Header::kMagic;
- header.checksum = checksum.Get();
-
- // This should overwrite the header.
- ScopedFd sfd(filesystem_->OpenForWrite(
- MakeHeaderFilename(options_.base_dir()).c_str()));
- if (!sfd.is_valid() ||
- !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
- !filesystem_->DataSync(sfd.get())) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
- MakeHeaderFilename(options_.base_dir())));
- }
return libtextclassifier3::Status::OK;
}
@@ -1323,6 +1216,7 @@
}
QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_query_length(search_spec.query().length());
std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::Status status = ValidateResultSpec(result_spec);
@@ -1650,19 +1544,20 @@
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
+IcingSearchEngine::IndexRestorationResult
+IcingSearchEngine::RestoreIndexIfNeeded() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
DocumentId last_indexed_document_id = index_->last_added_document_id();
if (last_stored_document_id == last_indexed_document_id) {
// No need to recover.
- return libtextclassifier3::Status::OK;
+ return {libtextclassifier3::Status::OK, false};
}
if (last_stored_document_id == kInvalidDocumentId) {
// Document store is empty but index is not. Reset the index.
- return index_->Reset();
+ return {index_->Reset(), false};
}
// TruncateTo ensures that the index does not hold any data that is not
@@ -1671,17 +1566,29 @@
// lost documents. If the index does not contain any hits for documents with
// document id greater than last_stored_document_id, then TruncateTo will have
// no effect.
- ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+ auto status = index_->TruncateTo(last_stored_document_id);
+ if (!status.ok()) {
+ return {status, false};
+ }
+ // Last indexed document id may have changed thanks to TruncateTo.
+ last_indexed_document_id = index_->last_added_document_id();
DocumentId first_document_to_reindex =
(last_indexed_document_id != kInvalidDocumentId)
? index_->last_added_document_id() + 1
: kMinDocumentId;
+ if (first_document_to_reindex > last_stored_document_id) {
+ // Nothing to restore. Just return.
+ return {libtextclassifier3::Status::OK, false};
+ }
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- CreateIndexProcessorOptions(options_),
- clock_.get()));
+ auto index_processor_or = IndexProcessor::Create(
+ normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+ clock_.get());
+ if (!index_processor_or.ok()) {
+ return {index_processor_or.status(), true};
+ }
+ std::unique_ptr<IndexProcessor> index_processor =
+ std::move(index_processor_or).ValueOrDie();
ICING_VLOG(1) << "Restoring index by replaying documents from document id "
<< first_document_to_reindex << " to document id "
@@ -1699,7 +1606,7 @@
continue;
} else {
// Returns other errors
- return document_or.status();
+ return {document_or.status(), true};
}
}
DocumentProto document(std::move(document_or).ValueOrDie());
@@ -1709,7 +1616,7 @@
language_segmenter_.get(),
std::move(document));
if (!tokenized_document_or.ok()) {
- return tokenized_document_or.status();
+ return {tokenized_document_or.status(), true};
}
TokenizedDocument tokenized_document(
std::move(tokenized_document_or).ValueOrDie());
@@ -1719,7 +1626,7 @@
if (!status.ok()) {
if (!absl_ports::IsDataLoss(status)) {
// Real error. Stop recovering and pass it up.
- return status;
+ return {status, true};
}
// Just a data loss. Keep trying to add the remaining docs, but report the
// data loss when we're done.
@@ -1727,7 +1634,7 @@
}
}
- return overall_status;
+ return {overall_status, true};
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index fa1e0c8..3dc7e29 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -53,16 +53,6 @@
// TODO(cassiewang) Top-level comments and links to design-doc.
class IcingSearchEngine {
public:
- struct Header {
- static constexpr int32_t kMagic = 0x6e650d0a;
-
- // Holds the magic as a quick sanity check against file corruption.
- int32_t magic;
-
- // Checksum of the IcingSearchEngine's sub-component's checksums.
- uint32_t checksum;
- };
-
// Note: It is only required to provide a pointer to a valid instance of
// JniCache if this instance needs to perform reverse-jni calls. Users on
// Linux and iOS should always provide a nullptr.
@@ -508,11 +498,15 @@
// Do any initialization/recovery necessary to create a DocumentStore
// instance.
//
+ // See comments on DocumentStore::Create for explanation of
+ // force_recovery_and_revalidate_documents.
+ //
// Returns:
// OK on success
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
libtextclassifier3::Status InitializeDocumentStore(
+ bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -577,29 +571,19 @@
// call Index::Reset first.
//
// Returns:
- // OK on success
+ // On success, OK and a bool indicating whether or not restoration was
+ // needed.
+ // DATA_LOSS, if an error during index merging caused us to lose indexed
+ // data in the main index. Despite the data loss, this is still considered
+ // a successful run and needed_restoration will be set to true.
// RESOURCE_EXHAUSTED if the index fills up before finishing indexing
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL_ERROR on any IO errors
- libtextclassifier3::Status RestoreIndexIfNeeded()
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Computes the combined checksum of the IcingSearchEngine - includes all its
- // subcomponents
- //
- // Returns:
- // Combined checksum on success
- // INTERNAL_ERROR on compute error
- libtextclassifier3::StatusOr<Crc32> ComputeChecksum()
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Checks if the header exists already. This does not create the header file
- // if it doesn't exist.
- bool HeaderExists() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Update, replace and persist the header file. Creates the header file if it
- // doesn't exist.
- libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
+ struct IndexRestorationResult {
+ libtextclassifier3::Status status;
+ bool needed_restoration;
+ };
+ IndexRestorationResult RestoreIndexIfNeeded()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// If we lost the schema during a previous failure, it may "look" the same as
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index fa23262..3258d64 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -30,6 +30,7 @@
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
#include "icing/proto/optimize.pb.h"
@@ -44,7 +45,6 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/random-string.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
@@ -3386,125 +3386,6 @@
ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Change the header's magic value
- int32_t invalid_magic = 1; // Anything that's not the actual kMagic value.
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, magic),
- &invalid_magic, sizeof(invalid_magic));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Change the header's checksum value
- uint32_t invalid_checksum =
- 1; // Anything that's not the actual checksum value
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, checksum),
- &invalid_checksum, sizeof(invalid_checksum));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
- expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
{
// Basic initialization/setup
@@ -3576,9 +3457,10 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
{
// Initializes folder and schema
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ IcingSearchEngine icing(options, GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema;
@@ -3652,6 +3534,13 @@
property->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
FakeClock fake_clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -5219,7 +5108,7 @@
EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, ResetAbortedError) {
+TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesAbortedError) {
auto mock_filesystem = std::make_unique<MockFilesystem>();
// This fails IcingSearchEngine::Reset(). But since we didn't actually delete
@@ -5253,22 +5142,27 @@
ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, ResetInternalError) {
+TEST_F(IcingSearchEngineTest, ResetCreateFailureCausesInternalError) {
auto mock_filesystem = std::make_unique<MockFilesystem>();
- // Let all other calls succeed.
- EXPECT_CALL(*mock_filesystem, Write(Matcher<const char*>(_), _, _))
+ // Let all other delete directory calls succeed.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(Matcher<const char*>(_)))
.WillRepeatedly(Return(true));
- // This prevents IcingSearchEngine from creating a DocumentStore instance on
- // reinitialization
- const std::string document_log_path =
- GetTestBaseDir() + "/document_dir/document_log";
+ // This prevents IcingSearchEngine from deleting our base dir when resetting
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(Matcher<const char*>(
+ StrEq(GetTestBaseDir().c_str()))))
+ .WillOnce(Return(false));
+
+ // The first call will show our base directory had 100 bytes, but after we
+ // falied to delete, we lost those 100 bytes. So this will be reported as an
+ // INTERNAL error since data was lost.
EXPECT_CALL(
*mock_filesystem,
- Write(Matcher<const char*>(StrEq(document_log_path.c_str())), _, _))
- .WillOnce(Return(true))
- .WillOnce(Return(false));
+ GetDiskUsage(Matcher<const char*>(StrEq(GetTestBaseDir().c_str()))))
+ .WillOnce(Return(100))
+ .WillOnce(Return(0));
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
@@ -6113,14 +6007,14 @@
EXPECT_THAT(
initialize_result_proto.initialize_stats().document_store_data_status(),
Eq(InitializeStatsProto::COMPLETE_LOSS));
- // The complete rewind of ground truth causes the mismatch of total
- // checksum, so index should be restored.
+ // The complete rewind of ground truth causes us to clear the index, but
+ // that's not considered a restoration.
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ Eq(InitializeStatsProto::NONE));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
- Eq(10));
+ Eq(0));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
Eq(InitializeStatsProto::NONE));
@@ -6187,26 +6081,51 @@
}
TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseTotalChecksumMismatch) {
+ InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
.Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
{
// Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ IcingSearchEngine icing(options, GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
{
- // Change the header's checksum value to a random value.
- uint32_t invalid_checksum = 1;
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, checksum),
- &invalid_checksum, sizeof(invalid_checksum));
+ // Simulate a schema change where power is lost after the schema is written.
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
}
{
@@ -6221,13 +6140,13 @@
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(
initialize_result_proto.initialize_stats().index_restoration_cause(),
- Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
@@ -6241,6 +6160,39 @@
.schema_store_recovery_latency_ms(),
Eq(0));
}
+
+ {
+ // No recovery should be needed.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
@@ -6843,6 +6795,7 @@
// Check the stats
QueryStatsProto exp_stats;
+ exp_stats.set_query_length(7);
exp_stats.set_num_terms(1);
exp_stats.set_num_namespaces_filtered(1);
exp_stats.set_num_schema_types_filtered(1);
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index b7ec09e..fc14800 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -36,6 +36,7 @@
#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
@@ -47,7 +48,6 @@
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/testing/platform.h b/icing/portable/platform.h
similarity index 76%
rename from icing/testing/platform.h
rename to icing/portable/platform.h
index ad612d5..0cccd57 100644
--- a/icing/testing/platform.h
+++ b/icing/portable/platform.h
@@ -12,8 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_TESTING_PLATFORM_H_
-#define ICING_TESTING_PLATFORM_H_
+#ifndef ICING_PORTABLE_PLATFORM_H_
+#define ICING_PORTABLE_PLATFORM_H_
// This file is meant to hold util functions for tests that help the test
// determine which platform-specific configuration it may be running in.
@@ -52,7 +52,27 @@
return false;
}
+enum Architecture {
+ UNKNOWN,
+ BIT_32,
+ BIT_64,
+};
+
+// Returns which architecture we're running on.
+//
+// Architecture macros pulled from
+// https://developer.android.com/ndk/guides/cpu-features
+inline Architecture GetArchitecture() {
+#if defined(__arm__) || defined(__i386__)
+ return BIT_32;
+#elif defined(__aarch64__) || defined(__x86_64__)
+ return BIT_64;
+#else
+ return UNKNOWN;
+#endif
+}
+
} // namespace lib
} // namespace icing
-#endif // ICING_TESTING_PLATFORM_H_
+#endif // ICING_PORTABLE_PLATFORM_H_
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 0f49f4d..daeb479 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -29,6 +29,7 @@
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
@@ -40,7 +41,6 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 8d61dd9..1c9684d 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -24,6 +24,7 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
@@ -35,7 +36,6 @@
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index e569307..31a2e5f 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -155,20 +155,17 @@
}
}
-// Returns true if token matches any of the terms in query terms according to
-// the provided match type.
+// Finds the start position of a valid token that is after
+// window_start_min_exclusive
//
// Returns:
// the position of the window start if successful
// INTERNAL_ERROR - if a tokenizer error is encountered
libtextclassifier3::StatusOr<int> DetermineWindowStart(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
- std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
- int window_start_min = (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
- if (window_start_min < 0) {
- return 0;
- }
- if (!iterator->ResetToTokenAfter(window_start_min)) {
+ std::string_view value, int window_start_min_exclusive,
+ Tokenizer::Iterator* iterator) {
+ if (!iterator->ResetToTokenAfter(window_start_min_exclusive)) {
return absl_ports::InternalError(
"Couldn't reset tokenizer to determine snippet window!");
}
@@ -196,17 +193,16 @@
return window_end_exclusive;
}
+// Finds the end position of a valid token that is before the
+// window_end_max_exclusive.
+//
// Returns:
// the position of the window end if successful
// INTERNAL_ERROR - if a tokenizer error is encountered
libtextclassifier3::StatusOr<int> DetermineWindowEnd(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
- std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
- int window_end_max_exclusive =
- match_mid + snippet_spec.max_window_bytes() / 2;
- if (window_end_max_exclusive >= value.length()) {
- return value.length();
- }
+ std::string_view value, int window_end_max_exclusive,
+ Tokenizer::Iterator* iterator) {
if (!iterator->ResetToTokenBefore(window_end_max_exclusive)) {
return absl_ports::InternalError(
"Couldn't reset tokenizer to determine snippet window!");
@@ -228,24 +224,68 @@
SnippetMatchProto snippet_match;
Token match = iterator->GetToken();
int match_pos = match.text.data() - value.section_subcontent.data();
+
+ // When finding boundaries, we have a few cases:
+ //
+ // Case 1:
+ // If we have an odd length match an odd length window, the window surrounds
+ // the match perfectly.
+ // match = "bar" in "foo bar baz"
+ // window = |---|
+ //
+ // Case 2:
+ // If we have an even length match with an even length window, the window
+ // surrounds the match perfectly.
+ // match = "baar" in "foo baar baz"
+ // window = |----|
+ //
+ // Case 3:
+ // If we have an odd length match with an even length window, we allocate
+ // that extra window byte to the beginning.
+ // match = "bar" in "foo bar baz"
+ // window = |----|
+ //
+ // Case 4:
+ // If we have an even length match with an odd length window, we allocate
+ // that extra window byte to the end.
+ // match = "baar" in "foo baar baz"
+ // window = |-----|
+ //
+ // We have do +1/-1 below to get the math to match up.
int match_mid = match_pos + match.text.length() / 2;
+ int window_start_min_exclusive =
+ (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
+ int window_end_max_exclusive =
+ match_mid + (snippet_spec.max_window_bytes() + 1) / 2;
snippet_match.set_exact_match_position(match_pos);
snippet_match.set_exact_match_bytes(match.text.length());
- if (snippet_spec.max_window_bytes() > match.text.length()) {
+ // Only include windows if it'll at least include the matched text. Otherwise,
+ // it'll just be an empty string anyways.
+ if (snippet_spec.max_window_bytes() >= match.text.length()) {
// Find the beginning of the window.
- ICING_ASSIGN_OR_RETURN(
- int window_start,
- DetermineWindowStart(snippet_spec, value.section_subcontent, match_mid,
- iterator));
+ int window_start;
+ if (window_start_min_exclusive < 0) {
+ window_start = 0;
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ window_start,
+ DetermineWindowStart(snippet_spec, value.section_subcontent,
+ window_start_min_exclusive, iterator));
+ }
snippet_match.set_window_position(window_start);
// Find the end of the window.
- ICING_ASSIGN_OR_RETURN(
- int window_end_exclusive,
- DetermineWindowEnd(snippet_spec, value.section_subcontent, match_mid,
- iterator));
+ int window_end_exclusive;
+ if (window_end_max_exclusive >= value.section_subcontent.length()) {
+ window_end_exclusive = value.section_subcontent.length();
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ window_end_exclusive,
+ DetermineWindowEnd(snippet_spec, value.section_subcontent,
+ window_end_max_exclusive, iterator));
+ }
snippet_match.set_window_bytes(window_end_exclusive - window_start);
// DetermineWindowStart/End may change the position of the iterator. So,
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index 1cf4e5a..ff38372 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -24,6 +24,7 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
@@ -36,7 +37,6 @@
#include "icing/store/key-mapper.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -184,6 +184,58 @@
EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
}
+TEST_F(SnippetRetrieverTest,
+ SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "counting")
+ .AddStringProperty("body", "one two three four.... five")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+ // Window starts at the beginning of "three" and at the exact end of
+ // "three". len=5, orig_window= "three"
+ snippet_spec_.set_max_window_bytes(5);
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("three"));
+}
+
+TEST_F(SnippetRetrieverTest,
+ SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "counting")
+ .AddStringProperty("body", "one two three four.... five")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+ // Window starts at the beginning of "four" and at the exact end of
+ // "four". len=4, orig_window= "four"
+ snippet_spec_.set_max_window_bytes(4);
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("four"));
+}
+
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
DocumentProto document =
DocumentBuilder()
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 15d9a19..3dcc5a9 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -20,7 +20,6 @@
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
-#include "icing/proto/schema.proto.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/schema-util.h"
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index b7e1b92..fe89f47 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -122,11 +122,11 @@
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
return usage_scores.usage_type3_count;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type1_last_used_timestamp_s;
+ return usage_scores.usage_type1_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type2_last_used_timestamp_s;
+ return usage_scores.usage_type2_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type3_last_used_timestamp_s;
+ return usage_scores.usage_type3_last_used_timestamp_s * 1000.0;
default:
// This shouldn't happen if this scorer is used correctly.
return default_score_;
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 31bdd15..22d548a 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -397,7 +397,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
@@ -406,7 +406,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
@@ -415,7 +415,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
@@ -458,7 +458,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 5000ms, score should be updated.
@@ -467,7 +467,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 3000ms, score should not be updated.
@@ -476,7 +476,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
@@ -519,7 +519,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1000));
// Report usage with timestamp = 5000ms, score should be updated.
UsageReport usage_report_type3_time5 = CreateUsageReport(
@@ -528,7 +528,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
// Report usage with timestamp = 3000ms, score should not be updated.
UsageReport usage_report_type3_time3 = CreateUsageReport(
@@ -537,7 +537,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
}
TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
@@ -565,6 +565,37 @@
EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
}
+TEST_F(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+ // Create usage report for the maximum allowable timestamp.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1",
+ /*timestamp_ms=*/std::numeric_limits<uint32_t>::max() * 1000.0,
+ UsageReport::USAGE_TYPE1);
+
+ double max_int_usage_timestamp_score =
+ std::numeric_limits<uint32_t>::max() * 1000.0;
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(max_int_usage_timestamp_score));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 5e251eb..125e2a7 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -610,9 +610,9 @@
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
- /*score=*/1);
+ /*score=*/1000);
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
- /*score=*/5);
+ /*score=*/5000);
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
/*score=*/0);
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 2436571..9631e29 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -85,33 +85,6 @@
return document_wrapper;
}
-DocumentWrapper CreateDocumentTombstone(std::string_view document_namespace,
- std::string_view document_uri) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_namespace_(std::string(document_namespace));
- document->set_uri(std::string(document_uri));
- return document_wrapper;
-}
-
-DocumentWrapper CreateNamespaceTombstone(std::string_view document_namespace) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_namespace_(std::string(document_namespace));
- return document_wrapper;
-}
-
-DocumentWrapper CreateSchemaTypeTombstone(
- std::string_view document_schema_type) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_schema(std::string(document_schema_type));
- return document_wrapper;
-}
-
std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
}
@@ -229,6 +202,7 @@
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
+ bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
@@ -236,8 +210,10 @@
auto document_store = std::unique_ptr<DocumentStore>(
new DocumentStore(filesystem, base_dir, clock, schema_store));
- ICING_ASSIGN_OR_RETURN(DataLoss data_loss,
- document_store->Initialize(initialize_stats));
+ ICING_ASSIGN_OR_RETURN(
+ DataLoss data_loss,
+ document_store->Initialize(force_recovery_and_revalidate_documents,
+ initialize_stats));
CreateResult create_result;
create_result.document_store = std::move(document_store);
@@ -246,6 +222,7 @@
}
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
+ bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats) {
auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem_, MakeDocumentLogFilename(base_dir_),
@@ -262,10 +239,11 @@
std::move(create_result_or).ValueOrDie();
document_log_ = std::move(create_result.proto_log);
- if (create_result.has_data_loss()) {
- ICING_LOG(WARNING)
- << "Data loss in document log, regenerating derived files.";
- if (initialize_stats != nullptr) {
+ if (force_recovery_and_revalidate_documents ||
+ create_result.has_data_loss()) {
+ if (create_result.has_data_loss() && initialize_stats != nullptr) {
+ ICING_LOG(WARNING)
+ << "Data loss in document log, regenerating derived files.";
initialize_stats->set_document_store_recovery_cause(
InitializeStatsProto::DATA_LOSS);
@@ -280,7 +258,8 @@
}
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
- libtextclassifier3::Status status = RegenerateDerivedFiles();
+ libtextclassifier3::Status status =
+ RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
@@ -295,13 +274,12 @@
ICING_VLOG(1)
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
- if (initialize_stats != nullptr) {
+ std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
+ libtextclassifier3::Status status = RegenerateDerivedFiles(
+ /*force_recovery_and_revalidate_documents*/ false);
+ if (initialize_stats != nullptr && num_documents() > 0) {
initialize_stats->set_document_store_recovery_cause(
InitializeStatsProto::IO_ERROR);
- }
- std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
- libtextclassifier3::Status status = RegenerateDerivedFiles();
- if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
}
@@ -407,7 +385,8 @@
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
+ bool revalidate_documents) {
ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
@@ -441,109 +420,44 @@
DocumentWrapper document_wrapper =
std::move(document_wrapper_or).ValueOrDie();
- if (document_wrapper.deleted()) {
- if (!document_wrapper.document().uri().empty()) {
- // Individual document deletion.
- auto document_id_or =
- GetDocumentId(document_wrapper.document().namespace_(),
- document_wrapper.document().uri());
- // Updates document_id mapper with deletion
- if (document_id_or.ok()) {
- ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
- document_id_or.ValueOrDie(), kDocDeletedFlag));
- } else if (!absl_ports::IsNotFound(document_id_or.status())) {
- // Real error
- return absl_ports::Annotate(
- document_id_or.status(),
- absl_ports::StrCat("Failed to find document id. namespace: ",
- document_wrapper.document().namespace_(),
- ", uri: ", document_wrapper.document().uri()));
- }
- } else if (!document_wrapper.document().namespace_().empty()) {
- // Namespace deletion.
- ICING_ASSIGN_OR_RETURN(
- NamespaceId namespace_id,
- namespace_mapper_->Get(document_wrapper.document().namespace_()));
- // Tombstone indicates it's a soft delete.
- ICING_RETURN_IF_ERROR(BatchDelete(namespace_id, kInvalidSchemaTypeId,
- /*soft_delete=*/true));
- } else if (!document_wrapper.document().schema().empty()) {
- // SchemaType deletion.
- auto schema_type_id_or = schema_store_->GetSchemaTypeId(
- document_wrapper.document().schema());
-
- if (schema_type_id_or.ok()) {
- // Tombstone indicates it's a soft delete.
- ICING_RETURN_IF_ERROR(BatchDelete(kInvalidNamespaceId,
- schema_type_id_or.ValueOrDie(),
- /*soft_delete=*/true));
- } else {
- // The deleted schema type doesn't have a SchemaTypeId we can refer
- // to in the FilterCache.
- //
- // TODO(cassiewang): We could avoid reading out all the documents.
- // When we see a schema type doesn't have a SchemaTypeId, assign the
- // unknown schema type a unique, temporary SchemaTypeId and store
- // that in the FilterCache. Then, when we see the schema type
- // tombstone here, we can look up its temporary SchemaTypeId and
- // just iterate through the FilterCache to mark those documents as
- // deleted.
- int size = document_id_mapper_->num_elements();
- for (DocumentId document_id = 0; document_id < size; document_id++) {
- auto document_or = Get(document_id);
- if (absl_ports::IsNotFound(document_or.status())) {
- // Skip nonexistent documents
- continue;
- } else if (!document_or.ok()) {
- // Real error, pass up
- return absl_ports::Annotate(
- document_or.status(),
- IcingStringUtil::StringPrintf(
- "Failed to retrieve Document for DocumentId %d",
- document_id));
- }
-
- // Guaranteed to have a document now.
- DocumentProto document = document_or.ValueOrDie();
-
- if (document.schema() == document_wrapper.document().schema()) {
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- }
- }
- }
- } else {
- return absl_ports::InternalError(
- "Encountered an invalid tombstone during recovery!");
+ // Revalidate that this document is still compatible if requested.
+ if (revalidate_documents) {
+ if (!document_validator_.Validate(document_wrapper.document()).ok()) {
+ // Document is no longer valid with the current schema. Mark as
+ // deleted
+ DocumentId new_document_id = document_id_mapper_->num_elements();
+ ICING_RETURN_IF_ERROR(document_log_->EraseProto(iterator.GetOffset()));
+ ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
+ continue;
}
+ }
+ // Updates key mapper and document_id mapper with the new document
+ DocumentId new_document_id = document_id_mapper_->num_elements();
+ ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+ MakeFingerprint(document_wrapper.document().namespace_(),
+ document_wrapper.document().uri()),
+ new_document_id));
+ ICING_RETURN_IF_ERROR(
+ document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
+
+ SchemaTypeId schema_type_id;
+ auto schema_type_id_or =
+ schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
+ if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+ // Didn't find a SchemaTypeId. This means that the DocumentStore and
+ // the SchemaStore are out of sync. But DocumentStore can't do
+ // anything about it so just ignore this for now. This should be
+ // detected/handled by the owner of DocumentStore. Set it to some
+ // arbitrary invalid value for now, it'll get updated to the correct
+ // ID later.
+ schema_type_id = -1;
+ } else if (!schema_type_id_or.ok()) {
+ // Real error. Pass it up
+ return schema_type_id_or.status();
} else {
- // Updates key mapper and document_id mapper with the new document
- DocumentId new_document_id = document_id_mapper_->num_elements();
- ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
- MakeFingerprint(document_wrapper.document().namespace_(),
- document_wrapper.document().uri()),
- new_document_id));
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
-
- SchemaTypeId schema_type_id;
- auto schema_type_id_or =
- schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
- if (absl_ports::IsNotFound(schema_type_id_or.status())) {
- // Didn't find a SchemaTypeId. This means that the DocumentStore and
- // the SchemaStore are out of sync. But DocumentStore can't do
- // anything about it so just ignore this for now. This should be
- // detected/handled by the owner of DocumentStore. Set it to some
- // arbitrary invalid value for now, it'll get updated to the correct
- // ID later.
- schema_type_id = -1;
- } else if (!schema_type_id_or.ok()) {
- // Real error. Pass it up
- return schema_type_id_or.status();
- } else {
- // We're guaranteed that SchemaTypeId is valid now
- schema_type_id = schema_type_id_or.ValueOrDie();
- }
+ // We're guaranteed that SchemaTypeId is valid now
+ schema_type_id = schema_type_id_or.ValueOrDie();
+ }
ICING_ASSIGN_OR_RETURN(
NamespaceId namespace_id,
@@ -582,8 +496,7 @@
ICING_RETURN_IF_ERROR(UpdateFilterCache(
new_document_id, DocumentFilterData(namespace_id, schema_type_id,
expiration_timestamp_ms)));
- }
- iterator_status = iterator.Advance();
+ iterator_status = iterator.Advance();
}
if (!absl_ports::IsOutOfRange(iterator_status)) {
@@ -929,9 +842,9 @@
usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
/*to_document_id=*/new_document_id));
- // Hard delete the old document.
- ICING_RETURN_IF_ERROR(
- HardDelete(old_document_id, offset_or.ValueOrDie()));
+ // Delete the old document.
+ ICING_RETURN_IF_ERROR(document_log_->EraseProto(offset_or.ValueOrDie()));
+ ICING_RETURN_IF_ERROR(ClearDerivedData(old_document_id));
}
}
@@ -1075,8 +988,7 @@
}
libtextclassifier3::Status DocumentStore::Delete(
- const std::string_view name_space, const std::string_view uri,
- bool soft_delete) {
+ const std::string_view name_space, const std::string_view uri) {
// Try to get the DocumentId first
auto document_id_or = GetDocumentId(name_space, uri);
if (!document_id_or.ok()) {
@@ -1085,69 +997,13 @@
absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
", uri: ", uri));
}
-
- // Check if the DocumentId's Document still exists.
- DocumentId document_id = document_id_or.ValueOrDie();
- auto file_offset_or = DoesDocumentExistAndGetFileOffset(document_id);
- if (!file_offset_or.ok()) {
- return absl_ports::Annotate(
- file_offset_or.status(),
- absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
- ", uri: ", uri));
- }
-
- if (soft_delete) {
- return SoftDelete(name_space, uri, document_id);
- } else {
- return HardDelete(document_id, file_offset_or.ValueOrDie());
- }
+ return Delete(document_id_or.ValueOrDie());
}
-libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
- bool soft_delete) {
+libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) {
// Copy out the document to get namespace and uri.
ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
DoesDocumentExistAndGetFileOffset(document_id));
-
- if (soft_delete) {
- auto document_wrapper_or = document_log_->ReadProto(document_log_offset);
- if (!document_wrapper_or.ok()) {
- ICING_LOG(ERROR) << document_wrapper_or.status().error_message()
- << "Failed to read from document log";
- return document_wrapper_or.status();
- }
- DocumentWrapper document_wrapper =
- std::move(document_wrapper_or).ValueOrDie();
-
- return SoftDelete(document_wrapper.document().namespace_(),
- document_wrapper.document().uri(), document_id);
- } else {
- return HardDelete(document_id, document_log_offset);
- }
-}
-
-// TODO(b/169969469): Consider removing SoftDelete().
-libtextclassifier3::Status DocumentStore::SoftDelete(
- std::string_view name_space, std::string_view uri, DocumentId document_id) {
- // Update ground truth first.
- // Mark the document as deleted by appending a tombstone of it and actually
- // remove it from file later in Optimize()
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateDocumentTombstone(name_space, uri))
- .status();
- if (!status.ok()) {
- return absl_ports::Annotate(
- status, absl_ports::StrCat("Failed to delete Document. namespace:",
- name_space, ", uri: ", uri));
- }
-
- return document_id_mapper_->Set(document_id, kDocDeletedFlag);
-}
-
-libtextclassifier3::Status DocumentStore::HardDelete(
- DocumentId document_id, int64_t document_log_offset) {
// Erases document proto.
ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
return ClearDerivedData(document_id);
@@ -1240,7 +1096,7 @@
}
DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
- std::string_view name_space, bool soft_delete) {
+ std::string_view name_space) {
DeleteByGroupResult result;
auto namespace_id_or = namespace_mapper_->Get(name_space);
if (!namespace_id_or.ok()) {
@@ -1250,26 +1106,7 @@
return result;
}
NamespaceId namespace_id = namespace_id_or.ValueOrDie();
-
- if (soft_delete) {
- // To delete an entire namespace, we append a tombstone that only contains
- // the deleted bit and the name of the deleted namespace.
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateNamespaceTombstone(name_space))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete namespace. namespace = "
- << name_space;
- result.status = std::move(status);
- return result;
- }
- }
-
- auto num_deleted_or =
- BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete);
+ auto num_deleted_or = BatchDelete(namespace_id, kInvalidSchemaTypeId);
if (!num_deleted_or.ok()) {
result.status = std::move(num_deleted_or).status();
return result;
@@ -1288,7 +1125,7 @@
}
DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
- std::string_view schema_type, bool soft_delete) {
+ std::string_view schema_type) {
DeleteByGroupResult result;
auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
if (!schema_type_id_or.ok()) {
@@ -1299,26 +1136,7 @@
return result;
}
SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-
- if (soft_delete) {
- // To soft-delete an entire schema type, we append a tombstone that only
- // contains the deleted bit and the name of the deleted schema type.
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete schema_type. schema_type = "
- << schema_type;
- result.status = std::move(status);
- return result;
- }
- }
-
- auto num_deleted_or =
- BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete);
+ auto num_deleted_or = BatchDelete(kInvalidNamespaceId, schema_type_id);
if (!num_deleted_or.ok()) {
result.status = std::move(num_deleted_or).status();
return result;
@@ -1335,7 +1153,7 @@
}
libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
- NamespaceId namespace_id, SchemaTypeId schema_type_id, bool soft_delete) {
+ NamespaceId namespace_id, SchemaTypeId schema_type_id) {
// Tracks if there were any existing documents with this namespace that we
// will mark as deleted.
int num_updated_documents = 0;
@@ -1368,30 +1186,15 @@
}
// The document has the desired namespace and schema type, it either exists
- // or has been soft-deleted / expired.
- if (soft_delete) {
- if (DoesDocumentExist(document_id)) {
- ++num_updated_documents;
- }
-
- // docid_mapper_->Set can only fail if document_id is < 0
- // or >= docid_mapper_->num_elements. So the only possible way to get an
- // error here would be if filter_cache_->num_elements >
- // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- } else {
- // Hard delete.
- libtextclassifier3::Status delete_status =
- Delete(document_id, /*soft_delete=*/false);
- if (absl_ports::IsNotFound(delete_status)) {
- continue;
- } else if (!delete_status.ok()) {
- // Real error, pass up.
- return delete_status;
- }
- ++num_updated_documents;
+ // or has expired.
+ libtextclassifier3::Status delete_status = Delete(document_id);
+ if (absl_ports::IsNotFound(delete_status)) {
+ continue;
+ } else if (!delete_status.ok()) {
+ // Real error, pass up.
+ return delete_status;
}
+ ++num_updated_documents;
}
return num_updated_documents;
@@ -1617,24 +1420,6 @@
schema_store_ = schema_store;
document_validator_.UpdateSchemaStore(schema_store);
- // Append a tombstone for each deleted schema type. This way, we don't have
- // to read out each document, check if the schema type has been deleted, and
- // append a tombstone per-document.
- for (const auto& schema_type :
- set_schema_result.schema_types_deleted_by_name) {
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete schema_type. schema_type = "
- << schema_type;
- return status;
- }
- }
-
int size = document_id_mapper_->num_elements();
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto exists_or = DoesDocumentExistAndGetFileOffset(document_id);
@@ -1653,14 +1438,8 @@
ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
filter_cache_->Get(document_id));
- if (set_schema_result.schema_types_deleted_by_id.count(
- filter_data->schema_type_id()) != 0) {
- // We already created a tombstone for this deleted type. Just update the
- // derived files now.
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- continue;
- }
+ bool delete_document = set_schema_result.schema_types_deleted_by_id.count(
+ filter_data->schema_type_id()) != 0;
// Check if we need to update the FilterCache entry for this document. It
// may have been assigned a different SchemaTypeId in the new SchemaStore.
@@ -1684,17 +1463,17 @@
filter_cache_->mutable_array()[document_id].set_schema_type_id(
schema_type_id);
}
-
if (revalidate_document) {
- if (!document_validator_.Validate(document).ok()) {
- // Document is no longer valid with the new SchemaStore. Mark as
- // deleted
- auto delete_status = Delete(document.namespace_(), document.uri());
- if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
- // Real error, pass up
- return delete_status;
- }
- }
+ delete_document = !document_validator_.Validate(document).ok();
+ }
+ }
+
+ if (delete_document) {
+ // Document is no longer valid with the new SchemaStore. Mark as deleted
+ auto delete_status = Delete(document_id);
+ if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+ // Real error, pass up
+ return delete_status;
}
}
}
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 533b240..832c470 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -109,6 +109,11 @@
// previously initialized with this directory, it will reload the files saved
// by the last instance.
//
+ // force_recovery_and_revalidate_documents=true will pre-emptively throw out
+ // the derived files and validate each document while recreating them. This
+ // can be used to indicate that the schema (and type ids) may have changed and
+ // those changes might not have been applied to the document store.
+ //
// If initialize_stats is present, the fields related to DocumentStore will be
// populated.
//
@@ -125,6 +130,7 @@
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
+ bool force_recovery_and_revalidate_documents = false,
InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
@@ -197,13 +203,10 @@
bool DoesDocumentExist(DocumentId document_id) const;
// Deletes the document identified by the given namespace and uri. The
- // document proto will be marked as deleted if 'soft_delete' is true,
- // otherwise the document proto will be erased immediately.
+ // document proto will be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
@@ -211,26 +214,20 @@
// NOT_FOUND if no document exists with namespace, uri
// INTERNAL_ERROR on IO error
libtextclassifier3::Status Delete(std::string_view name_space,
- std::string_view uri,
- bool soft_delete = false);
+ std::string_view uri);
- // Deletes the document identified by the given document_id. The
- // document proto will be marked as deleted if 'soft_delete' is true,
- // otherwise the document proto will be erased immediately.
+ // Deletes the document identified by the given document_id. The document
+ // proto will be erased immediately.
//
// NOTE:
- // 1. If possible, please use the other method Delete(name_space, uri,
- // soft_delete) for soft deletes because we need namespace and uri to
- // perform soft deletes.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// INTERNAL_ERROR on IO error
// INVALID_ARGUMENT if document_id is invalid.
- libtextclassifier3::Status Delete(DocumentId document_id,
- bool soft_delete = false);
+ libtextclassifier3::Status Delete(DocumentId document_id);
// Returns the NamespaceId of the string namespace
//
@@ -314,38 +311,30 @@
libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
// Deletes all documents belonging to the given namespace. The documents will
- // be marked as deleted if 'soft_delete' is true, otherwise they will be
- // erased immediately.
+ // be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// NOT_FOUND if namespace doesn't exist
// INTERNAL_ERROR on IO error
- DeleteByGroupResult DeleteByNamespace(std::string_view name_space,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteByNamespace(std::string_view name_space);
// Deletes all documents belonging to the given schema type. The documents
- // will be marked as deleted if 'soft_delete' is true, otherwise they will be
- // erased immediately.
+ // will be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// NOT_FOUND if schema_type doesn't exist
// INTERNAL_ERROR on IO error
- DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type);
// Syncs all the data and metadata changes to disk.
//
@@ -508,6 +497,7 @@
bool initialized_ = false;
libtextclassifier3::StatusOr<DataLoss> Initialize(
+ bool force_recovery_and_revalidate_documents,
InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
@@ -518,6 +508,9 @@
// Re-generates all files derived from the ground truth: the document log.
//
+ // revalidate_documents=true will also cause each document to be revalidated
+ // the schema as it is read out of the document log.
+ //
// NOTE: if this function fails, the only thing we can do is to retry it until
// it succeeds or prevent the initialization of a DocumentStore. The
// DocumentStore object wouldn't work reliably if this fails.
@@ -528,7 +521,7 @@
// document_id
// mapper.
// 3. Create header and store the updated combined checksum
- libtextclassifier3::Status RegenerateDerivedFiles();
+ libtextclassifier3::Status RegenerateDerivedFiles(bool revalidate_documents);
// Resets the unique_ptr to the document_key_mapper, deletes the underlying
// file, and re-creates a new instance of the document_key_mapper .
@@ -591,9 +584,8 @@
// Helper function to do batch deletes. Documents with the given
// "namespace_id" and "schema_type_id" will be deleted. If callers don't need
// to specify the namespace or schema type, pass in kInvalidNamespaceId or
- // kInvalidSchemaTypeId. The document protos will be marked as deleted if
- // 'soft_delete' is true, otherwise the document protos with their derived
- // data will be erased / cleared immediately.
+ // kInvalidSchemaTypeId. The document protos with their derived data will be
+ // erased / cleared immediately.
//
// NOTE: Space is not reclaimed in the derived files until Optimize() is
// called.
@@ -602,28 +594,7 @@
// Number of documents that were actually updated to be deleted
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int> BatchDelete(NamespaceId namespace_id,
- SchemaTypeId schema_type_id,
- bool soft_delete);
-
- // Marks the document identified by the given name_space, uri and document_id
- // as deleted, to be removed later during Optimize().
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on IO error
- libtextclassifier3::Status SoftDelete(std::string_view name_space,
- std::string_view uri,
- DocumentId document_id);
-
- // Erases the document at the given document_log_offset from the document_log
- // and clears the derived data identified by the given document_id. The space
- // will be reclaimed later during Optimize().
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on IO error
- libtextclassifier3::Status HardDelete(DocumentId document_id,
- int64_t document_log_offset);
+ SchemaTypeId schema_type_id);
// Helper method to find a DocumentId that is associated with the given
// namespace and uri.
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7b04a76..42aabde 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
@@ -29,6 +30,7 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/storage.pb.h"
@@ -41,7 +43,6 @@
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -90,6 +91,9 @@
constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+ PropertyConfigProto_DataType_Code_INT64;
+
UsageReport CreateUsageReport(std::string name_space, std::string uri,
int64 timestamp_ms,
UsageReport::UsageType usage_type) {
@@ -181,6 +185,19 @@
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ void CorruptDocStoreHeaderChecksumFile() {
+ // Change the DocStore's header combined checksum so that it won't match the
+ // recalculated checksum on initialization. This will force a regeneration
+ // of derived files from ground truth.
+ const std::string header_file =
+ absl_ports::StrCat(document_store_dir_, "/document_store_header");
+ DocumentStore::Header header;
+ header.magic = DocumentStore::Header::kMagic;
+ header.checksum = 10; // Arbitrary garbage checksum
+ filesystem_.DeleteFile(header_file.c_str());
+ filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
FakeClock fake_clock_;
@@ -342,7 +359,7 @@
IsFalse());
}
-TEST_F(DocumentStoreTest, GetSoftDeletedDocumentNotFound) {
+TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -356,29 +373,7 @@
IsOkAndHolds(EqualsProto(test_document1_)));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri(),
- /*soft_delete=*/true));
- EXPECT_THAT(
- document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, GetHardDeletedDocumentNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
- EXPECT_THAT(
- document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
- IsOkAndHolds(EqualsProto(test_document1_)));
-
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri(),
- /*soft_delete=*/false));
+ test_document1_.uri()));
EXPECT_THAT(
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -488,7 +483,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -519,7 +514,7 @@
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
DocumentStore::DeleteByGroupResult group_result =
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true);
+ doc_store->DeleteByNamespace("namespace.1");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -532,51 +527,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- DocumentProto document1 = test_document1_;
- document1.set_namespace_("namespace.1");
- document1.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document1));
-
- DocumentProto document2 = test_document1_;
- document2.set_namespace_("namespace.2");
- document2.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document2));
-
- DocumentProto document3 = test_document1_;
- document3.set_namespace_("namespace.3");
- document3.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document3));
-
- DocumentProto document4 = test_document1_;
- document4.set_namespace_("namespace.1");
- document4.set_uri("uri2");
- ICING_ASSERT_OK(doc_store->Put(document4));
-
- // DELETE namespace.1. document1 and document 4 should be deleted. document2
- // and document3 should still be retrievable.
- DocumentStore::DeleteByGroupResult group_result =
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
- IsOkAndHolds(EqualsProto(document2)));
- EXPECT_THAT(doc_store->Get(document3.namespace_(), document3.uri()),
- IsOkAndHolds(EqualsProto(document3)));
- EXPECT_THAT(doc_store->Get(document4.namespace_(), document4.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -589,10 +540,7 @@
int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store
- ->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/true)
- .status,
+ EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
@@ -600,31 +548,7 @@
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // Validates that deleting something non-existing won't append anything to
- // ground truth
- int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(doc_store
- ->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
- int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -639,33 +563,9 @@
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store
- ->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/true)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
-
- // At this point, there are no existing documents with the namespace, even
- // though Icing's derived files know about this namespace. We should still
- // return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store
- ->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(
+ document_store->DeleteByNamespace(test_document1_.namespace_()).status,
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
@@ -710,17 +610,7 @@
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -744,92 +634,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) {
- SchemaProto schema =
- SchemaBuilder()
- .AddType(SchemaTypeConfigBuilder().SetType("email"))
- .AddType(SchemaTypeConfigBuilder().SetType("message"))
- .AddType(SchemaTypeConfigBuilder().SetType("person"))
- .Build();
- std::string schema_store_dir = schema_store_dir_ + "_custom";
- filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
- filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- DocumentProto email_document_1 = DocumentBuilder()
- .SetKey("namespace1", "1")
- .SetSchema("email")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_1_document_id,
- document_store->Put(email_document_1));
-
- DocumentProto email_document_2 = DocumentBuilder()
- .SetKey("namespace2", "2")
- .SetSchema("email")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_2_document_id,
- document_store->Put(email_document_2));
-
- DocumentProto message_document = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("message")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
- document_store->Put(message_document));
-
- DocumentProto person_document = DocumentBuilder()
- .SetKey("namespace", "4")
- .SetSchema("person")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
- document_store->Put(person_document));
-
- // Delete the "email" type and ensure that it works across both
- // email_document's namespaces. And that other documents aren't affected.
- DocumentStore::DeleteByGroupResult group_result =
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- EXPECT_THAT(document_store->Get(email_1_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(email_2_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(message_document_id),
- IsOkAndHolds(EqualsProto(message_document)));
- EXPECT_THAT(document_store->Get(person_document_id),
- IsOkAndHolds(EqualsProto(person_document)));
-
- // Delete the "message" type and check that other documents aren't affected
- group_result =
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
- EXPECT_THAT(document_store->Get(email_1_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(email_2_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(message_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(person_document_id),
- IsOkAndHolds(EqualsProto(person_document)));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
SchemaProto schema =
SchemaBuilder()
.AddType(SchemaTypeConfigBuilder().SetType("email"))
@@ -888,7 +693,7 @@
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
DocumentStore::DeleteByGroupResult group_result =
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ document_store->DeleteBySchemaType("email");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -901,8 +706,7 @@
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- group_result =
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ group_result = document_store->DeleteBySchemaType("message");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -915,7 +719,7 @@
IsOkAndHolds(EqualsProto(person_document)));
}
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -928,10 +732,7 @@
int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store
- ->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/true)
- .status,
+ EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t document_log_size_after = filesystem_.GetFileSize(
@@ -940,32 +741,7 @@
EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- // Validates that deleting something non-existing won't append anything to
- // ground truth
- int64_t document_log_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(document_store
- ->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
- int64_t document_log_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -977,30 +753,9 @@
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store
- ->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/true)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
-
- EXPECT_THAT(document_store
- ->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(
+ document_store->DeleteBySchemaType(test_document1_.schema()).status,
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
@@ -1057,17 +812,7 @@
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1146,16 +891,7 @@
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ CorruptDocStoreHeaderChecksumFile();
SchemaProto new_schema =
SchemaBuilder()
@@ -1484,17 +1220,7 @@
IsOkAndHolds(EqualsProto(test_document2_)));
}
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1912,7 +1638,7 @@
StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
}
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
+TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1930,59 +1656,13 @@
/*schema_type_id=*/0,
/*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
- // Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id).status(), IsOk());
-}
-
-TEST_F(DocumentStoreTest, HardDeleteClearsFilterCache) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_));
-
- EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(
- /*namespace_id=*/0,
- /*schema_type_id=*/0,
- /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
-
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// Associated entry of the deleted document is removed.
EXPECT_THAT(doc_store->GetDocumentFilterData(document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearScoreCache) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_, /*num_tokens=*/4));
-
- EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
- IsOkAndHolds(DocumentAssociatedScoreData(
- /*corpus_id=*/0, /*document_score=*/document1_score_,
- /*creation_timestamp_ms=*/document1_creation_timestamp_,
- /*length_in_tokens=*/4)));
-
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
- // Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id).status(),
- IsOk());
-}
-
-TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
+TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2000,13 +1680,13 @@
/*creation_timestamp_ms=*/document1_creation_timestamp_,
/*length_in_tokens=*/4)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// Associated entry of the deleted document is removed.
EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
+TEST_F(DocumentStoreTest, DeleteShouldClearUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2028,38 +1708,8 @@
ASSERT_THAT(doc_store->GetUsageScores(document_id),
IsOkAndHolds(expected_scores));
- // Soft delete the document.
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
-
- // The scores should be the same.
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_));
-
- // Report usage with type 1.
- UsageReport usage_report_type1 = CreateUsageReport(
- /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
-
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_count = 1;
- ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
-
- // Hard delete the document.
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ // Delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// The scores should be cleared.
expected_scores.usage_type1_count = 0;
@@ -2356,16 +2006,7 @@
message_expiration_timestamp = message_data.expiration_timestamp_ms();
} // Everything destructs and commits changes to file
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ CorruptDocStoreHeaderChecksumFile();
// Change the schema so that we don't know of the Document's type anymore.
// Since we can't set backwards incompatible changes, we do some file-level
@@ -3155,17 +2796,7 @@
IsOkAndHolds(expected_scores));
}
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -3264,45 +2895,6 @@
IsOkAndHolds(expected_scores));
}
-TEST_F(DocumentStoreTest,
- UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentId document_id,
- document_store->Put(DocumentProto(test_document1_)));
-
- // Report usage with type 1.
- UsageReport usage_report_type1 = CreateUsageReport(
- /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
-
- UsageStore::UsageScores expected_scores;
- ++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
-
- // Soft delete the doc.
- ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true));
-
- // Put the same document.
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentId updated_document_id,
- document_store->Put(DocumentProto(test_document1_)));
- // We should get a different document id.
- ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
-
- // Usage scores should be cleared.
- EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
- IsOkAndHolds(UsageStore::UsageScores()));
-}
-
TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -3431,6 +3023,15 @@
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
}
+// TODO(b/185845269) Re-enable this test by copying over a full valid set of
+// document store files. Right now this test only includes the score_cache and
+// the document store header.
+//
+// This causes a problem now because this cl changes behavior to not consider an
+// InitializeDerivedFiles failure to be a recovery if there is nothing to
+// recover because the doocument store is empty.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
// The directory testdata/v0/document_store contains only the scoring_cache
// and the document_store_header (holding the crc for the scoring_cache). If
@@ -3471,7 +3072,9 @@
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(), &initialize_stats));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The store_cache trigger regeneration because its element size is
@@ -3479,6 +3082,7 @@
// score_cache).
EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
}
+#endif // DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
@@ -3589,6 +3193,399 @@
Eq(0));
}
+TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // The typeid for "email" should be 0.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+ DocumentId docid = kInvalidDocumentId;
+ {
+ // Create the document store the first time and add an email document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto doc =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+
+ // Add another type to the schema before the email type.
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("alarm")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_type_config)
+ .Build();
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // Adding a new type should cause ids to be reassigned. Ids are assigned in
+ // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+ {
+ // Create the document store the second time and force recovery
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Ensure that the type id of the email document has been correctly updated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(1));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // The typeid for "email" should be 0.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+ DocumentId docid = kInvalidDocumentId;
+ {
+ // Create the document store the first time and add an email document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto doc =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+
+ // Add another type to the schema.
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("alarm")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_type_config)
+ .Build();
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // Adding a new type should cause ids to be reassigned. Ids are assigned in
+ // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+ {
+ // Create the document store the second time. Don't force recovery.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Check that the type id of the email document has not been updated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ DocumentId docid = kInvalidDocumentId;
+ DocumentProto docWithBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ DocumentProto docWithoutBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+
+ {
+ // Create the document store the first time and add two email documents: one
+ // that has the 'body' section and one that doesn't.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+
+ // Delete the 'body' property from the 'email' type, making all pre-existing
+ // documents with the 'body' property invalid.
+ email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ schema = SchemaBuilder().AddType(email_type_config).Build();
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true),
+ IsOk());
+
+ {
+ // Create the document store the second time and force recovery
+ CorruptDocStoreHeaderChecksumFile();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ DocumentId docid = kInvalidDocumentId;
+ DocumentProto docWithBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ DocumentProto docWithoutBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+
+ {
+ // Create the document store the first time and add two email documents: one
+ // that has the 'body' section and one that doesn't.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+
+ // Delete the 'body' property from the 'email' type, making all pre-existing
+ // documents with the 'body' property invalid.
+ email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ schema = SchemaBuilder().AddType(email_type_config).Build();
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true),
+ IsOk());
+
+ {
+ // Corrupt the document store header checksum so that we will perform
+ // recovery, but without revalidation.
+ CorruptDocStoreHeaderChecksumFile();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index bbc8084..8d8bdf2 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -25,7 +25,6 @@
#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/legacy/core/icing-string-util.h"
-#include "icing/proto/search.proto.h"
#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index 78430cc..12133f5 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -18,7 +18,6 @@
#include <random>
#include <string>
-#include "icing/proto/schema.proto.h"
#include "icing/proto/schema.pb.h"
namespace icing {
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index 2b1911e..317da04 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -16,8 +16,8 @@
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
@@ -143,8 +143,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(LanguageSegmenterIteratorTest,
- ResetToTermEndingBeforeWithZeroNotFound) {
+TEST_F(LanguageSegmenterIteratorTest, ResetToTermEndingBeforeWithZeroNotFound) {
language_segmenter_factory::SegmenterOptions options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index f578567..2fb9750 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -19,9 +19,9 @@
#include "gmock/gmock.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-i18n-test-utils.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index e1a666b..500efa0 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -17,8 +17,8 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
diff --git a/proto/icing/proto/document_wrapper.proto b/proto/icing/proto/document_wrapper.proto
index e8eb992..929ee33 100644
--- a/proto/icing/proto/document_wrapper.proto
+++ b/proto/icing/proto/document_wrapper.proto
@@ -20,7 +20,6 @@
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// DocumentWrapper as a wrapper of the user-facing DocumentProto is meant to
@@ -30,6 +29,5 @@
message DocumentWrapper {
optional DocumentProto document = 1;
- // Indicates if the document is marked as deleted
- optional bool deleted = 2;
+ reserved 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index a9780b5..29f7f80 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -40,8 +40,9 @@
// Data in index is inconsistent with ground truth.
INCONSISTENT_WITH_GROUND_TRUTH = 2;
- // Total checksum of all the components does not match.
- TOTAL_CHECKSUM_MISMATCH = 3;
+ // Changes were made to the schema, but possibly not fully applied to the
+ // document store and the index - requiring a recovery.
+ SCHEMA_CHANGES_OUT_OF_SYNC = 3;
// Random I/O errors.
IO_ERROR = 4;
@@ -49,13 +50,13 @@
// Possible recovery causes for document store:
// - DATA_LOSS
- // - TOTAL_CHECKSUM_MISMATCH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
// - IO_ERROR
optional RecoveryCause document_store_recovery_cause = 2;
// Possible recovery causes for index:
// - INCONSISTENT_WITH_GROUND_TRUTH
- // - TOTAL_CHECKSUM_MISMATCH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
// - IO_ERROR
optional RecoveryCause index_restoration_cause = 3;
@@ -125,8 +126,11 @@
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 16
+// Next tag: 17
message QueryStatsProto {
+ // The UTF-8 length of the query string
+ optional int32 query_length = 16;
+
// Number of terms in the query string.
optional int32 num_terms = 1;
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index a7d657e..85538b5 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=368957181)
+set(synced_AOSP_CL_number=370944273)