Pull upstream changes.
Change-Id: I73ea5f80ccf16a02519f6f7ccfc993e9b0f39f86
diff --git a/icing/absl_ports/str_cat.cc b/icing/absl_ports/str_cat.cc
index 2cf020d..8695824 100644
--- a/icing/absl_ports/str_cat.cc
+++ b/icing/absl_ports/str_cat.cc
@@ -175,9 +175,8 @@
for (std::string_view s : pieces) {
result_size += s.length();
}
- // Create result with enough room to fit all operands.
- std::string result;
- result.__resize_default_init(result_size);
+ // Resize dest with enough room to fit all operands.
+ dest->__resize_default_init(result_size);
char* out = &(*dest)[old_size];
for (std::string_view s : pieces) {
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index eb89db8..3ecef54 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -149,6 +149,12 @@
// within a directory that already exists.
// mmap_strategy : Strategy/optimizations to access the content in the vector,
// see MemoryMappedFile::Strategy for more details
+ //
+ // Return:
+ // FAILED_PRECONDITION_ERROR if the file checksum doesn't match the stored
+ // checksum.
+ // INTERNAL_ERROR on I/O errors.
+ // UNIMPLEMENTED_ERROR if created with strategy READ_WRITE_MANUAL_SYNC.
static libtextclassifier3::StatusOr<std::unique_ptr<FileBackedVector<T>>>
Create(const Filesystem& filesystem, const std::string& file_path,
MemoryMappedFile::Strategy mmap_strategy);
@@ -402,7 +408,7 @@
// Check header
if (header->header_checksum != header->CalculateHeaderChecksum()) {
- return absl_ports::InternalError(
+ return absl_ports::FailedPreconditionError(
absl_ports::StrCat("Invalid header crc for ", file_path));
}
@@ -420,7 +426,7 @@
vector_checksum.Append(vector_contents);
if (vector_checksum.Get() != header->vector_checksum) {
- return absl_ports::InternalError(
+ return absl_ports::FailedPreconditionError(
absl_ports::StrCat("Invalid vector contents for ", file_path));
}
diff --git a/icing/file/file-backed-vector_test.cc b/icing/file/file-backed-vector_test.cc
index 6c3b931..bc2fef6 100644
--- a/icing/file/file-backed-vector_test.cc
+++ b/icing/file/file-backed-vector_test.cc
@@ -132,7 +132,7 @@
ASSERT_THAT(FileBackedVector<char>::Create(
filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
// Get it back into an ok state
filesystem_.PWrite(file_path_.data(),
diff --git a/icing/icing-search-engine-test-jni-layer.cc b/icing/icing-search-engine-test-jni-layer.cc
new file mode 100644
index 0000000..6acc99b
--- /dev/null
+++ b/icing/icing-search-engine-test-jni-layer.cc
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <jni.h>
+
+#include "gtest/gtest.h"
+#include "icing/testing/logging-event-listener.h"
+
+// Global variable used so that the test implementation can access the JNIEnv.
+JNIEnv* g_jenv = nullptr;
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_icing_jni_IcingSearchEngineJniTest_testsMain(JNIEnv* env, jclass ignored) {
+ g_jenv = env;
+
+ std::vector<char*> my_argv;
+ char arg[] = "jni-test-lib";
+ my_argv.push_back(arg);
+ int argc = 1;
+ char** argv = &(my_argv[0]);
+ testing::InitGoogleTest(&argc, argv);
+ testing::UnitTest::GetInstance()->listeners().Append(
+ new icing::lib::LoggingEventListener());
+ return RUN_ALL_TESTS() == 0;
+}
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 5e0a46e..fdec473 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -218,15 +218,17 @@
IcingSearchEngine::IcingSearchEngine(const IcingSearchEngineOptions& options,
std::unique_ptr<const JniCache> jni_cache)
: IcingSearchEngine(options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
std::make_unique<Clock>(), std::move(jni_cache)) {}
IcingSearchEngine::IcingSearchEngine(
IcingSearchEngineOptions options,
- std::unique_ptr<const Filesystem> filesystem, std::unique_ptr<Clock> clock,
- std::unique_ptr<const JniCache> jni_cache)
+ std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<Clock> clock, std::unique_ptr<const JniCache> jni_cache)
: options_(std::move(options)),
filesystem_(std::move(filesystem)),
- icing_filesystem_(std::make_unique<IcingFilesystem>()),
+ icing_filesystem_(std::move(icing_filesystem)),
clock_(std::move(clock)),
result_state_manager_(performance_configuration_.max_num_hits_per_query,
performance_configuration_.max_num_cache_results),
@@ -279,14 +281,14 @@
<< "IcingSearchEngine in inconsistent state, regenerating all "
"derived data";
status = RegenerateDerivedFiles();
- if (!status.ok()) {
- TransformStatus(status, result_status);
- return result_proto;
- }
+ } else {
+ status = RestoreIndex();
}
- initialized_ = true;
- result_status->set_code(StatusProto::OK);
+ if (status.ok() || absl_ports::IsDataLoss(status)) {
+ initialized_ = true;
+ }
+ TransformStatus(status, result_status);
return result_proto;
}
@@ -360,7 +362,8 @@
}
Index::Options index_options(index_dir, options_.index_merge_size());
- auto index_or = Index::Create(index_options, icing_filesystem_.get());
+ auto index_or =
+ Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
if (!index_or.ok()) {
if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
!filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
@@ -369,8 +372,9 @@
}
// Try recreating it from scratch and re-indexing everything.
- ICING_ASSIGN_OR_RETURN(
- index_, Index::Create(index_options, icing_filesystem_.get()));
+ ICING_ASSIGN_OR_RETURN(index_,
+ Index::Create(index_options, filesystem_.get(),
+ icing_filesystem_.get()));
ICING_RETURN_IF_ERROR(RestoreIndex());
} else {
// Index was created fine.
@@ -378,7 +382,7 @@
}
return libtextclassifier3::Status::OK;
-} // namespace lib
+}
libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
if (!HeaderExists()) {
@@ -607,12 +611,7 @@
std::move(index_processor_or).ValueOrDie();
auto status = index_processor->IndexDocument(document, document_id);
- if (!status.ok()) {
- TransformStatus(status, result_status);
- return result_proto;
- }
-
- result_status->set_code(StatusProto::OK);
+ TransformStatus(status, result_status);
return result_proto;
}
@@ -639,6 +638,19 @@
return result_proto;
}
+ReportUsageResultProto IcingSearchEngine::ReportUsage(
+ const UsageReport& usage_report) {
+ ReportUsageResultProto result_proto;
+ StatusProto* result_status = result_proto.mutable_status();
+
+ absl_ports::unique_lock l(&mutex_);
+
+ libtextclassifier3::Status status =
+ document_store_->ReportUsage(usage_report);
+ TransformStatus(status, result_status);
+ return result_proto;
+}
+
GetAllNamespacesResultProto IcingSearchEngine::GetAllNamespaces() {
GetAllNamespacesResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
@@ -975,11 +987,8 @@
}
Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
- Crc32 index_checksum = index_->ComputeChecksum();
-
total_checksum.Append(std::to_string(document_store_checksum.Get()));
total_checksum.Append(std::to_string(schema_store_checksum.Get()));
- total_checksum.Append(std::to_string(index_checksum.Get()));
return total_checksum;
}
@@ -1182,8 +1191,8 @@
}
result_status->set_code(StatusProto::OK);
- if (result_proto.results_size() > 0) {
- result_proto.set_next_page_token(next_page_token);
+ if (page_result_state.next_page_token != kInvalidNextPageToken) {
+ result_proto.set_next_page_token(page_result_state.next_page_token);
}
return result_proto;
}
@@ -1294,20 +1303,40 @@
libtextclassifier3::Status IcingSearchEngine::RestoreIndex() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
+ DocumentId last_indexed_document_id = index_->last_added_document_id();
if (last_stored_document_id == kInvalidDocumentId) {
- // Nothing to index
+ // Nothing to index. Make sure the index is also empty.
+ if (last_indexed_document_id != kInvalidDocumentId) {
+ ICING_RETURN_IF_ERROR(index_->Reset());
+ }
return libtextclassifier3::Status::OK;
}
+ // TruncateTo ensures that the index does not hold any data that is not
+ // present in the ground truth. If the document store lost some documents,
+ // TruncateTo will ensure that the index does not contain any hits from those
+ // lost documents. If the index does not contain any hits for documents with
+ // document id greater than last_stored_document_id, then TruncateTo will have
+ // no effect.
+ ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+ DocumentId first_document_to_reindex =
+ (last_indexed_document_id != kInvalidDocumentId)
+ ? index_->last_added_document_id() + 1
+ : 0;
+
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<IndexProcessor> index_processor,
IndexProcessor::Create(schema_store_.get(), language_segmenter_.get(),
normalizer_.get(), index_.get(),
CreateIndexProcessorOptions(options_)));
- for (DocumentId document_id = kMinDocumentId;
- document_id <= last_stored_document_id; document_id++) {
+ ICING_VLOG(1) << "Restoring index by replaying documents from document id "
+ << first_document_to_reindex << " to document id "
+ << last_stored_document_id;
+ libtextclassifier3::Status overall_status;
+ for (DocumentId document_id = first_document_to_reindex;
+ document_id <= last_stored_document_id; ++document_id) {
libtextclassifier3::StatusOr<DocumentProto> document_or =
document_store_->Get(document_id);
@@ -1322,11 +1351,20 @@
}
}
- ICING_RETURN_IF_ERROR(
- index_processor->IndexDocument(document_or.ValueOrDie(), document_id));
+ libtextclassifier3::Status status =
+ index_processor->IndexDocument(document_or.ValueOrDie(), document_id);
+ if (!status.ok()) {
+ if (!absl_ports::IsDataLoss(status)) {
+ // Real error. Stop recovering and pass it up.
+ return status;
+ }
+ // Just a data loss. Keep trying to add the remaining docs, but report the
+ // data loss when we're done.
+ overall_status = status;
+ }
}
- return libtextclassifier3::Status::OK;
+ return overall_status;
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 55d6b2f..58b8df2 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -37,6 +37,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/usage.pb.h"
#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
@@ -137,6 +138,9 @@
// INTERNAL_ERROR if Icing failed to store the new schema or upgrade
// existing data based on the new schema. Using Icing beyond this error is
// undefined and may cause crashes.
+ // DATA_LOSS_ERROR if 'new_schema' requires the index to be rebuilt and an
+ // IO error leads to some documents being excluded from the index. These
+ // documents will still be retrievable via Get, but won't match queries.
//
// TODO(cassiewang) Figure out, document (and maybe even enforce) the best
// way ordering of calls between Initialize() and SetSchema(), both when
@@ -187,6 +191,9 @@
// has not been initialized yet.
// NOT_FOUND if there is no SchemaTypeConfig in the SchemaProto that matches
// the document's schema
+ // DATA_LOSS if an IO error occurs while merging document into the index and
+ // the index is lost. These documents will still be retrievable via Get,
+ // but won't match queries.
// INTERNAL_ERROR on IO error
PutResultProto Put(DocumentProto&& document) ICING_LOCKS_EXCLUDED(mutex_);
@@ -208,6 +215,15 @@
// INTERNAL_ERROR on IO error
GetResultProto Get(std::string_view name_space, std::string_view uri);
+ // Reports usage. The corresponding usage scores of the specified document in
+ // the report will be updated.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
+ // INTERNAL_ERROR on I/O errors.
+ ReportUsageResultProto ReportUsage(const UsageReport& usage_report);
+
// Returns all the namespaces that have at least one valid document in it.
//
// Returns:
@@ -276,8 +292,8 @@
// Retrieves, scores, ranks, and returns the results according to the specs.
// Results can be empty. If there're multiple pages of results,
- // SearchResultProto.next_page_token will be populated and that can be used to
- // fetch more pages via GetNextPage() method. Clients should call
+ // SearchResultProto.next_page_token will be set to a non-zero token and can
+ // be used to fetch more pages via GetNextPage() method. Clients should call
// InvalidateNextPageToken() after they get the pages they need to release
// result cache in memory. Please refer to each proto file for spec
// definitions.
@@ -294,7 +310,12 @@
ICING_LOCKS_EXCLUDED(mutex_);
// Fetches the next page of results of a previously executed query. Results
- // can be empty if next-page token is invalid or all pages have been returned.
+ // can be empty if next-page token is invalid. Invalid next page tokens are
+ // tokens that are either zero or were previously passed to
+ // InvalidateNextPageToken. If there are pages of results remaining after the
+ // one retrieved by this call, SearchResultProto.next_page_token will be
+ // set to a non-zero token and can be used to fetch more pages via
+ // GetNextPage() method.
//
// Returns a SearchResultProto with status:
// OK with results on success
@@ -372,6 +393,7 @@
protected:
IcingSearchEngine(IcingSearchEngineOptions options,
std::unique_ptr<const Filesystem> filesystem,
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
std::unique_ptr<Clock> clock,
std::unique_ptr<const JniCache> jni_cache = nullptr);
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 5a8bb80..06e89f2 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -20,6 +20,7 @@
#include <string>
#include <utility>
+#include "icing/jni/jni-cache.h"
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
@@ -27,6 +28,7 @@
#include "icing/file/filesystem.h"
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/equals-proto.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
@@ -38,6 +40,7 @@
#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/jni-test-helpers.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -61,13 +64,33 @@
using ::testing::StrEq;
using ::testing::UnorderedElementsAre;
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
TestIcingSearchEngine(const IcingSearchEngineOptions& options,
std::unique_ptr<const Filesystem> filesystem,
- std::unique_ptr<FakeClock> clock)
- : IcingSearchEngine(options, std::move(filesystem), std::move(clock)) {}
+ std::unique_ptr<const IcingFilesystem> icing_filesystem,
+ std::unique_ptr<FakeClock> clock,
+ std::unique_ptr<JniCache> jni_cache)
+ : IcingSearchEngine(options, std::move(filesystem),
+ std::move(icing_filesystem), std::move(clock),
+ std::move(jni_cache)) {}
};
std::string GetTestBaseDir() { return GetTestTempDir() + "/icing"; }
@@ -75,10 +98,17 @@
class IcingSearchEngineTest : public testing::Test {
protected:
void SetUp() override {
+#ifndef ICING_REVERSE_JNI_SEGMENTATION
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
// File generated via icu_data_file rule in //icing/BUILD.
std::string icu_data_file_path =
GetTestFilePath("icing/icu.dat");
ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+#endif // ICING_REVERSE_JNI_SEGMENTATION
filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
}
@@ -167,26 +197,34 @@
return scoring_spec;
}
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
TEST_F(IcingSearchEngineTest, SimpleInitialization) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(DocumentProto(document)).status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(DocumentProto(document)).status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, InitializingAgainSavesNonPersistedData) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -195,7 +233,7 @@
ASSERT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
}
@@ -203,25 +241,25 @@
TEST_F(IcingSearchEngineTest, MaxIndexMergeSizeReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, NegativeMergeSizeReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(-1);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, ZeroMergeSizeReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_index_merge_size(0);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, GoodIndexMergeSizeReturnsOk) {
@@ -229,25 +267,25 @@
// One is fine, if a bit weird. It just means that the lite index will be
// smaller and will request a merge any time content is added to it.
options.set_index_merge_size(1);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest,
NegativeMaxTokensPerDocSizeReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_max_tokens_per_doc(-1);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, ZeroMaxTokensPerDocSizeReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_max_tokens_per_doc(0);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, GoodMaxTokensPerDocSizeReturnsOk) {
@@ -259,24 +297,24 @@
// for exceeding max_document_size, but there's no reason to explicitly
// bar it.
options.set_max_tokens_per_doc(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, NegativeMaxTokenLenReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_max_token_length(-1);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, ZeroMaxTokenLenReturnsInvalidArgument) {
IcingSearchEngineOptions options = GetDefaultIcingOptions();
options.set_max_token_length(0);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, MaxTokenLenReturnsOkAndTruncatesTokens) {
@@ -284,13 +322,12 @@
// A length of 1 is allowed - even though it would be strange to want
// this.
options.set_max_token_length(1);
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// "message" should have been truncated to "m"
SearchSpecProto search_spec;
@@ -328,10 +365,9 @@
// truncate tokens). However, this does mean that tokens that exceed the
// size of the lexicon will cause indexing to fail.
options.set_max_token_length(std::numeric_limits<int32_t>::max());
- IcingSearchEngine icing(options);
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Add a document that just barely fits under the max document limit.
// This will still fail to index because we won't actually have enough
@@ -343,8 +379,8 @@
.SetSchema("Message")
.AddStringProperty("body", std::move(enormous_string))
.Build();
- EXPECT_THAT(icing.Put(document).status().code(),
- Eq(StatusProto::OUT_OF_SPACE));
+ EXPECT_THAT(icing.Put(document).status(),
+ ProtoStatusIs(StatusProto::OUT_OF_SPACE));
SearchSpecProto search_spec;
search_spec.set_query("p");
@@ -365,11 +401,12 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status().code(),
- Eq(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
EXPECT_THAT(initialize_result_proto.status().message(),
HasSubstr("Could not create directory"));
}
@@ -404,20 +441,20 @@
body->mutable_indexing_config()->set_tokenizer_type(
IndexingConfig::TokenizerType::PLAIN);
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema).status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
}
TEST_F(IcingSearchEngineTest, PutWithoutSchemaFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
PutResultProto put_result_proto = icing.Put(document);
- EXPECT_THAT(put_result_proto.status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(put_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
EXPECT_THAT(put_result_proto.status().message(), HasSubstr("Schema not set"));
}
@@ -426,10 +463,9 @@
{
// Successfully initialize and set a schema
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
}
auto mock_filesystem = std::make_unique<MockFilesystem>();
@@ -441,11 +477,13 @@
.WillByDefault(Return(-1));
TestIcingSearchEngine test_icing(icing_options, std::move(mock_filesystem),
- std::make_unique<FakeClock>());
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
InitializeResultProto initialize_result_proto = test_icing.Initialize();
- EXPECT_THAT(initialize_result_proto.status().code(),
- Eq(StatusProto::INTERNAL));
+ EXPECT_THAT(initialize_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
EXPECT_THAT(initialize_result_proto.status().message(),
HasSubstr("Unable to open file for read"));
}
@@ -460,21 +498,23 @@
.WillByDefault(Return(-1));
TestIcingSearchEngine icing(icing_options, std::move(mock_filesystem),
- std::make_unique<FakeClock>());
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+
SetSchemaResultProto set_schema_result_proto =
icing.SetSchema(CreateMessageSchema());
- EXPECT_THAT(set_schema_result_proto.status().code(),
- Eq(StatusProto::INTERNAL));
+ EXPECT_THAT(set_schema_result_proto.status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
EXPECT_THAT(set_schema_result_proto.status().message(),
HasSubstr("Unable to open file for write"));
}
TEST_F(IcingSearchEngineTest, SetSchemaDelete2) {
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// 1. Create a schema with an Email type with properties { "title", "body"}
SchemaProto schema;
@@ -489,7 +529,7 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
// 2. Add an email document
DocumentProto doc = DocumentBuilder()
@@ -498,12 +538,12 @@
.AddStringProperty("title", "Hello world.")
.AddStringProperty("body", "Goodnight Moon.")
.Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
}
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// 3. Set a schema that deletes email. This should fail.
SchemaProto schema;
@@ -514,19 +554,18 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema, false).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.SetSchema(schema, false).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
// 4. Try to delete by email type.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
}
}
TEST_F(IcingSearchEngineTest, SetSchemaDelete) {
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// 1. Create a schema with an Email type with properties { "title", "body"}
SchemaProto schema;
@@ -541,7 +580,7 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
// 2. Add an email document
DocumentProto doc = DocumentBuilder()
@@ -550,12 +589,12 @@
.AddStringProperty("title", "Hello world.")
.AddStringProperty("body", "Goodnight Moon.")
.Build();
- EXPECT_THAT(icing.Put(std::move(doc)).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(std::move(doc)).status(), ProtoIsOk());
}
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// 3. Set a schema that deletes email. This should fail.
SchemaProto schema;
@@ -566,18 +605,17 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema, true).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
// 4. Try to delete by email type.
- EXPECT_THAT(icing.DeleteBySchemaType("Email").status().code(),
- Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
}
}
TEST_F(IcingSearchEngineTest, SetSchemaDuplicateTypesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// Create a schema with types { "Email", "Message" and "Email" }
SchemaProto schema;
@@ -597,14 +635,14 @@
*schema.add_types() = schema.types(0);
- EXPECT_THAT(icing.SetSchema(schema).status().code(),
- Eq(StatusProto::ALREADY_EXISTS));
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
}
TEST_F(IcingSearchEngineTest,
SetSchemaDuplicatePropertiesReturnsAlreadyExists) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// Create a schema with an Email type with properties { "title", "body" and
// "title" }
@@ -624,13 +662,13 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema).status().code(),
- Eq(StatusProto::ALREADY_EXISTS));
+ EXPECT_THAT(icing.SetSchema(schema).status(),
+ ProtoStatusIs(StatusProto::ALREADY_EXISTS));
}
TEST_F(IcingSearchEngineTest, SetSchema) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
auto message_document = CreateMessageDocument("namespace", "uri");
@@ -658,18 +696,17 @@
empty_type->set_schema_type("");
// Make sure we can't set invalid schemas
- EXPECT_THAT(icing.SetSchema(invalid_schema).status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(icing.SetSchema(invalid_schema).status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
// Can add an document of a set schema
- EXPECT_THAT(icing.SetSchema(schema_with_message).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema_with_message).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Schema with Email doesn't have Message, so would result incompatible
// data
- EXPECT_THAT(icing.SetSchema(schema_with_email).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.SetSchema(schema_with_email).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
// Can expand the set of schema types and add an document of a new
// schema type
@@ -677,7 +714,7 @@
.status()
.code(),
Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Can't add an document whose schema isn't set
auto photo_document = DocumentBuilder()
@@ -686,26 +723,25 @@
.AddStringProperty("creator", "icing")
.Build();
PutResultProto put_result_proto = icing.Put(photo_document);
- EXPECT_THAT(put_result_proto.status().code(), Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(put_result_proto.status(), ProtoStatusIs(StatusProto::NOT_FOUND));
EXPECT_THAT(put_result_proto.status().message(),
HasSubstr("'Photo' not found"));
}
TEST_F(IcingSearchEngineTest, SetSchemaTriggersIndexRestorationAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema_with_no_indexed_property = CreateMessageSchema();
schema_with_no_indexed_property.mutable_types(0)
->mutable_properties(0)
->clear_indexing_config();
- EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema_with_no_indexed_property).status(),
+ ProtoIsOk());
// Nothing will be index and Search() won't return anything.
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_query("message");
@@ -721,8 +757,8 @@
SchemaProto schema_with_indexed_property = CreateMessageSchema();
// Index restoration should be triggered here because new schema requires more
// properties to be indexed.
- EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema_with_indexed_property).status(),
+ ProtoIsOk());
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -734,8 +770,8 @@
}
TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema_with_optional_subject;
auto type = schema_with_optional_subject.add_types();
@@ -747,8 +783,8 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema_with_optional_subject).status(),
+ ProtoIsOk());
DocumentProto email_document_without_subject =
DocumentBuilder()
@@ -764,10 +800,8 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- EXPECT_THAT(icing.Put(email_document_without_subject).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(email_document_with_subject).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(email_document_without_subject).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(email_document_with_subject).status(), ProtoIsOk());
SchemaProto schema_with_required_subject;
type = schema_with_required_subject.add_types();
@@ -816,8 +850,8 @@
}
TEST_F(IcingSearchEngineTest, SetSchemaDeletesDocumentsAndReturnsOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema;
auto type = schema.add_types();
@@ -825,7 +859,7 @@
type = schema.add_types();
type->set_schema_type("message");
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
DocumentProto email_document =
DocumentBuilder()
@@ -840,8 +874,8 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- EXPECT_THAT(icing.Put(email_document).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Clear the schema and only add the "email" type, essentially deleting the
// "message" type
@@ -883,18 +917,18 @@
}
TEST_F(IcingSearchEngineTest, GetSchemaNotFound) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.GetSchema().status().code(), Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
}
TEST_F(IcingSearchEngineTest, GetSchemaOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
GetSchemaResultProto expected_get_schema_result_proto;
expected_get_schema_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -903,23 +937,22 @@
}
TEST_F(IcingSearchEngineTest, GetSchemaTypeFailedPrecondition) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
GetSchemaTypeResultProto get_schema_type_result_proto =
icing.GetSchemaType("nonexistent_schema");
- EXPECT_THAT(get_schema_type_result_proto.status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(get_schema_type_result_proto.status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
EXPECT_THAT(get_schema_type_result_proto.status().message(),
HasSubstr("Schema not set"));
}
TEST_F(IcingSearchEngineTest, GetSchemaTypeOk) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
GetSchemaTypeResultProto expected_get_schema_type_result_proto;
expected_get_schema_type_result_proto.mutable_status()->set_code(
@@ -931,15 +964,13 @@
}
TEST_F(IcingSearchEngineTest, GetDocument) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Simple put and get
- ASSERT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -950,8 +981,8 @@
// Put an invalid document
PutResultProto put_result_proto = icing.Put(DocumentProto());
- EXPECT_THAT(put_result_proto.status().code(),
- Eq(StatusProto::INVALID_ARGUMENT));
+ EXPECT_THAT(put_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
EXPECT_THAT(put_result_proto.status().message(),
HasSubstr("'namespace' is empty"));
@@ -965,16 +996,15 @@
}
TEST_F(IcingSearchEngineTest, SearchReturnsValidResults) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -987,7 +1017,7 @@
SearchResultProto results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(results.status(), ProtoIsOk());
EXPECT_THAT(results.results(), SizeIs(2));
EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
EXPECT_THAT(GetMatch(results.results(0).document(),
@@ -1018,16 +1048,15 @@
}
TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
- ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1043,7 +1072,7 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(search_result_proto.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
@@ -1051,8 +1080,8 @@
}
TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1068,8 +1097,8 @@
}
TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1092,22 +1121,20 @@
{
// Set the schema up beforehand.
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Schema will be persisted to disk when icing goes out of scope.
}
{
// Ensure that icing initializes the schema and section_manager
// properly from the pre-existing file.
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
// The index and document store will be persisted to disk when icing goes
// out of scope.
}
@@ -1115,8 +1142,8 @@
{
// Ensure that the index is brought back up without problems and we
// can query for the content that we expect.
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1142,10 +1169,9 @@
}
TEST_F(IcingSearchEngineTest, SearchShouldReturnEmpty) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1163,10 +1189,9 @@
}
TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates and inserts 5 documents
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
@@ -1174,11 +1199,11 @@
DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1215,21 +1240,22 @@
expected_search_result_proto.clear_results();
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
EXPECT_THAT(icing.GetNextPage(next_page_token),
EqualsProto(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
EXPECT_THAT(icing.GetNextPage(next_page_token),
EqualsProto(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates and inserts 5 documents
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
@@ -1237,11 +1263,11 @@
DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1281,21 +1307,22 @@
expected_search_result_proto.clear_results();
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
+ // Because there are no more results, we should not return the next page
+ // token.
+ expected_search_result_proto.clear_next_page_token();
EXPECT_THAT(icing.GetNextPage(next_page_token),
EqualsProto(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- expected_search_result_proto.clear_next_page_token();
EXPECT_THAT(icing.GetNextPage(next_page_token),
EqualsProto(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates and inserts 5 documents
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
@@ -1303,11 +1330,11 @@
DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document4).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document5).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1322,7 +1349,7 @@
// Searches and gets the first page, 2 results with 2 snippets
SearchResultProto search_result =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
@@ -1347,7 +1374,7 @@
// Second page, 2 result with 1 snippet
search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
@@ -1365,24 +1392,23 @@
// Third page, 1 result with 0 snippets
search_result = icing.GetNextPage(search_result.next_page_token());
- ASSERT_THAT(search_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
ASSERT_THAT(search_result.results(), SizeIs(1));
- ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
EXPECT_THAT(search_result.results(0).snippet().entries_size(), Eq(0));
}
TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1417,15 +1443,14 @@
TEST_F(IcingSearchEngineTest,
AllPageTokensShouldBeInvalidatedAfterOptimization) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -1471,20 +1496,18 @@
expected_get_result_proto.mutable_status()->set_message(
"Document (namespace, uri1) not found.");
{
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
const std::string document_log_path =
icing_options.base_dir() + "/document_dir/document_log";
int64_t document_log_size_before =
filesystem()->GetFileSize(document_log_path.c_str());
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
int64_t document_log_size_after =
filesystem()->GetFileSize(document_log_path.c_str());
@@ -1495,18 +1518,17 @@
EXPECT_THAT(document_log_size_after, Lt(document_log_size_before));
} // Destroys IcingSearchEngine to make sure nothing is cached.
- IcingSearchEngine icing(icing_options);
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri1"),
EqualsProto(expected_get_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldDeleteTemporaryDirectory) {
IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
- IcingSearchEngine icing(icing_options);
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(icing_options, GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Create a tmp dir that will be used in Optimize() to swap files,
// this validates that any tmp dirs will be deleted before using.
@@ -1520,7 +1542,7 @@
ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
fd.reset();
- EXPECT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Optimize().status(), ProtoIsOk());
EXPECT_FALSE(filesystem()->DirectoryExists(tmp_dir.c_str()));
EXPECT_FALSE(filesystem()->FileExists(tmp_file.c_str()));
@@ -1541,31 +1563,30 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
- std::move(fake_clock));
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// Just initialized, nothing is optimizable yet.
GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// Only have active documents, nothing is optimizable yet.
optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
// Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
int64_t first_estimated_optimizable_bytes =
@@ -1574,20 +1595,20 @@
// Add a second document, but it'll be expired since the time (1000) is
// greater than the document's creation timestamp (100) + the document's ttl
// (500)
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
Gt(first_estimated_optimizable_bytes));
// Optimize
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Nothing is optimizable now that everything has been optimized away.
optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
}
@@ -1602,22 +1623,21 @@
*expected_get_result_proto.mutable_document() = document1;
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Get() and Put() are good right after Optimize()
EXPECT_THAT(icing.Get("namespace", "uri1"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
} // Destroys IcingSearchEngine to make sure nothing is cached.
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri1"),
EqualsProto(expected_get_result_proto));
@@ -1625,24 +1645,22 @@
EXPECT_THAT(icing.Get("namespace", "uri2"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(document3).status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, DeleteShouldWorkAfterOptimization) {
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Delete() works right after Optimize()
- EXPECT_THAT(icing.Delete("namespace", "uri1").status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(
@@ -1659,10 +1677,9 @@
EqualsProto(expected_get_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Delete("namespace", "uri2").status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Delete("namespace", "uri2").status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
@@ -1699,12 +1716,13 @@
ON_CALL(*mock_filesystem, SwapFiles).WillByDefault(swap_lambda);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// The mocks should cause an unrecoverable error during Optimize - returning
// INTERNAL.
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::INTERNAL));
+ ASSERT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::INTERNAL));
// Ordinary operations should fail safely.
SchemaProto simple_schema;
@@ -1729,28 +1747,23 @@
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- EXPECT_THAT(icing.SetSchema(simple_schema).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Put(simple_doc).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(
- icing.Get(simple_doc.namespace_(), simple_doc.uri()).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(
- icing.Search(search_spec, scoring_spec, result_spec).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Put(simple_doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Get(simple_doc.namespace_(), simple_doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
// Reset should get icing back to a safe (empty) and working state.
- EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(simple_schema).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(simple_doc).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Get(simple_doc.namespace_(), simple_doc.uri()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Search(search_spec, scoring_spec, result_spec).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(simple_schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(simple_doc).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Get(simple_doc.namespace_(), simple_doc.uri()).status(),
+ ProtoIsOk());
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, DeleteBySchemaType) {
@@ -1791,11 +1804,11 @@
.AddStringProperty("subject", "message body2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1809,8 +1822,7 @@
// Delete the first type. The first doc should be irretrievable. The
// second should still be present.
- EXPECT_THAT(icing.DeleteBySchemaType("message").status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteBySchemaType("message").status(), ProtoIsOk());
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -1859,11 +1871,11 @@
.AddStringProperty("body", "message body2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1879,8 +1891,7 @@
// second should still be present.
SearchSpecProto search_spec;
search_spec.add_schema_type_filters(schema.types(0).schema_type());
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -1931,13 +1942,12 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -1955,8 +1965,7 @@
// Delete namespace1. Document1 and document2 should be irretrievable.
// Document3 should still be present.
- EXPECT_THAT(icing.DeleteByNamespace("namespace1").status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteByNamespace("namespace1").status(), ProtoIsOk());
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2007,12 +2016,11 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2028,8 +2036,7 @@
// second should still be present.
SearchSpecProto search_spec;
search_spec.add_namespace_filters("namespace1");
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2073,12 +2080,11 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2095,8 +2101,7 @@
SearchSpecProto search_spec;
search_spec.set_query("body1");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2140,12 +2145,11 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2162,8 +2166,8 @@
SearchSpecProto search_spec;
search_spec.set_query("foo");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status().code(),
- Eq(StatusProto::NOT_FOUND));
+ EXPECT_THAT(icing.DeleteByQuery(search_spec).status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
expected_get_result_proto.mutable_status()->clear_message();
@@ -2217,18 +2221,18 @@
IndexingConfig::TokenizerType::PLAIN);
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(schema1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that SetSchema() works right after Optimize()
- EXPECT_THAT(icing.SetSchema(schema2).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema2).status(), ProtoIsOk());
} // Destroys IcingSearchEngine to make sure nothing is cached.
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema3).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema3).status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, SearchShouldWorkAfterOptimization) {
@@ -2242,12 +2246,11 @@
document;
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Optimize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Search() works right after Optimize()
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2255,8 +2258,8 @@
EqualsProto(expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance()),
EqualsProto(expected_search_result_proto));
@@ -2266,11 +2269,10 @@
DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
{
// Initializes a normal icing to create files needed
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
}
// Creates a mock filesystem in which DeleteDirectoryRecursively() always
@@ -2282,9 +2284,10 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Optimize().status().code(), Eq(StatusProto::ABORTED));
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.Optimize().status(), ProtoStatusIs(StatusProto::ABORTED));
// Now optimization is aborted, we verify that document-related functions
// still work as expected.
@@ -2297,7 +2300,7 @@
DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
- EXPECT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_query("m");
@@ -2329,18 +2332,17 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
// Optimize() fails due to filesystem error
- EXPECT_THAT(icing.Optimize().status().code(),
- Eq(StatusProto::WARNING_DATA_LOSS));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
// Document is not found because original file directory is missing
GetResultProto expected_get_result_proto;
@@ -2358,7 +2360,7 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- EXPECT_THAT(icing.Put(new_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_query("m");
@@ -2399,18 +2401,17 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
// Optimize() fails due to filesystem error
- EXPECT_THAT(icing.Optimize().status().code(),
- Eq(StatusProto::WARNING_DATA_LOSS));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
// Document is not found because original files are missing
GetResultProto expected_get_result_proto;
@@ -2428,7 +2429,7 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- EXPECT_THAT(icing.Put(new_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(new_document).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_query("m");
@@ -2490,11 +2491,12 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
- std::move(fake_clock));
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is returned as part of search results
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2537,11 +2539,12 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
- std::move(fake_clock));
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is not returned as part of search results
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2567,10 +2570,10 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(message_document).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(message_document).status(), ProtoIsOk());
// Make sure we can search for message document
SearchSpecProto search_spec;
@@ -2615,7 +2618,7 @@
property->mutable_indexing_config()->set_tokenizer_type(
IndexingConfig::TokenizerType::PLAIN);
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
search_spec.Clear();
search_spec.set_query("foo");
@@ -2645,13 +2648,11 @@
{
// Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2662,8 +2663,8 @@
EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
// We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Checks that DocumentLog is still ok
EXPECT_THAT(icing.Get("namespace", "uri"),
@@ -2675,9 +2676,8 @@
EqualsProto(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
@@ -2697,13 +2697,11 @@
{
// Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2718,8 +2716,8 @@
&invalid_magic, sizeof(invalid_magic));
// We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Checks that DocumentLog is still ok
EXPECT_THAT(icing.Get("namespace", "uri"),
@@ -2731,9 +2729,8 @@
EqualsProto(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
@@ -2753,13 +2750,11 @@
{
// Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -2775,8 +2770,8 @@
&invalid_checksum, sizeof(invalid_checksum));
// We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Checks that DocumentLog is still ok
EXPECT_THAT(icing.Get("namespace", "uri"),
@@ -2788,21 +2783,18 @@
EqualsProto(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptSchema) {
{
// Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2819,20 +2811,19 @@
EXPECT_TRUE(filesystem()->Write(schema_file.c_str(), corrupt_data.data(),
corrupt_data.size()));
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::INTERNAL));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
}
TEST_F(IcingSearchEngineTest, UnableToRecoverFromCorruptDocumentLog) {
{
// Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2849,8 +2840,9 @@
EXPECT_TRUE(filesystem()->Write(document_log_file.c_str(),
corrupt_data.data(), corrupt_data.size()));
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::INTERNAL));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::INTERNAL));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentSchemaStore) {
@@ -2866,8 +2858,8 @@
{
// Initializes folder and schema
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema;
auto type = schema.add_types();
@@ -2887,10 +2879,10 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document2_with_additional_property).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document2_with_additional_property).status(),
+ ProtoIsOk());
// Won't get us anything because "additional" isn't marked as an indexed
// property in the schema
@@ -2944,8 +2936,8 @@
ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
} // Will persist new schema
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// We can insert a Email document since we kept the new schema
DocumentProto email_document =
@@ -2954,7 +2946,7 @@
.SetSchema("Email")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- EXPECT_THAT(icing.Put(email_document).status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(email_document).status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -2990,11 +2982,10 @@
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
} // This should shut down IcingSearchEngine and persist anything it needs to
{
@@ -3012,10 +3003,10 @@
ICING_EXPECT_OK(document_store->Put(document2));
}
- IcingSearchEngine icing(GetDefaultIcingOptions());
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
// Index Restoration should be triggered here and document2 should be
// indexed.
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
GetResultProto expected_get_result_proto;
expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
@@ -3059,13 +3050,11 @@
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance()),
EqualsProto(expected_search_result_proto));
@@ -3075,8 +3064,8 @@
EXPECT_TRUE(filesystem()->DeleteDirectoryRecursively(
absl_ports::StrCat(GetIndexDir(), "/idx/lite.").c_str()));
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -3096,13 +3085,11 @@
{
// Initializes folder and schema, index one document
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance()),
EqualsProto(expected_search_result_proto));
@@ -3114,8 +3101,8 @@
ASSERT_TRUE(fd.is_valid());
ASSERT_TRUE(filesystem()->Write(fd.get(), "1234", 4));
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
@@ -3124,10 +3111,9 @@
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates 3 documents and ensures the relationship in terms of document
// score is: document1 < document2 < document3
@@ -3158,9 +3144,9 @@
// Intentionally inserts the documents in the order that is different than
// their score order
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// "m" will match all 3 documents
SearchSpecProto search_spec;
@@ -3185,10 +3171,9 @@
}
TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates 3 documents and ensures the relationship of them is:
// document1 < document2 < document3
@@ -3216,9 +3201,9 @@
// Intentionally inserts the documents in the order that is different than
// their score order
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
// "m" will match all 3 documents
SearchSpecProto search_spec;
@@ -3244,10 +3229,9 @@
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates 3 documents and ensures the relationship in terms of creation
// timestamp score is: document1 < document2 < document3
@@ -3272,9 +3256,9 @@
// Intentionally inserts the documents in the order that is different than
// their score order
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
// "m" will match all 3 documents
SearchSpecProto search_spec;
@@ -3299,11 +3283,331 @@
EqualsProto(expected_search_result_proto));
}
+TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc3 twice and doc2 once. The order will be doc3 > doc2 >
+ // doc1 when ranked by USAGE_TYPE1_COUNT.
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ // Result should be in descending USAGE_TYPE1_COUNT order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance()),
+ EqualsProto(expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageCounts) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance()),
+ EqualsProto(expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ // Intentionally inserts the documents in a different order to eliminate the
+ // possibility that the following results are sorted in the default reverse
+ // insertion order.
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc2 and doc3. The order will be doc3 > doc2 > doc1 when
+ // ranked by USAGE_TYPE1_LAST_USED_TIMESTAMP.
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/3", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance()),
+ EqualsProto(expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+ SearchResultShouldHaveDefaultOrderWithoutUsageTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // "m" will match all 3 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ // None of the documents have usage reports. Result should be in the default
+ // reverse insertion order.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document3;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance()),
+ EqualsProto(expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 test documents
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // Report usage for doc1 and doc2. The older timestamp 5000 shouldn't be
+ // overridden by 1000. The order will be doc1 > doc2 when ranked by
+ // USAGE_TYPE1_LAST_USED_TIMESTAMP.
+ UsageReport usage_report_doc1_time1 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc1_time5 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2_time3 = CreateUsageReport(
+ /*name_space=*/"namespace", /*uri=*/"uri/2", /*timestamp_ms=*/3000,
+ UsageReport::USAGE_TYPE1);
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc2_time3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.ReportUsage(usage_report_doc1_time1).status(), ProtoIsOk());
+
+ // "m" will match both documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ // Result should be in descending USAGE_TYPE1_LAST_USED_TIMESTAMP order
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document1;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance()),
+ EqualsProto(expected_search_result_proto));
+}
+
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// Creates 3 documents and ensures the relationship in terms of document
// score is: document1 < document2 < document3
@@ -3334,9 +3638,9 @@
// Intentionally inserts the documents in the order that is different than
// their score order
- ASSERT_THAT(icing.Put(document2).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document3).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(document1).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
// "m" will match all 3 documents
SearchSpecProto search_spec;
@@ -3378,19 +3682,18 @@
PropertyConfigProto::Cardinality::REQUIRED);
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
} // This should shut down IcingSearchEngine and persist anything it needs to
ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
// Since we don't have any documents yet, we can't detect this edge-case. But
// it should be fine since there aren't any documents to be invalidated.
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, SetSchemaCanDetectPreviousSchemaWasLost) {
@@ -3416,12 +3719,12 @@
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
// Can retrieve by namespace/uri
GetResultProto expected_get_result_proto;
@@ -3444,10 +3747,9 @@
ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
// Setting the new, different schema will remove incompatible documents
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(incompatible_schema).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(incompatible_schema).status(), ProtoIsOk());
// Can't retrieve by namespace/uri
GetResultProto expected_get_result_proto;
@@ -3473,23 +3775,21 @@
CreateMessageDocument("namespace", "uri");
{
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
// Persisting shouldn't affect anything
- EXPECT_THAT(icing.PersistToDisk().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.PersistToDisk().status(), ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
} // Destructing persists as well
- IcingSearchEngine icing(GetDefaultIcingOptions());
- EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
}
@@ -3499,22 +3799,21 @@
SchemaProto empty_schema = SchemaProto(message_schema);
empty_schema.clear_types();
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(message_schema).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(message_schema).status(), ProtoIsOk());
int64_t empty_state_size =
filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that things have been added
EXPECT_THAT(filesystem()->GetDiskUsage(GetTestBaseDir().c_str()),
Gt(empty_state_size));
- EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Reset().status(), ProtoIsOk());
// Check that we're back to an empty state
EXPECT_EQ(filesystem()->GetFileDiskUsage(GetTestBaseDir().c_str()),
@@ -3523,8 +3822,7 @@
// Sanity check that we can still call other APIs. If things aren't cleared,
// then this should raise an error since the empty schema is incompatible with
// the old message_schema.
- EXPECT_THAT(icing.SetSchema(empty_schema).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, ResetAbortedError) {
@@ -3538,14 +3836,14 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document = CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Put(document).status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::ABORTED));
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::ABORTED));
// Everything is still intact.
// Can get old data.
@@ -3556,9 +3854,8 @@
EqualsProto(expected_get_result_proto));
// Can add new data.
- EXPECT_THAT(
- icing.Put(CreateMessageDocument("namespace", "uri")).status().code(),
- Eq(StatusProto::OK));
+ EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
+ ProtoIsOk());
}
TEST_F(IcingSearchEngineTest, ResetInternalError) {
@@ -3580,18 +3877,17 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
- std::make_unique<FakeClock>());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
- EXPECT_THAT(icing.Reset().status().code(), Eq(StatusProto::INTERNAL));
+ std::make_unique<IcingFilesystem>(),
+ std::make_unique<FakeClock>(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ EXPECT_THAT(icing.Reset().status(), ProtoStatusIs(StatusProto::INTERNAL));
}
TEST_F(IcingSearchEngineTest, SnippetNormalization) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document_one =
DocumentBuilder()
@@ -3600,7 +3896,7 @@
.AddStringProperty("body", "MDI zurich Team Meeting")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
DocumentProto document_two =
DocumentBuilder()
@@ -3609,7 +3905,7 @@
.AddStringProperty("body", "mdi Zürich Team Meeting")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
@@ -3622,7 +3918,7 @@
SearchResultProto results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(results.status(), ProtoIsOk());
ASSERT_THAT(results.results(), SizeIs(2));
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
@@ -3658,10 +3954,9 @@
}
TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
DocumentProto document_one =
DocumentBuilder()
@@ -3670,7 +3965,7 @@
.AddStringProperty("body", "MDI zurich Team Meeting")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
DocumentProto document_two =
DocumentBuilder()
@@ -3679,7 +3974,7 @@
.AddStringProperty("body", "mdi Zürich Team Meeting")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- ASSERT_THAT(icing.Put(document_two).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -3692,7 +3987,7 @@
SearchResultProto results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(results.status(), ProtoIsOk());
ASSERT_THAT(results.results(), SizeIs(2));
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
@@ -3728,10 +4023,9 @@
}
TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status().code(),
- Eq(StatusProto::OK));
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateEmailSchema()).status(), ProtoIsOk());
DocumentProto document_one =
DocumentBuilder()
@@ -3741,7 +4035,7 @@
.AddStringProperty("body", "MDI zurich Team Meeting")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::PREFIX);
@@ -3754,7 +4048,7 @@
SearchResultProto results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(results.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(results.status(), ProtoIsOk());
ASSERT_THAT(results.results(), SizeIs(1));
const DocumentProto& result_document = results.results(0).document();
const SnippetProto& result_snippet = results.results(0).snippet();
@@ -3774,26 +4068,25 @@
}
TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
- IcingSearchEngine icing(GetDefaultIcingOptions());
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
SchemaProto email_schema = CreateMessageSchema();
- EXPECT_THAT(icing.SetSchema(email_schema).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.GetSchema().status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(
- icing.GetSchemaType(email_schema.types(0).schema_type()).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.SetSchema(email_schema).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchema().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetSchemaType(email_schema.types(0).schema_type()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
DocumentProto doc = CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Put(doc).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Get(doc.namespace_(), doc.uri()).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Put(doc).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Get(doc.namespace_(), doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Delete(doc.namespace_(), doc.uri()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.DeleteByNamespace(doc.namespace_()).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
EXPECT_THAT(icing.DeleteBySchemaType(email_schema.types(0).schema_type())
.status()
.code(),
@@ -3802,18 +4095,17 @@
SearchSpecProto search_spec = SearchSpecProto::default_instance();
ScoringSpecProto scoring_spec = ScoringSpecProto::default_instance();
ResultSpecProto result_spec = ResultSpecProto::default_instance();
- EXPECT_THAT(
- icing.Search(search_spec, scoring_spec, result_spec).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Search(search_spec, scoring_spec, result_spec).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
constexpr int kSomePageToken = 12;
- EXPECT_THAT(icing.GetNextPage(kSomePageToken).status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.GetNextPage(kSomePageToken).status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
- EXPECT_THAT(icing.PersistToDisk().status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
- EXPECT_THAT(icing.Optimize().status().code(),
- Eq(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.PersistToDisk().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(icing.Optimize().status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
}
TEST_F(IcingSearchEngineTest, GetAllNamespaces) {
@@ -3854,47 +4146,43 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
- std::move(fake_clock));
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
// No namespaces exist yet
GetAllNamespacesResultProto result = icing.GetAllNamespaces();
- EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(result.status(), ProtoIsOk());
EXPECT_THAT(result.namespaces(), IsEmpty());
- ASSERT_THAT(icing.Put(namespace1).status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(namespace2_uri1).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(namespace2_uri2).status().code(),
- Eq(StatusProto::OK));
- ASSERT_THAT(icing.Put(namespace3).status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Put(namespace1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(namespace2_uri1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(namespace2_uri2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(namespace3).status(), ProtoIsOk());
// All namespaces should exist now
result = icing.GetAllNamespaces();
- EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(result.status(), ProtoIsOk());
EXPECT_THAT(result.namespaces(),
UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
// After deleting namespace2_uri1 document, we still have namespace2_uri2 in
// "namespace2" so it should still show up
- ASSERT_THAT(icing.Delete("namespace2", "uri1").status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Delete("namespace2", "uri1").status(), ProtoIsOk());
result = icing.GetAllNamespaces();
- EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(result.status(), ProtoIsOk());
EXPECT_THAT(result.namespaces(),
UnorderedElementsAre("namespace1", "namespace2", "namespace3"));
// After deleting namespace2_uri2 document, we no longer have any documents
// in "namespace2"
- ASSERT_THAT(icing.Delete("namespace2", "uri2").status().code(),
- Eq(StatusProto::OK));
+ ASSERT_THAT(icing.Delete("namespace2", "uri2").status(), ProtoIsOk());
result = icing.GetAllNamespaces();
- EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(result.status(), ProtoIsOk());
EXPECT_THAT(result.namespaces(),
UnorderedElementsAre("namespace1", "namespace3"));
}
@@ -3908,16 +4196,332 @@
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
- std::move(fake_clock));
- ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
// Only valid document left is the one in "namespace1"
GetAllNamespacesResultProto result = icing.GetAllNamespaces();
- EXPECT_THAT(result.status().code(), Eq(StatusProto::OK));
+ EXPECT_THAT(result.status(), ProtoIsOk());
EXPECT_THAT(result.namespaces(), UnorderedElementsAre("namespace1"));
}
}
+TEST_F(IcingSearchEngineTest, Hyphens) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SchemaProto schema;
+ SchemaTypeConfigProto* type = schema.add_types();
+ type->set_schema_type("MyType");
+ PropertyConfigProto* prop = type->add_properties();
+ prop->set_property_name("foo");
+ prop->set_data_type(PropertyConfigProto::DataType::STRING);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "foo bar-baz bat")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("MyType")
+ .AddStringProperty("foo", "bar for baz bat-man")
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("foo:bar-baz");
+
+ ResultSpecProto result_spec;
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+}
+
+TEST_F(IcingSearchEngineTest, RestoreIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the index file to trigger RestoreIndex.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+ // 3. Create the index again. This should trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // All documents should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(3));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+ EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results.results(2).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineTest, RestoreIndexLoseLiteIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the last document from the document log
+ {
+ const std::string document_log_file =
+ absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ filesystem()->DeleteFile(document_log_file.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+ FileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ FileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ std::move(create_result.proto_log);
+
+ document = DocumentBuilder(document).SetUri("fake_type/0").Build();
+ DocumentWrapper wrapper;
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ }
+
+ // 3. Create the index again. This should throw out the lite index and trigger
+ // index restoration which will only restore the two documents in the main
+ // index.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the documents that were in the main index should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(2));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/1"));
+ EXPECT_THAT(results.results(1).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineTest, RestoreIndexLoseIndex) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the last two documents from the document log.
+ {
+ const std::string document_log_file =
+ absl_ports::StrCat(GetDocumentDir(), "/document_log");
+ filesystem()->DeleteFile(document_log_file.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+ FileBackedProtoLog<DocumentWrapper>::Create(
+ filesystem(), document_log_file.c_str(),
+ FileBackedProtoLog<DocumentWrapper>::Options(
+ /*compress_in=*/true)));
+ std::unique_ptr<FileBackedProtoLog<DocumentWrapper>> document_log =
+ std::move(create_result.proto_log);
+
+ document = DocumentBuilder(document).SetUri("fake_type/0").Build();
+ DocumentWrapper wrapper;
+ *wrapper.mutable_document() = document;
+ ASSERT_THAT(document_log->WriteProto(wrapper), IsOk());
+ }
+
+ // 3. Create the index again. This should throw out the lite and main index
+ // and trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the first document should be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/0"));
+ }
+}
+
+TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", kIpsumText)
+ .Build();
+ // 1. Create an index with a LiteIndex that will only allow one document
+ // before needing a merge.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ IcingSearchEngine icing(options, GetTestJniCache());
+
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add two documents. These should get merged into the main index.
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ document = DocumentBuilder(document).SetUri("fake_type/1").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ // Add one document. This one should get remain in the lite index.
+ document = DocumentBuilder(document).SetUri("fake_type/2").Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Delete the index file to trigger RestoreIndex.
+ std::string idx_subdir = GetIndexDir() + "/idx";
+ filesystem()->DeleteDirectoryRecursively(idx_subdir.c_str());
+
+ // 3. Setup a mock filesystem to fail to grow the main index once.
+ bool has_failed_already = false;
+ auto open_write_lambda = [this, &has_failed_already](const char* filename) {
+ std::string main_lexicon_suffix = "/main-lexicon.prop.2";
+ std::string filename_string(filename);
+ if (!has_failed_already &&
+ filename_string.length() >= main_lexicon_suffix.length() &&
+ filename_string.substr(
+ filename_string.length() - main_lexicon_suffix.length(),
+ main_lexicon_suffix.length()) == main_lexicon_suffix) {
+ has_failed_already = true;
+ return -1;
+ }
+ return this->filesystem()->OpenForWrite(filename);
+ };
+ auto mock_icing_filesystem = std::make_unique<IcingMockFilesystem>();
+ ON_CALL(*mock_icing_filesystem, OpenForWrite)
+ .WillByDefault(open_write_lambda);
+
+ // 4. Create the index again. This should trigger index restoration.
+ {
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
+ options.set_index_merge_size(document.ByteSizeLong());
+ TestIcingSearchEngine icing(options, std::make_unique<Filesystem>(),
+ std::move(mock_icing_filesystem),
+ std::make_unique<FakeClock>(),
+ GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(),
+ ProtoStatusIs(StatusProto::WARNING_DATA_LOSS));
+
+ SearchSpecProto search_spec;
+ search_spec.set_query("consectetur");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ SearchResultProto results =
+ icing.Search(search_spec, ScoringSpecProto::default_instance(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.next_page_token(), Eq(0));
+ // Only the last document that was added should still be retrievable.
+ ASSERT_THAT(results.results(), SizeIs(1));
+ EXPECT_THAT(results.results(0).document().uri(), Eq("fake_type/2"));
+ }
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 7076257..47111ad 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -105,6 +105,26 @@
}
}
}
+
+ // Merge if necessary.
+ if (overall_status.ok() && index_->WantsMerge()) {
+ ICING_VLOG(1) << "Merging the index at docid " << document_id << ".";
+ libtextclassifier3::Status merge_status = index_->Merge();
+ if (!merge_status.ok()) {
+ ICING_LOG(ERROR) << "Index merging failed. Clearing index.";
+ if (!index_->Reset().ok()) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Unable to reset to clear index after merge failure. Merge "
+ "failure=%d:%s",
+ merge_status.error_code(), merge_status.error_message().c_str()));
+ } else {
+ return absl_ports::DataLossError(IcingStringUtil::StringPrintf(
+ "Forced to reset index after merge failure. Merge failure=%d:%s",
+ merge_status.error_code(), merge_status.error_message().c_str()));
+ }
+ }
+ }
+
return overall_status;
}
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index c3ccac3..083efea 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -66,11 +66,15 @@
// max_tokens_per_document will be added to the index. All tokens of length
// exceeding max_token_length will be shortened to max_token_length.
//
+ // Indexing a document *may* trigger an index merge. If a merge fails, then
+ // all content in the index will be lost.
+ //
// Returns:
// INVALID_ARGUMENT if document_id is less than the document_id of a
- // previously indexed
- // document or tokenization fails.
+ // previously indexed document or tokenization fails.
// RESOURCE_EXHAUSTED if the index is full and can't add anymore content.
+ // DATA_LOSS if an attempt to merge the index fails and both indices are
+ // cleared as a result.
// NOT_FOUND if there is no definition for the document's schema type.
// INTERNAL_ERROR if any other errors occur
libtextclassifier3::Status IndexDocument(const DocumentProto& document,
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index eb01731..a9b298e 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -133,10 +133,11 @@
.Build();
}
-std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
+ const Filesystem& filesystem,
const std::string& index_dir) {
Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
- return Index::Create(options, &filesystem).ValueOrDie();
+ return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
std::unique_ptr<Normalizer> CreateNormalizer() {
@@ -162,7 +163,7 @@
return schema_store;
}
-void CleanUp(const IcingFilesystem& filesystem, const std::string& index_dir) {
+void CleanUp(const Filesystem& filesystem, const std::string& index_dir) {
filesystem.DeleteDirectoryRecursively(index_dir.c_str());
}
@@ -187,12 +188,14 @@
GetTestFilePath("icing/icu.dat")));
}
- IcingFilesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ Filesystem filesystem;
std::string index_dir = GetTestTempDir() + "/index_test/";
CleanUp(filesystem, index_dir);
- std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -235,12 +238,14 @@
GetTestFilePath("icing/icu.dat")));
}
- IcingFilesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ Filesystem filesystem;
std::string index_dir = GetTestTempDir() + "/index_test/";
CleanUp(filesystem, index_dir);
- std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -284,12 +289,14 @@
GetTestFilePath("icing/icu.dat")));
}
- IcingFilesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ Filesystem filesystem;
std::string index_dir = GetTestTempDir() + "/index_test/";
CleanUp(filesystem, index_dir);
- std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -333,12 +340,14 @@
GetTestFilePath("icing/icu.dat")));
}
- IcingFilesystem filesystem;
+ IcingFilesystem icing_filesystem;
+ Filesystem filesystem;
std::string index_dir = GetTestTempDir() + "/index_test/";
CleanUp(filesystem, index_dir);
- std::unique_ptr<Index> index = CreateIndex(filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index 824c440..84c822b 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -22,6 +22,7 @@
#include <utility>
#include <vector>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
@@ -31,7 +32,9 @@
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
+#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
@@ -54,15 +57,32 @@
namespace {
+constexpr std::string_view kIpsumText =
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla convallis "
+ "scelerisque orci quis hendrerit. Sed augue turpis, sodales eu gravida "
+ "nec, scelerisque nec leo. Maecenas accumsan interdum commodo. Aliquam "
+ "mattis sapien est, sit amet interdum risus dapibus sed. Maecenas leo "
+ "erat, fringilla in nisl a, venenatis gravida metus. Phasellus venenatis, "
+ "orci in aliquet mattis, lectus sapien volutpat arcu, sed hendrerit ligula "
+ "arcu nec mauris. Integer dolor mi, rhoncus eget gravida et, pulvinar et "
+ "nunc. Aliquam ac sollicitudin nisi. Vivamus sit amet urna vestibulum, "
+ "tincidunt eros sed, efficitur nisl. Fusce non neque accumsan, sagittis "
+ "nisi eget, sagittis turpis. Ut pulvinar nibh eu purus feugiat faucibus. "
+ "Donec tellus nulla, tincidunt vel lacus id, bibendum fermentum turpis. "
+ "Nullam ultrices sed nibh vitae aliquet. Ut risus neque, consectetur "
+ "vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
+ "placerat semper.";
+
// type and property names of FakeType
constexpr std::string_view kFakeType = "FakeType";
constexpr std::string_view kExactProperty = "exact";
constexpr std::string_view kPrefixedProperty = "prefixed";
constexpr std::string_view kUnindexedProperty1 = "unindexed1";
constexpr std::string_view kUnindexedProperty2 = "unindexed2";
-constexpr std::string_view kSubProperty = "submessage";
-constexpr std::string_view kNestedProperty = "nested";
constexpr std::string_view kRepeatedProperty = "repeated";
+constexpr std::string_view kSubProperty = "submessage";
+constexpr std::string_view kNestedType = "NestedType";
+constexpr std::string_view kNestedProperty = "nested";
constexpr DocumentId kDocumentId0 = 0;
constexpr DocumentId kDocumentId1 = 1;
@@ -87,10 +107,10 @@
icu_data_file_helper::SetUpICUDataFile(
GetTestFilePath("icing/icu.dat")));
- index_dir_ = GetTestTempDir() + "/index_test/";
+ index_dir_ = GetTestTempDir() + "/index_test";
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_,
- Index::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
language_segmenter_factory::SegmenterOptions segmenter_options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
@@ -105,8 +125,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
- SchemaProto schema;
- CreateFakeTypeConfig(schema.add_types());
+ SchemaProto schema = CreateFakeSchema();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
IndexProcessor::Options processor_options;
@@ -119,23 +138,30 @@
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
normalizer_.get(), index_.get(),
processor_options));
+ mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
}
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
}
- std::unique_ptr<IndexProcessor> index_processor_;
+ std::unique_ptr<IcingMockFilesystem> mock_icing_filesystem_;
+
+ Filesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ std::string index_dir_;
+
std::unique_ptr<LanguageSegmenter> lang_segmenter_;
std::unique_ptr<Normalizer> normalizer_;
std::unique_ptr<Index> index_;
std::unique_ptr<SchemaStore> schema_store_;
+ std::unique_ptr<IndexProcessor> index_processor_;
private:
- static void AddProperty(std::string_view name, DataType::Code type,
- Cardinality::Code cardinality,
- TermMatchType::Code term_match_type,
- SchemaTypeConfigProto* type_config) {
+ static void AddStringProperty(std::string_view name, DataType::Code type,
+ Cardinality::Code cardinality,
+ TermMatchType::Code term_match_type,
+ SchemaTypeConfigProto* type_config) {
auto* prop = type_config->add_properties();
prop->set_property_name(std::string(name));
prop->set_data_type(type);
@@ -145,39 +171,55 @@
IndexingConfig::TokenizerType::PLAIN);
}
- static void CreateFakeTypeConfig(SchemaTypeConfigProto* type_config) {
- type_config->set_schema_type(std::string(kFakeType));
-
- AddProperty(std::string(kExactProperty), DataType::STRING,
- Cardinality::REQUIRED, TermMatchType::EXACT_ONLY, type_config);
-
- AddProperty(std::string(kPrefixedProperty), DataType::STRING,
- Cardinality::OPTIONAL, TermMatchType::PREFIX, type_config);
-
- // Don't set IndexingConfig
+ static void AddNonIndexedProperty(std::string_view name, DataType::Code type,
+ Cardinality::Code cardinality,
+ SchemaTypeConfigProto* type_config) {
auto* prop = type_config->add_properties();
- prop->set_property_name(std::string(kUnindexedProperty1));
- prop->set_data_type(DataType::STRING);
- prop->set_cardinality(Cardinality::OPTIONAL);
-
- AddProperty(std::string(kUnindexedProperty2), DataType::BYTES,
- Cardinality::OPTIONAL, TermMatchType::UNKNOWN, type_config);
-
- AddProperty(std::string(kRepeatedProperty), DataType::STRING,
- Cardinality::REPEATED, TermMatchType::PREFIX, type_config);
-
- AddProperty(kSubProperty, DataType::DOCUMENT, Cardinality::OPTIONAL,
- TermMatchType::UNKNOWN, type_config);
-
- std::string recipients_name =
- absl_ports::StrCat(kSubProperty, kPropertySeparator, kNestedProperty);
- AddProperty(recipients_name, DataType::STRING, Cardinality::OPTIONAL,
- TermMatchType::PREFIX, type_config);
+ prop->set_property_name(std::string(name));
+ prop->set_data_type(type);
+ prop->set_cardinality(cardinality);
}
- Filesystem filesystem_;
- IcingFilesystem icing_filesystem_;
- std::string index_dir_;
+ static SchemaProto CreateFakeSchema() {
+ SchemaProto schema;
+
+ // Add top-level type
+ auto* type_config = schema.add_types();
+ type_config->set_schema_type(std::string(kFakeType));
+
+ AddStringProperty(std::string(kExactProperty), DataType::STRING,
+ Cardinality::REQUIRED, TermMatchType::EXACT_ONLY,
+ type_config);
+
+ AddStringProperty(std::string(kPrefixedProperty), DataType::STRING,
+ Cardinality::OPTIONAL, TermMatchType::PREFIX,
+ type_config);
+
+ AddNonIndexedProperty(std::string(kUnindexedProperty1), DataType::STRING,
+ Cardinality::OPTIONAL, type_config);
+
+ AddNonIndexedProperty(std::string(kUnindexedProperty2), DataType::BYTES,
+ Cardinality::OPTIONAL, type_config);
+
+ AddStringProperty(std::string(kRepeatedProperty), DataType::STRING,
+ Cardinality::REPEATED, TermMatchType::PREFIX,
+ type_config);
+
+ auto* prop = type_config->add_properties();
+ prop->set_property_name(std::string(kSubProperty));
+ prop->set_data_type(DataType::DOCUMENT);
+ prop->set_cardinality(Cardinality::OPTIONAL);
+ prop->set_schema_type(std::string(kNestedType));
+
+ // Add nested type
+ type_config = schema.add_types();
+ type_config->set_schema_type(std::string(kNestedType));
+
+ AddStringProperty(kNestedProperty, DataType::STRING, Cardinality::OPTIONAL,
+ TermMatchType::PREFIX, type_config);
+
+ return schema;
+ }
};
std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
@@ -610,6 +652,109 @@
kDocumentId0, std::vector<SectionId>{kPrefixedSectionId})));
}
+TEST_F(IndexProcessorTest, IndexingDocAutomaticMerge) {
+ // Create the index with a smaller index_merge_size - merging every time we
+ // add 101 documents. This will result in a small LiteIndex, which will be
+ // easier to fill up. The LiteIndex itself will have a size larger than the
+ // index_merge_size because it adds extra buffer to ensure that it always has
+ // room to fit whatever document will trigger the merge.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), kIpsumText)
+ .Build();
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/document.ByteSizeLong() * 100);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ IndexProcessor::Options processor_options;
+ processor_options.max_tokens_per_document = 1000;
+ processor_options.token_limit_behavior =
+ IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_processor_,
+ IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
+ normalizer_.get(), index_.get(),
+ processor_options));
+ DocumentId doc_id = 0;
+ // Have determined experimentally that indexing 3373 documents with this text
+ // will cause the LiteIndex to fill up. Further indexing will fail unless the
+ // index processor properly merges the LiteIndex into the MainIndex and
+ // empties the LiteIndex.
+ constexpr int kNumDocsLiteIndexExhaustion = 3373;
+ for (; doc_id < kNumDocsLiteIndexExhaustion; ++doc_id) {
+ EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+ }
+ EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+}
+
+TEST_F(IndexProcessorTest, IndexingDocMergeFailureResets) {
+ // 1. Setup a mock filesystem to fail to grow the main index.
+ auto open_write_lambda = [this](const char* filename) {
+ std::string main_lexicon_suffix =
+ "/main-lexicon.prop." +
+ std::to_string(GetHasHitsInPrefixSectionPropertyId());
+ std::string filename_string(filename);
+ if (filename_string.length() >= main_lexicon_suffix.length() &&
+ filename_string.substr(
+ filename_string.length() - main_lexicon_suffix.length(),
+ main_lexicon_suffix.length()) == main_lexicon_suffix) {
+ return -1;
+ }
+ return this->filesystem_.OpenForWrite(filename);
+ };
+ ON_CALL(*mock_icing_filesystem_, OpenForWrite)
+ .WillByDefault(open_write_lambda);
+
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kPrefixedProperty), kIpsumText)
+ .Build();
+
+ // 2. Recreate the index with the mock filesystem and a merge size that will
+ // only allow one document to be added before requiring a merge.
+ Index::Options options(index_dir_,
+ /*index_merge_size=*/document.ByteSizeLong());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_,
+ Index::Create(options, &filesystem_, mock_icing_filesystem_.get()));
+
+ IndexProcessor::Options processor_options;
+ processor_options.max_tokens_per_document = 1000;
+ processor_options.token_limit_behavior =
+ IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_processor_,
+ IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
+ normalizer_.get(), index_.get(),
+ processor_options));
+
+ // 3. Index one document. This should fit in the LiteIndex without requiring a
+ // merge.
+ DocumentId doc_id = 0;
+ EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+
+ // 4. Add one more document to trigger a merge, which should fail and result
+ // in a Reset.
+ ++doc_id;
+ EXPECT_THAT(index_processor_->IndexDocument(document, doc_id),
+ StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+
+ // 5. Indexing a new document should succeed.
+ EXPECT_THAT(index_processor_->IndexDocument(document, doc_id), IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(doc_id));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 0b014d9..1fb0dc0 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -24,9 +24,11 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/hit.h"
+#include "icing/index/iterator/doc-hit-info-iterator-or.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/doc-hit-info-iterator-term-lite.h"
#include "icing/index/lite/lite-index.h"
+#include "icing/index/main/doc-hit-info-iterator-term-main.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/core/icing-string-util.h"
@@ -34,6 +36,7 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
+#include "icing/store/document-id.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -57,8 +60,13 @@
options.index_merge_size);
}
-// TODO(tjbarron) implement for real when the main index is added.
+std::string MakeMainIndexFilepath(const std::string& base_dir) {
+ return base_dir + "/idx/main";
+}
+
IcingDynamicTrie::Options GetMainLexiconOptions() {
+ // The default values for IcingDynamicTrie::Options is fine for the main
+ // lexicon.
return IcingDynamicTrie::Options();
}
@@ -67,6 +75,9 @@
bool IsTermInNamespaces(
const IcingDynamicTrie::PropertyReadersAll& property_reader,
uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
+ if (namespace_ids.empty()) {
+ return true;
+ }
for (NamespaceId namespace_id : namespace_ids) {
if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
value_index)) {
@@ -77,11 +88,64 @@
return false;
}
+enum class MergeAction { kTakeLiteTerm, kTakeMainTerm, kMergeTerms };
+std::vector<TermMetadata> MergeTermMetadatas(
+ std::vector<TermMetadata> lite_term_metadata_list,
+ std::vector<TermMetadata> main_term_metadata_list, int num_to_return) {
+ std::vector<TermMetadata> merged_term_metadata_list;
+ merged_term_metadata_list.reserve(
+ std::min(lite_term_metadata_list.size() + main_term_metadata_list.size(),
+ static_cast<size_t>(num_to_return)));
+
+ auto lite_term_itr = lite_term_metadata_list.begin();
+ auto main_term_itr = main_term_metadata_list.begin();
+ MergeAction merge_action;
+ while (merged_term_metadata_list.size() < num_to_return &&
+ (lite_term_itr != lite_term_metadata_list.end() ||
+ main_term_itr != main_term_metadata_list.end())) {
+ // Get pointers to the next metadatas in each group, if available
+ // Determine how to merge.
+ if (main_term_itr == main_term_metadata_list.end()) {
+ merge_action = MergeAction::kTakeLiteTerm;
+ } else if (lite_term_itr == lite_term_metadata_list.end()) {
+ merge_action = MergeAction::kTakeMainTerm;
+ } else if (lite_term_itr->content < main_term_itr->content) {
+ merge_action = MergeAction::kTakeLiteTerm;
+ } else if (main_term_itr->content < lite_term_itr->content) {
+ merge_action = MergeAction::kTakeMainTerm;
+ } else {
+ // The next metadatas refer to the same term. Combine them.
+ merge_action = MergeAction::kMergeTerms;
+ }
+ switch (merge_action) {
+ case MergeAction::kTakeLiteTerm:
+ merged_term_metadata_list.push_back(std::move(*lite_term_itr));
+ ++lite_term_itr;
+ break;
+ case MergeAction::kTakeMainTerm:
+ merged_term_metadata_list.push_back(std::move(*main_term_itr));
+ ++main_term_itr;
+ break;
+ case MergeAction::kMergeTerms:
+ int total_est_hit_count =
+ lite_term_itr->hit_count + main_term_itr->hit_count;
+ merged_term_metadata_list.emplace_back(
+ std::move(lite_term_itr->content), total_est_hit_count);
+ ++lite_term_itr;
+ ++main_term_itr;
+ break;
+ }
+ }
+ return merged_term_metadata_list;
+}
+
} // namespace
libtextclassifier3::StatusOr<std::unique_ptr<Index>> Index::Create(
- const Options& options, const IcingFilesystem* filesystem) {
+ const Options& options, const Filesystem* filesystem,
+ const IcingFilesystem* icing_filesystem) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
+ ICING_RETURN_ERROR_IF_NULL(icing_filesystem);
ICING_ASSIGN_OR_RETURN(LiteIndex::Options lite_index_options,
CreateLiteIndexOptions(options));
@@ -91,38 +155,67 @@
IcingDynamicTrie::max_value_index(GetMainLexiconOptions()),
IcingDynamicTrie::max_value_index(
lite_index_options.lexicon_options)));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<LiteIndex> lite_index,
- LiteIndex::Create(lite_index_options, filesystem));
- return std::unique_ptr<Index>(
- new Index(options, std::move(term_id_codec), std::move(lite_index)));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<LiteIndex> lite_index,
+ LiteIndex::Create(lite_index_options, icing_filesystem));
+ ICING_ASSIGN_OR_RETURN(
+ std::unique_ptr<MainIndex> main_index,
+ MainIndex::Create(MakeMainIndexFilepath(options.base_dir), filesystem,
+ icing_filesystem));
+ return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
+ std::move(lite_index),
+ std::move(main_index)));
+}
+
+libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
+ if (lite_index_->last_added_document_id() != kInvalidDocumentId &&
+ lite_index_->last_added_document_id() > document_id) {
+ ICING_VLOG(1) << "Clipping to " << document_id
+ << ". Throwing out lite index which is at "
+ << lite_index_->last_added_document_id();
+ ICING_RETURN_IF_ERROR(lite_index_->Reset());
+ }
+ if (main_index_->last_added_document_id() != kInvalidDocumentId &&
+ main_index_->last_added_document_id() > document_id) {
+ ICING_VLOG(1) << "Clipping to " << document_id
+ << ". Throwing out lite index which is at "
+ << main_index_->last_added_document_id();
+ ICING_RETURN_IF_ERROR(main_index_->Reset());
+ }
+ return libtextclassifier3::Status::OK;
}
libtextclassifier3::StatusOr<std::unique_ptr<DocHitInfoIterator>>
Index::GetIterator(const std::string& term, SectionIdMask section_id_mask,
TermMatchType::Code term_match_type) {
+ std::unique_ptr<DocHitInfoIterator> lite_itr;
+ std::unique_ptr<DocHitInfoIterator> main_itr;
switch (term_match_type) {
case TermMatchType::EXACT_ONLY:
- return std::make_unique<DocHitInfoIteratorTermLiteExact>(
+ lite_itr = std::make_unique<DocHitInfoIteratorTermLiteExact>(
term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+ main_itr = std::make_unique<DocHitInfoIteratorTermMainExact>(
+ main_index_.get(), term, section_id_mask);
+ break;
case TermMatchType::PREFIX:
- return std::make_unique<DocHitInfoIteratorTermLitePrefix>(
+ lite_itr = std::make_unique<DocHitInfoIteratorTermLitePrefix>(
term_id_codec_.get(), lite_index_.get(), term, section_id_mask);
+ main_itr = std::make_unique<DocHitInfoIteratorTermMainPrefix>(
+ main_index_.get(), term, section_id_mask);
+ break;
default:
return absl_ports::InvalidArgumentError(
absl_ports::StrCat("Invalid TermMatchType: ",
TermMatchType::Code_Name(term_match_type)));
}
+ return std::make_unique<DocHitInfoIteratorOr>(std::move(lite_itr),
+ std::move(main_itr));
}
libtextclassifier3::StatusOr<std::vector<TermMetadata>>
-Index::FindTermsByPrefix(const std::string& prefix,
- const std::vector<NamespaceId>& namespace_ids,
- int num_to_return) {
- std::vector<TermMetadata> term_metadata_list;
- if (num_to_return <= 0) {
- return term_metadata_list;
- }
-
+Index::FindLiteTermsByPrefix(const std::string& prefix,
+ const std::vector<NamespaceId>& namespace_ids,
+ int num_to_return) {
// Finds all the terms that start with the given prefix in the lexicon.
IcingDynamicTrie::Iterator term_iterator(lite_index_->lexicon(),
prefix.c_str());
@@ -130,13 +223,13 @@
// A property reader to help check if a term has some property.
IcingDynamicTrie::PropertyReadersAll property_reader(lite_index_->lexicon());
+ std::vector<TermMetadata> term_metadata_list;
while (term_iterator.IsValid() && term_metadata_list.size() < num_to_return) {
uint32_t term_value_index = term_iterator.GetValueIndex();
// Skips the terms that don't exist in the given namespaces. We won't skip
// any terms if namespace_ids is empty.
- if (!namespace_ids.empty() &&
- !IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
+ if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
term_iterator.Advance();
continue;
}
@@ -151,10 +244,39 @@
term_iterator.Advance();
}
-
+ if (term_iterator.IsValid()) {
+ // We exited the loop above because we hit the num_to_return limit.
+ ICING_LOG(WARNING) << "Ran into limit of " << num_to_return
+ << " retrieving suggestions for " << prefix
+ << ". Some suggestions may not be returned and others "
+ "may be misranked.";
+ }
return term_metadata_list;
}
+libtextclassifier3::StatusOr<std::vector<TermMetadata>>
+Index::FindTermsByPrefix(const std::string& prefix,
+ const std::vector<NamespaceId>& namespace_ids,
+ int num_to_return) {
+ std::vector<TermMetadata> term_metadata_list;
+ if (num_to_return <= 0) {
+ return term_metadata_list;
+ }
+
+ // Get results from the LiteIndex.
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<TermMetadata> lite_term_metadata_list,
+ FindLiteTermsByPrefix(prefix, namespace_ids, num_to_return));
+
+ // Append results from the MainIndex.
+ ICING_ASSIGN_OR_RETURN(
+ std::vector<TermMetadata> main_term_metadata_list,
+ main_index_->FindTermsByPrefix(prefix, namespace_ids, num_to_return));
+
+ return MergeTermMetadatas(std::move(lite_term_metadata_list),
+ std::move(main_term_metadata_list), num_to_return);
+}
+
libtextclassifier3::Status Index::Editor::AddHit(const char* term,
Hit::Score score) {
// Step 1: See if this term is already in the lexicon
diff --git a/icing/index/index.h b/icing/index/index.h
index d8e409c..1305b2c 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -23,9 +23,12 @@
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/file/filesystem.h"
#include "icing/index/hit/hit.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/index/lite/lite-index.h"
+#include "icing/index/main/main-index-merger.h"
+#include "icing/index/main/main-index.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
@@ -76,15 +79,22 @@
// INVALID_ARGUMENT if options have invalid values
// INTERNAL on I/O error
static libtextclassifier3::StatusOr<std::unique_ptr<Index>> Create(
- const Options& options, const IcingFilesystem* filesystem);
+ const Options& options, const Filesystem* filesystem,
+ const IcingFilesystem* icing_filesystem);
// Clears all files created by the index. Returns OK if all files were
// cleared.
- libtextclassifier3::Status Reset() { return lite_index_->Reset(); }
+ libtextclassifier3::Status Reset() {
+ ICING_RETURN_IF_ERROR(lite_index_->Reset());
+ return main_index_->Reset();
+ }
// Brings components of the index into memory in anticipation of a query in
// order to reduce latency.
- void Warm() { lite_index_->Warm(); }
+ void Warm() {
+ lite_index_->Warm();
+ main_index_->Warm();
+ }
// Syncs all the data and metadata changes to disk.
//
@@ -92,16 +102,28 @@
// OK on success
// INTERNAL on I/O errors
libtextclassifier3::Status PersistToDisk() {
- return lite_index_->PersistToDisk();
+ ICING_RETURN_IF_ERROR(lite_index_->PersistToDisk());
+ return main_index_->PersistToDisk();
}
- // Compute the checksum over the entire Index's subcomponents.
- Crc32 ComputeChecksum() { return lite_index_->ComputeChecksum(); }
+ // Discard parts of the index if they contain data for document ids greater
+ // than document_id.
+ //
+ // NOTE: This means that TruncateTo(kInvalidDocumentId) will have no effect.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL on I/O errors
+ libtextclassifier3::Status TruncateTo(DocumentId document_id);
// DocumentIds are always inserted in increasing order. Returns the largest
// document_id added to the index.
DocumentId last_added_document_id() const {
- return lite_index_->last_added_document_id();
+ DocumentId lite_document_id = lite_index_->last_added_document_id();
+ if (lite_document_id != kInvalidDocumentId) {
+ return lite_document_id;
+ }
+ return main_index_->last_added_document_id();
}
// Returns debug information for the index in out.
@@ -111,6 +133,7 @@
// lists.
void GetDebugInfo(int verbosity, std::string* out) const {
lite_index_->GetDebugInfo(verbosity, out);
+ main_index_->GetDebugInfo(verbosity, out);
}
// Returns the byte size of the all the elements held in the index. This
@@ -121,7 +144,11 @@
// Byte size on success
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const {
- return lite_index_->GetElementsSize();
+ ICING_ASSIGN_OR_RETURN(int64_t lite_index_size,
+ lite_index_->GetElementsSize());
+ ICING_ASSIGN_OR_RETURN(int64_t main_index_size,
+ main_index_->GetElementsSize());
+ return lite_index_size + main_index_size;
}
// Create an iterator to iterate through all doc hit infos in the index that
@@ -191,14 +218,40 @@
section_id, term_match_type, namespace_id);
}
+ bool WantsMerge() const { return lite_index_->WantsMerge(); }
+
+ // Merges newly-added hits in the LiteIndex into the MainIndex.
+ //
+ // RETURNS:
+ // - INTERNAL on IO error while writing to the MainIndex.
+ // - RESOURCE_EXHAUSTED error if unable to grow the index.
+ libtextclassifier3::Status Merge() {
+ ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
+ main_index_->MergeLexicon(lite_index_->lexicon()));
+ ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
+ MainIndexMerger::TranslateAndExpandLiteHits(
+ *lite_index_, *term_id_codec_, outputs));
+ ICING_RETURN_IF_ERROR(main_index_->AddHits(
+ *term_id_codec_, std::move(outputs.backfill_map),
+ std::move(term_id_hit_pairs), lite_index_->last_added_document_id()));
+ return lite_index_->Reset();
+ }
+
private:
Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
- std::unique_ptr<LiteIndex>&& lite_index)
+ std::unique_ptr<LiteIndex> lite_index,
+ std::unique_ptr<MainIndex> main_index)
: lite_index_(std::move(lite_index)),
+ main_index_(std::move(main_index)),
options_(options),
term_id_codec_(std::move(term_id_codec)) {}
+ libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
+ const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
+ int num_to_return);
+
std::unique_ptr<LiteIndex> lite_index_;
+ std::unique_ptr<MainIndex> main_index_;
const Options options_;
std::unique_ptr<TermIdCodec> term_id_codec_;
};
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index f7ca285..1d12274 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -52,33 +52,52 @@
using ::testing::Ne;
using ::testing::NiceMock;
using ::testing::Not;
+using ::testing::Return;
using ::testing::SizeIs;
using ::testing::Test;
using ::testing::UnorderedElementsAre;
+int GetBlockSize() { return getpagesize(); }
+
class IndexTest : public Test {
protected:
void SetUp() override {
index_dir_ = GetTestTempDir() + "/index_test/";
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
}
void TearDown() override {
- filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
+ icing_filesystem_.DeleteDirectoryRecursively(index_dir_.c_str());
}
std::unique_ptr<Index> index_;
std::string index_dir_;
- IcingFilesystem filesystem_;
+ IcingFilesystem icing_filesystem_;
+ Filesystem filesystem_;
};
constexpr DocumentId kDocumentId0 = 0;
constexpr DocumentId kDocumentId1 = 1;
constexpr DocumentId kDocumentId2 = 2;
+constexpr DocumentId kDocumentId3 = 3;
+constexpr DocumentId kDocumentId4 = 4;
+constexpr DocumentId kDocumentId5 = 5;
+constexpr DocumentId kDocumentId6 = 6;
+constexpr DocumentId kDocumentId7 = 7;
constexpr SectionId kSectionId2 = 2;
constexpr SectionId kSectionId3 = 3;
+// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
+// GetBlockSize(),
+// GetPostingListIndexBits(posting_list_utils::min_posting_list_size()));
+constexpr int kMinSizePlApproxHits = 3;
+// The value returned by IndexBlock::ApproximateFullPostingListHitsForBlock(
+// GetBlockSize(),
+// GetPostingListIndexBits(2 * posting_list_utils::min_posting_list_size()));
+constexpr int kSecondSmallestPlApproxHits = 7;
+
std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
std::vector<DocHitInfo> infos;
while (iterator->Advance().ok()) {
@@ -112,29 +131,50 @@
TEST_F(IndexTest, CreationWithNullPointerShouldFail) {
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- EXPECT_THAT(Index::Create(options, /*filesystem=*/nullptr),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ Index::Create(options, &filesystem_, /*icing_filesystem=*/nullptr),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ Index::Create(options, /*filesystem=*/nullptr, &icing_filesystem_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(IndexTest, EmptyIndex) {
- // Assert
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
ICING_ASSERT_OK_AND_ASSIGN(
itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+}
+
+TEST_F(IndexTest, EmptyIndexAfterMerge) {
+ // Merging an empty index should succeed, but have no effects.
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
TEST_F(IndexTest, AdvancePastEnd) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
@@ -143,7 +183,32 @@
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(itr->doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(itr->doc_hit_info(),
+ EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
+}
+
+TEST_F(IndexTest, AdvancePastEndAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(itr->doc_hit_info(),
EqualsDocHitInfo(kInvalidDocumentId, std::vector<SectionId>()));
@@ -158,12 +223,27 @@
}
TEST_F(IndexTest, SingleHitSingleTermIndex) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -175,13 +255,29 @@
}
TEST_F(IndexTest, SingleHitMultiTermIndex) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
EXPECT_THAT(edit.AddHit("bar"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -193,23 +289,36 @@
}
TEST_F(IndexTest, NoHitMultiTermIndex) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
EXPECT_THAT(edit.AddHit("bar"), IsOk());
- // Assert
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(itr->Advance(),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, MultiHitMultiTermIndex) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
@@ -222,7 +331,32 @@
/*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
+}
+
+TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("bar"), IsOk());
+
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -235,7 +369,6 @@
}
TEST_F(IndexTest, MultiHitSectionRestrict) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
@@ -244,7 +377,28 @@
/*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- // Assert
+ SectionIdMask desired_section = 1U << kSectionId2;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", desired_section, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -257,18 +411,17 @@
}
TEST_F(IndexTest, SingleHitDedupeIndex) {
- Crc32 empty_crc = index_->ComputeChecksum();
- // Act
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
+ EXPECT_THAT(size, Eq(0));
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- Crc32 first_hit_crc = index_->ComputeChecksum();
- EXPECT_THAT(first_hit_crc.Get(), Ne(empty_crc.Get()));
+ ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+ EXPECT_THAT(size, Gt(0));
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- Crc32 second_hit_crc = index_->ComputeChecksum();
- EXPECT_THAT(second_hit_crc.Get(), Eq(first_hit_crc.Get()));
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
+ EXPECT_THAT(new_size, Eq(size));
- // Assert
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -280,12 +433,27 @@
}
TEST_F(IndexTest, PrefixHit) {
- // Act
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("fool"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, PrefixHitAfterMerge) {
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
@@ -297,7 +465,6 @@
}
TEST_F(IndexTest, MultiPrefixHit) {
- // Act
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("fool"), IsOk());
@@ -306,7 +473,29 @@
/*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("foo"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
@@ -320,7 +509,6 @@
}
TEST_F(IndexTest, NoExactHitInPrefixQuery) {
- // Act
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("fool"), IsOk());
@@ -329,7 +517,26 @@
/*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("foo"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId1, std::vector<SectionId>{kSectionId3})));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
@@ -340,13 +547,28 @@
}
TEST_F(IndexTest, PrefixHitDedupe) {
- // Act
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.AddHit("foo"), IsOk());
ASSERT_THAT(edit.AddHit("fool"), IsOk());
- // Assert
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
@@ -361,15 +583,18 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", id_mask, TermMatchType::PREFIX));
- EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo*"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(0000000000001100:foo* OR 0000000000001100:foo*)"));
ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskAll,
TermMatchType::PREFIX));
- EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo*"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(1111111111111111:foo* OR 1111111111111111:foo*)"));
ICING_ASSERT_OK_AND_ASSIGN(itr, index_->GetIterator("foo", kSectionIdMaskNone,
TermMatchType::PREFIX));
- EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo*"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(0000000000000000:foo* OR 0000000000000000:foo*)"));
}
TEST_F(IndexTest, ExactToString) {
@@ -377,17 +602,20 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("foo", id_mask, TermMatchType::EXACT_ONLY));
- EXPECT_THAT(itr->ToString(), Eq("0000000000001100:foo"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(0000000000001100:foo OR 0000000000001100:foo)"));
ICING_ASSERT_OK_AND_ASSIGN(
itr,
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
- EXPECT_THAT(itr->ToString(), Eq("1111111111111111:foo"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(1111111111111111:foo OR 1111111111111111:foo)"));
ICING_ASSERT_OK_AND_ASSIGN(itr,
index_->GetIterator("foo", kSectionIdMaskNone,
TermMatchType::EXACT_ONLY));
- EXPECT_THAT(itr->ToString(), Eq("0000000000000000:foo"));
+ EXPECT_THAT(itr->ToString(),
+ Eq("(0000000000000000:foo OR 0000000000000000:foo)"));
}
TEST_F(IndexTest, NonAsciiTerms) {
@@ -411,26 +639,52 @@
kDocumentId0, std::vector<SectionId>{kSectionId2})));
}
+TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
+ ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("こんに", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ index_->GetIterator("あなた", kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+}
+
TEST_F(IndexTest, FullIndex) {
// Make a smaller index so that it's easier to fill up.
Index::Options options(index_dir_, /*index_merge_size=*/1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
std::default_random_engine random;
- libtextclassifier3::Status status = libtextclassifier3::Status::OK;
- constexpr int kTokenSize = 5;
- DocumentId document_id = 0;
std::vector<std::string> query_terms;
+ for (int i = 0; i < 2600; ++i) {
+ constexpr int kTokenSize = 5;
+ query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+ }
+
+ DocumentId document_id = 0;
+ libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+ std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
while (status.ok()) {
for (int i = 0; i < 100; ++i) {
Index::Editor edit =
index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- std::string term = RandomString(kAlNumAlphabet, kTokenSize, &random);
- status = edit.AddHit(term.c_str());
- if (i % 50 == 0) {
- // Remember one out of every fifty terms to query for later.
- query_terms.push_back(std::move(term));
- }
+ size_t idx = uniform(random);
+ status = edit.AddHit(query_terms.at(idx).c_str());
if (!status.ok()) {
break;
}
@@ -438,7 +692,6 @@
++document_id;
}
- // Assert
// Adding more hits should fail.
Index::Editor edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
@@ -450,10 +703,10 @@
EXPECT_THAT(edit.AddHit("baz"),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- for (const std::string& term : query_terms) {
+ for (int i = 0; i < query_terms.size(); i += 25) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator(term.c_str(), kSectionIdMaskAll,
+ index_->GetIterator(query_terms.at(i).c_str(), kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
// Each query term should contain at least one hit - there may have been
// other hits for this term that were added.
@@ -462,12 +715,74 @@
EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
}
+TEST_F(IndexTest, FullIndexMerge) {
+ // Make a smaller index so that it's easier to fill up.
+ Index::Options options(index_dir_, /*index_merge_size=*/1024);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ std::default_random_engine random;
+ std::vector<std::string> query_terms;
+ for (int i = 0; i < 2600; ++i) {
+ constexpr int kTokenSize = 5;
+ query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+ }
+
+ DocumentId document_id = 0;
+ libtextclassifier3::Status status = libtextclassifier3::Status::OK;
+ std::uniform_int_distribution<size_t> uniform(0u, query_terms.size() - 1);
+ while (status.ok()) {
+ for (int i = 0; i < 100; ++i) {
+ Index::Editor edit =
+ index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ size_t idx = uniform(random);
+ status = edit.AddHit(query_terms.at(idx).c_str());
+ if (!status.ok()) {
+ break;
+ }
+ }
+ ++document_id;
+ }
+ EXPECT_THAT(status,
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+
+ // Adding more hits should fail.
+ Index::Editor edit =
+ index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(edit.AddHit("bar"),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(edit.AddHit("baz"),
+ StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+
+ // After merging with the main index. Adding more hits should succeed now.
+ ICING_ASSERT_OK(index_->Merge());
+ edit =
+ index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY, 0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.AddHit("baz"), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ // We know that "bar" should have at least one hit because we just added it!
+ EXPECT_THAT(itr->Advance(), IsOk());
+ EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(document_id + 1));
+}
+
TEST_F(IndexTest, IndexCreateIOFailure) {
// Create the index with mock filesystem. By default, Mock will return false,
// so the first attempted file operation will fail.
- NiceMock<IcingMockFilesystem> mock_filesystem;
+ NiceMock<IcingMockFilesystem> mock_icing_filesystem;
+ ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
+ .WillByDefault(Return(false));
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- EXPECT_THAT(Index::Create(options, &mock_filesystem),
+ EXPECT_THAT(Index::Create(options, &filesystem_, &mock_icing_filesystem),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
@@ -483,20 +798,21 @@
// Corrrupt the index file.
std::string hit_buffer_filename = index_dir_ + "/idx/lite.hb";
- ScopedFd sfd(filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
+ ScopedFd sfd(icing_filesystem_.OpenForWrite(hit_buffer_filename.c_str()));
ASSERT_THAT(sfd.is_valid(), IsTrue());
constexpr std::string_view kCorruptBytes = "ffffffffffffffffffffff";
// The first page of the hit_buffer is taken up by the header. Overwrite the
// first page of content.
- constexpr int kHitBufferStartOffset = 4096;
- ASSERT_THAT(filesystem_.PWrite(sfd.get(), kHitBufferStartOffset,
- kCorruptBytes.data(), kCorruptBytes.length()),
- IsTrue());
+ int hit_buffer_start_offset = GetBlockSize();
+ ASSERT_THAT(
+ icing_filesystem_.PWrite(sfd.get(), hit_buffer_start_offset,
+ kCorruptBytes.data(), kCorruptBytes.length()),
+ IsTrue());
// Recreate the index.
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- EXPECT_THAT(Index::Create(options, &filesystem_),
+ EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::DATA_LOSS));
}
@@ -513,7 +829,36 @@
// Recreate the index.
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
+
+ // Check that the hits are present.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, IndexPersistenceAfterMerge) {
+ // Add some content to the index
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ICING_ASSERT_OK(index_->Merge());
+ EXPECT_THAT(index_->PersistToDisk(), IsOk());
+
+ // Close the index.
+ index_.reset();
+
+ // Recreate the index.
+ Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
// Check that the hits are present.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -529,51 +874,10 @@
TEST_F(IndexTest, InvalidHitBufferSize) {
Index::Options options(
index_dir_, /*index_merge_size=*/std::numeric_limits<uint32_t>::max());
- EXPECT_THAT(Index::Create(options, &filesystem_),
+ EXPECT_THAT(Index::Create(options, &filesystem_, &icing_filesystem_),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(IndexTest, ComputeChecksumSameBetweenCalls) {
- // Add some content to the index.
- Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
- TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
- Crc32 checksum = index_->ComputeChecksum();
- // Calling it again shouldn't change the checksum
- EXPECT_THAT(index_->ComputeChecksum(), Eq(checksum));
-}
-
-TEST_F(IndexTest, ComputeChecksumSameAcrossInstances) {
- // Add some content to the index.
- Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
- TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
- Crc32 checksum = index_->ComputeChecksum();
-
- // Recreate the index, checksum should still be the same across instances
- index_.reset();
- Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_, Index::Create(options, &filesystem_));
-
- EXPECT_THAT(index_->ComputeChecksum(), Eq(checksum));
-}
-
-TEST_F(IndexTest, ComputeChecksumChangesOnModification) {
- // Add some content to the index.
- Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
- TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
-
- Crc32 checksum = index_->ComputeChecksum();
-
- // Modifying the index changes the checksum;
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
-
- EXPECT_THAT(index_->ComputeChecksum(), Not(Eq(checksum)));
-}
-
TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
@@ -582,7 +886,15 @@
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
/*num_to_return=*/0),
IsOkAndHolds(IsEmpty()));
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+ /*num_to_return=*/-1),
+ IsOkAndHolds(IsEmpty()));
+ ICING_ASSERT_OK(index_->Merge());
+
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
+ /*num_to_return=*/0),
+ IsOkAndHolds(IsEmpty()));
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
/*num_to_return=*/-1),
IsOkAndHolds(IsEmpty()));
@@ -598,6 +910,14 @@
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
/*num_to_return=*/10),
IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("bar", 1))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // "b" should only match "bar" but not "foo".
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("bar", kMinSizePlApproxHits))));
}
TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
@@ -611,6 +931,13 @@
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
/*num_to_return=*/2),
IsOkAndHolds(SizeIs(2)));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // We have 3 results but only 2 should be returned.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/2),
+ IsOkAndHolds(SizeIs(2)));
}
TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInOneNamespace) {
@@ -630,12 +957,25 @@
/*num_to_return=*/10),
IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fo", 1),
EqualsTermMetadata("foo", 1))));
-
// namespace with id 1 has 1 result.
EXPECT_THAT(
index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
/*num_to_return=*/10),
IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("fool", 1))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // namespace with id 0 has 2 results.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("fo", kMinSizePlApproxHits),
+ EqualsTermMetadata("foo", kMinSizePlApproxHits))));
+ // namespace with id 1 has 1 result.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("fool", kMinSizePlApproxHits))));
}
TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInMultipleNamespaces) {
@@ -660,6 +1000,15 @@
/*num_to_return=*/10),
IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
EqualsTermMetadata("fool", 1))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ EXPECT_THAT(
+ index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{1, 2},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", kMinSizePlApproxHits))));
}
TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsInAllNamespaces) {
@@ -684,6 +1033,16 @@
IsOkAndHolds(UnorderedElementsAre(
EqualsTermMetadata("fo", 1), EqualsTermMetadata("foo", 1),
EqualsTermMetadata("fool", 1))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // Should return "fo", "foo" and "fool" across all namespaces.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("fo", kMinSizePlApproxHits),
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", kMinSizePlApproxHits))));
}
TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectHitCount) {
@@ -704,17 +1063,477 @@
/*num_to_return=*/10),
IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
EqualsTermMetadata("fool", 2))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // foo's one hit should fit on a min-sized pl, fool's two hits should also fit
+ // on a min-sized pl.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", kMinSizePlApproxHits))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnApproximateHitCountForMain) {
+ Index::Editor edit =
+ index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+
+ // 'foo' has 1 hit, 'fool' has 8 hits.
+ EXPECT_THAT(
+ index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(EqualsTermMetadata("foo", 1),
+ EqualsTermMetadata("fool", 8))));
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // foo's hits should fit on a single pl. fool's hits will need two pls.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", kSecondSmallestPlApproxHits))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnCombinedHitCount) {
+ Index::Editor edit =
+ index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+
+ // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the main index and
+ // 1 hit in the lite index.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", kMinSizePlApproxHits + 1))));
+}
+
+TEST_F(IndexTest, FindTermByPrefixShouldReturnTermsFromBothIndices) {
+ Index::Editor edit =
+ index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+
+ // 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
+ EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
+ /*num_to_return=*/10),
+ IsOkAndHolds(UnorderedElementsAre(
+ EqualsTermMetadata("foo", kMinSizePlApproxHits),
+ EqualsTermMetadata("fool", 1))));
}
TEST_F(IndexTest, GetElementsSize) {
// Check empty index.
- EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(Eq(0)));
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t size, index_->GetElementsSize());
+ EXPECT_THAT(size, Eq(0));
// Add an element.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(Gt(0)));
+ ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+ EXPECT_THAT(size, Gt(0));
+
+ ASSERT_THAT(index_->Merge(), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
+ EXPECT_THAT(size, Gt(0));
+}
+
+TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
+}
+
+TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ EXPECT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
+}
+
+TEST_F(IndexTest, GetDebugInfo) {
+ // Add two documents to the lite index, merge them into the main index and
+ // then add another doc to the lite index.
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+ std::string out0;
+ index_->GetDebugInfo(/*verbosity=*/0, &out0);
+ EXPECT_THAT(out0, Not(IsEmpty()));
+
+ std::string out1;
+ index_->GetDebugInfo(/*verbosity=*/1, &out1);
+ EXPECT_THAT(out1, SizeIs(Gt(out0.size())));
+
+ // Add one more doc to the lite index. Debug strings should change.
+ edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("far"), IsOk());
+
+ std::string out2;
+ index_->GetDebugInfo(/*verbosity=*/0, &out2);
+ EXPECT_THAT(out2, Ne(out0));
+
+ std::string out3;
+ index_->GetDebugInfo(/*verbosity=*/1, &out3);
+ EXPECT_THAT(out3, Ne(out1));
+
+ // Merge into the man index. Debuug strings should change again.
+ ICING_ASSERT_OK(index_->Merge());
+
+ std::string out4;
+ index_->GetDebugInfo(/*verbosity=*/0, &out4);
+ EXPECT_THAT(out4, Ne(out0));
+ EXPECT_THAT(out4, Ne(out2));
+
+ std::string out5;
+ index_->GetDebugInfo(/*verbosity=*/1, &out5);
+ EXPECT_THAT(out5, Ne(out1));
+ EXPECT_THAT(out5, Ne(out3));
+}
+
+TEST_F(IndexTest, BackfillingMultipleTermsSucceeds) {
+ // Add two documents to the lite index, merge them into the main index and
+ // then add another doc to the lite index.
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ edit = index_->Edit(kDocumentId0, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+
+ // After this merge the index should have posting lists for
+ // "fool" {(doc0,sec3)},
+ // "foot" {(doc1,sec3)},
+ // "foo" {(doc1,sec3),(doc0,sec3),(doc0,sec2)}
+ ICING_ASSERT_OK(index_->Merge());
+
+ // Add one more doc to the lite index.
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("far"), IsOk());
+
+ // After this merge the index should add a posting list for "far" and a
+ // backfill branch point for "f". In addition to the posting lists described
+ // above, which are unaffected, the new posting lists should be
+ // "far" {(doc2,sec2)},
+ // "f" {(doc1,sec3),(doc0,sec3)}
+ // Multiple pre-existing hits should be added to the new backfill branch
+ // point.
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
+}
+
+TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
+ // Add two documents to the lite index, merge them into the main index and
+ // then add another doc to the lite index.
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ // After this merge the index should have posting lists for
+ // "fool" {(doc0,sec2)},
+ // "foot" {(doc1,sec3)},
+ // "foo" {(doc1,sec3),(doc0,sec2)}
+ ICING_ASSERT_OK(index_->Merge());
+
+ edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ // Add one more doc to the lite index. Debug strings should change.
+ edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("far"), IsOk());
+
+ // After this merge the index should add posting lists for "far" and "footer"
+ // and a backfill branch point for "f". The new posting lists should be
+ // "fool" {(doc0,sec2)},
+ // "foot" {(doc1,sec3)},
+ // "foo" {(doc2,sec3),(doc1,sec3),(doc0,sec2)}
+ // "footer" {(doc2,sec2)},
+ // "far" {(doc3,sec2)},
+ // "f" {(doc2,sec3),(doc1,sec3)}
+ // Multiple pre-existing hits should be added to the new backfill branch
+ // point.
+ ICING_ASSERT_OK(index_->Merge());
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId3));
+}
+
+TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
+ ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+ // Add one document to the lite index
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ // Clipping to invalid should have no effect.
+ ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ // Clipping to invalid should still have no effect even if hits are in main.
+ ICING_ASSERT_OK(index_->Merge());
+ ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+
+ // Clipping to invalid should still have no effect even if both indices have
+ // hits.
+ ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
+ ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+ EXPECT_THAT(index_->GetElementsSize(), IsOkAndHolds(0));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+ // Add one document to the lite index
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+ // Clipping to invalid should have no effect.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ // Clipping to invalid should still have no effect even if hits are in main.
+ ICING_ASSERT_OK(index_->Merge());
+ ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+
+ // Clipping to invalid should still have no effect even if both indices have
+ // hits.
+ ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ itr, index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(
+ GetHits(std::move(itr)),
+ ElementsAre(
+ EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
+ EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
+}
+
+TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
+ // Add one document to the lite index and merge it into main.
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // Add another document to the lite index.
+ edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+
+ EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
+
+ // Clipping to document 0 should toss out the lite index, but keep the main.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)),
+ ElementsAre(EqualsDocHitInfo(
+ kDocumentId0, std::vector<SectionId>{kSectionId2})));
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
+ // Add two documents to the lite index and merge them into main.
+ Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
+ TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foul"), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+
+ // Add another document to the lite index.
+ edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.AddHit("foot"), IsOk());
+
+ EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
+
+ // Clipping to document 0 should toss out both indices.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> itr,
+ index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
+ EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
} // namespace
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.cc b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
index 1f1c296..d535d7f 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.cc
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.cc
@@ -21,6 +21,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/schema/section.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -42,7 +43,13 @@
libtextclassifier3::Status DocHitInfoIteratorTermLite::Advance() {
if (cached_hits_idx_ == -1) {
- ICING_RETURN_IF_ERROR(RetrieveMoreHits());
+ libtextclassifier3::Status status = RetrieveMoreHits();
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << "Failed to retrieve more hits "
+ << status.error_message();
+ return absl_ports::ResourceExhaustedError(
+ "No more DocHitInfos in iterator");
+ }
} else {
++cached_hits_idx_;
}
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 27ccf33..7b51aa4 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -154,6 +154,11 @@
uint32_t size() const { return header_->cur_size(); }
+ bool WantsMerge() const {
+ return size() >= (options_.hit_buffer_want_merge_bytes /
+ sizeof(TermIdHitPair::Value));
+ }
+
class const_iterator {
friend class LiteIndex;
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index 0640135..b3696b7 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -52,7 +52,13 @@
// the last cached hit, then go get some more!
// We hold back the last cached hit because it could have more hits on the
// next posting list in the chain.
- ICING_RETURN_IF_ERROR(RetrieveMoreHits());
+ libtextclassifier3::Status status = RetrieveMoreHits();
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << "Failed to retrieve more hits "
+ << status.error_message();
+ return absl_ports::ResourceExhaustedError(
+ "No more DocHitInfos in iterator");
+ }
} else {
++cached_doc_hit_infos_idx_;
}
diff --git a/icing/index/main/flash-index-storage.cc b/icing/index/main/flash-index-storage.cc
index b88d7fe..f125b6d 100644
--- a/icing/index/main/flash-index-storage.cc
+++ b/icing/index/main/flash-index-storage.cc
@@ -24,6 +24,7 @@
#include <unordered_set>
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/index/main/index-block.h"
#include "icing/index/main/posting-list-free.h"
@@ -237,6 +238,27 @@
return filesystem_->DataSync(block_fd_.get());
}
+libtextclassifier3::Status FlashIndexStorage::Reset() {
+ // Reset in-memory members to default values.
+ num_blocks_ = 0;
+ header_block_.reset();
+ block_fd_.reset();
+ in_memory_freelists_.clear();
+
+ // Delete the underlying file.
+ if (!filesystem_->DeleteFile(index_filename_.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Unable to delete file: ", index_filename_));
+ }
+
+ // Re-initialize.
+ if (!Init()) {
+ return absl_ports::InternalError(
+ "Unable to successfully read header block!");
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<PostingListHolder>
FlashIndexStorage::GetPostingList(PostingListIdentifier id) const {
ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(id.block_index()));
@@ -481,6 +503,51 @@
}
}
+void FlashIndexStorage::GetDebugInfo(int verbosity, std::string* out) const {
+ // Dump and check integrity of the index block free lists.
+ out->append("Free lists:\n");
+ for (size_t i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
+ // TODO(tjbarron) Port over StringAppendFormat to migrate off of this legacy
+ // util.
+ IcingStringUtil::SStringAppendF(
+ out, 100, "Posting list bytes %u: ",
+ header_block_->header()->index_block_infos[i].posting_list_bytes);
+ uint32_t block_index =
+ header_block_->header()->index_block_infos[i].free_list_block_index;
+ int count = 0;
+ while (block_index != kInvalidBlockIndex) {
+ auto block_or = GetIndexBlock(block_index);
+ IcingStringUtil::SStringAppendF(out, 100, "%u ", block_index);
+ ++count;
+
+ if (block_or.ok()) {
+ block_index = block_or.ValueOrDie().next_block_index();
+ } else {
+ block_index = kInvalidBlockIndex;
+ }
+ }
+ IcingStringUtil::SStringAppendF(out, 100, "(count=%d)\n", count);
+ }
+
+ out->append("In memory free lists:\n");
+ if (in_memory_freelists_.size() ==
+ header_block_->header()->num_index_block_infos) {
+ for (size_t i = 0; i < in_memory_freelists_.size(); ++i) {
+ IcingStringUtil::SStringAppendF(
+ out, 100, "Posting list bytes %u %s\n",
+ header_block_->header()->index_block_infos[i].posting_list_bytes,
+ in_memory_freelists_.at(i).DebugString().c_str());
+ }
+ } else {
+ IcingStringUtil::SStringAppendF(
+ out, 100,
+ "In memory free list size %zu doesn't match index block infos size "
+ "%d\n",
+ in_memory_freelists_.size(),
+ header_block_->header()->num_index_block_infos);
+ }
+}
+
// FreeList.
void FlashIndexStorage::FreeList::Push(PostingListIdentifier id) {
if (free_list_.size() >= kMaxSize) {
@@ -490,10 +557,13 @@
<< ") has reached max size. Dropping freed posting list [block_index:"
<< id.block_index()
<< ", posting_list_index:" << id.posting_list_index() << "]";
+ ++num_dropped_free_list_entries_;
return;
}
free_list_.push_back(id);
+ free_list_size_high_watermark_ = std::max(
+ free_list_size_high_watermark_, static_cast<int>(free_list_.size()));
}
libtextclassifier3::StatusOr<PostingListIdentifier>
@@ -507,5 +577,11 @@
return id;
}
+std::string FlashIndexStorage::FreeList::DebugString() const {
+ return IcingStringUtil::StringPrintf(
+ "size %zu max %d dropped %d", free_list_.size(),
+ free_list_size_high_watermark_, num_dropped_free_list_entries_);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/main/flash-index-storage.h b/icing/index/main/flash-index-storage.h
index 958f131..8d5b50b 100644
--- a/icing/index/main/flash-index-storage.h
+++ b/icing/index/main/flash-index-storage.h
@@ -136,6 +136,12 @@
return filesystem_->GetDiskUsage(block_fd_.get());
}
+ // Returns the size of the index file used to contains hits.
+ uint64_t GetElementsSize() const {
+ // Element size is the same as disk size excluding the header block.
+ return GetDiskUsage() - block_size();
+ }
+
int num_blocks() const { return num_blocks_; }
// Info about the index based on the block size.
@@ -151,6 +157,10 @@
return 1.0 - static_cast<double>(num_blocks_) / kMaxBlockIndex;
}
+ libtextclassifier3::Status Reset();
+
+ void GetDebugInfo(int verbosity, std::string* out) const;
+
private:
FlashIndexStorage(const std::string& index_filename,
const Filesystem* filesystem, bool has_in_memory_freelists);
@@ -259,8 +269,12 @@
// - NOT_FOUND if there are no free posting lists on this free list.
libtextclassifier3::StatusOr<PostingListIdentifier> TryPop();
+ std::string DebugString() const;
+
private:
std::vector<PostingListIdentifier> free_list_;
+ int free_list_size_high_watermark_ = 0;
+ int num_dropped_free_list_entries_ = 0;
};
std::vector<FreeList> in_memory_freelists_;
diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
index 724cf43..8142b79 100644
--- a/icing/index/main/main-index-merger.cc
+++ b/icing/index/main/main-index-merger.cc
@@ -14,11 +14,14 @@
#include "icing/index/main/main-index-merger.h"
+#include <cstdint>
#include <cstring>
#include <memory>
+#include <unordered_map>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/index/lite/term-id-hit-pair.h"
+#include "icing/index/main/index-block.h"
#include "icing/index/term-id-codec.h"
#include "icing/legacy/core/icing-string-util.h"
#include "icing/util/status-macros.h"
@@ -103,6 +106,52 @@
TermIdHitPair prev_;
};
+class HitComparator {
+ public:
+ explicit HitComparator(
+ const TermIdCodec& term_id_codec,
+ const std::unordered_map<uint32_t, int>& main_tvi_to_block_index)
+ : term_id_codec_(&term_id_codec),
+ main_tvi_to_block_index_(&main_tvi_to_block_index) {}
+
+ bool operator()(const TermIdHitPair& lhs, const TermIdHitPair& rhs) const {
+ // Primary sort by index block. This acheives two things:
+ // 1. It reduces the number of flash writes by grouping together new hits
+ // for terms whose posting lists might share the same index block.
+ // 2. More importantly, this ensures that newly added backfill branch points
+ // will be populated first (because all newly added terms have an invalid
+ // block index of 0) before any new hits are added to the postings lists
+ // that they backfill from.
+ int lhs_index_block = GetIndexBlock(lhs.term_id());
+ int rhs_index_block = GetIndexBlock(rhs.term_id());
+ if (lhs_index_block == rhs_index_block) {
+ // Secondary sort by term_id and hit.
+ return lhs.value() < rhs.value();
+ }
+ return lhs_index_block < rhs_index_block;
+ }
+
+ private:
+ int GetIndexBlock(uint32_t term_id) const {
+ auto term_info_or = term_id_codec_->DecodeTermInfo(term_id);
+ if (!term_info_or.ok()) {
+ ICING_LOG(WARNING)
+ << "Unable to decode term-info during merge. This shouldn't happen.";
+ return kInvalidBlockIndex;
+ }
+ TermIdCodec::DecodedTermInfo term_info =
+ std::move(term_info_or).ValueOrDie();
+ auto itr = main_tvi_to_block_index_->find(term_info.tvi);
+ if (itr == main_tvi_to_block_index_->end()) {
+ return kInvalidBlockIndex;
+ }
+ return itr->second;
+ }
+
+ const TermIdCodec* term_id_codec_;
+ const std::unordered_map<uint32_t, int>* main_tvi_to_block_index_;
+};
+
// A helper function to dedupe hits stored in hits. Suppose that the lite index
// contained a single document with two hits in a single prefix section: "foot"
// and "fool". When expanded, there would be four hits:
@@ -121,13 +170,13 @@
// score. If there is both an exact and prefix hit for the same term, we prefer
// the exact hit, unless they have different scores, in which case we keep both
// them.
-void DedupeHits(std::vector<TermIdHitPair>* hits) {
+void DedupeHits(
+ std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec,
+ const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) {
// Now all terms are grouped together and all hits for a term are sorted.
// Merge equivalent hits into one.
std::sort(hits->begin(), hits->end(),
- [](const TermIdHitPair& lhs, const TermIdHitPair& rhs) {
- return lhs.value() < rhs.value();
- });
+ HitComparator(term_id_codec, main_tvi_to_block_index));
size_t current_offset = 0;
HitSelector hit_selector;
for (const TermIdHitPair& term_id_hit_pair : *hits) {
@@ -193,7 +242,8 @@
cur_decoded_term.tvi);
if (itr_prefixes ==
lexicon_merge_outputs.other_tvi_to_prefix_main_tvis.end()) {
- ICING_VLOG(1) << "No necessary prefix expansion for " << cur_decoded_term.tvi;
+ ICING_VLOG(1) << "No necessary prefix expansion for "
+ << cur_decoded_term.tvi;
continue;
}
// The tvis of all prefixes of this hit's term that appear in the main
@@ -201,9 +251,10 @@
// prefix_tvis_buf[offset+len]).
size_t offset = itr_prefixes->second.first;
size_t len = itr_prefixes->second.second;
+ size_t offset_end_exclusive = offset + len;
Hit prefix_hit(hit.section_id(), hit.document_id(), hit.score(),
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
- for (; offset < len; ++offset) {
+ for (; offset < offset_end_exclusive; ++offset) {
// Take the tvi (in the main lexicon) of each prefix term.
uint32_t prefix_main_tvi =
lexicon_merge_outputs.prefix_tvis_buf[offset];
@@ -217,7 +268,8 @@
}
}
// 3. Remove any duplicate hits.
- DedupeHits(&hits);
+ DedupeHits(&hits, term_id_codec,
+ lexicon_merge_outputs.main_tvi_to_block_index);
return hits;
}
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 42b3266..59d3e82 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -162,13 +162,14 @@
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'fool' to have a hit for 'foo'
ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermIdHitPair> expanded_elts,
+ std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
- EXPECT_THAT(expanded_elts, UnorderedElementsAre(
- TermIdHitPair(foot_main_term_id, doc0_hit),
- TermIdHitPair(fool_main_term_id, doc1_hit),
- TermIdHitPair(foo_term_id, doc1_prefix_hit)));
+ EXPECT_THAT(
+ expanded_term_id_hit_pairs,
+ UnorderedElementsAre(TermIdHitPair(foot_main_term_id, doc0_hit),
+ TermIdHitPair(fool_main_term_id, doc1_hit),
+ TermIdHitPair(foo_term_id, doc1_prefix_hit)));
}
TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
@@ -223,11 +224,11 @@
// c. Keep both the exact hit for 'foo' and the prefix hit for 'foot'
// because they have different scores.
ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermIdHitPair> expanded_elts,
+ std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
EXPECT_THAT(
- expanded_elts,
+ expanded_term_id_hit_pairs,
UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
TermIdHitPair(foo_main_term_id, foo_doc0_hit),
TermIdHitPair(foo_main_term_id, doc0_prefix_hit)));
@@ -281,11 +282,11 @@
// c. Keep only the exact hit for 'foo' since they both have the same hit
// score.
ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermIdHitPair> expanded_elts,
+ std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
EXPECT_THAT(
- expanded_elts,
+ expanded_term_id_hit_pairs,
UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
TermIdHitPair(foo_main_term_id, foo_doc0_hit)));
}
@@ -351,11 +352,11 @@
// c. Merge the prefix hits from 'foot' and 'fool', taking the best hit
// score.
ICING_ASSERT_OK_AND_ASSIGN(
- std::vector<TermIdHitPair> expanded_elts,
+ std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
EXPECT_THAT(
- expanded_elts,
+ expanded_term_id_hit_pairs,
UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
TermIdHitPair(fool_main_term_id, fool_doc0_hit),
TermIdHitPair(foo_term_id, doc0_prefix_hit)));
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 6e45760..2a5ba83 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -13,11 +13,13 @@
// limitations under the License.
#include "icing/index/main/main-index.h"
+#include <cstdint>
#include <cstring>
#include <memory>
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/index/main/index-block.h"
#include "icing/index/term-id-codec.h"
#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
@@ -81,29 +83,32 @@
} // namespace
-libtextclassifier3::StatusOr<MainIndex> MainIndex::Create(
- const std::string& index_filename, const Filesystem* filesystem,
+libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> MainIndex::Create(
+ const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(icing_filesystem);
- MainIndex main_index;
+ auto main_index = std::make_unique<MainIndex>();
ICING_RETURN_IF_ERROR(
- main_index.Init(index_filename, filesystem, icing_filesystem));
+ main_index->Init(index_directory, filesystem, icing_filesystem));
return main_index;
}
// TODO(b/139087650) : Migrate off of IcingFilesystem.
libtextclassifier3::Status MainIndex::Init(
- const std::string& index_filename, const Filesystem* filesystem,
+ const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem) {
- std::string flash_index_file = index_filename + "-main-index";
+ if (!filesystem->CreateDirectoryRecursively(index_directory.c_str())) {
+ return absl_ports::InternalError("Unable to create main index directory.");
+ }
+ std::string flash_index_file = index_directory + "/main_index";
ICING_ASSIGN_OR_RETURN(
FlashIndexStorage flash_index,
FlashIndexStorage::Create(flash_index_file, filesystem));
flash_index_storage_ =
std::make_unique<FlashIndexStorage>(std::move(flash_index));
- std::string lexicon_file = index_filename + "-main-lexicon";
+ std::string lexicon_file = index_directory + "/main-lexicon";
IcingDynamicTrie::RuntimeOptions runtime_options;
main_lexicon_ = std::make_unique<IcingDynamicTrie>(
lexicon_file, runtime_options, icing_filesystem);
@@ -115,6 +120,17 @@
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
+ int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+ int64_t index_elt_size = flash_index_storage_->GetElementsSize();
+ if (lexicon_elt_size == IcingFilesystem::kBadFileSize ||
+ index_elt_size == IcingFilesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ "Failed to get element size of LiteIndex's lexicon");
+ }
+ return lexicon_elt_size + index_elt_size;
+}
+
libtextclassifier3::StatusOr<std::unique_ptr<PostingListAccessor>>
MainIndex::GetAccessorForExactTerm(const std::string& term) {
PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
@@ -161,6 +177,68 @@
return result;
}
+// TODO(samzheng): Implement a method PropertyReadersAll.HasAnyProperty().
+bool IsTermInNamespaces(
+ const IcingDynamicTrie::PropertyReadersAll& property_reader,
+ uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
+ if (namespace_ids.empty()) {
+ return true;
+ }
+ for (NamespaceId namespace_id : namespace_ids) {
+ if (property_reader.HasProperty(GetNamespacePropertyId(namespace_id),
+ value_index)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+libtextclassifier3::StatusOr<std::vector<TermMetadata>>
+MainIndex::FindTermsByPrefix(const std::string& prefix,
+ const std::vector<NamespaceId>& namespace_ids,
+ int num_to_return) {
+ // Finds all the terms that start with the given prefix in the lexicon.
+ IcingDynamicTrie::Iterator term_iterator(*main_lexicon_, prefix.c_str());
+
+ // A property reader to help check if a term has some property.
+ IcingDynamicTrie::PropertyReadersAll property_reader(*main_lexicon_);
+
+ std::vector<TermMetadata> term_metadata_list;
+ while (term_iterator.IsValid() && term_metadata_list.size() < num_to_return) {
+ uint32_t term_value_index = term_iterator.GetValueIndex();
+
+ // Skips the terms that don't exist in the given namespaces. We won't skip
+ // any terms if namespace_ids is empty.
+ if (!IsTermInNamespaces(property_reader, term_value_index, namespace_ids)) {
+ term_iterator.Advance();
+ continue;
+ }
+ PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+ memcpy(&posting_list_id, term_iterator.GetValue(), sizeof(posting_list_id));
+ // Getting the actual hit count would require reading the entire posting
+ // list chain. We take an approximation to avoid all of those IO ops.
+ // Because we are not reading the posting lists, it is impossible to
+ // differentiate between single max-size posting lists and chains of
+ // max-size posting lists. We assume that the impact on scoring is not
+ // significant.
+ int approx_hit_count = IndexBlock::ApproximateFullPostingListHitsForBlock(
+ flash_index_storage_->block_size(),
+ posting_list_id.posting_list_index_bits());
+ term_metadata_list.emplace_back(term_iterator.GetKey(), approx_hit_count);
+
+ term_iterator.Advance();
+ }
+ if (term_iterator.IsValid()) {
+ // We exited the loop above because we hit the num_to_return limit.
+ ICING_LOG(WARNING) << "Ran into limit of " << num_to_return
+ << " retrieving suggestions for " << prefix
+ << ". Some suggestions may not be returned and others "
+ "may be misranked.";
+ }
+ return term_metadata_list;
+}
+
libtextclassifier3::StatusOr<MainIndex::LexiconMergeOutputs>
MainIndex::AddBackfillBranchPoints(const IcingDynamicTrie& other_lexicon) {
// Maps new branching points in main lexicon to the term such that
@@ -199,7 +277,8 @@
bool new_key;
PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
if (!main_lexicon_->Insert(prefix.c_str(), &posting_list_id,
- &branching_prefix_tvi, false, &new_key)) {
+ &branching_prefix_tvi, /*replace=*/false,
+ &new_key)) {
return absl_ports::InternalError("Could not insert branching prefix");
}
@@ -242,8 +321,15 @@
// Add other to main mapping.
outputs.other_tvi_to_main_tvi.emplace(other_tvi, new_main_tvi);
+
+ memcpy(&posting_list_id, main_lexicon_->GetValueAtIndex(new_main_tvi),
+ sizeof(posting_list_id));
+ if (posting_list_id.block_index() != kInvalidBlockIndex) {
+ outputs.main_tvi_to_block_index[new_main_tvi] =
+ posting_list_id.block_index();
+ }
}
- return outputs;
+ return std::move(outputs);
}
libtextclassifier3::StatusOr<MainIndex::LexiconMergeOutputs>
@@ -252,7 +338,7 @@
IcingDynamicTrie::PropertyReader has_prefix_prop_reader(
other_lexicon, GetHasHitsInPrefixSectionPropertyId());
if (!has_prefix_prop_reader.Exists()) {
- return outputs;
+ return std::move(outputs);
}
std::string prefix;
for (IcingDynamicTrie::Iterator other_term_itr(other_lexicon, /*prefix=*/"");
@@ -277,10 +363,9 @@
prefix.assign(other_term_itr.GetKey(), prefix_length);
uint32_t prefix_tvi;
bool new_key;
- PostingListIdentifier posting_list_identifier =
- PostingListIdentifier::kInvalid;
- if (!main_lexicon_->Insert(prefix.c_str(), &posting_list_identifier,
- &prefix_tvi, /*replace=*/false, &new_key)) {
+ PostingListIdentifier posting_list_id = PostingListIdentifier::kInvalid;
+ if (!main_lexicon_->Insert(prefix.c_str(), &posting_list_id, &prefix_tvi,
+ /*replace=*/false, &new_key)) {
return absl_ports::InternalError(
absl_ports::StrCat("Could not insert prefix: ", prefix));
}
@@ -300,6 +385,13 @@
}
outputs.prefix_tvis_buf.push_back(prefix_tvi);
+
+ memcpy(&posting_list_id, main_lexicon_->GetValueAtIndex(prefix_tvi),
+ sizeof(posting_list_id));
+ if (posting_list_id.block_index() != kInvalidBlockIndex) {
+ outputs.main_tvi_to_block_index[prefix_tvi] =
+ posting_list_id.block_index();
+ }
}
// Any prefixes added? Then add to map.
@@ -308,7 +400,7 @@
buf_start, outputs.prefix_tvis_buf.size() - buf_start};
}
}
- return outputs;
+ return std::move(outputs);
}
bool MainIndex::CopyProperties(
@@ -344,8 +436,9 @@
libtextclassifier3::Status MainIndex::AddHits(
const TermIdCodec& term_id_codec,
std::unordered_map<uint32_t, uint32_t>&& backfill_map,
- std::vector<TermIdHitPair>&& hits) {
+ std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id) {
if (hits.empty()) {
+ flash_index_storage_->set_last_indexed_docid(last_added_document_id);
return libtextclassifier3::Status::OK;
}
uint32_t cur_term_id = hits[0].term_id();
@@ -381,8 +474,8 @@
}
// Now copy remaining backfills.
- ICING_VLOG(2) << IcingStringUtil::StringPrintf("Remaining backfills %zu",
- backfill_map.size());
+ ICING_VLOG(1) << IcingStringUtil::StringPrintf("Remaining backfills %zu",
+ backfill_map.size());
for (auto other_tvi_main_tvi_pair : backfill_map) {
PostingListIdentifier backfill_posting_list_id =
PostingListIdentifier::kInvalid;
@@ -400,6 +493,7 @@
main_lexicon_->SetValueAtIndex(other_tvi_main_tvi_pair.first, &result.id);
}
}
+ flash_index_storage_->set_last_indexed_docid(last_added_document_id);
return libtextclassifier3::Status::OK;
}
@@ -473,7 +567,11 @@
}
Hit last_added_hit;
- for (const Hit& hit : backfill_hits) {
+ // The hits in backfill_hits are in the reverse order of how they were added.
+ // Iterate in reverse to add them to this new posting list in the correct
+ // order.
+ for (auto itr = backfill_hits.rbegin(); itr != backfill_hits.rend(); ++itr) {
+ const Hit& hit = *itr;
// Skip hits from non-prefix-enabled sections.
if (!hit.is_in_prefix_section()) {
continue;
@@ -493,5 +591,17 @@
return libtextclassifier3::Status::OK;
}
+void MainIndex::GetDebugInfo(int verbosity, std::string* out) const {
+ // Lexicon.
+ out->append("Main Lexicon stats:\n");
+ main_lexicon_->GetDebugInfo(verbosity, out);
+
+ if (verbosity <= 0) {
+ return;
+ }
+
+ flash_index_storage_->GetDebugInfo(verbosity, out);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 79378ea..7403b8c 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -24,8 +24,10 @@
#include "icing/index/main/flash-index-storage.h"
#include "icing/index/main/posting-list-accessor.h"
#include "icing/index/term-id-codec.h"
+#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/store/namespace-id.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -36,8 +38,8 @@
// RETURNS:
// - valid instance of MainIndex, on success.
// - INTERNAL error if unable to create the lexicon or flash storage.
- static libtextclassifier3::StatusOr<MainIndex> Create(
- const std::string& index_filename, const Filesystem* filesystem,
+ static libtextclassifier3::StatusOr<std::unique_ptr<MainIndex>> Create(
+ const std::string& index_directory, const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
// Get a PostingListAccessor that holds the posting list chain for 'term'.
@@ -65,6 +67,22 @@
libtextclassifier3::StatusOr<GetPrefixAccessorResult>
GetAccessorForPrefixTerm(const std::string& prefix);
+ // Finds terms with the given prefix in the given namespaces. If
+ // 'namespace_ids' is empty, returns results from all the namespaces. The
+ // input prefix must be normalized, otherwise inaccurate results may be
+ // returned. Results are not sorted specifically and are in lexigraphical
+ // order. Number of results are no more than 'num_to_return'.
+ //
+ // The hit count returned with each TermMetadata is an approximation based of
+ // posting list size.
+ //
+ // Returns:
+ // A list of TermMetadata on success
+ // INTERNAL_ERROR if failed to access term data.
+ libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindTermsByPrefix(
+ const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
+ int num_to_return);
+
struct LexiconMergeOutputs {
// Maps from main_lexicon tvi for new branching point to the main_lexicon
// tvi for posting list whose hits must be backfilled.
@@ -73,6 +91,10 @@
// Maps from lexicon tvis to main_lexicon tvis.
std::unordered_map<uint32_t, uint32_t> other_tvi_to_main_tvi;
+ // Maps from main lexicon tvi to the block index. Tvis with no entry do not
+ // have an allocated posting list.
+ std::unordered_map<uint32_t, int> main_tvi_to_block_index;
+
// Maps from the lexicon tvi to the beginning position in
// prefix_tvis_buf and the length.
std::unordered_map<uint32_t, std::pair<int, int>>
@@ -124,10 +146,40 @@
libtextclassifier3::Status AddHits(
const TermIdCodec& term_id_codec,
std::unordered_map<uint32_t, uint32_t>&& backfill_map,
- std::vector<TermIdHitPair>&& hits);
+ std::vector<TermIdHitPair>&& hits, DocumentId last_added_document_id);
+
+ libtextclassifier3::Status PersistToDisk() {
+ if (main_lexicon_->Sync() && flash_index_storage_->PersistToDisk()) {
+ return libtextclassifier3::Status::OK;
+ }
+ return absl_ports::InternalError("Unable to sync lite index components.");
+ }
+
+ DocumentId last_added_document_id() const {
+ return flash_index_storage_->get_last_indexed_docid();
+ }
+
+ libtextclassifier3::Status Reset() {
+ ICING_RETURN_IF_ERROR(flash_index_storage_->Reset());
+ main_lexicon_->Clear();
+ return libtextclassifier3::Status::OK;
+ }
+
+ void Warm() { main_lexicon_->Warm(); }
+
+ // Returns:
+ // - elements size of lexicon and index, on success
+ // - INTERNAL on IO error
+ libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+
+ // Returns debug information for the main index in out.
+ // verbosity <= 0, simplest debug information - just the lexicon
+ // verbosity > 0, more detailed debug information including raw postings
+ // lists.
+ void GetDebugInfo(int verbosity, std::string* out) const;
private:
- libtextclassifier3::Status Init(const std::string& index_filename,
+ libtextclassifier3::Status Init(const std::string& index_directory,
const Filesystem* filesystem,
const IcingFilesystem* icing_filesystem);
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index 019b588..abe7181 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -41,6 +41,7 @@
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::NiceMock;
+using ::testing::Return;
using ::testing::SizeIs;
std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
@@ -72,11 +73,12 @@
MainIndex* main_index) {
ICING_ASSIGN_OR_RETURN(MainIndex::LexiconMergeOutputs outputs,
main_index->MergeLexicon(lite_index.lexicon()));
- ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> elts,
+ ICING_ASSIGN_OR_RETURN(std::vector<TermIdHitPair> term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(
lite_index, term_id_codec, outputs));
return main_index->AddHits(term_id_codec, std::move(outputs.backfill_map),
- std::move(elts));
+ std::move(term_id_hit_pairs),
+ lite_index.last_added_document_id());
}
class MainIndexTest : public testing::Test {
@@ -114,21 +116,23 @@
TEST_F(MainIndexTest, MainIndexCreateIOFailure) {
// Create the index with mock filesystem. By default, Mock will return false,
// so the first attempted file operation will fail.
- NiceMock<IcingMockFilesystem> mock_filesystem;
+ NiceMock<IcingMockFilesystem> mock_icing_filesystem;
+ ON_CALL(mock_icing_filesystem, CreateDirectoryRecursively)
+ .WillByDefault(Return(false));
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
- EXPECT_THAT(
- MainIndex::Create(main_index_file_name, &filesystem_, &mock_filesystem),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ EXPECT_THAT(MainIndex::Create(main_index_file_name, &filesystem_,
+ &mock_icing_filesystem),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(MainIndexTest, MainIndexGetAccessorForPrefixTermNotFound) {
// Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
- EXPECT_THAT(main_index.GetAccessorForPrefixTerm("foo"),
+ EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -148,24 +152,24 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the index. The main index should contain "foo".
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// GetAccessorForPrefixTerm should return a valid accessor for "foo".
- EXPECT_THAT(main_index.GetAccessorForPrefixTerm("foo"), IsOk());
+ EXPECT_THAT(main_index->GetAccessorForPrefixTerm("foo"), IsOk());
}
TEST_F(MainIndexTest, MainIndexGetAccessorForExactTermNotFound) {
// Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
- EXPECT_THAT(main_index.GetAccessorForExactTerm("foo"),
+ EXPECT_THAT(main_index->GetAccessorForExactTerm("foo"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -185,14 +189,14 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the index. The main index should contain "foo".
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// GetAccessorForPrefixTerm should return a valid accessor for "foo".
- EXPECT_THAT(main_index.GetAccessorForExactTerm("foo"), IsOk());
+ EXPECT_THAT(main_index->GetAccessorForExactTerm("foo"), IsOk());
}
TEST_F(MainIndexTest, MergeIndexToEmpty) {
@@ -234,21 +238,21 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
- std::vector<DocHitInfo> hits = GetExactHits(&main_index, "foot");
+ std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
EXPECT_THAT(hits, IsEmpty());
- hits = GetPrefixHits(&main_index, "fo");
+ hits = GetPrefixHits(main_index.get(), "fo");
EXPECT_THAT(hits, IsEmpty());
// 3. Merge the index. The main index should contain "fool", "foot"
// and "far" as well as a branch points for "foo" and "f". "fa" and "fo"
// should not be present because it is not a branch point.
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list.
- hits = GetExactHits(&main_index, "foot");
+ hits = GetExactHits(main_index.get(), "foot");
// We should get hits for "foot" in doc1 and doc0
EXPECT_THAT(
hits,
@@ -259,7 +263,7 @@
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from a branching point posting list. "fo" should redirect to "foo"
- hits = GetPrefixHits(&main_index, "fo");
+ hits = GetPrefixHits(main_index.get(), "fo");
// We should get hits for "foot" in doc1 and "fool" in doc1. We shouldn't get
// the hits for "foot" in doc0 and "fool" in doc0 and doc2 because they
// weren't hits in prefix sections.
@@ -307,14 +311,14 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the index. The main index should contain "fool", "foot"
// and "far" as well as a branch points for "foo" and "f". "fa" and "fo"
// should not be present because it is not a branch point.
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// 4. Index two docs in a new Lite Index:
// - Doc3 {"foot", "four", "foul", "fall" is_in_prefix_section=false}
@@ -355,9 +359,9 @@
// 3. Merge the index. The main index should now contain "foul", "four"
// and "fall", a branch points for "fou" and backfill points for "fo".
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits from an exact posting list the existed before the merge.
- std::vector<DocHitInfo> hits = GetExactHits(&main_index, "foot");
+ std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
// We should get hits for "foot" in doc3, doc1 and doc0
EXPECT_THAT(
@@ -370,7 +374,7 @@
EqualsDocHitInfo(doc0_hit.document_id(),
std::vector<SectionId>{doc0_hit.section_id()})));
// Get hits from backfill posting list.
- hits = GetPrefixHits(&main_index, "fo");
+ hits = GetPrefixHits(main_index.get(), "fo");
// We should get hits for "four" and "foul" in doc4 and hits for "foot" and
// "fool" in doc1. We shouldn't get the hits for "foot" in doc0 and doc3,
// "fool" in doc0 and doc2 or the hits for "four" and "foul" in doc4 because
@@ -415,14 +419,14 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
- std::vector<DocHitInfo> hits = GetPrefixHits(&main_index, "foo");
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+ std::vector<DocHitInfo> hits = GetPrefixHits(main_index.get(), "foo");
// We should get hits for "foo" in doc1 and doc0, but not in doc2 because it
// is not a prefix hit.
EXPECT_THAT(
@@ -464,14 +468,14 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the lite lexicon. The main lexicon should contain "foot" and
// "foo".
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
- std::vector<DocHitInfo> hits = GetExactHits(&main_index, "foo");
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+ std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
// We should get hits for "foo" in doc2 and doc1, but not in doc0 because it
// is not an exact hit.
@@ -514,14 +518,14 @@
// 2. Create the main index. It should have no entries in its lexicon.
std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
ICING_ASSERT_OK_AND_ASSIGN(
- MainIndex main_index,
+ std::unique_ptr<MainIndex> main_index,
MainIndex::Create(main_index_file_name, &filesystem_,
&icing_filesystem_));
// 3. Merge the lite index.
- ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, &main_index));
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
// Get hits for all documents containing "foot" - which should be all of them.
- std::vector<DocHitInfo> hits = GetExactHits(&main_index, "foot");
+ std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foot");
EXPECT_THAT(hits, SizeIs(2048));
EXPECT_THAT(hits.front(),
@@ -530,6 +534,61 @@
EqualsDocHitInfo(0, std::vector<SectionId>{0, 1, 2}));
}
+TEST_F(MainIndexTest, MergeIndexBackfilling) {
+ // 1. Index one doc in the Lite Index:
+ // - Doc0 {"fool" is_in_prefix_section=true}
+ ICING_ASSERT_OK_AND_ASSIGN(
+ uint32_t tvi,
+ lite_index_->InsertTerm("fool", TermMatchType::PREFIX, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ /*is_in_prefix_section=*/true);
+ ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
+
+ // 2. Create the main index. It should have no entries in its lexicon.
+ std::string main_index_file_name = index_dir_ + "/test_file.idx.index";
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<MainIndex> main_index,
+ MainIndex::Create(main_index_file_name, &filesystem_,
+ &icing_filesystem_));
+
+ // 3. Merge the index. The main index should contain "fool".
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+
+ // 4. Index two docs in a new Lite Index:
+ // - Doc1 {"foot" is_in_prefix_section=false}
+ std::string lite_index_file_name2 = index_dir_ + "/test_file.lite-idx.index2";
+ LiteIndex::Options options(lite_index_file_name2,
+ /*hit_buffer_want_merge_bytes=*/1024 * 1024);
+ ICING_ASSERT_OK_AND_ASSIGN(lite_index_,
+ LiteIndex::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ tvi,
+ lite_index_->InsertTerm("foot", TermMatchType::EXACT_ONLY, kNamespace0));
+ ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
+ term_id_codec_->EncodeTvi(tvi, TviType::LITE));
+
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ /*is_in_prefix_section=*/false);
+ ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
+
+ // 5. Merge the index. The main index should now contain "fool", "foot"
+ // and a backfill point for "foo".
+ ICING_ASSERT_OK(Merge(*lite_index_, *term_id_codec_, main_index.get()));
+ // Get hits from an exact posting list the existed before the merge.
+ std::vector<DocHitInfo> hits = GetExactHits(main_index.get(), "foo");
+ EXPECT_THAT(hits, IsEmpty());
+
+ // Get hits from backfill posting list.
+ hits = GetPrefixHits(main_index.get(), "foo");
+ // We should get a hit for "fool" in doc0.
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfo(
+ doc0_hit.document_id(),
+ std::vector<SectionId>{doc0_hit.section_id()})));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/legacy/index/icing-mock-filesystem.h b/icing/legacy/index/icing-mock-filesystem.h
index 5a064ea..75ac62f 100644
--- a/icing/legacy/index/icing-mock-filesystem.h
+++ b/icing/legacy/index/icing-mock-filesystem.h
@@ -28,9 +28,143 @@
namespace icing {
namespace lib {
+using ::testing::_;
+using ::testing::A;
class IcingMockFilesystem : public IcingFilesystem {
public:
+ IcingMockFilesystem() {
+ ON_CALL(*this, DeleteFile).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.DeleteFile(file_name);
+ });
+
+ ON_CALL(*this, DeleteDirectory).WillByDefault([this](const char *dir_name) {
+ return real_icing_filesystem_.DeleteDirectory(dir_name);
+ });
+
+ ON_CALL(*this, DeleteDirectoryRecursively)
+ .WillByDefault([this](const char *dir_name) {
+ return real_icing_filesystem_.DeleteDirectoryRecursively(dir_name);
+ });
+
+ ON_CALL(*this, FileExists).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.FileExists(file_name);
+ });
+
+ ON_CALL(*this, DirectoryExists).WillByDefault([this](const char *dir_name) {
+ return real_icing_filesystem_.DirectoryExists(dir_name);
+ });
+
+ ON_CALL(*this, GetBasenameIndex)
+ .WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.GetBasenameIndex(file_name);
+ });
+
+ ON_CALL(*this, GetBasename).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.GetBasename(file_name);
+ });
+
+ ON_CALL(*this, GetDirname).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.GetDirname(file_name);
+ });
+
+ ON_CALL(*this, ListDirectory)
+ .WillByDefault(
+ [this](const char *dir_name, std::vector<std::string> *entries) {
+ return real_icing_filesystem_.ListDirectory(dir_name, entries);
+ });
+
+ ON_CALL(*this, GetMatchingFiles)
+ .WillByDefault(
+ [this](const char *glob, std::vector<std::string> *matches) {
+ return real_icing_filesystem_.GetMatchingFiles(glob, matches);
+ });
+
+ ON_CALL(*this, OpenForWrite).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.OpenForWrite(file_name);
+ });
+
+ ON_CALL(*this, OpenForAppend).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.OpenForAppend(file_name);
+ });
+
+ ON_CALL(*this, OpenForRead).WillByDefault([this](const char *file_name) {
+ return real_icing_filesystem_.OpenForRead(file_name);
+ });
+
+ ON_CALL(*this, GetFileSize(A<int>())).WillByDefault([this](int fd) {
+ return real_icing_filesystem_.GetFileSize(fd);
+ });
+
+ ON_CALL(*this, GetFileSize(A<const char *>()))
+ .WillByDefault([this](const char *filename) {
+ return real_icing_filesystem_.GetFileSize(filename);
+ });
+
+ ON_CALL(*this, Truncate(A<int>(), _))
+ .WillByDefault([this](int fd, uint64_t new_size) {
+ return real_icing_filesystem_.Truncate(fd, new_size);
+ });
+
+ ON_CALL(*this, Truncate(A<const char *>(), _))
+ .WillByDefault([this](const char *filename, uint64_t new_size) {
+ return real_icing_filesystem_.Truncate(filename, new_size);
+ });
+
+ ON_CALL(*this, Grow).WillByDefault([this](int fd, uint64_t new_size) {
+ return real_icing_filesystem_.Grow(fd, new_size);
+ });
+
+ ON_CALL(*this, Write)
+ .WillByDefault([this](int fd, const void *data, size_t data_size) {
+ return real_icing_filesystem_.Write(fd, data, data_size);
+ });
+ ON_CALL(*this, PWrite)
+ .WillByDefault(
+ [this](int fd, off_t offset, const void *data, size_t data_size) {
+ return real_icing_filesystem_.PWrite(fd, offset, data, data_size);
+ });
+
+ ON_CALL(*this, DataSync).WillByDefault([this](int fd) {
+ return real_icing_filesystem_.DataSync(fd);
+ });
+
+ ON_CALL(*this, RenameFile)
+ .WillByDefault([this](const char *old_name, const char *new_name) {
+ return real_icing_filesystem_.RenameFile(old_name, new_name);
+ });
+
+ ON_CALL(*this, SwapFiles)
+ .WillByDefault([this](const char *one, const char *two) {
+ return real_icing_filesystem_.SwapFiles(one, two);
+ });
+
+ ON_CALL(*this, CreateDirectory).WillByDefault([this](const char *dir_name) {
+ return real_icing_filesystem_.CreateDirectory(dir_name);
+ });
+
+ ON_CALL(*this, CreateDirectoryRecursively)
+ .WillByDefault([this](const char *dir_name) {
+ return real_icing_filesystem_.CreateDirectoryRecursively(dir_name);
+ });
+
+ ON_CALL(*this, CopyFile)
+ .WillByDefault([this](const char *src, const char *dst) {
+ return real_icing_filesystem_.CopyFile(src, dst);
+ });
+
+ ON_CALL(*this, ComputeChecksum)
+ .WillByDefault([this](int fd, uint32_t *checksum, uint64_t offset,
+ uint64_t length) {
+ return real_icing_filesystem_.ComputeChecksum(fd, checksum, offset,
+ length);
+ });
+
+ ON_CALL(*this, GetDiskUsage).WillByDefault([this](const char *path) {
+ return real_icing_filesystem_.GetDiskUsage(path);
+ });
+ }
+
MOCK_METHOD(bool, DeleteFile, (const char *file_name), (const, override));
MOCK_METHOD(bool, DeleteDirectory, (const char *dir_name), (const, override));
@@ -103,6 +237,9 @@
(const, override));
MOCK_METHOD(uint64_t, GetDiskUsage, (const char *path), (const, override));
+
+ private:
+ IcingFilesystem real_icing_filesystem_;
};
} // namespace lib
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 29404d9..900cce5 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -73,10 +73,11 @@
ICING_ASSERT_OK(editor.AddHit(token.c_str()));
}
-std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
+std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
+ const Filesystem& filesystem,
const std::string& index_dir) {
Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
- return Index::Create(options, &filesystem).ValueOrDie();
+ return Index::Create(options, &filesystem, &icing_filesystem).ValueOrDie();
}
std::unique_ptr<Normalizer> CreateNormalizer() {
@@ -107,7 +108,8 @@
ICING_LOG(ERROR) << "Failed to create test directories";
}
- std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -220,7 +222,8 @@
ICING_LOG(ERROR) << "Failed to create test directories";
}
- std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -351,7 +354,8 @@
ICING_LOG(ERROR) << "Failed to create test directories";
}
- std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
@@ -467,7 +471,8 @@
ICING_LOG(ERROR) << "Failed to create test directories";
}
- std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
+ std::unique_ptr<Index> index =
+ CreateIndex(icing_filesystem, filesystem, index_dir);
language_segmenter_factory::SegmenterOptions options(ULOC_US);
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index dc94a72..2edc624 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -109,8 +109,8 @@
Index::Options options(index_dir_,
/*index_merge_size=*/1024 * 1024);
- ICING_ASSERT_OK_AND_ASSIGN(index_,
- Index::Create(options, &icing_filesystem_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_, Index::Create(options, &filesystem_, &icing_filesystem_));
language_segmenter_factory::SegmenterOptions segmenter_options(
ULOC_US, jni_cache_.get());
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index e9ae0ab..ff5dbf0 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -99,6 +99,7 @@
if (!state_iterator->second.HasMoreResults()) {
InternalInvalidateResultState(next_page_token);
+ next_page_token = kInvalidNextPageToken;
}
return PageResultState(result_of_page, next_page_token,
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index 7413d73..12f7c4c 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -37,6 +37,10 @@
namespace {
+// Data types that can be indexed. This follows rule 11 of SchemaUtil::Validate
+static std::unordered_set<PropertyConfigProto::DataType::Code>
+ kIndexableDataTypes = {PropertyConfigProto::DataType::STRING};
+
bool IsCardinalityCompatible(const PropertyConfigProto& old_property,
const PropertyConfigProto& new_property) {
if (old_property.cardinality() < new_property.cardinality()) {
@@ -146,7 +150,7 @@
validated_status,
absl_ports::StrCat("Field 'schema_type' is required for DOCUMENT "
"data_types in schema property '",
- schema_type, " ", property_name, "'"));
+ schema_type, ".", property_name, "'"));
}
// Need to make sure we eventually see/validate this schema_type
@@ -159,7 +163,8 @@
schema_type, property_name));
ICING_RETURN_IF_ERROR(
- ValidateIndexingConfig(property_config.indexing_config(), data_type));
+ ValidateIndexingConfig(property_config.indexing_config(), data_type,
+ schema_type, property_name));
}
}
@@ -214,7 +219,7 @@
if (data_type == PropertyConfigProto::DataType::UNKNOWN) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Field 'data_type' cannot be UNKNOWN for schema property '",
- schema_type, " ", property_name, "'"));
+ schema_type, ".", property_name, "'"));
}
return libtextclassifier3::Status::OK;
@@ -228,23 +233,39 @@
if (cardinality == PropertyConfigProto::Cardinality::UNKNOWN) {
return absl_ports::InvalidArgumentError(absl_ports::StrCat(
"Field 'cardinality' cannot be UNKNOWN for schema property '",
- schema_type, " ", property_name, "'"));
+ schema_type, ".", property_name, "'"));
}
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status SchemaUtil::ValidateIndexingConfig(
- const IndexingConfig& config,
- PropertyConfigProto::DataType::Code data_type) {
- if (data_type == PropertyConfigProto::DataType::DOCUMENT) {
- return libtextclassifier3::Status::OK;
+ const IndexingConfig& config, PropertyConfigProto::DataType::Code data_type,
+ std::string_view schema_type, std::string_view property_name) {
+ if (config.term_match_type() == TermMatchType::UNKNOWN &&
+ config.tokenizer_type() != IndexingConfig::TokenizerType::NONE) {
+ // They set a tokenizer type, but no term match type.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Indexed property '", schema_type, ".", property_name,
+ "' cannot have a term match type UNKNOWN"));
}
+
if (config.term_match_type() != TermMatchType::UNKNOWN &&
config.tokenizer_type() == IndexingConfig::TokenizerType::NONE) {
+ // They set a term match type, but no tokenizer type
return absl_ports::InvalidArgumentError(
- "TermMatchType properties cannot have a tokenizer type of NONE");
+ absl_ports::StrCat("Indexed property '", property_name,
+ "' cannot have a tokenizer type of NONE"));
}
+
+ if (config.term_match_type() != TermMatchType::UNKNOWN &&
+ kIndexableDataTypes.find(data_type) == kIndexableDataTypes.end()) {
+ // They want this section indexed, but it's not an indexable data type.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Cannot index non-string data type for schema property '", schema_type,
+ ".", property_name, "'"));
+ }
+
return libtextclassifier3::Status::OK;
}
@@ -297,9 +318,9 @@
if (new_schema_type_and_config == new_type_config_map.end()) {
// Didn't find the old schema type in the new schema, all the old
// documents of this schema type are invalid without the schema
- ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type ",
+ ICING_VLOG(1) << absl_ports::StrCat("Previously defined schema type '",
old_type_config.schema_type(),
- " was not defined in new schema");
+ "' was not defined in new schema");
schema_delta.schema_types_deleted.insert(old_type_config.schema_type());
continue;
}
@@ -320,10 +341,10 @@
if (new_property_name_and_config ==
new_parsed_property_configs.property_config_map.end()) {
// Didn't find the old property
- ICING_VLOG(1) << absl_ports::StrCat("Previously defined property type ",
- old_type_config.schema_type(), ".",
- old_property_config.property_name(),
- " was not defined in new schema");
+ ICING_VLOG(1) << absl_ports::StrCat(
+ "Previously defined property type '", old_type_config.schema_type(),
+ ".", old_property_config.property_name(),
+ "' was not defined in new schema");
schema_delta.schema_types_incompatible.insert(
old_type_config.schema_type());
continue;
@@ -334,8 +355,8 @@
if (!IsPropertyCompatible(old_property_config, *new_property_config)) {
ICING_VLOG(1) << absl_ports::StrCat(
- "Property ", old_type_config.schema_type(), ".",
- old_property_config.property_name(), " is incompatible.");
+ "Property '", old_type_config.schema_type(), ".",
+ old_property_config.property_name(), "' is incompatible.");
schema_delta.schema_types_incompatible.insert(
old_type_config.schema_type());
}
@@ -367,8 +388,8 @@
if (new_parsed_property_configs.num_required_properties >
old_required_properties) {
ICING_VLOG(1) << absl_ports::StrCat(
- "New schema ", old_type_config.schema_type(),
- " has REQUIRED properties that are not "
+ "New schema '", old_type_config.schema_type(),
+ "' has REQUIRED properties that are not "
"present in the previously defined schema");
schema_delta.schema_types_incompatible.insert(
old_type_config.schema_type());
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index d65dd10..dfa3aa2 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -80,10 +80,11 @@
// 9. PropertyConfigProtos.schema_type's must correspond to a
// SchemaTypeConfigProto.schema_type
// 10. Property names can only be alphanumeric.
+ // 11. Only STRING data types are indexed
//
// Returns:
// ALREADY_EXISTS for case 1 and 2
- // INVALID_ARGUMENT for 3-10
+ // INVALID_ARGUMENT for 3-11
// OK otherwise
static libtextclassifier3::Status Validate(const SchemaProto& schema);
@@ -137,22 +138,53 @@
const SchemaProto& old_schema, const SchemaProto& new_schema);
private:
+ // Validates the 'schema_type' field
+ //
+ // Returns:
+ // INVALID_ARGUMENT if 'schema_type' is an empty string.
+ // OK on success
static libtextclassifier3::Status ValidateSchemaType(
std::string_view schema_type);
+
+ // Validates the 'property_name' field.
+ // 1. Can't be an empty string
+ // 2. Can only contain alphanumeric characters
+ //
+ // Returns:
+ // INVALID_ARGUMENT if any of the rules are not followed
+ // OK on success
static libtextclassifier3::Status ValidatePropertyName(
std::string_view property_name, std::string_view schema_type);
+
+ // Validates the 'data_type' field.
+ //
+ // Returns:
+ // INVALID_ARGUMENT if it's UNKNOWN
+ // OK on success
static libtextclassifier3::Status ValidateDataType(
PropertyConfigProto::DataType::Code data_type,
std::string_view schema_type, std::string_view property_name);
- static libtextclassifier3::Status ValidatePropertySchemaType(
- std::string_view property_schema_type, std::string_view schema_type,
- std::string_view property_name);
+
+ // Validates the 'cardinality' field.
+ //
+ // Returns:
+ // INVALID_ARGUMENT if it's UNKNOWN
+ // OK on success
static libtextclassifier3::Status ValidateCardinality(
PropertyConfigProto::Cardinality::Code cardinality,
std::string_view schema_type, std::string_view property_name);
+
+ // Checks that the 'indexing_config' satisfies the following rules:
+ // 1. Only STRING data types can be indexed
+ // 2. An indexed property must have a valid tokenizer type
+ //
+ // Returns:
+ // INVALID_ARGUMENT if any of the rules are not followed
+ // OK on success
static libtextclassifier3::Status ValidateIndexingConfig(
const IndexingConfig& config,
- PropertyConfigProto::DataType::Code data_type);
+ PropertyConfigProto::DataType::Code data_type,
+ std::string_view schema_type, std::string_view property_name);
};
} // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index a3ab96f..6012989 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -574,7 +574,29 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, ValidateNoTokenizer) {
+TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("MyType");
+
+ auto* prop = type->add_properties();
+ prop->set_property_name("Foo");
+ prop->set_data_type(PropertyConfigProto::DataType::STRING);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+
+ // Error if we don't set a term match type
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Passes once we set a term match type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
SchemaProto schema;
auto* type = schema.add_types();
type->set_schema_type("MyType");
@@ -585,15 +607,158 @@
prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
prop->mutable_indexing_config()->set_term_match_type(
TermMatchType::EXACT_ONLY);
+
+ // Error if we don't set a tokenizer type
EXPECT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+ // Passes once we set a tokenizer type
prop->mutable_indexing_config()->set_tokenizer_type(
IndexingConfig::TokenizerType::PLAIN);
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
-TEST_F(SchemaUtilTest, ValidateDocumentNoTokenizer) {
+TEST_F(SchemaUtilTest, ValidateIntPropertyShouldntHaveIndexingConfig) {
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("MyType");
+
+ auto* prop = type->add_properties();
+ prop->set_property_name("IntProperty");
+ prop->set_data_type(PropertyConfigProto::DataType::INT64);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Passes if it doesn't have indexing config
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails if we try to set an indexing_config.term_match_type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing_config.tokenizer_type
+ prop->mutable_indexing_config()->clear_term_match_type();
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing config
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, ValidateDoublePropertyShouldntHaveIndexingConfig) {
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("MyType");
+
+ auto* prop = type->add_properties();
+ prop->set_property_name("DoubleProperty");
+ prop->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Passes if it doesn't have indexing config
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails if we try to set an indexing_config.term_match_type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing_config.tokenizer_type
+ prop->mutable_indexing_config()->clear_term_match_type();
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing config
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, ValidateBooleanPropertyShouldntHaveIndexingConfig) {
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("MyType");
+
+ auto* prop = type->add_properties();
+ prop->set_property_name("BooleanProperty");
+ prop->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Passes if it doesn't have indexing config
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails if we try to set an indexing_config.term_match_type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing_config.tokenizer_type
+ prop->mutable_indexing_config()->clear_term_match_type();
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing config
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, ValidateBytesPropertyShouldntHaveIndexingConfig) {
+ SchemaProto schema;
+ auto* type = schema.add_types();
+ type->set_schema_type("MyType");
+
+ auto* prop = type->add_properties();
+ prop->set_property_name("BytesProperty");
+ prop->set_data_type(PropertyConfigProto::DataType::BYTES);
+ prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Passes if it doesn't have indexing config
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails if we try to set an indexing_config.term_match_type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing_config.tokenizer_type
+ prop->mutable_indexing_config()->clear_term_match_type();
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing config
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(SchemaUtilTest, ValidateDocumentPropertyShouldntHaveIndexingConfig) {
SchemaProto schema;
auto* type = schema.add_types();
type->set_schema_type("OtherType");
@@ -606,12 +771,30 @@
prop->set_schema_type("OtherType");
prop->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+
+ // Passes if it doesn't have indexing config
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+
+ // Fails if we try to set an indexing_config.term_match_type
+ prop->mutable_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing_config.tokenizer_type
+ prop->mutable_indexing_config()->clear_term_match_type();
+ prop->mutable_indexing_config()->set_tokenizer_type(
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ // Fails if we try to set an indexing config
prop->mutable_indexing_config()->set_term_match_type(
TermMatchType::EXACT_ONLY);
prop->mutable_indexing_config()->set_tokenizer_type(
- IndexingConfig::TokenizerType::NONE);
-
- EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+ IndexingConfig::TokenizerType::PLAIN);
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
} // namespace
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 6a10c9a..0eed2fe 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -117,35 +117,8 @@
return p1->property_name() < p2->property_name();
});
for (const auto& property_config : sorted_properties) {
- if (property_config.indexing_config().term_match_type() ==
- TermMatchType::UNKNOWN) {
- // No need to create section for current property
- continue;
- }
-
- // Creates section metadata according to data type
- if (property_config.data_type() == PropertyConfigProto::DataType::STRING ||
- property_config.data_type() == PropertyConfigProto::DataType::INT64 ||
- property_config.data_type() == PropertyConfigProto::DataType::DOUBLE) {
- // Validates next section id, makes sure that section id is the same as
- // the list index so that we could find any section metadata by id in O(1)
- // later.
- auto new_section_id = static_cast<SectionId>(metadata_list->size());
- if (!IsSectionIdValid(new_section_id)) {
- // Max number of sections reached
- return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
- "Too many properties to be indexed, max number of properties "
- "allowed: %d",
- kMaxSectionId - kMinSectionId + 1));
- }
- // Creates section metadata from property config
- metadata_list->emplace_back(
- new_section_id, property_config.indexing_config().term_match_type(),
- property_config.indexing_config().tokenizer_type(),
- ConcatenatePath(current_section_path,
- property_config.property_name()));
- } else if (property_config.data_type() ==
- PropertyConfigProto::DataType::DOCUMENT) {
+ if (property_config.data_type() ==
+ PropertyConfigProto::DataType::DOCUMENT) {
// Tries to find sections recursively
auto nested_type_config_iter =
type_config_map.find(property_config.schema_type());
@@ -161,7 +134,30 @@
property_config.property_name()),
type_config_map, visited_states, metadata_list));
}
- // NOTE: we don't create sections for BOOLEAN and BYTES data types.
+
+ if (property_config.indexing_config().term_match_type() ==
+ TermMatchType::UNKNOWN) {
+ // No need to create section for current property
+ continue;
+ }
+
+ // Creates section metadata according to data type
+ // Validates next section id, makes sure that section id is the same as
+ // the list index so that we could find any section metadata by id in O(1)
+ // later.
+ auto new_section_id = static_cast<SectionId>(metadata_list->size());
+ if (!IsSectionIdValid(new_section_id)) {
+ // Max number of sections reached
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Too many properties to be indexed, max number of properties "
+ "allowed: %d",
+ kMaxSectionId - kMinSectionId + 1));
+ }
+ // Creates section metadata from property config
+ metadata_list->emplace_back(
+ new_section_id, property_config.indexing_config().term_match_type(),
+ property_config.indexing_config().tokenizer_type(),
+ ConcatenatePath(current_section_path, property_config.property_name()));
}
return libtextclassifier3::Status::OK;
}
@@ -197,8 +193,7 @@
}
// Helper function to get string content from a property. Repeated values are
-// joined into one string. We only care about STRING, INT64, and DOUBLE data
-// types.
+// joined into one string. We only care about the STRING data type.
std::vector<std::string> GetPropertyContent(const PropertyProto& property) {
std::vector<std::string> values;
if (!property.string_values().empty()) {
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 9e73465..ad9d07d 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -37,7 +37,6 @@
constexpr char kTypeEmail[] = "EmailMessage";
constexpr char kPropertySubject[] = "subject";
constexpr char kPropertyText[] = "text";
-constexpr char kPropertyTimestamp[] = "timestamp";
constexpr char kPropertyAttachment[] = "attachment";
constexpr char kPropertyRecipients[] = "recipients";
// type and property names of Conversation
@@ -60,7 +59,6 @@
.SetSchema(kTypeEmail)
.AddStringProperty(kPropertySubject, "the subject")
.AddStringProperty(kPropertyText, "the text")
- .AddInt64Property(kPropertyTimestamp, 1234567890)
.AddBytesProperty(kPropertyAttachment, "attachment bytes")
.AddStringProperty(kPropertyRecipients, "recipient1", "recipient2",
"recipient3")
@@ -107,23 +105,10 @@
text->mutable_indexing_config()->set_term_match_type(
TermMatchType::UNKNOWN);
- auto timestamp = type.add_properties();
- timestamp->set_property_name(kPropertyTimestamp);
- timestamp->set_data_type(PropertyConfigProto::DataType::INT64);
- timestamp->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- timestamp->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- timestamp->mutable_indexing_config()->set_tokenizer_type(
- IndexingConfig::TokenizerType::PLAIN);
-
auto attachment = type.add_properties();
attachment->set_property_name(kPropertyAttachment);
attachment->set_data_type(PropertyConfigProto::DataType::BYTES);
attachment->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- attachment->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- attachment->mutable_indexing_config()->set_tokenizer_type(
- IndexingConfig::TokenizerType::PLAIN);
auto recipients = type.add_properties();
recipients->set_property_name(kPropertyRecipients);
@@ -153,8 +138,6 @@
emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
emails->set_schema_type(kTypeEmail);
- emails->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
return type;
}
@@ -183,8 +166,6 @@
property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
property1->set_schema_type("type2"); // Here we reference type2
property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property1->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
SchemaTypeConfigProto type_config2;
type_config2.set_schema_type("type2");
@@ -194,8 +175,6 @@
// Here we reference type1, which references type2 causing the infinite loop
property2->set_schema_type("type1");
property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property2->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
SchemaUtil::TypeConfigMap type_config_map;
type_config_map.emplace("type1", type_config1);
@@ -223,8 +202,6 @@
// Here we're referencing our own type, causing an infinite loop
property2->set_schema_type("type");
property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property2->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
SchemaUtil::TypeConfigMap type_config_map;
type_config_map.emplace("type", type_config);
@@ -266,8 +243,6 @@
property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
property->set_schema_type("unknown_name");
property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property->mutable_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
SchemaUtil::TypeConfigMap type_config_map;
type_config_map.emplace("type", type_config);
@@ -328,10 +303,6 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
// Test other data types
- // INT64
- EXPECT_THAT(section_manager->GetSectionContent(email_document_,
- /*section_path*/ "timestamp"),
- IsOkAndHolds(ElementsAre("1234567890")));
// BYTES type can't be indexed, so content won't be returned
EXPECT_THAT(section_manager->GetSectionContent(email_document_,
/*section_path*/ "attachment"),
@@ -343,8 +314,7 @@
// EmailMessage (section id -> section path):
SectionId recipients_section_id = 0;
SectionId subject_section_id = 1;
- SectionId timestamp_section_id = 2;
- SectionId invalid_email_section_id = 3;
+ SectionId invalid_email_section_id = 2;
ICING_ASSERT_OK_AND_ASSIGN(
content, section_manager->GetSectionContent(email_document_,
recipients_section_id));
@@ -353,9 +323,6 @@
EXPECT_THAT(
section_manager->GetSectionContent(email_document_, subject_section_id),
IsOkAndHolds(ElementsAre("the subject")));
- EXPECT_THAT(
- section_manager->GetSectionContent(email_document_, timestamp_section_id),
- IsOkAndHolds(ElementsAre("1234567890")));
EXPECT_THAT(section_manager->GetSectionContent(email_document_,
invalid_email_section_id),
@@ -364,13 +331,11 @@
// Conversation (section id -> section path):
// 0 -> emails.recipients
// 1 -> emails.subject
- // 2 -> emails.timestamp
- // 3 -> name
+ // 2 -> name
SectionId emails_recipients_section_id = 0;
SectionId emails_subject_section_id = 1;
- SectionId emails_timestamp_section_id = 2;
- SectionId name_section_id = 3;
- SectionId invalid_conversation_section_id = 4;
+ SectionId name_section_id = 2;
+ SectionId invalid_conversation_section_id = 3;
ICING_ASSERT_OK_AND_ASSIGN(
content, section_manager->GetSectionContent(
conversation_document_, emails_recipients_section_id));
@@ -382,11 +347,6 @@
emails_subject_section_id));
EXPECT_THAT(content, ElementsAre("the subject", "the subject"));
- ICING_ASSERT_OK_AND_ASSIGN(
- content, section_manager->GetSectionContent(conversation_document_,
- emails_timestamp_section_id));
- EXPECT_THAT(content, ElementsAre("1234567890", "1234567890"));
-
EXPECT_THAT(section_manager->GetSectionContent(conversation_document_,
name_section_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -404,7 +364,7 @@
// Extracts all sections from 'EmailMessage' document
ICING_ASSERT_OK_AND_ASSIGN(auto sections,
section_manager->ExtractSections(email_document_));
- EXPECT_THAT(sections.size(), Eq(3));
+ EXPECT_THAT(sections.size(), Eq(2));
EXPECT_THAT(sections[0].metadata.id, Eq(0));
EXPECT_THAT(sections[0].metadata.path, Eq("recipients"));
@@ -415,14 +375,10 @@
EXPECT_THAT(sections[1].metadata.path, Eq("subject"));
EXPECT_THAT(sections[1].content, ElementsAre("the subject"));
- EXPECT_THAT(sections[2].metadata.id, Eq(2));
- EXPECT_THAT(sections[2].metadata.path, Eq("timestamp"));
- EXPECT_THAT(sections[2].content, ElementsAre("1234567890"));
-
// Extracts all sections from 'Conversation' document
ICING_ASSERT_OK_AND_ASSIGN(
sections, section_manager->ExtractSections(conversation_document_));
- EXPECT_THAT(sections.size(), Eq(3));
+ EXPECT_THAT(sections.size(), Eq(2));
// Section id 3 (name) not found in document, so the first section id found
// is 1 below.
@@ -435,10 +391,6 @@
EXPECT_THAT(sections[1].metadata.id, Eq(1));
EXPECT_THAT(sections[1].metadata.path, Eq("emails.subject"));
EXPECT_THAT(sections[1].content, ElementsAre("the subject", "the subject"));
-
- EXPECT_THAT(sections[2].metadata.id, Eq(2));
- EXPECT_THAT(sections[2].metadata.path, Eq("emails.timestamp"));
- EXPECT_THAT(sections[2].content, ElementsAre("1234567890", "1234567890"));
}
} // namespace lib
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index ab5308c..42ec09a 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -67,6 +67,46 @@
double default_score_;
};
+// A scorer which assigns scores to documents based on usage reports.
+class UsageScorer : public Scorer {
+ public:
+ UsageScorer(const DocumentStore* document_store,
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy,
+ double default_score)
+ : document_store_(*document_store),
+ ranking_strategy_(ranking_strategy),
+ default_score_(default_score) {}
+
+ double GetScore(DocumentId document_id) override {
+ ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
+ document_store_.GetUsageScores(document_id),
+ default_score_);
+
+ switch (ranking_strategy_) {
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
+ return usage_scores.usage_type1_count;
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
+ return usage_scores.usage_type2_count;
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
+ return usage_scores.usage_type3_count;
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
+ return usage_scores.usage_type1_last_used_timestamp_s;
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
+ return usage_scores.usage_type2_last_used_timestamp_s;
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
+ return usage_scores.usage_type3_last_used_timestamp_s;
+ default:
+ // This shouldn't happen if this scorer is used correctly.
+ return default_score_;
+ }
+ }
+
+ private:
+ const DocumentStore& document_store_;
+ ScoringSpecProto::RankingStrategy::Code ranking_strategy_;
+ double default_score_;
+};
+
// A special scorer which does nothing but assigns the default score to each
// document. This is used especially when no scoring is required in a query.
class NoScorer : public Scorer {
@@ -91,6 +131,19 @@
case ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP:
return std::make_unique<DocumentCreationTimestampScorer>(document_store,
default_score);
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT:
+ [[fallthrough]];
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT:
+ [[fallthrough]];
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
+ [[fallthrough]];
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
+ [[fallthrough]];
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
+ [[fallthrough]];
+ case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
+ return std::make_unique<UsageScorer>(document_store, rank_by,
+ default_score);
case ScoringSpecProto::RankingStrategy::NONE:
return std::make_unique<NoScorer>(default_score);
}
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 4dda603..06bf484 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -36,9 +36,8 @@
namespace {
using ::testing::Eq;
-using ::testing::Test;
-class ScorerTest : public Test {
+class ScorerTest : public testing::Test {
protected:
ScorerTest()
: test_dir_(GetTestTempDir() + "/icing"),
@@ -96,6 +95,17 @@
FakeClock fake_clock2_;
};
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
TEST_F(ScorerTest, CreationWithNullPointerShouldFail) {
EXPECT_THAT(Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
/*default_score=*/0, /*document_store=*/nullptr),
@@ -187,6 +197,303 @@
Eq(fake_clock2().GetSystemTimeMilliseconds()));
}
+TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType1) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report a type1 usage.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(1));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType2) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report a type2 usage.
+ UsageReport usage_report_type2 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2));
+
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(1));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectUsageCountScoreForType3) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE2_COUNT,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report a type1 usage.
+ UsageReport usage_report_type3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3));
+
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(1));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType1) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ UsageReport usage_report_type1_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(1));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report usage with timestamp = 5000ms, score should be updated.
+ UsageReport usage_report_type1_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report usage with timestamp = 3000ms, score should not be updated.
+ UsageReport usage_report_type1_time3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType2) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ UsageReport usage_report_type2_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(1));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report usage with timestamp = 5000ms, score should be updated.
+ UsageReport usage_report_type2_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ // Report usage with timestamp = 3000ms, score should not be updated.
+ UsageReport usage_report_type2_time3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+}
+
+TEST_F(ScorerTest, ShouldGetCorrectUsageTimestampScoreForType3) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ // Create 3 scorers for 3 different usage types.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer2,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer3,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(0));
+
+ UsageReport usage_report_type3_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(1));
+
+ // Report usage with timestamp = 5000ms, score should be updated.
+ UsageReport usage_report_type3_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(5));
+
+ // Report usage with timestamp = 3000ms, score should not be updated.
+ UsageReport usage_report_type3_time3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
+ EXPECT_THAT(scorer1->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer2->GetScore(document_id), Eq(0));
+ EXPECT_THAT(scorer3->GetScore(document_id), Eq(5));
+}
+
TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index b93bf1a..0da25f6 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -35,9 +35,8 @@
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::SizeIs;
-using ::testing::Test;
-class ScoringProcessorTest : public Test {
+class ScoringProcessorTest : public testing::Test {
protected:
ScoringProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
@@ -120,6 +119,17 @@
return std::pair(doc_hit_infos, scored_document_hits);
}
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
TEST_F(ScoringProcessorTest, CreationWithNullPointerShouldFail) {
ScoringSpecProto spec_proto;
EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr),
@@ -289,6 +299,126 @@
EqualsScoredDocumentHit(scored_document_hit1)));
}
+TEST_F(ScoringProcessorTest, ShouldScoreByUsageCount) {
+ DocumentProto document1 =
+ CreateDocument("icing", "email/1", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+ DocumentProto document2 =
+ CreateDocument("icing", "email/2", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+ DocumentProto document3 =
+ CreateDocument("icing", "email/3", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store()->Put(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store()->Put(document2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store()->Put(document3));
+
+ // Report usage for doc1 once and doc2 twice.
+ UsageReport usage_report_doc1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+
+ DocHitInfo doc_hit_info1(document_id1);
+ DocHitInfo doc_hit_info2(document_id2);
+ DocHitInfo doc_hit_info3(document_id3);
+ ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+ /*score=*/2);
+ ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+ /*score=*/0);
+
+ // Creates a dummy DocHitInfoIterator with 3 results
+ std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+ doc_hit_info3};
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ ScoringSpecProto spec_proto;
+ spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
+
+ // Creates a ScoringProcessor which ranks in descending order
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoringProcessor> scoring_processor,
+ ScoringProcessor::Create(spec_proto, document_store()));
+
+ EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
+ /*num_to_score=*/3),
+ ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+ EqualsScoredDocumentHit(scored_document_hit2),
+ EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
+TEST_F(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
+ DocumentProto document1 =
+ CreateDocument("icing", "email/1", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+ DocumentProto document2 =
+ CreateDocument("icing", "email/2", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+ DocumentProto document3 =
+ CreateDocument("icing", "email/3", kDefaultScore,
+ /*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store()->Put(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store()->Put(document2));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store()->Put(document3));
+
+ // Report usage for doc1 and doc2.
+ UsageReport usage_report_doc1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ UsageReport usage_report_doc2 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
+
+ DocHitInfo doc_hit_info1(document_id1);
+ DocHitInfo doc_hit_info2(document_id2);
+ DocHitInfo doc_hit_info3(document_id3);
+ ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
+ /*score=*/1);
+ ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
+ /*score=*/5);
+ ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
+ /*score=*/0);
+
+ // Creates a dummy DocHitInfoIterator with 3 results
+ std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
+ doc_hit_info3};
+ std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
+
+ ScoringSpecProto spec_proto;
+ spec_proto.set_rank_by(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
+
+ // Creates a ScoringProcessor which ranks in descending order
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ScoringProcessor> scoring_processor,
+ ScoringProcessor::Create(spec_proto, document_store()));
+
+ EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
+ /*num_to_score=*/3),
+ ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
+ EqualsScoredDocumentHit(scored_document_hit2),
+ EqualsScoredDocumentHit(scored_document_hit3)));
+}
+
TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
// Creates input doc_hit_infos and corresponding scored_document_hits
ICING_ASSERT_OK_AND_ASSIGN(
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 7577f6b..1e47d59 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -59,6 +59,7 @@
constexpr char kScoreCacheFilename[] = "score_cache";
constexpr char kFilterCacheFilename[] = "filter_cache";
constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
+constexpr char kUsageStoreDirectoryName[] = "usage_store";
constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB
@@ -123,6 +124,10 @@
return absl_ports::StrCat(base_dir, "/", kNamespaceMapperFilename);
}
+std::string MakeUsageStoreDirectoryName(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
+}
+
// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
// overhead per key. As we know that these fingerprints are always 8-bytes in
@@ -309,6 +314,14 @@
MakeNamespaceMapperFilename(base_dir_),
kNamespaceMapperMaxSize));
+ ICING_ASSIGN_OR_RETURN(
+ usage_store_,
+ UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+
+ // Ensure the usage store is the correct size.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->TruncateTo(document_id_mapper_->num_elements()));
+
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
if (checksum.Get() != header.checksum) {
return absl_ports::InternalError(
@@ -325,6 +338,12 @@
ICING_RETURN_IF_ERROR(ResetFilterCache());
ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+ // Creates a new UsageStore instance. Note that we don't reset the data in
+ // usage store here because we're not able to regenerate the usage scores.
+ ICING_ASSIGN_OR_RETURN(
+ usage_store_,
+ UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+
// Iterates through document log
auto iterator = document_log_->GetIterator();
auto iterator_status = iterator.Advance();
@@ -478,6 +497,10 @@
"Failed to iterate through proto log.");
}
+ // Shrink usage_store_ to the correct size.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->TruncateTo(document_id_mapper_->num_elements()));
+
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -727,9 +750,19 @@
expiration_timestamp_ms)));
if (old_document_id_or.ok()) {
- // Mark the old document id as deleted.
- ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
- old_document_id_or.ValueOrDie(), kDocDeletedFlag));
+ DocumentId old_document_id = old_document_id_or.ValueOrDie();
+ auto offset_or = DoesDocumentExistAndGetFileOffset(old_document_id);
+
+ if (offset_or.ok()) {
+ // The old document exists, copy over the usage scores.
+ ICING_RETURN_IF_ERROR(
+ usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
+ /*to_document_id=*/new_document_id));
+
+ // Hard delete the old document.
+ ICING_RETURN_IF_ERROR(
+ HardDelete(old_document_id, offset_or.ValueOrDie()));
+ }
}
return new_document_id;
@@ -887,8 +920,7 @@
if (soft_delete) {
return SoftDelete(name_space, uri, document_id);
} else {
- uint64_t document_log_offset = file_offset_or.ValueOrDie();
- return HardDelete(document_id, document_log_offset);
+ return HardDelete(document_id, file_offset_or.ValueOrDie());
}
}
@@ -915,6 +947,7 @@
}
}
+// TODO(b/169969469): Consider removing SoftDelete().
libtextclassifier3::Status DocumentStore::SoftDelete(
std::string_view name_space, std::string_view uri, DocumentId document_id) {
// Update ground truth first.
@@ -935,7 +968,7 @@
}
libtextclassifier3::Status DocumentStore::HardDelete(
- DocumentId document_id, uint64_t document_log_offset) {
+ DocumentId document_id, int64_t document_log_offset) {
// Erases document proto.
ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
return ClearDerivedData(document_id);
@@ -981,6 +1014,19 @@
return document_filter_data;
}
+libtextclassifier3::StatusOr<UsageStore::UsageScores>
+DocumentStore::GetUsageScores(DocumentId document_id) const {
+ return usage_store_->GetUsageScores(document_id);
+}
+
+libtextclassifier3::Status DocumentStore::ReportUsage(
+ const UsageReport& usage_report) {
+ ICING_ASSIGN_OR_RETURN(DocumentId document_id,
+ GetDocumentId(usage_report.document_namespace(),
+ usage_report.document_uri()));
+ return usage_store_->AddUsageReport(usage_report, document_id);
+}
+
libtextclassifier3::Status DocumentStore::DeleteByNamespace(
std::string_view name_space, bool soft_delete) {
auto namespace_id_or = namespace_mapper_->Get(name_space);
@@ -1132,6 +1178,7 @@
ICING_RETURN_IF_ERROR(score_cache_->PersistToDisk());
ICING_RETURN_IF_ERROR(filter_cache_->PersistToDisk());
ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
// Update the combined checksum and write to header file.
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
@@ -1334,15 +1381,21 @@
// Guaranteed to have a document now.
DocumentProto document_to_keep = document_or.ValueOrDie();
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- new_doc_store->Put(std::move(document_to_keep)).status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
+ // that can support error logging.
+ auto new_document_id_or = new_doc_store->Put(std::move(document_to_keep));
+ if (!new_document_id_or.ok()) {
+ ICING_LOG(ERROR) << new_document_id_or.status().error_message()
<< "Failed to write into new document store";
- return status;
+ return new_document_id_or.status();
}
+
+ // Copy over usage scores.
+ ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
+ usage_store_->GetUsageScores(document_id));
+ DocumentId new_document_id = new_document_id_or.ValueOrDie();
+ ICING_RETURN_IF_ERROR(
+ new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
@@ -1430,7 +1483,13 @@
document_id, DocumentFilterData(kInvalidNamespaceId, kInvalidSchemaTypeId,
/*expiration_timestamp_ms=*/-1)));
- return libtextclassifier3::Status::OK;
+ // Clears the usage scores.
+ return usage_store_->DeleteUsageScores(document_id);
+}
+
+libtextclassifier3::Status DocumentStore::SetUsageScores(
+ DocumentId document_id, const UsageStore::UsageScores& usage_scores) {
+ return usage_store_->SetUsageScores(document_id, usage_scores);
}
} // namespace lib
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index 2ac1c71..5c1b902 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -34,6 +34,7 @@
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
+#include "icing/store/usage-store.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
#include "icing/util/document-validator.h"
@@ -223,6 +224,24 @@
libtextclassifier3::StatusOr<DocumentFilterData> GetDocumentFilterData(
DocumentId document_id) const;
+ // Gets the usage scores of a document.
+ //
+ // Returns:
+ // UsageScores on success
+ // INVALID_ARGUMENT if document_id is invalid
+ // INTERNAL_ERROR on I/O errors
+ libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores(
+ DocumentId document_id) const;
+
+ // Reports usage. The corresponding usage scores of the specified document in
+ // the report will be updated.
+ //
+ // Returns:
+ // OK on success
+ // NOT_FOUND if the [namesapce + uri] key in the report doesn't exist
+ // INTERNAL_ERROR on I/O errors.
+ libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
+
// Deletes all documents belonging to the given namespace. The documents will
// be marked as deleted if 'soft_delete' is true, otherwise they will be
// erased immediately.
@@ -391,6 +410,11 @@
// DocumentStore. Namespaces may be removed from the mapper during compaction.
std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
+ // A storage class that caches all usage scores. Usage scores are not
+ // considered as ground truth. Usage scores are associated with document ids
+ // so they need to be updated when document ids change.
+ std::unique_ptr<UsageStore> usage_store_;
+
// Used internally to indicate whether the class has been initialized. This is
// to guard against cases where the object has been created, but Initialize
// fails in the constructor. If we have successfully exited the constructor,
@@ -497,7 +521,7 @@
// OK on success
// INTERNAL_ERROR on IO error
libtextclassifier3::Status HardDelete(DocumentId document_id,
- uint64_t document_log_offset);
+ int64_t document_log_offset);
// Helper method to find a DocumentId that is associated with the given
// namespace and uri.
@@ -539,6 +563,10 @@
// Helper method to clear the derived data of a document
libtextclassifier3::Status ClearDerivedData(DocumentId document_id);
+
+ // Sets usage scores for the given document.
+ libtextclassifier3::Status SetUsageScores(
+ DocumentId document_id, const UsageStore::UsageScores& usage_scores);
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index f857481..301dbdd 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -42,6 +42,8 @@
namespace icing {
namespace lib {
+namespace {
+
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
using ::testing::Eq;
@@ -54,6 +56,17 @@
using ::testing::Return;
using ::testing::UnorderedElementsAre;
+UsageReport CreateUsageReport(std::string name_space, std::string uri,
+ int64 timestamp_ms,
+ UsageReport::UsageType usage_type) {
+ UsageReport usage_report;
+ usage_report.set_document_namespace(name_space);
+ usage_report.set_document_uri(uri);
+ usage_report.set_usage_timestamp_ms(timestamp_ms);
+ usage_report.set_usage_type(usage_type);
+ return usage_report;
+}
+
class DocumentStoreTest : public ::testing::Test {
protected:
DocumentStoreTest()
@@ -1297,7 +1310,7 @@
// Bad file system
MockFilesystem mock_filesystem;
- ON_CALL(mock_filesystem, GetDiskUsage(A<const char *>()))
+ ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
.WillByDefault(Return(Filesystem::kBadFileSize));
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem,
@@ -1465,6 +1478,63 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
+TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Soft delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
+
+ // The scores should be the same.
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_count = 1;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Hard delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+
+ // The scores should be cleared.
+ expected_scores.usage_type1_count = 0;
+ ASSERT_THAT(doc_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
TEST_F(DocumentStoreTest,
ExpirationTimestampIsSumOfNonZeroTtlAndCreationTimestamp) {
DocumentProto document = DocumentBuilder()
@@ -1572,7 +1642,7 @@
// With default doc score 0
.Build();
DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "email/1")
+ .SetKey("icing", "email/2")
.SetSchema("email")
.AddStringProperty("subject", "subject foo")
.SetScore(5)
@@ -2346,5 +2416,321 @@
UnorderedElementsAre("namespace1"));
}
+TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1 and time 1.
+ UsageReport usage_report_type1_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time1));
+
+ UsageStore::UsageScores expected_scores;
+ expected_scores.usage_type1_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 1 and time 5, time should be updated.
+ UsageReport usage_report_type1_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1_time5));
+
+ expected_scores.usage_type1_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2 and time 1.
+ UsageReport usage_report_type2_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time1));
+
+ expected_scores.usage_type2_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2 and time 5.
+ UsageReport usage_report_type2_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2_time5));
+
+ expected_scores.usage_type2_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3 and time 1.
+ UsageReport usage_report_type3_time1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time1));
+
+ expected_scores.usage_type3_last_used_timestamp_s = 1;
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3 and time 5.
+ UsageReport usage_report_type3_time5 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3_time5));
+
+ expected_scores.usage_type3_last_used_timestamp_s = 5;
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 2.
+ UsageReport usage_report_type2 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type2));
+
+ ++expected_scores.usage_type2_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Report usage with type 3.
+ UsageReport usage_report_type3 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type3));
+
+ ++expected_scores.usage_type3_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldNotBeClearedOnChecksumMismatch) {
+ UsageStore::UsageScores expected_scores;
+ DocumentId document_id;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(document_id,
+ document_store->Put(test_document1_));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+ }
+
+ // Change the DocStore's header combined checksum so that it won't match the
+ // recalculated checksum on initialization. This will force a regeneration of
+ // derived files from ground truth.
+ const std::string header_file =
+ absl_ports::StrCat(document_store_dir_, "/document_store_header");
+ DocumentStore::Header header;
+ header.magic = DocumentStore::Header::kMagic;
+ header.checksum = 10; // Arbitrary garbage checksum
+ filesystem_.DeleteFile(header_file.c_str());
+ filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+
+ // Successfully recover from a corrupt derived file issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should be the same.
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldBeAvailableAfterDataLoss) {
+ UsageStore::UsageScores expected_scores;
+ DocumentId document_id;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ document_id, document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+ }
+
+ // "Corrupt" the content written in the log by adding non-checksummed data to
+ // it. This will mess up the checksum of the proto log, forcing it to rewind
+ // to the last saved point.
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+ const std::string serialized_document = document.SerializeAsString();
+
+ const std::string document_log_file =
+ absl_ports::StrCat(document_store_dir_, "/document_log");
+ int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+ filesystem_.PWrite(document_log_file.c_str(), file_size,
+ serialized_document.data(), serialized_document.size());
+
+ // Successfully recover from a data loss issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should still be available.
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Update the document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId updated_document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+ // We should get a different document id.
+ ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
+
+ // Usage scores should be the same.
+ EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
+ IsOkAndHolds(expected_scores));
+}
+
+TEST_F(DocumentStoreTest,
+ UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+
+ // Report usage with type 1.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id),
+ IsOkAndHolds(expected_scores));
+
+ // Soft delete the doc.
+ ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true));
+
+ // Put the same document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId updated_document_id,
+ document_store->Put(DocumentProto(test_document1_)));
+ // We should get a different document id.
+ ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
+
+ // Usage scores should be cleared.
+ EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id1,
+ document_store->Put(DocumentProto(test_document1_)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id2,
+ document_store->Put(DocumentProto(test_document2_)));
+ ICING_ASSERT_OK(document_store->Delete(document_id1));
+
+ // Report usage of document 2.
+ UsageReport usage_report = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(document_store->ReportUsage(usage_report));
+
+ UsageStore::UsageScores expected_scores;
+ ++expected_scores.usage_type1_count;
+ ASSERT_THAT(document_store->GetUsageScores(document_id2),
+ IsOkAndHolds(expected_scores));
+
+ // Run optimize
+ std::string optimized_dir = document_store_dir_ + "/optimize_test";
+ filesystem_.CreateDirectoryRecursively(optimized_dir.c_str());
+ ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
+
+ // Get optimized document store
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocumentStore> optimized_document_store,
+ DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_,
+ schema_store_.get()));
+
+ // Usage scores should be the same.
+ // The original document_id2 should have become document_id2 - 1.
+ ASSERT_THAT(optimized_document_store->GetUsageScores(document_id2 - 1),
+ IsOkAndHolds(expected_scores));
+}
+
+} // namespace
+
} // namespace lib
} // namespace icing
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 911c45a..7a0af9c 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -31,10 +31,32 @@
const Filesystem* filesystem, const std::string& base_dir) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
+ if (!filesystem->CreateDirectoryRecursively(base_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Failed to create UsageStore directory: ", base_dir));
+ }
+
+ const std::string score_cache_filename =
+ MakeUsageScoreCacheFilename(base_dir);
+
auto usage_score_cache_or = FileBackedVector<UsageScores>::Create(
- *filesystem, MakeUsageScoreCacheFilename(base_dir),
+ *filesystem, score_cache_filename,
MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+ if (absl_ports::IsFailedPrecondition(usage_score_cache_or.status())) {
+ // File checksum doesn't match the stored checksum. Delete and recreate the
+ // file.
+ ICING_RETURN_IF_ERROR(
+ FileBackedVector<int64_t>::Delete(*filesystem, score_cache_filename));
+
+ ICING_VLOG(1) << "The score cache file in UsageStore is corrupted, all "
+ "scores have been reset.";
+
+ usage_score_cache_or = FileBackedVector<UsageScores>::Create(
+ *filesystem, score_cache_filename,
+ MemoryMappedFile::READ_WRITE_AUTO_SYNC);
+ }
+
if (!usage_score_cache_or.ok()) {
ICING_LOG(ERROR) << usage_score_cache_or.status().error_message()
<< "Failed to initialize usage_score_cache";
@@ -111,9 +133,7 @@
}
// Write updated usage scores to file.
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores));
-
- return libtextclassifier3::Status::OK;
+ return usage_score_cache_->Set(document_id, usage_scores);
}
libtextclassifier3::Status UsageStore::DeleteUsageScores(
@@ -123,10 +143,13 @@
"Document id %d is invalid.", document_id));
}
- // Clear all the scores of the document.
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, UsageScores()));
+ if (document_id >= usage_score_cache_->num_elements()) {
+ // Nothing to delete.
+ return libtextclassifier3::Status::OK;
+ }
- return libtextclassifier3::Status::OK;
+ // Clear all the scores of the document.
+ return usage_score_cache_->Set(document_id, UsageScores());
}
libtextclassifier3::StatusOr<UsageStore::UsageScores>
@@ -149,20 +172,55 @@
}
libtextclassifier3::Status UsageStore::SetUsageScores(
- DocumentId document_id, UsageScores usage_scores) {
+ DocumentId document_id, const UsageScores& usage_scores) {
if (!IsDocumentIdValid(document_id)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Document id %d is invalid.", document_id));
}
- ICING_RETURN_IF_ERROR(usage_score_cache_->Set(document_id, usage_scores));
+ return usage_score_cache_->Set(document_id, usage_scores);
+}
- return libtextclassifier3::Status::OK;
+libtextclassifier3::Status UsageStore::CloneUsageScores(
+ DocumentId from_document_id, DocumentId to_document_id) {
+ if (!IsDocumentIdValid(from_document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "from_document_id %d is invalid.", from_document_id));
+ }
+
+ if (!IsDocumentIdValid(to_document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "to_document_id %d is invalid.", to_document_id));
+ }
+
+ auto usage_scores_or = usage_score_cache_->Get(from_document_id);
+ if (usage_scores_or.ok()) {
+ return usage_score_cache_->Set(to_document_id,
+ *std::move(usage_scores_or).ValueOrDie());
+ } else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
+ // No usage scores found. Set default scores to to_document_id.
+ return usage_score_cache_->Set(to_document_id, UsageScores());
+ }
+
+ // Real error
+ return usage_scores_or.status();
}
libtextclassifier3::Status UsageStore::PersistToDisk() {
- ICING_RETURN_IF_ERROR(usage_score_cache_->PersistToDisk());
- return libtextclassifier3::Status::OK;
+ return usage_score_cache_->PersistToDisk();
+}
+
+libtextclassifier3::StatusOr<Crc32> UsageStore::ComputeChecksum() {
+ return usage_score_cache_->ComputeChecksum();
+}
+
+libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
+ if (num_documents >= usage_score_cache_->num_elements()) {
+ // No need to truncate
+ return libtextclassifier3::Status::OK;
+ }
+ // "+1" because document ids start from 0.
+ return usage_score_cache_->TruncateTo(num_documents);
}
libtextclassifier3::Status UsageStore::Reset() {
@@ -186,7 +244,7 @@
}
usage_score_cache_ = std::move(usage_score_cache_or).ValueOrDie();
- return libtextclassifier3::Status::OK;
+ return PersistToDisk();
}
} // namespace lib
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index 9a8c286..0a622a0 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -104,7 +104,6 @@
// Returns:
// UsageScores on success
// INVALID_ARGUMENT if document_id is invalid
- // NOT_FOUND if no scores are found for the document
// INTERNAL_ERROR on I/O errors
//
// TODO(b/169433395): return a pointer instead of an object.
@@ -121,7 +120,19 @@
// INVALID_ARGUMENT if document_id is invalid
// INTERNAL_ERROR on I/O errors
libtextclassifier3::Status SetUsageScores(DocumentId document_id,
- UsageScores usage_scores);
+ const UsageScores& usage_scores);
+
+ // Clones the usage scores from one document to another.
+ //
+ // Returns:
+ // OK on success
+ // INVALID_ARGUMENT if any of the document ids is invalid
+ // INTERNAL_ERROR on I/O errors
+ //
+ // TODO(b/169433395): We can remove this method once GetUsageScores() returns
+ // a pointer.
+ libtextclassifier3::Status CloneUsageScores(DocumentId from_document_id,
+ DocumentId to_document_id);
// Syncs data to disk.
//
@@ -130,6 +141,21 @@
// INTERNAL on I/O error
libtextclassifier3::Status PersistToDisk();
+ // Updates checksum of the usage scores and returns it.
+ //
+ // Returns:
+ // A Crc32 on success
+ // INTERNAL_ERROR if the internal state is inconsistent
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ // Resizes the storage so that only the usage scores of and before
+ // last_document_id are stored.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if num_documents is negative
+ libtextclassifier3::Status TruncateTo(DocumentId num_documents);
+
// Deletes all usage data and re-initialize the storage.
//
// Returns:
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 39985f0..f7fa778 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -53,6 +53,22 @@
return usage_report;
}
+UsageStore::UsageScores CreateUsageScores(uint32_t type1_timestamp,
+ uint32_t type2_timestamp,
+ uint32_t type3_timestamp,
+ int type1_count, int type2_count,
+ int type3_count) {
+ UsageStore::UsageScores scores;
+ scores.usage_type1_last_used_timestamp_s = type1_timestamp;
+ scores.usage_type2_last_used_timestamp_s = type2_timestamp;
+ scores.usage_type3_last_used_timestamp_s = type3_timestamp;
+ scores.usage_type1_count = type1_count;
+ scores.usage_type2_count = type2_count;
+ scores.usage_type3_count = type3_count;
+
+ return scores;
+}
+
TEST_F(UsageStoreTest, CreationShouldSucceed) {
EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_), IsOk());
}
@@ -138,11 +154,9 @@
// Report a usage with timestamp 5.
usage_store->AddUsageReport(usage_report_time5, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 5;
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/5, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
@@ -174,10 +188,10 @@
// Report a usage with type 1.
usage_store->AddUsageReport(usage_report_type1, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
+ ;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
// Report another usage with type 1.
@@ -222,13 +236,9 @@
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
// Verify that set and get results are consistent.
ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
@@ -246,15 +256,8 @@
// Now the scores of document 1 have been implicitly initialized. The scores
// should all be 0.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
}
TEST_F(UsageStoreTest, DeleteUsageScores) {
@@ -262,27 +265,59 @@
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ;
ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
// Delete the usage scores of document 1, all the scores of document 1 should
// be 0.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
ICING_EXPECT_OK(usage_store->DeleteUsageScores(/*document_id=*/1));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, CloneUsageScores) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers and assign them to document 1.
+ UsageStore::UsageScores scores_a = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ;
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores_a));
+
+ // Create another set of usage scores with some random numbers and assign them
+ // to document 2.
+ UsageStore::UsageScores scores_b = CreateUsageScores(
+ /*type1_timestamp=*/111, /*type2_timestamp=*/666, /*type3_timestamp=*/333,
+ /*type1_count=*/50, /*type2_count=*/30, /*type3_count=*/100);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores_b));
+
+ // Clone scores from document 1 to document 3.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/1,
+ /*to_document_id=*/3),
+ IsOk());
+
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(scores_a));
+
+ // Clone scores from document 2 to document 3.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/2,
+ /*to_document_id=*/3),
+ IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(scores_b));
+
+ // Clone scores from document 4 to document 3, scores should be set to
+ // default.
+ EXPECT_THAT(usage_store->CloneUsageScores(/*from_document_id=*/4,
+ /*to_document_id=*/3),
+ IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/3),
+ IsOkAndHolds(UsageStore::UsageScores()));
}
TEST_F(UsageStoreTest, PersistToDisk) {
@@ -290,49 +325,125 @@
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
EXPECT_THAT(usage_store->PersistToDisk(), IsOk());
}
-TEST_F(UsageStoreTest, Reset) {
+TEST_F(UsageStoreTest, ComputeChecksum) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum1, usage_store->ComputeChecksum());
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum2, usage_store->ComputeChecksum());
+
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum3, usage_store->ComputeChecksum());
+
+ EXPECT_THAT(checksum1, Not(Eq(checksum2)));
+ EXPECT_THAT(checksum1, Not(Eq(checksum3)));
+ EXPECT_THAT(checksum2, Not(Eq(checksum3)));
+
+ // Without changing the store, checksum should be the same.
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum4, usage_store->ComputeChecksum());
+ EXPECT_THAT(checksum3, Eq(checksum4));
+}
+
+TEST_F(UsageStoreTest, TruncateTo) {
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
UsageStore::Create(&filesystem_, test_dir_));
- // Create usage scores with some random numbers.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 7;
- scores.usage_type2_last_used_timestamp_s = 9;
- scores.usage_type3_last_used_timestamp_s = 11;
- scores.usage_type1_count = 3;
- scores.usage_type2_count = 4;
- scores.usage_type3_count = 9;
+ // Create usage scores with some random numbers and set scores for document 0,
+ // 1, 2.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
- // Set scores for document 1 and document 2.
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
- ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
-
- EXPECT_THAT(usage_store->Reset(), IsOk());
-
- // After resetting, all the scores are cleared.
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s = 0;
- expected_scores.usage_type2_last_used_timestamp_s = 0;
- expected_scores.usage_type3_last_used_timestamp_s = 0;
- expected_scores.usage_type1_count = 0;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ // Truncate number of documents to 2, scores of document 2 should be gone.
+ EXPECT_THAT(usage_store->TruncateTo(/*num_documents=*/2), IsOk());
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(scores));
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
- IsOkAndHolds(expected_scores));
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToALargeNumberShouldDoNothing) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers and set scores for document
+ // 0, 1.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+
+ // Truncate to a number that is greater than the number of documents. Scores
+ // should be the same.
+ EXPECT_THAT(usage_store->TruncateTo(1000), IsOk());
+
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(scores));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, TruncateToNegativeNumberShouldReturnError) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ EXPECT_THAT(usage_store->TruncateTo(-1),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+}
+
+TEST_F(UsageStoreTest, Reset) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+
+ // Set scores for document 1 and document 2.
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/2, scores));
+
+ EXPECT_THAT(usage_store->Reset(), IsOk());
+
+ // After resetting, all the scores are cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
+ IsOkAndHolds(UsageStore::UsageScores()));
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/2),
+ IsOkAndHolds(UsageStore::UsageScores()));
+ }
+
+ // New instances should be created successfully after Reset().
+ EXPECT_THAT(UsageStore::Create(&filesystem_, test_dir_).status(), IsOk());
}
TEST_F(UsageStoreTest, TimestampInSecondsShouldNotOverflow) {
@@ -346,12 +457,10 @@
// The stored timestamp in seconds should be the max value of uint32.
usage_store->AddUsageReport(usage_report, /*document_id=*/1);
- UsageStore::UsageScores expected_scores;
- expected_scores.usage_type1_last_used_timestamp_s =
- std::numeric_limits<uint32_t>::max();
- expected_scores.usage_type1_count = 1;
- expected_scores.usage_type2_count = 0;
- expected_scores.usage_type3_count = 0;
+ UsageStore::UsageScores expected_scores = CreateUsageScores(
+ /*type1_timestamp=*/std::numeric_limits<uint32_t>::max(),
+ /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/1, /*type2_count=*/0, /*type3_count=*/0);
EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
IsOkAndHolds(expected_scores));
}
@@ -361,13 +470,10 @@
UsageStore::Create(&filesystem_, test_dir_));
// Create usage scores with the max value of int.
- UsageStore::UsageScores scores;
- scores.usage_type1_last_used_timestamp_s = 0;
- scores.usage_type2_last_used_timestamp_s = 0;
- scores.usage_type3_last_used_timestamp_s = 0;
- scores.usage_type1_count = std::numeric_limits<int>::max();
- scores.usage_type2_count = 0;
- scores.usage_type3_count = 0;
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/0, /*type2_timestamp=*/0, /*type3_timestamp=*/0,
+ /*type1_count=*/std::numeric_limits<int>::max(), /*type2_count=*/0,
+ /*type3_count=*/0);
ICING_ASSERT_OK(usage_store->SetUsageScores(/*document_id=*/1, scores));
ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/1),
@@ -383,6 +489,77 @@
IsOkAndHolds(scores));
}
+TEST_F(UsageStoreTest, StoreShouldBeResetOnVectorChecksumMismatch) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ }
+
+ // Modify the header to trigger a vector checksum mismatch.
+ const std::string score_cache_file_path =
+ absl_ports::StrCat(test_dir_, "/usage-scores");
+ FileBackedVector<UsageStore::UsageScores>::Header header{};
+ filesystem_.PRead(
+ score_cache_file_path.c_str(), /*buf=*/&header,
+ /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+ /*offset=*/0);
+ header.vector_checksum = 10; // Arbitrary garbage checksum
+ header.header_checksum = header.CalculateHeaderChecksum();
+ filesystem_.PWrite(
+ score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+ /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+ // Recover from checksum mismatch.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+ // Previous data should be cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
+TEST_F(UsageStoreTest, StoreShouldBeResetOnHeaderChecksumMismatch) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // Create usage scores with some random numbers.
+ UsageStore::UsageScores scores = CreateUsageScores(
+ /*type1_timestamp=*/7, /*type2_timestamp=*/9, /*type3_timestamp=*/1,
+ /*type1_count=*/3, /*type2_count=*/4, /*type3_count=*/9);
+ ICING_EXPECT_OK(usage_store->SetUsageScores(/*document_id=*/0, scores));
+ ASSERT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(scores));
+ }
+
+ // Modify the header to trigger a header checksum mismatch.
+ const std::string score_cache_file_path =
+ absl_ports::StrCat(test_dir_, "/usage-scores");
+ FileBackedVector<UsageStore::UsageScores>::Header header{};
+ filesystem_.PRead(
+ score_cache_file_path.c_str(), /*buf=*/&header,
+ /*buf_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header),
+ /*offset=*/0);
+ header.header_checksum = 10; // Arbitrary garbage checksum
+ filesystem_.PWrite(
+ score_cache_file_path.c_str(), /*offset=*/0, /*data=*/&header,
+ /*data_size=*/sizeof(FileBackedVector<UsageStore::UsageScores>::Header));
+
+ // Recover from checksum mismatch.
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+ // Previous data should be cleared.
+ EXPECT_THAT(usage_store->GetUsageScores(/*document_id=*/0),
+ IsOkAndHolds(UsageStore::UsageScores()));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 25f6249..7e14d0a 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -224,6 +224,33 @@
}
}
+string ProtoStatusCodeToString(StatusProto::Code code) {
+ switch (code) {
+ case StatusProto::OK:
+ return "OK";
+ case StatusProto::UNKNOWN:
+ return "UNKNOWN";
+ case StatusProto::INVALID_ARGUMENT:
+ return "INVALID_ARGUMENT";
+ case StatusProto::NOT_FOUND:
+ return "NOT_FOUND";
+ case StatusProto::ALREADY_EXISTS:
+ return "ALREADY_EXISTS";
+ case StatusProto::OUT_OF_SPACE:
+ return "OUT_OF_SPACE";
+ case StatusProto::FAILED_PRECONDITION:
+ return "FAILED_PRECONDITION";
+ case StatusProto::ABORTED:
+ return "ABORTED";
+ case StatusProto::INTERNAL:
+ return "INTERNAL";
+ case StatusProto::WARNING_DATA_LOSS:
+ return "WARNING_DATA_LOSS";
+ default:
+ return "";
+ }
+}
+
MATCHER(IsOk, "") {
libtextclassifier3::StatusAdapter adapter(arg);
if (adapter.status().ok()) {
@@ -274,6 +301,38 @@
result_listener);
}
+MATCHER(ProtoIsOk, "") {
+ if (arg.code() == StatusProto::OK) {
+ return true;
+ }
+ *result_listener << IcingStringUtil::StringPrintf(
+ "Expected OK, actual was (%s:%s)",
+ ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+ return false;
+}
+
+MATCHER_P(ProtoStatusIs, status_code, "") {
+ if (arg.code() == status_code) {
+ return true;
+ }
+ *result_listener << IcingStringUtil::StringPrintf(
+ "Expected (%s:), actual was (%s:%s)",
+ ProtoStatusCodeToString(status_code).c_str(),
+ ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+ return false;
+}
+
+MATCHER_P2(ProtoStatusIs, status_code, error_matcher, "") {
+ if (arg.code() != status_code) {
+ *result_listener << IcingStringUtil::StringPrintf(
+ "Expected (%s:), actual was (%s:%s)",
+ ProtoStatusCodeToString(status_code).c_str(),
+ ProtoStatusCodeToString(arg.code()).c_str(), arg.message().c_str());
+ return false;
+ }
+ return ExplainMatchResult(error_matcher, arg.message(), result_listener);
+}
+
// TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status
#define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
diff --git a/icing/testing/random-string.h b/icing/testing/random-string.h
index 1510e15..ac36924 100644
--- a/icing/testing/random-string.h
+++ b/icing/testing/random-string.h
@@ -27,7 +27,7 @@
template <typename Gen>
std::string RandomString(const std::string_view alphabet, size_t len,
Gen* gen) {
- std::uniform_int_distribution<size_t> uniform(0u, alphabet.size());
+ std::uniform_int_distribution<size_t> uniform(0u, alphabet.size() - 1);
std::string result(len, '\0');
std::generate(
std::begin(result), std::end(result),
diff --git a/icing/text_classifier/lib3/utils/java/jni-base.cc b/icing/text_classifier/lib3/utils/java/jni-base.cc
index 897628c..e97e8b9 100644
--- a/icing/text_classifier/lib3/utils/java/jni-base.cc
+++ b/icing/text_classifier/lib3/utils/java/jni-base.cc
@@ -22,11 +22,13 @@
return env->EnsureLocalCapacity(capacity) == JNI_OK;
}
-bool JniExceptionCheckAndClear(JNIEnv* env) {
+bool JniExceptionCheckAndClear(JNIEnv* env, bool print_exception_on_error) {
TC3_CHECK(env != nullptr);
const bool result = env->ExceptionCheck();
if (result) {
- env->ExceptionDescribe();
+ if (print_exception_on_error) {
+ env->ExceptionDescribe();
+ }
env->ExceptionClear();
}
return result;
diff --git a/icing/text_classifier/lib3/utils/java/jni-base.h b/icing/text_classifier/lib3/utils/java/jni-base.h
index 5876eba..65c64a5 100644
--- a/icing/text_classifier/lib3/utils/java/jni-base.h
+++ b/icing/text_classifier/lib3/utils/java/jni-base.h
@@ -63,7 +63,8 @@
bool EnsureLocalCapacity(JNIEnv* env, int capacity);
// Returns true if there was an exception. Also it clears the exception.
-bool JniExceptionCheckAndClear(JNIEnv* env);
+bool JniExceptionCheckAndClear(JNIEnv* env,
+ bool print_exception_on_error = true);
// A deleter to be used with std::unique_ptr to delete JNI global references.
class GlobalRefDeleter {
diff --git a/icing/text_classifier/lib3/utils/java/jni-helper.h b/icing/text_classifier/lib3/utils/java/jni-helper.h
index 907ad0d..4e548ec 100644
--- a/icing/text_classifier/lib3/utils/java/jni-helper.h
+++ b/icing/text_classifier/lib3/utils/java/jni-helper.h
@@ -150,8 +150,10 @@
jmethodID method_id, ...);
template <class T>
- static StatusOr<T> CallStaticIntMethod(JNIEnv* env, jclass clazz,
- jmethodID method_id, ...);
+ static StatusOr<T> CallStaticIntMethod(JNIEnv* env,
+ bool print_exception_on_error,
+ jclass clazz, jmethodID method_id,
+ ...);
};
template <typename T>
@@ -167,14 +169,19 @@
}
template <class T>
-StatusOr<T> JniHelper::CallStaticIntMethod(JNIEnv* env, jclass clazz,
- jmethodID method_id, ...) {
+StatusOr<T> JniHelper::CallStaticIntMethod(JNIEnv* env,
+ bool print_exception_on_error,
+ jclass clazz, jmethodID method_id,
+ ...) {
va_list args;
va_start(args, method_id);
jint result = env->CallStaticIntMethodV(clazz, method_id, args);
va_end(args);
- TC3_NO_EXCEPTION_OR_RETURN;
+ if (JniExceptionCheckAndClear(env, print_exception_on_error)) {
+ return {Status::UNKNOWN};
+ }
+
return result;
}
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index d0b90d1..e60f6d5 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -213,6 +213,36 @@
EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android:icing"),
IsOkAndHolds(ElementsAre("com.google.android:icing")));
+ // Connectors that don't have valid terms on both sides of it are not
+ // considered connectors.
+ EXPECT_THAT(language_segmenter->GetAllTerms(":bar:baz"),
+ IsOkAndHolds(ElementsAre(":", "bar:baz")));
+
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:"),
+ IsOkAndHolds(ElementsAre("bar:baz", ":")));
+
+ // Connectors that don't have valid terms on both sides of it are not
+ // considered connectors.
+ EXPECT_THAT(language_segmenter->GetAllTerms(" :bar:baz"),
+ IsOkAndHolds(ElementsAre(" ", ":", "bar:baz")));
+
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz: "),
+ IsOkAndHolds(ElementsAre("bar:baz", ":", " ")));
+
+ // Connectors don't connect if one side is an invalid term (?)
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:?"),
+ IsOkAndHolds(ElementsAre("bar:baz", ":")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("?:bar:baz"),
+ IsOkAndHolds(ElementsAre(":", "bar:baz")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("3:14"),
+ IsOkAndHolds(ElementsAre("3", ":", "14")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("私:は"),
+ IsOkAndHolds(ElementsAre("私", ":", "は")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("我:每"),
+ IsOkAndHolds(ElementsAre("我", ":", "每")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("เดิน:ไป"),
+ IsOkAndHolds(ElementsAre("เดิน:ไป")));
+
// Any heading and trailing characters are not connecters
EXPECT_THAT(language_segmenter->GetAllTerms(".com.google.android."),
IsOkAndHolds(ElementsAre(".", "com.google.android", ".")));
@@ -409,6 +439,21 @@
EXPECT_THAT(word2_address, Eq(word2_result_address));
}
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToStartWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
+ language_segmenter_factory::Create(GetOptions()));
+ constexpr std::string_view kText = "com:google:android is package";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "com:google:android is package"
+ // ^ ^^ ^^
+ // Bytes: 0 18 19 21 22
+ auto position_or = itr->ResetToStart();
+ EXPECT_THAT(position_or, IsOk());
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
TEST_P(IcuLanguageSegmenterAllLocalesTest, NewIteratorResetToStart) {
ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
language_segmenter_factory::Create(GetOptions()));
@@ -474,6 +519,27 @@
EXPECT_THAT(itr->GetTerm(), Eq("How"));
}
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
+ language_segmenter_factory::Create(GetOptions()));
+ constexpr std::string_view kText = "package com:google:android name";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "package com:google:android name"
+ // ^ ^^ ^^
+ // Bytes: 0 7 8 26 27
+ auto position_or = itr->ResetToTermStartingAfter(8);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
+ ASSERT_THAT(itr->GetTerm(), Eq(" "));
+
+ position_or = itr->ResetToTermStartingAfter(7);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermAfterOutOfBounds) {
ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
language_segmenter_factory::Create(GetOptions()));
@@ -791,6 +857,27 @@
EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
}
+TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
+ language_segmenter_factory::Create(GetOptions()));
+ constexpr std::string_view kText = "package name com:google:android!";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "package name com:google:android!"
+ // ^ ^^ ^^ ^
+ // Bytes: 0 7 8 12 13 31
+ auto position_or = itr->ResetToTermEndingBefore(31);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+
+ position_or = itr->ResetToTermEndingBefore(21);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
+ ASSERT_THAT(itr->GetTerm(), Eq(" "));
+}
+
TEST_P(IcuLanguageSegmenterAllLocalesTest, ResetToTermBeforeOutOfBounds) {
ICING_ASSERT_OK_AND_ASSIGN(auto segmenter,
language_segmenter_factory::Create(GetOptions()));
diff --git a/icing/tokenization/language-segmenter.h b/icing/tokenization/language-segmenter.h
index fdb1846..7ca31d1 100644
--- a/icing/tokenization/language-segmenter.h
+++ b/icing/tokenization/language-segmenter.h
@@ -102,6 +102,23 @@
virtual libtextclassifier3::StatusOr<int32_t> ResetToTermEndingBefore(
int32_t offset) = 0;
+ // Resets the iterator to point to the first term.
+ // GetTerm will now return that term. For example:
+ //
+ // language_segmenter = language_segmenter_factory::Create(type);
+ // iterator = language_segmenter->Segment("foo bar baz");
+ // iterator.Advance();
+ // iterator.ResetToStart();
+ // iterator.GetTerm() // returns "foo";
+ //
+ // Return types of OK and NOT_FOUND indicate that the function call was
+ // valid and the state of the iterator has changed.
+ //
+ // Returns:
+ // On success, the starting position of the first term.
+ // NOT_FOUND if an error occurred or there are no valid terms in the text.
+ // ABORTED if an invalid unicode character is encountered while
+ // traversing the text.
virtual libtextclassifier3::StatusOr<int32_t> ResetToStart() = 0;
};
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
index 4b50231..1cd6fa3 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
@@ -225,6 +225,36 @@
EXPECT_THAT(language_segmenter->GetAllTerms("com.google.android:icing"),
IsOkAndHolds(ElementsAre("com.google.android:icing")));
+ // Connectors that don't have valid terms on both sides of it are not
+ // considered connectors.
+ EXPECT_THAT(language_segmenter->GetAllTerms(":bar:baz"),
+ IsOkAndHolds(ElementsAre(":", "bar:baz")));
+
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:"),
+ IsOkAndHolds(ElementsAre("bar:baz", ":")));
+
+ // Connectors that don't have valid terms on both sides of it are not
+ // considered connectors.
+ EXPECT_THAT(language_segmenter->GetAllTerms(" :bar:baz"),
+ IsOkAndHolds(ElementsAre(" ", ":", "bar:baz")));
+
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz: "),
+ IsOkAndHolds(ElementsAre("bar:baz", ":", " ")));
+
+ // Connectors don't connect if one side is an invalid term (?)
+ EXPECT_THAT(language_segmenter->GetAllTerms("bar:baz:?"),
+ IsOkAndHolds(ElementsAre("bar:baz", ":")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("?:bar:baz"),
+ IsOkAndHolds(ElementsAre(":", "bar:baz")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("3:14"),
+ IsOkAndHolds(ElementsAre("3", ":", "14")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("私:は"),
+ IsOkAndHolds(ElementsAre("私", ":", "は")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("我:每"),
+ IsOkAndHolds(ElementsAre("我", ":", "每")));
+ EXPECT_THAT(language_segmenter->GetAllTerms("เดิน:ไป"),
+ IsOkAndHolds(ElementsAre("เดิน:ไป")));
+
// Any heading and trailing characters are not connecters
EXPECT_THAT(language_segmenter->GetAllTerms(".com.google.android."),
IsOkAndHolds(ElementsAre(".", "com.google.android", ".")));
@@ -443,6 +473,22 @@
EXPECT_THAT(word2_address, Eq(word2_result_address));
}
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToStartWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto segmenter, language_segmenter_factory::Create(
+ GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+ constexpr std::string_view kText = "com:google:android is package";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "com:google:android is package"
+ // ^ ^^ ^^
+ // Bytes: 0 18 19 21 22
+ auto position_or = itr->ResetToStart();
+ EXPECT_THAT(position_or, IsOk());
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
TEST_P(ReverseJniLanguageSegmenterTest, NewIteratorResetToStart) {
ICING_ASSERT_OK_AND_ASSIGN(
auto segmenter, language_segmenter_factory::Create(
@@ -511,6 +557,28 @@
EXPECT_THAT(itr->GetTerm(), Eq("How"));
}
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto segmenter, language_segmenter_factory::Create(
+ GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+ constexpr std::string_view kText = "package com:google:android name";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "package com:google:android name"
+ // ^ ^^ ^^
+ // Bytes: 0 7 8 26 27
+ auto position_or = itr->ResetToTermStartingAfter(8);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(26));
+ ASSERT_THAT(itr->GetTerm(), Eq(" "));
+
+ position_or = itr->ResetToTermStartingAfter(7);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(8));
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+}
+
TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermAfterOutOfBounds) {
ICING_ASSERT_OK_AND_ASSIGN(
auto segmenter, language_segmenter_factory::Create(
@@ -846,6 +914,28 @@
EXPECT_THAT(itr->GetTerm(), Eq("ทุก"));
}
+TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeWordConnector) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ auto segmenter, language_segmenter_factory::Create(
+ GetSegmenterOptions(GetLocale(), jni_cache_.get())));
+ constexpr std::string_view kText = "package name com:google:android!";
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<LanguageSegmenter::Iterator> itr,
+ segmenter->Segment(kText));
+
+ // String: "package name com:google:android!"
+ // ^ ^^ ^^ ^
+ // Bytes: 0 7 8 12 13 31
+ auto position_or = itr->ResetToTermEndingBefore(31);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(13));
+ ASSERT_THAT(itr->GetTerm(), Eq("com:google:android"));
+
+ position_or = itr->ResetToTermEndingBefore(21);
+ EXPECT_THAT(position_or, IsOk());
+ EXPECT_THAT(position_or.ValueOrDie(), Eq(12));
+ ASSERT_THAT(itr->GetTerm(), Eq(" "));
+}
+
TEST_P(ReverseJniLanguageSegmenterTest, ResetToTermBeforeOutOfBounds) {
ICING_ASSERT_OK_AND_ASSIGN(
auto segmenter, language_segmenter_factory::Create(
diff --git a/icing/util/clock.cc b/icing/util/clock.cc
index 3593f13..7843bc4 100644
--- a/icing/util/clock.cc
+++ b/icing/util/clock.cc
@@ -26,11 +26,17 @@
.count();
}
-uint64_t GetSteadyTimeNanoseconds() {
+int64_t GetSteadyTimeNanoseconds() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
.count();
}
+int64_t GetSteadyTimeMilliseconds() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now().time_since_epoch())
+ .count();
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/util/clock.h b/icing/util/clock.h
index 58628f3..a37fe58 100644
--- a/icing/util/clock.h
+++ b/icing/util/clock.h
@@ -34,7 +34,13 @@
// Returns the current steady time in nanoseconds. The steady clock is different
// from the system clock. It's monotonic and never returns a lower value than a
// previous call, while a system clock can be occasionally adjusted.
-uint64_t GetSteadyTimeNanoseconds();
+int64_t GetSteadyTimeNanoseconds();
+
+// Returns the current steady time in Milliseconds. The steady clock is
+// different from the system clock. It's monotonic and never returns a lower
+// value than a previous call, while a system clock can be occasionally
+// adjusted.
+int64_t GetSteadyTimeMilliseconds();
} // namespace lib
} // namespace icing
diff --git a/icing/util/timer.h b/icing/util/timer.h
new file mode 100644
index 0000000..da872fe
--- /dev/null
+++ b/icing/util/timer.h
@@ -0,0 +1,43 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_TIMER_H_
+#define ICING_UTIL_TIMER_H_
+
+#include <cstdint>
+
+#include "icing/util/clock.h"
+
+namespace icing {
+namespace lib {
+
+// A util class to calculate the elapsed time.
+class Timer {
+ public:
+ // Timer starts.
+ Timer() : start_timestamp_milliseconds_(GetSteadyTimeMilliseconds()) {}
+
+ // Returns the elapsed time from when timer started.
+ int64_t GetElapsedMilliseconds() {
+ return GetSteadyTimeMilliseconds() - start_timestamp_milliseconds_;
+ }
+
+ private:
+ int64_t start_timestamp_milliseconds_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_UTIL_TIMER_H_
diff --git a/proto/icing/proto/schema.proto b/proto/icing/proto/schema.proto
index 3a7ee5d..ce8e007 100644
--- a/proto/icing/proto/schema.proto
+++ b/proto/icing/proto/schema.proto
@@ -61,9 +61,7 @@
//
// TermMatchType.Code=UNKNOWN
// Content in this property will not be tokenized or indexed. Useful if the
- // data type is not made up of terms (e.g. DOCUMENT or BYTES type). All the
- // properties inside the nested property won't be indexed regardless of the
- // value of the term_match_type field for the nested properties.
+ // data type is not indexable. See schema-util for details.
//
// TermMatchType.Code=EXACT_ONLY
// Content in this property should only be returned for queries matching the
@@ -79,7 +77,7 @@
message TokenizerType {
enum Code {
// It is only valid for tokenizer_type to be 'NONE' if the data type is
- // DOCUMENT.
+ // not indexed.
NONE = 0;
// Tokenization for plain text.
@@ -106,9 +104,10 @@
// REQUIRED: Physical data-types of the contents of the property.
message DataType {
enum Code {
- // This should never purposely be set. This is used for backwards
+ // This value should never purposely be used. This is used for backwards
// compatibility reasons.
UNKNOWN = 0;
+
STRING = 1;
INT64 = 2;
DOUBLE = 3;
@@ -163,7 +162,9 @@
optional Cardinality.Code cardinality = 4;
// OPTIONAL: Properties that do not set the indexing config will not be
- // indexed.
+ // indexed. DOCUMENT data types shouldn't set an indexing config; Icing lib
+ // will recurse and check the nested schema type's properties' indexing
+ // configs themselves.
optional IndexingConfig indexing_config = 5;
}
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index 667ff4f..3a99b09 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -23,6 +23,7 @@
// Encapsulates the configurations on how Icing should score and rank the search
// results.
+// TODO(b/170347684): Change all timestamps to seconds.
// Next tag: 3
message ScoringSpecProto {
// OPTIONAL: Indicates how the search results will be ranked.
@@ -37,6 +38,32 @@
// Ranked by document creation timestamps.
CREATION_TIMESTAMP = 2;
+
+ // The following ranking strategies are based on usage reporting. Please
+ // see usage.proto for more information. If one of the usage ranking
+ // strategy is used but none of result documents have reported usage, the
+ // documents will be returned in the default reverse insertion order.
+
+ // Ranked by count of reports with usage type 1.
+ USAGE_TYPE1_COUNT = 3;
+
+ // Ranked by count of reports with usage type 2.
+ USAGE_TYPE2_COUNT = 4;
+
+ // Ranked by count of reports with usage type 3.
+ USAGE_TYPE3_COUNT = 5;
+
+ // Ranked by last used timestamp with usage type 1. The timestamps are
+ // compared in seconds.
+ USAGE_TYPE1_LAST_USED_TIMESTAMP = 6;
+
+ // Ranked by last used timestamp with usage type 2. The timestamps are
+ // compared in seconds.
+ USAGE_TYPE2_LAST_USED_TIMESTAMP = 7;
+
+ // Ranked by last used timestamp with usage type 3. The timestamps are
+ // compared in seconds.
+ USAGE_TYPE3_LAST_USED_TIMESTAMP = 8;
}
}
optional RankingStrategy.Code rank_by = 1;
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 8ea5036..abbfc32 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -191,8 +191,8 @@
// An opaque token used internally to keep track of information needed for
// pagination. A valid pagination token is required to fetch other pages of
- // results. The default value 0 means that there're no more pages.
+ // results. A value 0 means that there're no more pages.
// LINT.IfChange(next_page_token)
- optional uint64 next_page_token = 4 [default = 0];
+ optional uint64 next_page_token = 4;
// LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
}
diff --git a/proto/icing/proto/usage.proto b/proto/icing/proto/usage.proto
index 81243f0..7f31a2b 100644
--- a/proto/icing/proto/usage.proto
+++ b/proto/icing/proto/usage.proto
@@ -16,6 +16,8 @@
package icing.lib;
+import "icing/proto/status.proto";
+
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
@@ -51,3 +53,15 @@
optional UsageType usage_type = 4;
}
// LINT.ThenChange(//depot/google3/icing/store/usage-store.h:UsageScores)
+
+// Result of a call to IcingSearchEngine.ReportUsage
+// Next tag: 2
+message ReportUsageResultProto {
+ // Status code can be one of:
+ // OK
+ // NOT_FOUND
+ // INTERNAL
+ //
+ // See status.proto for more details.
+ optional StatusProto status = 1;
+}
\ No newline at end of file