Merge remote-tracking branch 'androidx-platform-dev' into master am: deed1c7dfc
Original change: https://googleplex-android-review.googlesource.com/c/platform/external/icing/+/13779665
MUST ONLY BE SUBMITTED BY AUTOMERGER
Change-Id: I0030dee1f549a4404e395ec1a17282810a6fe63a
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 763c93b..1d5b689 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -959,7 +959,8 @@
header_->header_checksum = header_->CalculateHeaderChecksum();
if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
- sizeof(Header))) {
+ sizeof(Header)) ||
+ !filesystem_->DataSync(fd_.get())) {
return absl_ports::InternalError(
absl_ports::StrCat("Failed to update header to: ", file_path_));
}
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 5a9327e..24523c9 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -36,6 +37,14 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::Eq;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
std::string GetTestBaseDir() {
return GetTestTempDir() + "/icing_with_icu_files";
}
@@ -55,23 +64,6 @@
.Build();
}
-SchemaProto CreateMessageSchema() {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
-}
-
ScoringSpecProto GetDefaultScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -81,15 +73,31 @@
TEST(IcingSearchEngineWithIcuFileTest, ShouldInitialize) {
IcingSearchEngine icing(GetDefaultIcingOptions());
EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
}
TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
IcingSearchEngine icing(GetDefaultIcingOptions());
ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 791368a..b50e412 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -27,6 +27,7 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index-processor.h"
@@ -35,6 +36,7 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/internal/optimize.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/optimize.pb.h"
#include "icing/proto/persist.pb.h"
@@ -75,6 +77,7 @@
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
constexpr std::string_view kIcingSearchEngineHeaderFilename =
"icing_search_engine_header";
+constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
libtextclassifier3::Status ValidateOptions(
const IcingSearchEngineOptions& options) {
@@ -238,8 +241,7 @@
filesystem_(std::move(filesystem)),
icing_filesystem_(std::move(icing_filesystem)),
clock_(std::move(clock)),
- result_state_manager_(performance_configuration_.max_num_hits_per_query,
- performance_configuration_.max_num_cache_results),
+ result_state_manager_(performance_configuration_.max_num_total_hits),
jni_cache_(std::move(jni_cache)) {
ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
}
@@ -270,8 +272,8 @@
InitializeResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
- NativeInitializeStats* initialize_stats =
- result_proto.mutable_native_initialize_stats();
+ InitializeStatsProto* initialize_stats =
+ result_proto.mutable_initialize_stats();
if (initialized_) {
// Already initialized.
result_status->set_code(StatusProto::OK);
@@ -307,7 +309,7 @@
// sub-components may not be able to tell if the storage is being
// initialized the first time or has lost some files. Sub-components may
// already have set these fields in earlier steps.
- *initialize_stats = NativeInitializeStats();
+ *initialize_stats = InitializeStatsProto();
status = RegenerateDerivedFiles();
} else {
ICING_VLOG(1)
@@ -317,13 +319,13 @@
// recovery. Preserve the root cause that was set by the document store.
bool should_log_document_store_recovery_cause =
initialize_stats->document_store_recovery_cause() ==
- NativeInitializeStats::NONE;
+ InitializeStatsProto::NONE;
if (should_log_document_store_recovery_cause) {
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
+ InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH);
}
initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
+ InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH);
status = RegenerateDerivedFiles(initialize_stats,
should_log_document_store_recovery_cause);
}
@@ -339,7 +341,7 @@
// Index is inconsistent with the document store, we need to restore the
// index.
initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH);
+ InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH);
std::unique_ptr<Timer> index_restore_timer = clock_->GetNewTimer();
status = RestoreIndexIfNeeded();
initialize_stats->set_index_restoration_latency_ms(
@@ -357,7 +359,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
ICING_RETURN_IF_ERROR(InitializeOptions());
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
@@ -390,7 +392,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string schema_store_dir =
@@ -408,7 +410,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string document_dir =
@@ -428,7 +430,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
@@ -449,7 +451,7 @@
}
initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::IO_ERROR);
+ InitializeStatsProto::IO_ERROR);
// Try recreating it from scratch and re-indexing everything.
ICING_ASSIGN_OR_RETURN(index_,
@@ -499,7 +501,7 @@
}
libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles(
- NativeInitializeStats* initialize_stats, bool log_document_store_stats) {
+ InitializeStatsProto* initialize_stats, bool log_document_store_stats) {
// Measure the latency of the data recovery. The cause of the recovery should
// be logged by the caller.
std::unique_ptr<Timer> timer = clock_->GetNewTimer();
@@ -682,8 +684,8 @@
PutResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
- NativePutDocumentStats* put_document_stats =
- result_proto.mutable_native_put_document_stats();
+ PutDocumentStatsProto* put_document_stats =
+ result_proto.mutable_put_document_stats();
// Lock must be acquired before validation because the DocumentStore uses
// the schema file to validate, and the schema could be changed in
@@ -833,8 +835,8 @@
return result_proto;
}
- NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE);
+ DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -867,8 +869,8 @@
return delete_result;
}
- NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -901,8 +903,8 @@
return delete_result;
}
- NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -937,8 +939,8 @@
return result_proto;
}
- NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
libtextclassifier3::Status status =
@@ -1029,6 +1031,16 @@
return result_proto;
}
+ std::unique_ptr<Timer> optimize_timer = clock_->GetNewTimer();
+ OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
+ int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (before_size != Filesystem::kBadFileSize) {
+ optimize_stats->set_storage_size_before(before_size);
+ } else {
+ // Set -1 as a sentinel value when failures occur.
+ optimize_stats->set_storage_size_before(-1);
+ }
+
// Releases result / query cache if any
result_state_manager_.InvalidateAllResultStates();
@@ -1041,7 +1053,11 @@
// TODO(b/143646633): figure out if we need to optimize index and doc store
// at the same time.
- libtextclassifier3::Status optimization_status = OptimizeDocumentStore();
+ std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
+ libtextclassifier3::Status optimization_status =
+ OptimizeDocumentStore(optimize_stats);
+ optimize_stats->set_document_store_optimize_latency_ms(
+ optimize_doc_store_timer->GetElapsedMilliseconds());
if (!optimization_status.ok() &&
!absl_ports::IsDataLoss(optimization_status)) {
@@ -1055,6 +1071,7 @@
// The status is either OK or DATA_LOSS. The optimized document store is
// guaranteed to work, so we update index according to the new document store.
+ std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
libtextclassifier3::Status index_reset_status = index_->Reset();
if (!index_reset_status.ok()) {
status = absl_ports::Annotate(
@@ -1065,6 +1082,8 @@
}
libtextclassifier3::Status index_restoration_status = RestoreIndexIfNeeded();
+ optimize_stats->set_index_restoration_latency_ms(
+ optimize_index_timer->GetElapsedMilliseconds());
if (!index_restoration_status.ok()) {
status = absl_ports::Annotate(
absl_ports::InternalError(
@@ -1075,6 +1094,35 @@
return result_proto;
}
+ // Read the optimize status to get the time that we last ran.
+ std::string optimize_status_filename =
+ absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+ FileBackedProto<OptimizeStatusProto> optimize_status_file(
+ *filesystem_, optimize_status_filename);
+ auto optimize_status_or = optimize_status_file.Read();
+ int64_t current_time = clock_->GetSystemTimeMilliseconds();
+ if (optimize_status_or.ok()) {
+ // If we have trouble reading the status or this is the first time that
+ // we've ever run, don't set this field.
+ optimize_stats->set_time_since_last_optimize_ms(
+ current_time - optimize_status_or.ValueOrDie()
+ ->last_successful_optimize_run_time_ms());
+ }
+
+ // Update the status for this run and write it.
+ auto optimize_status = std::make_unique<OptimizeStatusProto>();
+ optimize_status->set_last_successful_optimize_run_time_ms(current_time);
+ optimize_status_file.Write(std::move(optimize_status));
+
+ int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (after_size != Filesystem::kBadFileSize) {
+ optimize_stats->set_storage_size_after(after_size);
+ } else {
+ // Set -1 as a sentinel value when failures occur.
+ optimize_stats->set_storage_size_after(-1);
+ }
+ optimize_stats->set_latency_ms(optimize_timer->GetElapsedMilliseconds());
+
TransformStatus(optimization_status, result_status);
return result_proto;
}
@@ -1092,6 +1140,22 @@
return result_proto;
}
+ // Read the optimize status to get the time that we last ran.
+ std::string optimize_status_filename =
+ absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+ FileBackedProto<OptimizeStatusProto> optimize_status_file(
+ *filesystem_, optimize_status_filename);
+ auto optimize_status_or = optimize_status_file.Read();
+ int64_t current_time = clock_->GetSystemTimeMilliseconds();
+
+ if (optimize_status_or.ok()) {
+ // If we have trouble reading the status or this is the first time that
+ // we've ever run, don't set this field.
+ result_proto.set_time_since_last_optimize_ms(
+ current_time - optimize_status_or.ValueOrDie()
+ ->last_successful_optimize_run_time_ms());
+ }
+
// Get stats from DocumentStore
auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
if (!doc_store_optimize_info_or.ok()) {
@@ -1127,6 +1191,32 @@
return result_proto;
}
+StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
+ StorageInfoResultProto result;
+ absl_ports::shared_lock l(&mutex_);
+ if (!initialized_) {
+ result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+ result.mutable_status()->set_message(
+ "IcingSearchEngine has not been initialized!");
+ return result;
+ }
+
+ int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (index_size != Filesystem::kBadFileSize) {
+ result.mutable_storage_info()->set_total_storage_size(index_size);
+ } else {
+ result.mutable_storage_info()->set_total_storage_size(-1);
+ }
+ *result.mutable_storage_info()->mutable_document_storage_info() =
+ document_store_->GetStorageInfo();
+ *result.mutable_storage_info()->mutable_schema_store_storage_info() =
+ schema_store_->GetStorageInfo();
+ *result.mutable_storage_info()->mutable_index_storage_info() =
+ index_->GetStorageInfo();
+ result.mutable_status()->set_code(StatusProto::OK);
+ return result;
+}
+
libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk() {
ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
ICING_RETURN_IF_ERROR(document_store_->PersistToDisk());
@@ -1189,8 +1279,11 @@
header.checksum = checksum.Get();
// This should overwrite the header.
- if (!filesystem_->Write(MakeHeaderFilename(options_.base_dir()).c_str(),
- &header, sizeof(header))) {
+ ScopedFd sfd(filesystem_->OpenForWrite(
+ MakeHeaderFilename(options_.base_dir()).c_str()));
+ if (!sfd.is_valid() ||
+ !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+ !filesystem_->DataSync(sfd.get())) {
return absl_ports::InternalError(
absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
MakeHeaderFilename(options_.base_dir())));
@@ -1211,7 +1304,7 @@
return result_proto;
}
- NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ QueryStatsProto* query_stats = result_proto.mutable_query_stats();
std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::Status status = ValidateResultSpec(result_spec);
@@ -1359,7 +1452,7 @@
return result_proto;
}
- NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ QueryStatsProto* query_stats = result_proto.mutable_query_stats();
query_stats->set_is_first_page(false);
std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
@@ -1438,7 +1531,8 @@
result_state_manager_.InvalidateResultState(next_page_token);
}
-libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
+libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore(
+ OptimizeStatsProto* optimize_stats) {
// Gets the current directory path and an empty tmp directory path for
// document store optimization.
const std::string current_document_dir =
@@ -1455,7 +1549,7 @@
// Copies valid document data to tmp directory
auto optimize_status = document_store_->OptimizeInto(
- temporary_document_dir, language_segmenter_.get());
+ temporary_document_dir, language_segmenter_.get(), optimize_stats);
// Handles error if any
if (!optimize_status.ok()) {
@@ -1529,7 +1623,6 @@
ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
"delete temporary file directory";
}
-
return libtextclassifier3::Status::OK;
}
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index a899131..9adef7f 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -37,6 +37,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
@@ -328,7 +329,8 @@
// Invalidates the next-page token so that no more results of the related
// query can be returned.
- void InvalidateNextPageToken(uint64_t next_page_token);
+ void InvalidateNextPageToken(uint64_t next_page_token)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Makes sure that every update/delete received till this point is flushed
// to disk. If the app crashes after a call to PersistToDisk(), Icing
@@ -378,6 +380,12 @@
// INTERNAL_ERROR on IO error
GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_);
+ // Calculates the StorageInfo for Icing.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
+
// Clears all data from Icing and re-initializes. Clients DO NOT need to call
// Initialize again.
//
@@ -460,7 +468,7 @@
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL on any I/O errors
libtextclassifier3::Status InitializeMembers(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any validation/setup required for the given IcingSearchEngineOptions
@@ -479,7 +487,7 @@
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
libtextclassifier3::Status InitializeSchemaStore(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any initialization/recovery necessary to create a DocumentStore
@@ -490,7 +498,7 @@
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
libtextclassifier3::Status InitializeDocumentStore(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any initialization/recovery necessary to create a DocumentStore
@@ -503,7 +511,7 @@
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL on I/O error
libtextclassifier3::Status InitializeIndex(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Many of the internal components rely on other components' derived data.
@@ -527,7 +535,7 @@
// OK on success
// INTERNAL_ERROR on any IO errors
libtextclassifier3::Status RegenerateDerivedFiles(
- NativeInitializeStats* initialize_stats = nullptr,
+ InitializeStatsProto* initialize_stats = nullptr,
bool log_document_store_stats = false)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -545,7 +553,8 @@
// document store is still available
// INTERNAL_ERROR on any IO errors or other errors that we can't recover
// from
- libtextclassifier3::Status OptimizeDocumentStore()
+ libtextclassifier3::Status OptimizeDocumentStore(
+ OptimizeStatsProto* optimize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to restore missing document data in index_. All documents
@@ -573,8 +582,8 @@
// if it doesn't exist.
bool HeaderExists() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
- // Update and replace the header file. Creates the header file if it doesn't
- // exist.
+ // Update, replace and persist the header file. Creates the header file if it
+ // doesn't exist.
libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index 1f59c6e..2d07e37 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -23,6 +23,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
#include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -30,27 +31,20 @@
namespace lib {
namespace {
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
IcingSearchEngineOptions Setup() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestTempDir() + "/icing");
return icing_options;
}
-SchemaProto SetTypes() {
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- return schema;
-}
-
DocumentProto MakeDocument(const uint8_t* data, size_t size) {
// TODO (sidchhabra): Added more optimized fuzzing techniques.
DocumentProto document;
@@ -83,7 +77,15 @@
// TODO (b/145758378): Deleting directory should not be required.
filesystem_.DeleteDirectoryRecursively(icing_options.base_dir().c_str());
icing.Initialize();
- SchemaProto schema_proto = SetTypes();
+
+ SchemaProto schema_proto =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
icing.SetSchema(schema_proto);
// Index
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 8c64614..64c62d0 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -32,10 +32,12 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/optimize.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
@@ -85,6 +87,16 @@
"vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
"placerat semper.";
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
@@ -172,95 +184,61 @@
}
SchemaProto CreateMessageSchema() {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
}
SchemaProto CreateEmailSchema() {
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* subj = type->add_properties();
- subj->set_property_name("subject");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- return schema;
+ return SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
}
SchemaProto CreatePersonAndEmailSchema() {
- SchemaProto schema;
-
- auto* person_type = schema.add_types();
- person_type->set_schema_type("Person");
- auto* name = person_type->add_properties();
- name->set_property_name("name");
- name->set_data_type(PropertyConfigProto::DataType::STRING);
- name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- name->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* address = person_type->add_properties();
- address->set_property_name("emailAddress");
- address->set_data_type(PropertyConfigProto::DataType::STRING);
- address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- address->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- address->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* subj = type->add_properties();
- subj->set_property_name("subject");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* sender = type->add_properties();
- sender->set_property_name("sender");
- sender->set_schema_type("Person");
- sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- sender->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- return schema;
+ return SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
}
ScoringSpecProto GetDefaultScoringSpec() {
@@ -1500,24 +1478,21 @@
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(results.status(), ProtoIsOk());
EXPECT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(GetMatch(results.results(0).document(),
- results.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(
- GetWindow(results.results(0).document(), results.results(0).snippet(),
- "body", /*snippet_index=*/0),
- Eq("message body"));
+
+ const DocumentProto& document = results.results(0).document();
+ EXPECT_THAT(document, EqualsProto(document_two));
+
+ const SnippetProto& snippet = results.results(0).snippet();
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(
- GetMatch(results.results(1).document(), results.results(1).snippet(),
- "body", /*snippet_index=*/0),
- IsEmpty());
- EXPECT_THAT(
- GetWindow(results.results(1).document(), results.results(1).snippet(),
- "body", /*snippet_index=*/0),
- IsEmpty());
+ EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
search_spec.set_query("foo");
@@ -1852,24 +1827,28 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document5));
- EXPECT_THAT(GetMatch(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
- EXPECT_THAT(search_result.results(1).document(), EqualsProto(document4));
- EXPECT_THAT(GetMatch(search_result.results(1).document(),
- search_result.results(1).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(1).document(),
- search_result.results(1).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
+ const DocumentProto& document_result_1 = search_result.results(0).document();
+ EXPECT_THAT(document_result_1, EqualsProto(document5));
+ const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+ EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &document_result_1, snippet_result_1.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+ ElementsAre("message"));
+
+ const DocumentProto& document_result_2 = search_result.results(1).document();
+ EXPECT_THAT(document_result_2, EqualsProto(document4));
+ const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+ EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_2,
+ snippet_result_2.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+ ElementsAre("message"));
// Second page, 2 result with 1 snippet
search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1877,17 +1856,19 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document3));
- EXPECT_THAT(GetMatch(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
+ const DocumentProto& document_result_3 = search_result.results(0).document();
+ EXPECT_THAT(document_result_3, EqualsProto(document3));
+ const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_3,
+ snippet_result_3.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+ ElementsAre("message"));
+
EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
- EXPECT_THAT(search_result.results(1).snippet().entries_size(), Eq(0));
+ EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
// Third page, 1 result with 0 snippets
search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1896,7 +1877,7 @@
ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
- EXPECT_THAT(search_result.results(0).snippet().entries_size(), Eq(0));
+ EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
}
TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
@@ -1978,7 +1959,9 @@
OptimizeResultProto optimize_result_proto;
optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
optimize_result_proto.mutable_status()->set_message("");
- ASSERT_THAT(icing.Optimize(), EqualsProto(optimize_result_proto));
+ OptimizeResultProto actual_result = icing.Optimize();
+ actual_result.clear_optimize_stats();
+ ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
// Tries to fetch the second page, no results since all tokens have been
// invalidated during Optimize()
@@ -2063,59 +2046,78 @@
.SetTtlMs(500)
.Build();
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(1000);
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // Just initialized, nothing is optimizable yet.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ // Just initialized, nothing is optimizable yet.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- // Only have active documents, nothing is optimizable yet.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ // Only have active documents, nothing is optimizable yet.
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
- int64_t first_estimated_optimizable_bytes =
- optimize_info.estimated_optimizable_bytes();
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+ int64_t first_estimated_optimizable_bytes =
+ optimize_info.estimated_optimizable_bytes();
- // Add a second document, but it'll be expired since the time (1000) is
- // greater than the document's creation timestamp (100) + the document's ttl
- // (500)
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ // Add a second document, but it'll be expired since the time (1000) is
+ // greater than the document's creation timestamp (100) + the document's ttl
+ // (500)
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
- Gt(first_estimated_optimizable_bytes));
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+ Gt(first_estimated_optimizable_bytes));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- // Optimize
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ // Optimize
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ }
- // Nothing is optimizable now that everything has been optimized away.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ {
+ // Recreate with new time
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(5000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Nothing is optimizable now that everything has been optimized away.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+ }
}
TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
@@ -2351,8 +2353,8 @@
DeleteBySchemaTypeResultProto result_proto =
icing.DeleteBySchemaType("message");
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(1);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2519,8 +2521,8 @@
DeleteByNamespaceResultProto result_proto =
icing.DeleteByNamespace("namespace1");
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(2);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2679,8 +2681,8 @@
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::QUERY);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(1);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -4303,24 +4305,21 @@
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("body:coffee OR body:food");
+ search_spec.set_query("subject:coffee OR body:food");
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- // Result should be in descending score order, section restrict doesn't impact
- // the BM25F score.
+ // Result should be in descending score order
EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches.
+ // The term frequencies of "coffee" and "food" are calculated respectively
+ // from the subject section and the body section.
// Documents with "food" are ranked lower as the term "food" is commonly
// present in this corpus, and thus, has a lower IDF.
EXPECT_THAT(
GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject,
- // 1 time in section body
- "namespace1/uri7", // 'coffee' 2 times in section body
+ ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
"namespace1/uri1", // 'food' 2 times in section body
"namespace1/uri4", // 'food' 2 times in section body
"namespace1/uri2", // 'food' 1 time in section body
@@ -4985,34 +4984,28 @@
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("Zürich"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("mdi Zürich Team Meeting"));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
const DocumentProto& result_document_2 = results.results(1).document();
const SnippetProto& result_snippet_2 = results.results(1).snippet();
EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("zurich"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("MDI zurich Team Meeting"));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
}
TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
@@ -5054,34 +5047,28 @@
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("Zürich"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("mdi Zürich Team Meeting"));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
const DocumentProto& result_document_2 = results.results(1).document();
const SnippetProto& result_snippet_2 = results.results(1).snippet();
EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("zurich"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("MDI zurich Team Meeting"));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
}
TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
@@ -5112,21 +5099,18 @@
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(results.status(), ProtoIsOk());
ASSERT_THAT(results.results(), SizeIs(1));
+
const DocumentProto& result_document = results.results(0).document();
const SnippetProto& result_snippet = results.results(0).snippet();
EXPECT_THAT(result_document, EqualsProto(document_one));
- EXPECT_THAT(
- GetMatch(result_document, result_snippet, "body", /*snippet_index=*/0),
- Eq("zurich"));
- EXPECT_THAT(
- GetWindow(result_document, result_snippet, "body", /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(
- GetMatch(result_document, result_snippet, "subject", /*snippet_index=*/0),
- IsEmpty());
- EXPECT_THAT(GetWindow(result_document, result_snippet, "subject",
- /*snippet_index=*/0),
- IsEmpty());
+ EXPECT_THAT(result_snippet.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&result_document, result_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+ ElementsAre("MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+ ElementsAre("zurich"));
}
TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
@@ -5596,8 +5580,7 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats().latency_ms(),
- Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
@@ -5617,9 +5600,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(0));
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -5629,9 +5611,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(1));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(1));
// Put another document.
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -5641,9 +5622,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(2));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(2));
}
}
@@ -5659,25 +5639,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5721,25 +5701,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::PARTIAL_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::PARTIAL_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5790,27 +5770,27 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::COMPLETE_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::COMPLETE_LOSS));
// The complete rewind of ground truth causes the mismatch of total
// checksum, so index should be restored.
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5848,25 +5828,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5905,25 +5885,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::TOTAL_CHECKSUM_MISMATCH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5970,25 +5950,25 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -6036,25 +6016,25 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -6083,25 +6063,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
}
@@ -6114,9 +6094,8 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
// There should be 0 schema types.
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(0));
// Set a schema with one type config.
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -6127,9 +6106,8 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
// There should be 1 schema type.
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(1));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(1));
// Create and set a schema with two type configs: Email and Message.
SchemaProto schema = CreateEmailSchema();
@@ -6152,9 +6130,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(2));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(2));
}
}
@@ -6176,8 +6153,7 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
@@ -6200,11 +6176,9 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().document_store_latency_ms(),
- Eq(10));
- size_t document_size =
- put_result_proto.native_put_document_stats().document_size();
+ EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+ Eq(10));
+ size_t document_size = put_result_proto.put_document_stats().document_size();
EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
sizeof(DocumentProto::InternalFields)));
@@ -6228,18 +6202,16 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.native_put_document_stats().index_latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
// No merge should happen.
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().index_merge_latency_ms(),
- Eq(0));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(0));
// Number of tokens should not exceed.
- EXPECT_FALSE(put_result_proto.native_put_document_stats()
+ EXPECT_FALSE(put_result_proto.put_document_stats()
.tokenization_stats()
.exceeded_max_token_num());
// The input document has 2 tokens.
- EXPECT_THAT(put_result_proto.native_put_document_stats()
+ EXPECT_THAT(put_result_proto.put_document_stats()
.tokenization_stats()
.num_tokens_indexed(),
Eq(2));
@@ -6263,10 +6235,10 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
// Number of tokens(2) exceeds the max allowed value(1).
- EXPECT_TRUE(put_result_proto.native_put_document_stats()
+ EXPECT_TRUE(put_result_proto.put_document_stats()
.tokenization_stats()
.exceeded_max_token_num());
- EXPECT_THAT(put_result_proto.native_put_document_stats()
+ EXPECT_THAT(put_result_proto.put_document_stats()
.tokenization_stats()
.num_tokens_indexed(),
Eq(1));
@@ -6300,9 +6272,8 @@
// Putting document2 should trigger an index merge.
PutResultProto put_result_proto = icing.Put(document2);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().index_merge_latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(10));
}
TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
@@ -6491,7 +6462,7 @@
EqualsProto(projected_document_one));
}
-TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
+TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
@@ -6537,7 +6508,7 @@
ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
// Check the stats
- NativeQueryStats exp_stats;
+ QueryStatsProto exp_stats;
exp_stats.set_num_terms(1);
exp_stats.set_num_namespaces_filtered(1);
exp_stats.set_num_schema_types_filtered(1);
@@ -6561,7 +6532,7 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- exp_stats = NativeQueryStats();
+ exp_stats = QueryStatsProto();
exp_stats.set_is_first_page(false);
exp_stats.set_requested_page_size(2);
exp_stats.set_num_results_returned_current_page(2);
@@ -6576,7 +6547,7 @@
ASSERT_THAT(search_result.results(), SizeIs(1));
ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
- exp_stats = NativeQueryStats();
+ exp_stats = QueryStatsProto();
exp_stats.set_is_first_page(false);
exp_stats.set_requested_page_size(2);
exp_stats.set_num_results_returned_current_page(1);
@@ -6586,6 +6557,101 @@
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
}
+TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create three documents.
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ document2.set_creation_timestamp_ms(9000);
+ document2.set_ttl_ms(500);
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk().status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineTest, StorageInfoTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create three documents.
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Ensure that total_storage_size is set. All the other stats are covered by
+ // the classes that generate them.
+ StorageInfoResultProto result = icing.GetStorageInfo();
+ EXPECT_THAT(result.status(), ProtoIsOk());
+ EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index d2f9d41..09dda41 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -55,7 +55,7 @@
libtextclassifier3::Status IndexProcessor::IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_->last_added_document_id() != kInvalidDocumentId &&
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 9fc7c46..6b07c98 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -81,7 +81,7 @@
// INTERNAL_ERROR if any other errors occur
libtextclassifier3::Status IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
private:
IndexProcessor(const Normalizer* normalizer, Index* index,
diff --git a/icing/index/index.cc b/icing/index/index.cc
index bd41b51..db59ad2 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -164,7 +164,7 @@
icing_filesystem));
return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
std::move(lite_index),
- std::move(main_index)));
+ std::move(main_index), filesystem));
}
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
@@ -277,6 +277,18 @@
std::move(main_term_metadata_list), num_to_return);
}
+IndexStorageInfoProto Index::GetStorageInfo() const {
+ IndexStorageInfoProto storage_info;
+ int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_index_size(directory_size);
+ } else {
+ storage_info.set_index_size(-1);
+ }
+ storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
+ return main_index_->GetStorageInfo(std::move(storage_info));
+}
+
libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
// Step 1: See if this term is already in the lexicon
uint32_t tvi;
diff --git a/icing/index/index.h b/icing/index/index.h
index a4ea719..b7021ca 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,6 +32,7 @@
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -151,6 +152,12 @@
return lite_index_size + main_index_size;
}
+ // Calculates the StorageInfo for the Index.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo() const;
+
// Create an iterator to iterate through all doc hit infos in the index that
// match the term. section_id_mask can be set to ignore hits from sections not
// listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
@@ -242,11 +249,12 @@
private:
Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
std::unique_ptr<LiteIndex> lite_index,
- std::unique_ptr<MainIndex> main_index)
+ std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
: lite_index_(std::move(lite_index)),
main_index_(std::move(main_index)),
options_(options),
- term_id_codec_(std::move(term_id_codec)) {}
+ term_id_codec_(std::move(term_id_codec)),
+ filesystem_(filesystem) {}
libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
@@ -256,6 +264,7 @@
std::unique_ptr<MainIndex> main_index_;
const Options options_;
std::unique_ptr<TermIdCodec> term_id_codec_;
+ const Filesystem* filesystem_;
};
} // namespace lib
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 3479ab1..de4edf8 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,6 +31,7 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -46,6 +47,7 @@
using ::testing::ElementsAre;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::IsEmpty;
using ::testing::IsTrue;
@@ -1636,6 +1638,33 @@
EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
+TEST_F(IndexTest, IndexStorageInfoProto) {
+ // Add two documents to the lite index and merge them into main.
+ {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+ }
+
+ IndexStorageInfoProto storage_info = index_->GetStorageInfo();
+ EXPECT_THAT(storage_info.index_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
+ // There should be 1 block for the header and 1 block for two posting lists.
+ EXPECT_THAT(storage_info.num_blocks(), Eq(2));
+ EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index faca785..8ceff44 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -47,13 +47,16 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- short_->PopulateMatchedTermsStats(matched_terms_stats);
- long_->PopulateMatchedTermsStats(matched_terms_stats);
+ short_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ long_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
@@ -78,13 +81,15 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < iterators_.size(); ++i) {
- iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index fb60e38..9cee74c 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -68,8 +68,10 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2f49430..2dae68d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -43,15 +43,18 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- current_->PopulateMatchedTermsStats(matched_terms_stats);
+ current_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
// If equal, then current_ == left_. Combine with results from right_.
if (left_document_id_ == right_document_id_) {
- right_->PopulateMatchedTermsStats(matched_terms_stats);
+ right_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
@@ -83,13 +86,15 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < current_iterators_.size(); i++) {
- current_iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ current_iterators_.at(i)->PopulateMatchedTermsStats(
+ matched_terms_stats, filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 8acb91a..e6ee8e3 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -84,8 +84,7 @@
if (section_metadata->path == target_section_) {
// The hit was in the target section name, return OK/found
doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- delegate_->hit_intersect_section_ids_mask();
+ hit_intersect_section_ids_mask_ = 1u << section_id;
return libtextclassifier3::Status::OK;
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ba74384..52b243a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -52,13 +52,21 @@
std::string ToString() const override;
- // NOTE: currently, section restricts does decide which documents to
- // return, but doesn't impact the relevance score of a document.
- // TODO(b/173156803): decide whether we want to filter the matched_terms_stats
- // for the restricted sections.
+ // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that
+ // should set filtering_section_mask, hence the received
+ // filtering_section_mask is ignored and the filtering_section_mask passed to
+ // the delegate will be set to hit_intersect_section_ids_mask_. This will
+ // allow to filter the matching sections in the delegate.
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(
+ matched_terms_stats,
+ /*filtering_section_mask=*/hit_intersect_section_ids_mask_);
}
private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 91e0cbe..21b3f8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -43,6 +43,7 @@
namespace {
using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsEmpty;
@@ -101,6 +102,57 @@
FakeClock fake_clock_;
};
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfo doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ // Filtering for the indexed section name (which has a section id of 0) should
+ // get a result.
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_section=*/indexed_property_);
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(section_restrict_iterator.Advance());
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+ SectionIdMask expected_section_id_mask = 0b00000001; // hits in sections 0
+ EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+ std::array<Hit::TermFrequency, kMaxSectionId> expected_term_frequencies{
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+ ElementsAreArray(expected_term_frequencies));
+ EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask,
+ expected_section_id_mask);
+
+ EXPECT_FALSE(section_restrict_iterator.Advance().ok());
+}
+
TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
@@ -110,6 +162,9 @@
schema_store_.get(), /*target_section=*/"");
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
@@ -148,6 +203,9 @@
/*target_section=*/"");
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -171,6 +229,9 @@
"some_section_name");
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -192,6 +253,9 @@
indexed_property_);
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -216,6 +280,9 @@
indexed_property_);
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index 913696a..45acc8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -56,23 +56,25 @@
// Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
-
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
- section_term_frequencies);
+ TermMatchInfo term_stats(term_, section_mask,
+ std::move(section_term_frequencies));
for (auto& cur_term_stats : *matched_terms_stats) {
if (cur_term_stats.term == term_stats.term) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index 67bd74f..afb298b 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -94,11 +94,14 @@
// For the last hit docid, retrieves all the matched query terms and other
// stats, see TermMatchInfo.
+ // filtering_section_mask filters the matching sections and should be set only
+ // by DocHitInfoIteratorSectionRestrict.
// If Advance() wasn't called after construction, Advance() returned false or
// the concrete HitIterator didn't override this method, the vectors aren't
// populated.
virtual void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const {}
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const {}
protected:
DocHitInfo doc_hit_info_;
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index ac5e97f..8dbe043 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -50,21 +50,24 @@
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index e0379b8..69138e1 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -394,26 +394,36 @@
}
libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
- int64_t header_and_hit_buffer_file_size =
- filesystem_->GetFileSize(hit_buffer_fd_.get());
-
- if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of the LiteIndex's header and hit buffer");
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.lite_index_hit_buffer_size() == -1 ||
+ storage_info.lite_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of LiteIndex's members.");
}
-
- int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
- if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
- }
-
// On initialization, we grow the file to a padded size first. So this size
// won't count towards the size taken up by elements
size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+ return storage_info.lite_index_hit_buffer_size() - header_padded_size +
+ storage_info.lite_index_lexicon_size();
+}
- return header_and_hit_buffer_file_size - header_padded_size +
- lexicon_disk_usage;
+IndexStorageInfoProto LiteIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
+ int64_t header_and_hit_buffer_file_size =
+ filesystem_->GetFileSize(hit_buffer_fd_.get());
+ if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_hit_buffer_size(
+ header_and_hit_buffer_file_size);
+ } else {
+ storage_info.set_lite_index_hit_buffer_size(-1);
+ }
+ int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+ if (lexicon_disk_usage != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
+ } else {
+ storage_info.set_lite_index_lexicon_size(-1);
+ }
+ return storage_info;
}
uint32_t LiteIndex::Seek(uint32_t term_id) {
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 7b51aa4..90c6fbc 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,6 +37,7 @@
#include "icing/legacy/index/icing-lite-index-header.h"
#include "icing/legacy/index/icing-lite-index-options.h"
#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -240,6 +241,14 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the lite
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
private:
static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index d626d7a..f3cf701 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -50,21 +50,24 @@
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 636f631..8ae6b27 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -121,14 +121,34 @@
}
libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
- int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
- int64_t index_elt_size = flash_index_storage_->GetElementsSize();
- if (lexicon_elt_size == IcingFilesystem::kBadFileSize ||
- index_elt_size == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.main_index_storage_size() == -1 ||
+ storage_info.main_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of MainIndex's members.");
}
- return lexicon_elt_size + index_elt_size;
+ return storage_info.main_index_storage_size() +
+ storage_info.main_index_lexicon_size();
+}
+
+IndexStorageInfoProto MainIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
+ int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_lexicon_size(lexicon_elt_size);
+ } else {
+ storage_info.set_main_index_lexicon_size(-1);
+ }
+ int64_t index_elt_size = flash_index_storage_->GetElementsSize();
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_storage_size(index_elt_size);
+ } else {
+ storage_info.set_main_index_storage_size(-1);
+ }
+ storage_info.set_main_index_block_size(flash_index_storage_->block_size());
+ storage_info.set_num_blocks(flash_index_storage_->num_blocks());
+ storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
+ return storage_info;
}
libtextclassifier3::StatusOr<std::unique_ptr<PostingListAccessor>>
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 7403b8c..43635ca 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,6 +27,7 @@
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/store/namespace-id.h"
#include "icing/util/status-macros.h"
@@ -172,6 +173,14 @@
// - INTERNAL on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the main
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
// Returns debug information for the main index in out.
// verbosity <= 0, simplest debug information - just the lexicon
// verbosity > 0, more detailed debug information including raw postings
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index bf709cd..a8fb0e2 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -27,6 +27,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/util/status-macros.h"
@@ -390,6 +391,18 @@
}
JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetStorageInfo(
+ JNIEnv* env, jclass clazz, jobject object) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
+
+ icing::lib::StorageInfoResultProto storage_info_result_proto =
+ icing->GetStorageInfo();
+
+ return SerializeProtoToJniByteArray(env, storage_info_result_proto);
+}
+
+JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeReset(
JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index aeaa449..45b03d3 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -15,6 +15,7 @@
#include "icing/performance-configuration.h"
#include "icing/result/result-state.h"
+#include "icing/scoring/scored-document-hit.h"
namespace icing {
namespace lib {
@@ -60,32 +61,14 @@
// value.
constexpr int kSafeMemoryUsage = 16 * 1024 * 1024; // 16MB
-// This number is not determined by benchmarks. We just assume that returning
-// the best 1000 scored document hits of a query is enough. To find the best
-// 1000 scored document hits from a heap, we need roughly 0.7 ms on a Pixel 3 XL
-// according to //icing/scoring:ranker_benchmark.
-constexpr int kMaxNumHitsPerQuery = 1000;
+// The maximum number of hits that can fit below the kSafeMemoryUsage threshold.
+constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
-// A rough estimation of the size of ResultState if it stores the maximum number
-// of scored document hits.
-constexpr int kMaxMemoryPerResult =
- sizeof(ResultState) + kMaxNumHitsPerQuery * sizeof(ScoredDocumentHit);
-
-// To be safer, we assume that all the Results contain the maximum number of
-// hits and only use half of the memory allowed.
-constexpr int kDefaultNumResultsToCache =
- kSafeMemoryUsage / 2 / kMaxMemoryPerResult;
-
-static_assert(
- kDefaultNumResultsToCache > 500,
- "Default number of results to cache has changed, please update and make "
- "sure it still meets our requirements.");
} // namespace
PerformanceConfiguration::PerformanceConfiguration()
: PerformanceConfiguration(kMaxQueryLength, kDefaultNumToScore,
- kMaxNumHitsPerQuery, kDefaultNumResultsToCache) {
-}
+ kMaxNumTotalHits) {}
} // namespace lib
} // namespace icing
diff --git a/icing/performance-configuration.h b/icing/performance-configuration.h
index fa4050b..b9282ca 100644
--- a/icing/performance-configuration.h
+++ b/icing/performance-configuration.h
@@ -24,12 +24,10 @@
PerformanceConfiguration();
PerformanceConfiguration(int max_query_length_in, int num_to_score_in,
- int max_num_hits_per_query_in,
- int max_num_cache_results_in)
+ int max_num_total_hits)
: max_query_length(max_query_length_in),
num_to_score(num_to_score_in),
- max_num_hits_per_query(max_num_hits_per_query_in),
- max_num_cache_results(max_num_cache_results_in) {}
+ max_num_total_hits(max_num_total_hits) {}
// Search performance
@@ -41,11 +39,9 @@
// Memory
- // Maximum number of ScoredDocumentHits to return per query.
- int max_num_hits_per_query;
-
- // Maximum number of ResultStates to store in ResultStateManager.
- int max_num_cache_results;
+ // Maximum number of ScoredDocumentHits to cache in the ResultStateManager at
+ // one time.
+ int max_num_total_hits;
};
// TODO(b/149040810): Consider creating a class to manage performance
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 7cb2d62..46830ef 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -29,6 +29,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/projection-tree.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -54,6 +55,15 @@
using ::testing::Return;
using ::testing::SizeIs;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
class ResultRetrieverTest : public testing::Test {
protected:
ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
@@ -78,65 +88,47 @@
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
- ASSERT_THAT(schema_store_->SetSchema(CreatePersonAndEmailSchema()), IsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
}
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
- SchemaProto CreatePersonAndEmailSchema() {
- SchemaProto schema;
-
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* subj = type->add_properties();
- subj->set_property_name("name");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* sender = type->add_properties();
- sender->set_property_name("sender");
- sender->set_schema_type("Person");
- sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- sender->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- auto* person_type = schema.add_types();
- person_type->set_schema_type("Person");
- auto* name = person_type->add_properties();
- name->set_property_name("name");
- name->set_data_type(PropertyConfigProto::DataType::STRING);
- name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- name->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* address = person_type->add_properties();
- address->set_property_name("emailAddress");
- address->set_data_type(PropertyConfigProto::DataType::STRING);
- address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- address->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- address->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
- }
-
SectionId GetSectionId(const std::string& type, const std::string& property) {
auto type_id_or = schema_store_->GetSchemaTypeId(type);
if (!type_id_or.ok()) {
@@ -495,35 +487,63 @@
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
- Eq("body bar 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
- Eq("bar"));
- EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0),
- Eq("subject foo 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
- Eq("body bar 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "body", 0),
- Eq("bar"));
+ const DocumentProto& result_document_one = result.at(0).document();
+ const SnippetProto& result_snippet_one = result.at(0).snippet();
+ EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_one, result_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_one,
+ result_snippet_one.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
+ ElementsAre("foo"));
- EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0),
- Eq("subject foo 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
- Eq("body bar 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "body", 0),
- Eq("bar"));
+ const DocumentProto& result_document_two = result.at(1).document();
+ const SnippetProto& result_snippet_two = result.at(1).snippet();
+ EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
+ EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+ ElementsAre("body bar 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
+ ElementsAre("subject foo 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
+ ElementsAre("foo"));
+
+ const DocumentProto& result_document_three = result.at(2).document();
+ const SnippetProto& result_snippet_three = result.at(2).snippet();
+ EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
+ EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
+ ElementsAre("body bar 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
+ ElementsAre("subject foo 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
+ ElementsAre("foo"));
}
TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
@@ -568,15 +588,25 @@
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
- Eq("body bar 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
- Eq("bar"));
+
+ const DocumentProto& result_document = result.at(0).document();
+ const SnippetProto& result_snippet = result.at(0).snippet();
+ EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&result_document, result_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
+ content =
+ GetString(&result_document, result_snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
+ ElementsAre("foo"));
EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
EXPECT_THAT(result[1].snippet(),
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index 0f27d9e..19dabb8 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -16,15 +16,15 @@
#include "icing/proto/search.pb.h"
#include "icing/util/clock.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
-ResultStateManager::ResultStateManager(int max_hits_per_query,
- int max_result_states)
- : max_hits_per_query_(max_hits_per_query),
- max_result_states_(max_result_states),
+ResultStateManager::ResultStateManager(int max_total_hits)
+ : max_total_hits_(max_total_hits),
+ num_total_hits_(0),
random_generator_(GetSteadyTimeNanoseconds()) {}
libtextclassifier3::StatusOr<PageResultState>
@@ -33,9 +33,6 @@
return absl_ports::InvalidArgumentError("ResultState has no results");
}
- // Truncates scored document hits so that they don't take up too much space.
- result_state.TruncateHitsTo(max_hits_per_query_);
-
// Gets the number before calling GetNextPage() because num_returned() may
// change after returning more results.
int num_previously_returned = result_state.num_returned();
@@ -68,10 +65,12 @@
}
uint64_t ResultStateManager::Add(ResultState result_state) {
- RemoveStatesIfNeeded();
+ RemoveStatesIfNeeded(result_state);
+ result_state.TruncateHitsTo(max_total_hits_);
uint64_t new_token = GetUniqueToken();
+ num_total_hits_ += result_state.num_remaining();
result_state_map_.emplace(new_token, std::move(result_state));
// Tracks the insertion order
token_queue_.push(new_token);
@@ -112,6 +111,7 @@
next_page_token = kInvalidNextPageToken;
}
+ num_total_hits_ -= result_of_page.size();
return PageResultState(
result_of_page, next_page_token, std::move(snippet_context_copy),
std::move(projection_tree_map_copy), num_returned, num_per_page);
@@ -129,10 +129,14 @@
void ResultStateManager::InvalidateAllResultStates() {
absl_ports::unique_lock l(&mutex_);
+ InternalInvalidateAllResultStates();
+}
+void ResultStateManager::InternalInvalidateAllResultStates() {
result_state_map_.clear();
invalidated_token_set_.clear();
- token_queue_ = {};
+ token_queue_ = std::queue<uint64_t>();
+ num_total_hits_ = 0;
}
uint64_t ResultStateManager::GetUniqueToken() {
@@ -148,12 +152,21 @@
return new_token;
}
-void ResultStateManager::RemoveStatesIfNeeded() {
+void ResultStateManager::RemoveStatesIfNeeded(const ResultState& result_state) {
if (result_state_map_.empty() || token_queue_.empty()) {
return;
}
- // Removes any tokens that were previously invalidated.
+ // 1. Check if this new result_state would take up the entire result state
+ // manager budget.
+ if (result_state.num_remaining() > max_total_hits_) {
+ // This single result state will exceed our budget. Drop everything else to
+ // accomodate it.
+ InternalInvalidateAllResultStates();
+ return;
+ }
+
+ // 2. Remove any tokens that were previously invalidated.
while (!token_queue_.empty() &&
invalidated_token_set_.find(token_queue_.front()) !=
invalidated_token_set_.end()) {
@@ -161,11 +174,13 @@
token_queue_.pop();
}
- // Removes the oldest state
- if (result_state_map_.size() >= max_result_states_ && !token_queue_.empty()) {
- result_state_map_.erase(token_queue_.front());
+ // 3. If we're over budget, remove states from oldest to newest until we fit
+ // into our budget.
+ while (result_state.num_remaining() + num_total_hits_ > max_total_hits_) {
+ InternalInvalidateResultState(token_queue_.front());
token_queue_.pop();
}
+ invalidated_token_set_.clear();
}
void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
@@ -173,7 +188,10 @@
// invalidated_token_set_. The entry in token_queue_ can't be easily removed
// right now (may need O(n) time), so we leave it there and later completely
// remove the token in RemoveStatesIfNeeded().
- if (result_state_map_.erase(token) > 0) {
+ auto itr = result_state_map_.find(token);
+ if (itr != result_state_map_.end()) {
+ num_total_hits_ -= itr->second.num_remaining();
+ result_state_map_.erase(itr);
invalidated_token_set_.insert(token);
}
}
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index eaf9eb5..cf5d8c2 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -37,7 +37,7 @@
// Used to store and manage ResultState.
class ResultStateManager {
public:
- explicit ResultStateManager(int max_hits_per_query, int max_result_states);
+ explicit ResultStateManager(int max_total_hits);
ResultStateManager(const ResultStateManager&) = delete;
ResultStateManager& operator=(const ResultStateManager&) = delete;
@@ -77,13 +77,15 @@
private:
absl_ports::shared_mutex mutex_;
- // The maximum number of scored document hits to return for a query. When we
- // have more than the maximum number, extra hits will be truncated.
- const int max_hits_per_query_;
+ // The maximum number of scored document hits that all result states may
+ // have. When a new result state is added such that num_total_hits_ would
+ // exceed max_total_hits_, the oldest result states are evicted until
+ // num_total_hits_ is below max_total_hits.
+ const int max_total_hits_;
- // The maximum number of result states. When we have more than the maximum
- // number, the oldest / firstly added result state will be removed.
- const int max_result_states_;
+ // The number of scored document hits that all result states currently held by
+ // the result state manager have.
+ int num_total_hits_;
// A hash map of (next-page token -> result state)
std::unordered_map<uint64_t, ResultState> result_state_map_
@@ -112,13 +114,21 @@
uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to remove old states to make room for incoming states.
- void RemoveStatesIfNeeded() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ void RemoveStatesIfNeeded(const ResultState& result_state)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to remove a result state from result_state_map_, the token
// will then be temporarily kept in invalidated_token_set_ until it's finally
// removed from token_queue_.
void InternalInvalidateResultState(uint64_t token)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Internal method to invalidates all result states / tokens currently in
+ // ResultStateManager. We need this separate method so that other public
+ // methods don't need to call InvalidateAllResultStates(). Public methods
+ // calling each other may cause deadlock issues.
+ void InternalInvalidateAllResultStates()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
} // namespace lib
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 6defa6f..afddeb5 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -59,8 +59,7 @@
/*num_per_page=*/10);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -86,8 +85,7 @@
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -126,8 +124,7 @@
ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
EXPECT_THAT(
result_state_manager.RankAndPaginate(std::move(empty_result_state)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
@@ -146,8 +143,7 @@
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -185,8 +181,7 @@
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -221,9 +216,7 @@
CreateScoredDocumentHit(/*document_id=*/6)},
/*num_per_page=*/1);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/2);
+ ResultStateManager result_state_manager(/*max_total_hits=*/2);
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -255,7 +248,7 @@
}
TEST(ResultStateManagerTest,
- PreviouslyInvalidatedResultStateShouldNotBeCounted) {
+ InvalidatedResultStateShouldDecreaseCurrentHitsCount) {
ResultState result_state1 =
CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
CreateScoredDocumentHit(/*document_id=*/2)},
@@ -268,14 +261,12 @@
CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
CreateScoredDocumentHit(/*document_id=*/6)},
/*num_per_page=*/1);
- ResultState result_state4 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
- CreateScoredDocumentHit(/*document_id=*/8)},
- /*num_per_page=*/1);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/3);
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -286,11 +277,18 @@
PageResultState page_result_state3,
result_state_manager.RankAndPaginate(std::move(result_state3)));
- // Invalidates state 2, so that the number of valid tokens becomes 2.
+ // Invalidates state 2, so that the number of hits current cached should be
+ // decremented to 2.
result_state_manager.InvalidateResultState(
page_result_state2.next_page_token);
- // Adding state 4 shouldn't affect rest of the states
+ // If invalidating state 2 correctly decremented the current hit count to 2,
+ // then adding state 4 should still be within our budget and no other result
+ // states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8)},
+ /*num_per_page=*/1);
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state4,
result_state_manager.RankAndPaginate(std::move(result_state4)));
@@ -321,6 +319,473 @@
/*document_id=*/7))));
}
+TEST(ResultStateManagerTest,
+ InvalidatedAllResultStatesShouldResetCurrentHitCount) {
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
+ CreateScoredDocumentHit(/*document_id=*/4)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
+ CreateScoredDocumentHit(/*document_id=*/6)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // Invalidates all states so that the current hit count will be 0.
+ result_state_manager.InvalidateAllResultStates();
+
+ // If invalidating all states correctly reset the current hit count to 0,
+ // then the entirety of state 4 should still be within our budget and no other
+ // result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8)},
+ /*num_per_page=*/1);
+ ResultState result_state5 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/9),
+ CreateScoredDocumentHit(/*document_id=*/10)},
+ /*num_per_page=*/1);
+ ResultState result_state6 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/11),
+ CreateScoredDocumentHit(/*document_id=*/12)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state6,
+ result_state_manager.RankAndPaginate(std::move(result_state6)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state3.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/7))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/9))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state6,
+ result_state_manager.GetNextPage(page_result_state6.next_page_token));
+ EXPECT_THAT(page_result_state6.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/11))));
+}
+
+TEST(ResultStateManagerTest,
+ InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) {
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
+ CreateScoredDocumentHit(/*document_id=*/4)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
+ CreateScoredDocumentHit(/*document_id=*/6)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // Invalidates state 2, so that the number of hits current cached should be
+ // decremented to 2.
+ result_state_manager.InvalidateResultState(
+ page_result_state2.next_page_token);
+
+ // If invalidating state 2 correctly decremented the current hit count to 2,
+ // then adding state 4 should still be within our budget and no other result
+ // states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ // If invalidating result state 2 correctly decremented the current hit count
+ // to 2 and adding state 4 correctly incremented it to 3, then adding this
+ // result state should trigger the eviction of state 1.
+ ResultState result_state5 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/9),
+ CreateScoredDocumentHit(/*document_id=*/10)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/5))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/7))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/9))));
+}
+
+TEST(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
+ CreateScoredDocumentHit(/*document_id=*/4)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
+ CreateScoredDocumentHit(/*document_id=*/6)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 should return its result and decrement the
+ // number of cached hits to 2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state1,
+ result_state_manager.GetNextPage(page_result_state1.next_page_token));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/1))));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2, then adding state 4 should still be within our
+ // budget and no other result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state2,
+ result_state_manager.GetNextPage(page_result_state2.next_page_token));
+ EXPECT_THAT(page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/3))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/5))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/7))));
+}
+
+TEST(ResultStateManagerTest,
+ GetNextPageShouldDecreaseCurrentHitsCountByExactlyOnePage) {
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
+ CreateScoredDocumentHit(/*document_id=*/4)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
+ CreateScoredDocumentHit(/*document_id=*/6)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 should return its result and decrement the
+ // number of cached hits to 2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state1,
+ result_state_manager.GetNextPage(page_result_state1.next_page_token));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/1))));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2, then adding state 4 should still be within our
+ // budget and no other result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2 and adding state 4 correctly incremented it to 3,
+ // then adding this result state should trigger the eviction of state 2.
+ ResultState result_state5 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/9),
+ CreateScoredDocumentHit(/*document_id=*/10)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/5))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/7))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/9))));
+}
+
+TEST(ResultStateManagerTest, AddingOverBudgetResultStateShouldEvictAllStates) {
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2),
+ CreateScoredDocumentHit(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
+ CreateScoredDocumentHit(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ // Add the first two states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1. So 3 hits will remain cached.
+ ResultStateManager result_state_manager(/*max_total_hits=*/4);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+ // Add a result state that is larger than the entire budget. This should
+ // result in all previous result states being evicted, the first hit from
+ // result state 3 being returned and the next four hits being cached (the last
+ // hit should be dropped because it exceeds the max).
+ ResultState result_state3 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/6),
+ CreateScoredDocumentHit(/*document_id=*/7),
+ CreateScoredDocumentHit(/*document_id=*/8),
+ CreateScoredDocumentHit(/*document_id=*/9),
+ CreateScoredDocumentHit(/*document_id=*/10),
+ CreateScoredDocumentHit(/*document_id=*/11)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 and 2 should return NOT_FOUND.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Only the next four results in state 3 should be retrievable.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/10))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/9))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/8))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/7))));
+
+ // The final result should have been dropped because it exceeded the budget.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state3.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST(ResultStateManagerTest,
+ AddingResultStateShouldEvictOverBudgetResultState) {
+ ResultStateManager result_state_manager(/*max_total_hits=*/4);
+ // Add a result state that is larger than the entire budget. The entire result
+ // state will still be cached
+ ResultState result_state1 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/0),
+ CreateScoredDocumentHit(/*document_id=*/1),
+ CreateScoredDocumentHit(/*document_id=*/2),
+ CreateScoredDocumentHit(/*document_id=*/3),
+ CreateScoredDocumentHit(/*document_id=*/4),
+ CreateScoredDocumentHit(/*document_id=*/5)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+
+ // Add a result state. Because state2 + state1 is larger than the budget,
+ // state1 should be evicted.
+ ResultState result_state2 =
+ CreateResultState({CreateScoredDocumentHit(/*document_id=*/6),
+ CreateScoredDocumentHit(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+ // state1 should have been evicted and state2 should still be retrievable.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state2,
+ result_state_manager.GetNextPage(page_result_state2.next_page_token));
+ EXPECT_THAT(page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
+ /*document_id=*/6))));
+}
+
TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
@@ -339,8 +804,7 @@
query_terms_map, search_spec, CreateScoringSpec(), result_spec);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -374,8 +838,7 @@
query_terms_map, search_spec, CreateScoringSpec(), result_spec);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -400,8 +863,7 @@
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -435,41 +897,47 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST(ResultStateManagerTest, ShouldStoreMaxNumberOfScoredDocumentHits) {
- ResultState original_result_state =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5)},
- /*num_per_page=*/2);
+TEST(ResultStateManagerTest, ShouldStoreAllHits) {
+ ScoredDocumentHit scored_hit_1 = CreateScoredDocumentHit(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_2 = CreateScoredDocumentHit(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_3 = CreateScoredDocumentHit(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_4 = CreateScoredDocumentHit(/*document_id=*/4);
+ ScoredDocumentHit scored_hit_5 = CreateScoredDocumentHit(/*document_id=*/5);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/3,
- /*max_result_states=*/std::numeric_limits<int>::max());
+ ResultState original_result_state = CreateResultState(
+ {scored_hit_1, scored_hit_2, scored_hit_3, scored_hit_4, scored_hit_5},
+ /*num_per_page=*/2);
- // The 5 input scored document hits will be truncated to 3.
+ ResultStateManager result_state_manager(/*max_total_hits=*/4);
+
+ // The 5 input scored document hits will not be truncated. The first page of
+ // two hits will be returned immediately and the other three hits will fit
+ // within our caching budget.
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
- EXPECT_THAT(
- page_result_state1.scored_document_hits,
- ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_5),
+ EqualsScoredDocumentHit(scored_hit_4)));
uint64_t next_page_token = page_result_state1.next_page_token;
- // Second page, 1 results.
+ // Second page, 2 results.
ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
result_state_manager.GetNextPage(next_page_token));
EXPECT_THAT(page_result_state2.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(
- CreateScoredDocumentHit(/*document_id=*/3))));
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2)));
- // No third page.
+ // Third page, 1 result.
+ ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
+ result_state_manager.GetNextPage(next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_1)));
+
+ // Fourth page, 0 results.
EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index be92b85..de810fb 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -67,6 +67,10 @@
// increased when GetNextPage() is called.
int num_returned() const { return num_returned_; }
+ // The number of results yet to be returned. This number is decreased when
+ // GetNextPage is called.
+ int num_remaining() const { return scored_document_hits_.size(); }
+
private:
// The scored document hits. It represents a heap data structure when ranking
// is required so that we can get top K hits in O(KlgN) time. If no ranking is
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index d4a5f79..0510d55 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -15,6 +15,7 @@
#include "icing/result/snippet-retriever.h"
#include <algorithm>
+#include <iterator>
#include <memory>
#include <string>
#include <string_view>
@@ -25,9 +26,12 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/tokenization/language-segmenter.h"
@@ -43,6 +47,33 @@
namespace {
+const PropertyProto* GetProperty(const DocumentProto& document,
+ std::string_view property_name) {
+ for (const PropertyProto& property : document.properties()) {
+ if (property.name() == property_name) {
+ return &property;
+ }
+ }
+ return nullptr;
+}
+
+inline std::string AddPropertyToPath(const std::string& current_path,
+ std::string_view property) {
+ if (current_path.empty()) {
+ return std::string(property);
+ }
+ return absl_ports::StrCat(current_path, kPropertySeparator, property);
+}
+
+inline std::string AddIndexToPath(int values_size, int index,
+ const std::string& property_path) {
+ if (values_size == 1) {
+ return property_path;
+ }
+ return absl_ports::StrCat(property_path, kLBracket, std::to_string(index),
+ kRBracket);
+}
+
class TokenMatcher {
public:
virtual ~TokenMatcher() = default;
@@ -189,20 +220,12 @@
struct SectionData {
std::string_view section_name;
std::string_view section_subcontent;
- // Identifies which subsection of the section content, section_subcontent has
- // come from.
- // Ex. "recipient.address" :
- // ["foo@google.com", "bar@google.com", "baz@google.com"]
- // The subcontent_index of "bar@google.com" is 1.
- int subcontent_index;
};
libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
const SectionData& value, Tokenizer::Iterator* iterator) {
SnippetMatchProto snippet_match;
- snippet_match.set_values_index(value.subcontent_index);
-
Token match = iterator->GetToken();
int match_pos = match.text.data() - value.section_subcontent.data();
int match_mid = match_pos + match.text.length() / 2;
@@ -243,33 +266,109 @@
int max_matches_remaining;
};
-libtextclassifier3::StatusOr<SnippetProto::EntryProto> RetrieveMatches(
- const TokenMatcher* matcher, const MatchOptions& match_options,
- const SectionData& value, const Tokenizer* tokenizer) {
- SnippetProto::EntryProto snippet_entry;
- snippet_entry.set_property_name(std::string(value.section_name));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
- tokenizer->Tokenize(value.section_subcontent));
- while (iterator->Advance()) {
- if (snippet_entry.snippet_matches_size() >=
- match_options.max_matches_remaining) {
- break;
+// Retrieves snippets in the string values of current_property.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// current_property is the property with the string values to snippet.
+// property_path is the path in the document to current_property.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to GetEntriesFromProperty will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void GetEntriesFromProperty(const PropertyProto* current_property,
+ const std::string& property_path,
+ const TokenMatcher* matcher,
+ const Tokenizer* tokenizer,
+ MatchOptions* match_options,
+ SnippetProto* snippet_proto) {
+ // We're at the end. Let's check our values.
+ for (int i = 0; i < current_property->string_values_size(); ++i) {
+ SnippetProto::EntryProto snippet_entry;
+ snippet_entry.set_property_name(AddIndexToPath(
+ current_property->string_values_size(), /*index=*/i, property_path));
+ std::string_view value = current_property->string_values(i);
+ std::unique_ptr<Tokenizer::Iterator> iterator =
+ tokenizer->Tokenize(value).ValueOrDie();
+ while (iterator->Advance()) {
+ Token token = iterator->GetToken();
+ if (matcher->Matches(token)) {
+ // If there was an error while retrieving the match, the tokenizer
+ // iterator is probably in an invalid state. There's nothing we can do
+ // here, so just return.
+ SectionData data = {property_path, value};
+ SnippetMatchProto match =
+ RetrieveMatch(match_options->snippet_spec, data, iterator.get())
+ .ValueOrDie();
+ snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+ if (--match_options->max_matches_remaining <= 0) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
+ return;
+ }
+ }
}
- Token token = iterator->GetToken();
- if (matcher->Matches(token)) {
- // If there was an error while retrieving the match, the tokenizer
- // iterator is probably in an invalid state. There's nothing we can do
- // here, so just return.
- ICING_ASSIGN_OR_RETURN(
- SnippetMatchProto match,
- RetrieveMatch(match_options.snippet_spec, value, iterator.get()));
- snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+ if (!snippet_entry.snippet_matches().empty()) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
}
}
- if (snippet_entry.snippet_matches().empty()) {
- return absl_ports::NotFoundError("No matches found in value!");
+}
+
+// Retrieves snippets in document from content at section_path.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// section_path_index refers to the current property that is held by document.
+// current_path is equivalent to the first section_path_index values in
+// section_path, but with value indices present.
+//
+// For example, suppose that a hit appeared somewhere in the "bcc.emailAddress".
+// The arguments for RetrieveSnippetForSection might be
+// {section_path=["bcc", "emailAddress"], section_path_index=0, current_path=""}
+// on the first call and
+// {section_path=["bcc", "emailAddress"], section_path_index=1,
+// current_path="bcc[1]"} on the second recursive call.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to RetrieveSnippetForSection will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void RetrieveSnippetForSection(
+ const DocumentProto& document, const TokenMatcher* matcher,
+ const Tokenizer* tokenizer,
+ const std::vector<std::string_view>& section_path, int section_path_index,
+ const std::string& current_path, MatchOptions* match_options,
+ SnippetProto* snippet_proto) {
+ std::string_view next_property_name = section_path.at(section_path_index);
+ const PropertyProto* current_property =
+ GetProperty(document, next_property_name);
+ if (current_property == nullptr) {
+ ICING_VLOG(1) << "No property " << next_property_name << " found at path "
+ << current_path;
+ return;
}
- return snippet_entry;
+ std::string property_path =
+ AddPropertyToPath(current_path, next_property_name);
+ if (section_path_index == section_path.size() - 1) {
+ // We're at the end. Let's check our values.
+ GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
+ match_options, snippet_proto);
+ } else {
+ // Still got more to go. Let's look through our subdocuments.
+ std::vector<SnippetProto::EntryProto> entries;
+ for (int i = 0; i < current_property->document_values_size(); ++i) {
+ std::string new_path = AddIndexToPath(
+ current_property->document_values_size(), /*index=*/i, property_path);
+ RetrieveSnippetForSection(current_property->document_values(i), matcher,
+ tokenizer, section_path, section_path_index + 1,
+ new_path, match_options, snippet_proto);
+ if (match_options->max_matches_remaining <= 0) {
+ break;
+ }
+ }
+ }
}
} // namespace
@@ -304,6 +403,11 @@
// Remove this section from the mask.
section_id_mask &= ~(1u << section_id);
+ MatchOptions match_options = {snippet_spec};
+ match_options.max_matches_remaining =
+ std::min(snippet_spec.num_to_snippet() - snippet_proto.entries_size(),
+ snippet_spec.num_matches_per_property());
+
// Determine the section name and match type.
auto section_metadata_or =
schema_store_.GetSectionMetadata(type_id, section_id);
@@ -311,7 +415,9 @@
continue;
}
const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
- MatchOptions match_options = {snippet_spec};
+ std::vector<std::string_view> section_path =
+ absl_ports::StrSplit(metadata->path, kPropertySeparator);
+
// Match type must be as restrictive as possible. Prefix matches for a
// snippet should only be included if both the query is Prefix and the
// section has prefixes enabled.
@@ -330,38 +436,18 @@
if (!matcher_or.ok()) {
continue;
}
- match_options.max_matches_remaining =
- snippet_spec.num_matches_per_property();
+ std::unique_ptr<TokenMatcher> matcher = std::move(matcher_or).ValueOrDie();
- // Retrieve values and snippet them.
- auto values_or =
- schema_store_.GetStringSectionContent(document, metadata->path);
- if (!values_or.ok()) {
- continue;
- }
auto tokenizer_or = tokenizer_factory::CreateIndexingTokenizer(
metadata->tokenizer, &language_segmenter_);
if (!tokenizer_or.ok()) {
// If we couldn't create the tokenizer properly, just skip this section.
continue;
}
- std::vector<std::string_view> values = values_or.ValueOrDie();
- for (int value_index = 0; value_index < values.size(); ++value_index) {
- if (match_options.max_matches_remaining <= 0) {
- break;
- }
- SectionData value = {metadata->path, values.at(value_index), value_index};
- auto entry_or =
- RetrieveMatches(matcher_or.ValueOrDie().get(), match_options, value,
- tokenizer_or.ValueOrDie().get());
-
- // Drop any entries that encountered errors or didn't find any matches.
- if (entry_or.ok()) {
- match_options.max_matches_remaining -=
- entry_or.ValueOrDie().snippet_matches_size();
- snippet_proto.mutable_entries()->Add(std::move(entry_or).ValueOrDie());
- }
- }
+ std::unique_ptr<Tokenizer> tokenizer = std::move(tokenizer_or).ValueOrDie();
+ RetrieveSnippetForSection(
+ document, matcher.get(), tokenizer.get(), section_path,
+ /*section_path_index=*/0, "", &match_options, &snippet_proto);
}
return snippet_proto;
}
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index ecda400..1cf4e5a 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -27,7 +27,9 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section-manager.h"
#include "icing/store/document-id.h"
@@ -49,10 +51,30 @@
namespace {
+using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::SizeIs;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
+std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
+ std::vector<std::string_view> paths;
+ for (const SnippetProto::EntryProto& entry : snippet.entries()) {
+ paths.push_back(entry.property_name());
+ }
+ return paths;
+}
+
class SnippetRetrieverTest : public testing::Test {
protected:
void SetUp() override {
@@ -75,25 +97,22 @@
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- SchemaTypeConfigProto* type_config = schema.add_types();
- type_config->set_schema_type("email");
- PropertyConfigProto* prop_config = type_config->add_properties();
- prop_config->set_property_name("subject");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- prop_config = type_config->add_properties();
- prop_config->set_property_name("body");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
@@ -156,11 +175,13 @@
// "three". len=4, orig_window= "thre"
snippet_spec_.set_max_window_bytes(4);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq(""));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
@@ -180,11 +201,14 @@
// len=14, orig_window=" two three fou"
snippet_spec_.set_max_window_bytes(14);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("two three"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("two three"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
@@ -203,11 +227,14 @@
// len=16, orig_window="e two three four"
snippet_spec_.set_max_window_bytes(16);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("two three four"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("two three four"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
@@ -226,11 +253,14 @@
// len=20, orig_window="one two three four.."
snippet_spec_.set_max_window_bytes(20);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.."));
}
TEST_F(SnippetRetrieverTest,
@@ -251,11 +281,14 @@
// len=26, orig_window="pside down in Australia\xC2"
snippet_spec_.set_max_window_bytes(24);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("down in Australia"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("down in Australia"));
}
TEST_F(SnippetRetrieverTest,
@@ -276,11 +309,14 @@
// len=26, orig_window="upside down in Australia\xC2\xBF"
snippet_spec_.set_max_window_bytes(26);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("upside down in Australia¿"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("upside down in Australia¿"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
@@ -299,11 +335,14 @@
// len=22, orig_window="one two three four..."
snippet_spec_.set_max_window_bytes(22);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four..."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four..."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
@@ -322,11 +361,14 @@
// len=26, orig_window="one two three four.... "
snippet_spec_.set_max_window_bytes(26);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four...."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four...."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
@@ -345,11 +387,14 @@
// len=32, orig_window="one two three four.... fiv"
snippet_spec_.set_max_window_bytes(32);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four...."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four...."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
@@ -368,11 +413,14 @@
// len=34, orig_window="one two three four.... five"
snippet_spec_.set_max_window_bytes(34);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.... five"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.... five"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
@@ -391,11 +439,14 @@
// len=36, orig_window="one two three four.... five"
snippet_spec_.set_max_window_bytes(36);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.... five"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.... five"));
}
TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
@@ -409,14 +460,17 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets. 'f' should match prefix-enabled property 'subject', but
// not exact-only property 'body'
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, ExactSnippeting) {
@@ -431,8 +485,7 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), IsEmpty());
@@ -452,13 +505,15 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
@@ -474,20 +529,25 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1),
- Eq("our options regarding body bar."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre("Concerning the subject of foo, we need to begin considering",
+ "our options regarding body bar."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("foo", "bar"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
@@ -505,18 +565,19 @@
SectionIdMask section_mask = 0b00000001;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1),
- Eq("our options regarding body bar."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre("Concerning the subject of foo, we need to begin considering",
+ "our options regarding body bar."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("foo", "bar"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
@@ -536,25 +597,26 @@
SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
{"body", {"foo"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- // 'subject' section should only have the one match for "subject".
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("subject"));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 1), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "subject", 1), IsEmpty());
-
- // 'body' section should have matches for "subject" and "foo".
- EXPECT_THAT(GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("subject"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 1),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("foo"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre(
+ "Concerning the subject of foo, we need to begin",
+ "Concerning the subject of foo, we need to begin considering"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("subject", "foo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
@@ -573,19 +635,24 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), IsEmpty());
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre(
+ "Concerning the subject of foo, we need to begin considering"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
@@ -599,12 +666,14 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"md"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("MDI team"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("MDI"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
}
TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
@@ -619,13 +688,398 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"zurich"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", 0),
- Eq("Some members are in Zürich."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("Zürich"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("Some members are in Zürich."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto document;
+ document.set_schema("SingleLevelType");
+ PropertyProto* prop = document.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = document.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = document.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ SectionIdMask section_mask = 0b00000111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(6));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(GetPropertyPaths(snippet),
+ ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(18));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A.X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(
+ GetPropertyPaths(snippet),
+ ElementsAre("A.X[1]", "A.X[3]", "A.Y[1]", "A.Y[3]", "A.Z[1]", "A.Z[3]",
+ "B.X[1]", "B.X[3]", "B.Y[1]", "B.Y[3]", "B.Z[1]", "B.Z[3]",
+ "C.X[1]", "C.X[3]", "C.Y[1]", "C.Y[3]", "C.Z[1]", "C.Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(36));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(GetPropertyPaths(snippet),
+ ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
+ "A[0].Y[1]", "A[0].Y[3]", "A[1].Y[1]", "A[1].Y[3]",
+ "A[0].Z[1]", "A[0].Z[3]", "A[1].Z[1]", "A[1].Z[3]",
+ "B[0].X[1]", "B[0].X[3]", "B[1].X[1]", "B[1].X[3]",
+ "B[0].Y[1]", "B[0].Y[3]", "B[1].Y[1]", "B[1].Y[3]",
+ "B[0].Z[1]", "B[0].Z[3]", "B[1].Z[1]", "B[1].Z[3]",
+ "C[0].X[1]", "C[0].X[3]", "C[1].X[1]", "C[1].X[3]",
+ "C[0].Y[1]", "C[0].Y[3]", "C[1].Y[1]", "C[1].Y[3]",
+ "C[0].Z[1]", "C[0].Z[3]", "C[1].Z[1]", "C[1].Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ prop->add_string_values("polo");
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ prop->add_string_values("marco");
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ prop->add_string_values("polo");
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(12));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(
+ GetPropertyPaths(snippet),
+ ElementsAre("A[0].X", "A[1].X", "A[0].Z", "A[1].Z", "B[0].X", "B[1].X",
+ "B[0].Z", "B[1].Z", "C[0].X", "C[1].X", "C[0].Z", "C[1].Z"));
}
} // namespace
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
new file mode 100644
index 0000000..59ed7c5
--- /dev/null
+++ b/icing/schema-builder.h
@@ -0,0 +1,130 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BUILDER_H_
+#define ICING_SCHEMA_BUILDER_H_
+
+#include <cstdint>
+#include <initializer_list>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+class PropertyConfigBuilder {
+ public:
+ PropertyConfigBuilder() = default;
+ explicit PropertyConfigBuilder(PropertyConfigProto property)
+ : property_(std::move(property)) {}
+
+ PropertyConfigBuilder& SetName(std::string_view name) {
+ property_.set_property_name(std::string(name));
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataType(
+ PropertyConfigProto::DataType::Code data_type) {
+ property_.set_data_type(data_type);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataTypeString(
+ TermMatchType::Code match_type,
+ StringIndexingConfig::TokenizerType::Code tokenizer) {
+ property_.set_data_type(PropertyConfigProto::DataType::STRING);
+ property_.mutable_string_indexing_config()->set_term_match_type(match_type);
+ property_.mutable_string_indexing_config()->set_tokenizer_type(tokenizer);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataTypeDocument(std::string_view schema_type,
+ bool index_nested_properties) {
+ property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property_.set_schema_type(std::string(schema_type));
+ property_.mutable_document_indexing_config()->set_index_nested_properties(
+ index_nested_properties);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetCardinality(
+ PropertyConfigProto::Cardinality::Code cardinality) {
+ property_.set_cardinality(cardinality);
+ return *this;
+ }
+
+ PropertyConfigProto Build() const { return std::move(property_); }
+
+ private:
+ PropertyConfigProto property_;
+};
+
+class SchemaTypeConfigBuilder {
+ public:
+ SchemaTypeConfigBuilder() = default;
+ SchemaTypeConfigBuilder(SchemaTypeConfigProto type_config)
+ : type_config_(std::move(type_config)) {}
+
+ SchemaTypeConfigBuilder& SetType(std::string_view type) {
+ type_config_.set_schema_type(std::string(type));
+ return *this;
+ }
+
+ SchemaTypeConfigBuilder& SetVersion(int version) {
+ type_config_.set_version(version);
+ return *this;
+ }
+
+ SchemaTypeConfigBuilder& AddProperty(PropertyConfigProto property) {
+ *type_config_.add_properties() = std::move(property);
+ return *this;
+ }
+ SchemaTypeConfigBuilder& AddProperty(PropertyConfigBuilder property_builder) {
+ *type_config_.add_properties() = property_builder.Build();
+ return *this;
+ }
+
+ SchemaTypeConfigProto Build() { return std::move(type_config_); }
+
+ private:
+ SchemaTypeConfigProto type_config_;
+};
+
+class SchemaBuilder {
+ public:
+ SchemaBuilder() = default;
+ SchemaBuilder(SchemaProto schema) : schema_(std::move(schema)) {}
+
+ SchemaBuilder& AddType(SchemaTypeConfigProto type) {
+ *schema_.add_types() = std::move(type);
+ return *this;
+ }
+ SchemaBuilder& AddType(SchemaTypeConfigBuilder type_builder) {
+ *schema_.add_types() = type_builder.Build();
+ return *this;
+ }
+
+ SchemaProto Build() { return std::move(schema_); }
+
+ private:
+ SchemaProto schema_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_BUILDER_H_
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index b43d2a4..7040a31 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -104,7 +104,7 @@
libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
- const Clock* clock, NativeInitializeStats* initialize_stats) {
+ const Clock* clock, InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
@@ -122,7 +122,7 @@
schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
SchemaStore::~SchemaStore() {
- if (initialized_) {
+ if (has_schema_successfully_set_) {
if (!PersistToDisk().ok()) {
ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
}
@@ -130,7 +130,7 @@
}
libtextclassifier3::Status SchemaStore::Initialize(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
auto schema_proto_or = GetSchema();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
// Don't have an existing schema proto, that's fine
@@ -139,6 +139,7 @@
// Real error when trying to read the existing schema
return schema_proto_or.status();
}
+ has_schema_successfully_set_ = true;
if (!InitializeDerivedFiles().ok()) {
ICING_VLOG(3)
@@ -147,7 +148,7 @@
std::unique_ptr<Timer> regenerate_timer = clock_.GetNewTimer();
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_cause(
- NativeInitializeStats::IO_ERROR);
+ InitializeStatsProto::IO_ERROR);
}
ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
if (initialize_stats != nullptr) {
@@ -156,7 +157,6 @@
}
}
- initialized_ = true;
if (initialize_stats != nullptr) {
initialize_stats->set_num_schema_types(type_config_map_.size());
}
@@ -253,9 +253,12 @@
header.magic = SchemaStore::Header::kMagic;
header.checksum = checksum.Get();
+ ScopedFd scoped_fd(
+ filesystem_.OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
// This should overwrite the header.
- if (!filesystem_.Write(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
+ if (!scoped_fd.is_valid() ||
+ !filesystem_.Write(scoped_fd.get(), &header, sizeof(header)) ||
+ !filesystem_.DataSync(scoped_fd.get())) {
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
}
@@ -285,18 +288,11 @@
libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
Crc32 total_checksum;
-
- auto schema_proto_or = GetSchema();
- if (absl_ports::IsNotFound(schema_proto_or.status())) {
+ if (!has_schema_successfully_set_) {
// Nothing to checksum
return total_checksum;
- } else if (!schema_proto_or.ok()) {
- // Some real error. Pass it up
- return schema_proto_or.status();
}
-
- // Guaranteed to have a schema proto now
- const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
Crc32 schema_checksum;
schema_checksum.Append(schema_proto->SerializeAsString());
@@ -390,6 +386,7 @@
// Write the schema (and potentially overwrite a previous schema)
ICING_RETURN_IF_ERROR(
schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
+ has_schema_successfully_set_ = true;
ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
}
@@ -399,14 +396,7 @@
libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
- auto schema_proto_or = GetSchema();
- if (absl_ports::IsNotFound(schema_proto_or.status())) {
- return absl_ports::FailedPreconditionError("Schema not set yet.");
- } else if (!schema_proto_or.ok()) {
- // Some other real error, pass it up
- return schema_proto_or.status();
- }
-
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
const auto& type_config_iter =
type_config_map_.find(std::string(schema_type));
if (type_config_iter == type_config_map_.end()) {
@@ -418,39 +408,42 @@
libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
std::string_view schema_type) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return schema_type_mapper_->Get(schema_type);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
SchemaStore::GetStringSectionContent(const DocumentProto& document,
std::string_view section_path) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetStringSectionContent(document, section_path);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
SchemaStore::GetStringSectionContent(const DocumentProto& document,
SectionId section_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetStringSectionContent(document, section_id);
}
libtextclassifier3::StatusOr<const SectionMetadata*>
SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetSectionMetadata(schema_type_id, section_id);
}
libtextclassifier3::StatusOr<std::vector<Section>> SchemaStore::ExtractSections(
const DocumentProto& document) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->ExtractSections(document);
}
libtextclassifier3::Status SchemaStore::PersistToDisk() {
- if (schema_type_mapper_ != nullptr) {
- // It's possible we haven't had a schema set yet, so SchemaTypeMapper hasn't
- // been initialized and is still a nullptr
- ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
+ if (!has_schema_successfully_set_) {
+ return libtextclassifier3::Status::OK;
}
-
+ ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -458,5 +451,35 @@
return libtextclassifier3::Status::OK;
}
+SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
+ SchemaStoreStorageInfoProto storage_info;
+ int64_t directory_size = filesystem_.GetDiskUsage(base_dir_.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_schema_store_size(directory_size);
+ } else {
+ storage_info.set_schema_store_size(-1);
+ }
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
+ storage_info.set_num_schema_types(schema->types_size());
+ int total_sections = 0;
+ int num_types_sections_exhausted = 0;
+ for (const SchemaTypeConfigProto& type : schema->types()) {
+ auto sections_list_or =
+ section_manager_->GetMetadataList(type.schema_type());
+ if (!sections_list_or.ok()) {
+ continue;
+ }
+ total_sections += sections_list_or.ValueOrDie()->size();
+ if (sections_list_or.ValueOrDie()->size() == kMaxSectionId + 1) {
+ ++num_types_sections_exhausted;
+ }
+ }
+
+ storage_info.set_num_total_sections(total_sections);
+ storage_info.set_num_schema_types_sections_exhausted(
+ num_types_sections_exhausted);
+ return storage_info;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 3854704..dd1edb8 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -29,6 +29,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-util.h"
#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
@@ -115,7 +116,7 @@
// INTERNAL_ERROR on any IO errors
static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
const Filesystem* filesystem, const std::string& base_dir,
- const Clock* clock, NativeInitializeStats* initialize_stats = nullptr);
+ const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
// Not copyable
SchemaStore(const SchemaStore&) = delete;
@@ -167,6 +168,7 @@
//
// Returns:
// SchemaTypeId on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND_ERROR if we don't know about the schema type
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
@@ -176,6 +178,7 @@
//
// Returns:
// A string of content on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND if:
// 1. Property is optional and not found in the document
// 2. section_path is invalid
@@ -188,6 +191,7 @@
//
// Returns:
// A string of content on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// INVALID_ARGUMENT if section id is invalid
// NOT_FOUND if type config name of document not found
libtextclassifier3::StatusOr<std::vector<std::string_view>>
@@ -199,6 +203,7 @@
//
// Returns:
// pointer to SectionMetadata on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// INVALID_ARGUMENT if schema type id or section is invalid
libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
SchemaTypeId schema_type_id, SectionId section_id) const;
@@ -209,6 +214,7 @@
//
// Returns:
// A list of sections on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND if type config name of document not found
libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
const DocumentProto& document) const;
@@ -228,6 +234,12 @@
// INTERNAL_ERROR on compute error
libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
+ // Calculates the StorageInfo for the Schema Store.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ SchemaStoreStorageInfoProto GetStorageInfo() const;
+
private:
// Use SchemaStore::Create instead.
explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
@@ -238,8 +250,7 @@
// Returns:
// OK on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status Initialize(
- NativeInitializeStats* initialize_stats);
+ libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -275,16 +286,20 @@
// Returns any IO errors.
libtextclassifier3::Status ResetSchemaTypeMapper();
+ libtextclassifier3::Status CheckSchemaSet() const {
+ return has_schema_successfully_set_
+ ? libtextclassifier3::Status::OK
+ : absl_ports::FailedPreconditionError("Schema not set yet.");
+ }
+
const Filesystem& filesystem_;
const std::string base_dir_;
const Clock& clock_;
- // Used internally to indicate whether the class has been initialized. This is
- // to guard against cases where the object has been created, but Initialize
- // fails in the constructor. If we have successfully exited the constructor,
- // then this field can be ignored. Clients of SchemaStore should not need to
- // worry about this field.
- bool initialized_ = false;
+ // Used internally to indicate whether the class has been successfully
+ // initialized with a valid schema. Will be false if Initialize failed or no
+ // schema has ever been set.
+ bool has_schema_successfully_set_ = false;
// Cached schema
FileBackedProto<SchemaProto> schema_file_;
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 7df3dd9..d97948f 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -30,8 +30,9 @@
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/tmp-directory.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
namespace icing {
namespace lib {
@@ -41,9 +42,24 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::ElementsAre;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Not;
using ::testing::Pointee;
+PropertyConfigProto CreateProperty(
+ std::string_view name, PropertyConfigProto::DataType::Code datatype,
+ PropertyConfigProto::Cardinality::Code cardinality,
+ TermMatchType::Code match_type,
+ StringIndexingConfig::TokenizerType::Code tokenizer_type) {
+ PropertyConfigProto property;
+ property.set_property_name(std::string(name));
+ property.set_data_type(datatype);
+ property.set_cardinality(cardinality);
+ property.mutable_string_indexing_config()->set_term_match_type(match_type);
+ property.mutable_string_indexing_config()->set_tokenizer_type(tokenizer_type);
+ return property;
+}
+
class SchemaStoreTest : public ::testing::Test {
protected:
SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
@@ -54,13 +70,10 @@
// Add an indexed property so we generate section metadata on it
auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ *property = CreateProperty("subject", PropertyConfigProto::DataType::STRING,
+ PropertyConfigProto::Cardinality::OPTIONAL,
+ TermMatchType::EXACT_ONLY,
+ StringIndexingConfig::TokenizerType::PLAIN);
}
void TearDown() override {
@@ -74,8 +87,9 @@
};
TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(SchemaStoreTest, CorruptSchemaError) {
@@ -190,7 +204,36 @@
}
TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ // The apis to retrieve information about the schema should fail gracefully.
+ EXPECT_THAT(store->GetSchema(),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(store->GetSchemaTypeConfig("foo"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetSchemaTypeId("foo"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetSectionMetadata(/*schema_type_id=*/0, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ // The apis to extract content from a document should fail gracefully.
+ DocumentProto doc;
+ PropertyProto* prop = doc.add_properties();
+ prop->set_name("name");
+ prop->add_string_values("foo bar baz");
+
+ EXPECT_THAT(store->GetStringSectionContent(doc, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetStringSectionContent(doc, "name"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->ExtractSections(doc),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ // The apis to persist and checksum data should succeed.
+ EXPECT_THAT(store->ComputeChecksum(), IsOkAndHolds(Crc32()));
+ EXPECT_THAT(store->PersistToDisk(), IsOk());
}
TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
@@ -204,7 +247,8 @@
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+ EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
+ IsOk());
}
TEST_F(SchemaStoreTest, MultipleCreateOk) {
@@ -670,6 +714,69 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
+TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ // Create a schema with two types: one simple type and one type that uses all
+ // 16 sections.
+ SchemaProto schema;
+ auto type = schema.add_types();
+ type->set_schema_type("email");
+ PropertyConfigProto prop = CreateProperty(
+ "subject", PropertyConfigProto::DataType::STRING,
+ PropertyConfigProto::Cardinality::OPTIONAL, TermMatchType::EXACT_ONLY,
+ StringIndexingConfig::TokenizerType::PLAIN);
+ *type->add_properties() = prop;
+
+ type = schema.add_types();
+ type->set_schema_type("fullSectionsType");
+ prop.set_property_name("prop0");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop1");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop2");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop3");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop4");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop5");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop6");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop7");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop8");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop9");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop10");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop11");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop12");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop13");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop14");
+ *type->add_properties() = prop;
+ prop.set_property_name("prop15");
+ *type->add_properties() = prop;
+
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ EXPECT_THAT(schema_store->SetSchema(schema),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+ SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
+ EXPECT_THAT(storage_info.schema_store_size(), Ge(0));
+ EXPECT_THAT(storage_info.num_schema_types(), Eq(2));
+ EXPECT_THAT(storage_info.num_total_sections(), Eq(17));
+ EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index a10e9b9..a0893e6 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -165,16 +165,6 @@
return values;
}
-// Helper function to get metadata list of a type config
-libtextclassifier3::StatusOr<std::vector<SectionMetadata>> GetMetadataList(
- const KeyMapper<SchemaTypeId>& schema_type_mapper,
- const std::vector<std::vector<SectionMetadata>>& section_metadata_cache,
- const std::string& type_config_name) {
- ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
- schema_type_mapper.Get(type_config_name));
- return section_metadata_cache.at(schema_type_id);
-}
-
} // namespace
SectionManager::SectionManager(
@@ -263,18 +253,16 @@
"Section id %d is greater than the max value %d", section_id,
kMaxSectionId));
}
- ICING_ASSIGN_OR_RETURN(
- const std::vector<SectionMetadata>& metadata_list,
- GetMetadataList(schema_type_mapper_, section_metadata_cache_,
- document.schema()));
- if (section_id >= metadata_list.size()) {
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
+ if (section_id >= metadata_list->size()) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Section with id %d doesn't exist in type config %s", section_id,
document.schema().c_str()));
}
// The index of metadata list is the same as the section id, so we can use
// section id as the index.
- return GetStringSectionContent(document, metadata_list[section_id].path);
+ return GetStringSectionContent(document, metadata_list->at(section_id).path);
}
libtextclassifier3::StatusOr<const SectionMetadata*>
@@ -300,12 +288,10 @@
libtextclassifier3::StatusOr<std::vector<Section>>
SectionManager::ExtractSections(const DocumentProto& document) const {
- ICING_ASSIGN_OR_RETURN(
- const std::vector<SectionMetadata>& metadata_list,
- GetMetadataList(schema_type_mapper_, section_metadata_cache_,
- document.schema()));
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
std::vector<Section> sections;
- for (const auto& section_metadata : metadata_list) {
+ for (const auto& section_metadata : *metadata_list) {
auto section_content_or =
GetStringSectionContent(document, section_metadata.path);
// Adds to result vector if section is found in document
@@ -317,5 +303,12 @@
return sections;
}
+libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+SectionManager::GetMetadataList(const std::string& type_config_name) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper_.Get(type_config_name));
+ return §ion_metadata_cache_.at(schema_type_id);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 191a169..51eb133 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -30,7 +30,9 @@
namespace icing {
namespace lib {
-inline constexpr char kPropertySeparator[] = ".";
+inline constexpr std::string_view kPropertySeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
// This class provides section-related operations. It assigns sections according
// to type configs and extracts section / sections from documents.
@@ -94,6 +96,12 @@
libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
const DocumentProto& document) const;
+ // Returns:
+ // - On success, the section metadatas for the specified type
+ // - NOT_FOUND if the type config name is not present in the schema
+ libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+ GetMetadataList(const std::string& type_config_name) const;
+
private:
// Use SectionManager::Create() to instantiate
explicit SectionManager(
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 72bf736..59944fe 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -189,6 +189,17 @@
return expiration_timestamp_ms;
}
+void IncrementDeletedOrExpired(FileBackedVector<int64_t>* document_id_mapper,
+ DocumentId document_id, int* num_deleted_out,
+ int* num_expired_out) {
+ auto location_or = document_id_mapper->Get(document_id);
+ if (location_or.ok() && *location_or.ValueOrDie() == kDocDeletedFlag) {
+ ++(*num_deleted_out);
+ } else {
+ ++(*num_expired_out);
+ }
+}
+
} // namespace
DocumentStore::DocumentStore(const Filesystem* filesystem,
@@ -203,13 +214,13 @@
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
return Put(DocumentProto(document), num_tokens, put_document_stats);
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
DocumentProto&& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
return InternalPut(document, put_document_stats);
}
@@ -226,7 +237,7 @@
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
@@ -243,7 +254,7 @@
}
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem_, MakeDocumentLogFilename(base_dir_),
FileBackedProtoLog<DocumentWrapper>::Options(
@@ -264,16 +275,16 @@
<< "Data loss in document log, regenerating derived files.";
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::DATA_LOSS);
+ InitializeStatsProto::DATA_LOSS);
if (create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::PARTIAL_LOSS);
+ InitializeStatsProto::PARTIAL_LOSS);
} else {
// Ground truth is completely lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::COMPLETE_LOSS);
+ InitializeStatsProto::COMPLETE_LOSS);
}
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
@@ -294,7 +305,7 @@
"regenerating derived files for DocumentStore.";
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::IO_ERROR);
+ InitializeStatsProto::IO_ERROR);
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status = RegenerateDerivedFiles();
@@ -788,6 +799,11 @@
}
Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
+ // NOTE: We purposely don't include usage_store checksum here because we can't
+ // regenerate it from ground truth documents. If it gets corrupted, we'll just
+ // clear all usage reports, but we shouldn't throw everything else in the
+ // document store out.
+
total_checksum.Append(std::to_string(document_log_checksum.Get()));
total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
@@ -819,8 +835,11 @@
header.checksum = checksum.Get();
// This should overwrite the header.
- if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
+ ScopedFd sfd(
+ filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+ if (!sfd.is_valid() ||
+ !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+ !filesystem_->DataSync(sfd.get())) {
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
}
@@ -828,7 +847,7 @@
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
- DocumentProto& document, NativePutDocumentStats* put_document_stats) {
+ DocumentProto& document, PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
@@ -1404,30 +1423,62 @@
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
- ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
- document_log_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
- document_key_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
- document_id_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
- score_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
- filter_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
- namespace_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
- corpus_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage,
- corpus_score_cache_->GetDiskUsage());
+int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
+ int64_t default_value) {
+ return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
+}
- int64_t disk_usage = document_log_disk_usage +
- document_key_mapper_disk_usage +
- document_id_mapper_disk_usage + score_cache_disk_usage +
- filter_cache_disk_usage + namespace_mapper_disk_usage +
- corpus_mapper_disk_usage + corpus_score_cache_disk_usage;
- return disk_usage;
+DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
+ DocumentStorageInfoProto storage_info;
+ storage_info.set_document_log_size(
+ GetValueOrDefault(document_log_->GetDiskUsage(), -1));
+ storage_info.set_key_mapper_size(
+ GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
+ storage_info.set_document_id_mapper_size(
+ GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
+ storage_info.set_score_cache_size(
+ GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
+ storage_info.set_filter_cache_size(
+ GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
+ storage_info.set_namespace_id_mapper_size(
+ GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_mapper_size(
+ GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_score_cache_size(
+ GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const {
+ int num_alive = 0;
+ int num_expired = 0;
+ int num_deleted = 0;
+ for (DocumentId document_id = 0;
+ document_id < document_id_mapper_->num_elements(); ++document_id) {
+ if (DoesDocumentExist(document_id)) {
+ ++num_alive;
+ } else {
+ IncrementDeletedOrExpired(document_id_mapper_.get(), document_id,
+ &num_deleted, &num_expired);
+ }
+ }
+ storage_info.set_num_alive_documents(num_alive);
+ storage_info.set_num_deleted_documents(num_deleted);
+ storage_info.set_num_expired_documents(num_expired);
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
+ DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
+ int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_document_store_size(directory_size);
+ } else {
+ storage_info.set_document_store_size(-1);
+ }
+ storage_info.set_num_namespaces(namespace_mapper_->num_keys());
+ return CalculateDocumentStatusCounts(std::move(storage_info));
}
libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1577,7 +1628,8 @@
}
libtextclassifier3::Status DocumentStore::OptimizeInto(
- const std::string& new_directory, const LanguageSegmenter* lang_segmenter) {
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats) {
// Validates directory
if (new_directory == base_dir_) {
return absl_ports::InvalidArgumentError(
@@ -1592,10 +1644,14 @@
// Writes all valid docs into new document store (new directory)
int size = document_id_mapper_->num_elements();
+ int num_deleted = 0;
+ int num_expired = 0;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
// Skip nonexistent documents
+ IncrementDeletedOrExpired(document_id_mapper_.get(), document_id,
+ &num_deleted, &num_expired);
continue;
} else if (!document_or.ok()) {
// Real error, pass up
@@ -1640,7 +1696,11 @@
ICING_RETURN_IF_ERROR(
new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
-
+ if (stats != nullptr) {
+ stats->set_num_original_documents(size);
+ stats->set_num_deleted_documents(num_deleted);
+ stats->set_num_expired_documents(num_expired);
+ }
ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
return libtextclassifier3::Status::OK;
}
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b2908f0..3b8408d 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -29,6 +29,8 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -122,7 +124,7 @@
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats = nullptr);
+ InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -152,10 +154,10 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<DocumentId> Put(
const DocumentProto& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
libtextclassifier3::StatusOr<DocumentId> Put(
DocumentProto&& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Finds and returns the document identified by the given key (namespace +
// uri). If 'clear_internal_fields' is true, document level data that's
@@ -351,16 +353,11 @@
// INTERNAL on I/O error
libtextclassifier3::Status PersistToDisk();
- // Calculates and returns the disk usage in bytes. Rounds up to the nearest
- // block size.
+ // Calculates the StorageInfo for the Document Store.
//
- // Returns:
- // Disk usage on success
- // INTERNAL_ERROR on IO error
- //
- // TODO(tjbarron): consider returning a struct which has the breakdown of each
- // component.
- libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ DocumentStorageInfoProto GetStorageInfo() const;
// Update any derived data off of the SchemaStore with the new SchemaStore.
// This may include pointers, SchemaTypeIds, etc.
@@ -407,6 +404,8 @@
// reassigned so any files / classes that are based on old document ids may be
// outdated.
//
+ // stats will be set if non-null.
+ //
// NOTE: The tasks in this method are too expensive to be executed in
// real-time. The caller should decide how frequently and when to call this
// method based on device usage.
@@ -416,8 +415,8 @@
// INVALID_ARGUMENT if new_directory is same as current base directory
// INTERNAL_ERROR on IO error
libtextclassifier3::Status OptimizeInto(
- const std::string& new_directory,
- const LanguageSegmenter* lang_segmenter);
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats = nullptr);
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -508,7 +507,7 @@
bool initialized_ = false;
libtextclassifier3::StatusOr<DataLoss> Initialize(
- NativeInitializeStats* initialize_stats);
+ InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -576,8 +575,8 @@
// if it doesn't exist.
bool HeaderExists();
- // Update and replace the header file. Creates the header file if it doesn't
- // exist.
+ // Update, replace and persist the header file. Creates the header file if it
+ // doesn't exist.
//
// Returns:
// OK on success
@@ -586,7 +585,7 @@
libtextclassifier3::StatusOr<DocumentId> InternalPut(
DocumentProto& document,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Helper function to do batch deletes. Documents with the given
// "namespace_id" and "schema_type_id" will be deleted. If callers don't need
@@ -688,6 +687,20 @@
// Sets usage scores for the given document.
libtextclassifier3::Status SetUsageScores(
DocumentId document_id, const UsageStore::UsageScores& usage_scores);
+
+ // Returns:
+ // - on success, a DocumentStorageInfoProto with the fields relating to the
+ // size of Document Store member variables populated.
+ // - INTERNAL on failure to get file size
+ DocumentStorageInfoProto GetMemberStorageInfo() const;
+
+ // Returns:
+ // - on success, the storage_info that was passed in but with the number of
+ // alive, deleted and expired documents also set.
+ // - OUT_OF_RANGE, this should never happen. This could only be returned if
+ // the document_id_mapper somehow became larger than the filter cache.
+ DocumentStorageInfoProto CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const;
};
} // namespace lib
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7754373..440b48f 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -31,6 +31,7 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -55,6 +56,7 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
@@ -436,16 +438,16 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(
document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
@@ -566,7 +568,7 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(doc_store
@@ -575,9 +577,9 @@
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
@@ -590,7 +592,7 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(doc_store
@@ -599,9 +601,9 @@
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
@@ -665,7 +667,7 @@
document4.set_namespace_("namespace.1");
document4.set_uri("uri2");
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -686,7 +688,7 @@
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -710,9 +712,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -908,7 +910,7 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(document_store
@@ -917,10 +919,10 @@
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
@@ -933,7 +935,7 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(document_store
@@ -942,10 +944,10 @@
.status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
@@ -1016,7 +1018,7 @@
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1036,7 +1038,7 @@
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -1060,9 +1062,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1100,7 +1102,7 @@
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1125,7 +1127,7 @@
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
@@ -1156,9 +1158,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1507,7 +1509,7 @@
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, GetDiskUsage) {
+TEST_F(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1515,8 +1517,8 @@
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
- doc_store->GetDiskUsage());
+ DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo();
+ int64_t empty_doc_store_size = doc_store_storage_info.document_store_size();
EXPECT_THAT(empty_doc_store_size, Gt(0));
DocumentProto document = DocumentBuilder()
@@ -1525,15 +1527,16 @@
.AddStringProperty("subject", "foo")
.Build();
- // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // Since GetStorageInfo can only get sizes in increments of block_size, we
// need to insert enough documents so the disk usage will increase by at least
// 1 block size. The number 100 is a bit arbitrary, gotten from manually
// testing.
for (int i = 0; i < 100; ++i) {
ICING_ASSERT_OK(doc_store->Put(document));
}
- EXPECT_THAT(doc_store->GetDiskUsage(),
- IsOkAndHolds(Gt(empty_doc_store_size)));
+ doc_store_storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(),
+ Gt(empty_doc_store_size));
// Bad file system
MockFilesystem mock_filesystem;
@@ -1546,8 +1549,8 @@
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
std::move(create_result.document_store);
- EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
}
TEST_F(DocumentStoreTest, MaxDocumentId) {
@@ -2231,7 +2234,7 @@
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2247,6 +2250,24 @@
IsOkAndHolds(Not(Eq(checksum))));
}
+TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_EXPECT_OK(document_store->Put(test_document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+ UsageReport usage_report =
+ CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(),
+ /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ ICING_EXPECT_OK(document_store->ReportUsage(usage_report));
+ EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+}
+
TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
const std::string schema_store_dir = schema_store_dir_ + "_custom";
@@ -3438,17 +3459,66 @@
ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
}
- NativeInitializeStats initializeStats;
+ InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(), &initializeStats));
+ schema_store_.get(), &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The store_cache trigger regeneration because its element size is
// inconsistent: expected 20 (current new size), actual 12 (as per the v0
// score_cache).
- EXPECT_TRUE(initializeStats.has_document_store_recovery_cause());
+ EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
+}
+
+TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Add three documents.
+ DocumentProto document1 = test_document1_;
+ document1.set_namespace_("namespace.1");
+ document1.set_uri("uri1");
+ ICING_ASSERT_OK(doc_store->Put(document1));
+
+ DocumentProto document2 = test_document1_;
+ document2.set_namespace_("namespace.1");
+ document2.set_uri("uri2");
+ document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds());
+ document2.set_ttl_ms(100);
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ DocumentProto document3 = test_document1_;
+ document3.set_namespace_("namespace.1");
+ document3.set_uri("uri3");
+ ICING_ASSERT_OK(doc_store->Put(document3));
+
+ // Delete the first doc.
+ ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri()));
+
+ // Expire the second doc.
+ fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
+ document2.ttl_ms() + 1);
+
+ DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(storage_info.num_alive_documents(), Eq(1));
+ EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1));
+ EXPECT_THAT(storage_info.num_expired_documents(), Eq(1));
+ EXPECT_THAT(storage_info.document_store_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_log_size(), Ge(0));
+ EXPECT_THAT(storage_info.key_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.filter_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.num_namespaces(), Eq(1));
}
} // namespace
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 54896dc..7e5cebf 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -218,6 +218,10 @@
return usage_score_cache_->GetElementsFileSize();
}
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const {
+ return usage_score_cache_->GetDiskUsage();
+}
+
libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
if (num_documents >= usage_score_cache_->num_elements()) {
// No need to truncate
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index b7de970..fd77df4 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -157,6 +157,14 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+ // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+ // block size.
+ //
+ // Returns:
+ // Disk usage on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
// Resizes the storage so that only the usage scores of and before
// last_document_id are stored.
//
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 220c226..b2dbe4b 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -577,6 +577,41 @@
IsOkAndHolds(Gt(empty_file_size)));
}
+TEST_F(UsageStoreTest, GetDiskUsageEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+ EXPECT_THAT(empty_disk_usage, Gt(0));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+
+ // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // need to insert enough usage reports so the disk usage will increase by at
+ // least 1 block size. The number 200 is a bit arbitrary, gotten from manually
+ // testing.
+ UsageReport usage_report = CreateUsageReport(
+ "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ for (int i = 0; i < 200; ++i) {
+ usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+ }
+
+ // We need to persist since iOS won't see the new disk allocations until after
+ // everything gets written.
+ usage_store->PersistToDisk();
+
+ EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index b7f54ba..dcb8bf3 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -267,7 +267,7 @@
}
}
-string ProtoStatusCodeToString(StatusProto::Code code) {
+std::string ProtoStatusCodeToString(StatusProto::Code code) {
switch (code) {
case StatusProto::OK:
return "OK";
diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
index fde0004..6a017ef 100644
--- a/icing/testing/snippet-helpers.cc
+++ b/icing/testing/snippet-helpers.cc
@@ -17,28 +17,37 @@
#include <algorithm>
#include <string_view>
+#include "icing/absl_ports/str_join.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/section-manager.h"
namespace icing {
namespace lib {
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
- const std::string& property_name,
- int snippet_index) {
- auto iterator = std::find_if(
- snippet_proto.entries().begin(), snippet_proto.entries().end(),
- [&property_name](const SnippetProto::EntryProto& entry) {
- return entry.property_name() == property_name;
- });
- if (iterator == snippet_proto.entries().end() ||
- iterator->snippet_matches_size() <= snippet_index) {
- return nullptr;
+namespace {
+
+// Returns the property index and the property name with the index removed.
+// Examples:
+// GetPropertyIndex("foo") will return ["foo", 0]
+// GetPropertyIndex("foo[5]") will return ["foo", 5]
+std::pair<std::string_view, int> GetPropertyIndex(std::string_view property) {
+ size_t l_bracket = property.find(kLBracket);
+ if (l_bracket == std::string_view::npos || l_bracket >= property.length()) {
+ return {property, 0};
}
- return &iterator->snippet_matches(snippet_index);
+ size_t r_bracket = property.find(kRBracket, l_bracket);
+ if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+ return {property, 0};
+ }
+ std::string index_string =
+ std::string(property.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+ return {property.substr(0, l_bracket), std::stoi(index_string)};
}
+} // namespace
+
const PropertyProto* GetProperty(const DocumentProto& document,
- const std::string& property_name) {
+ std::string_view property_name) {
const PropertyProto* property = nullptr;
for (const PropertyProto& prop : document.properties()) {
if (prop.name() == property_name) {
@@ -48,32 +57,55 @@
return property;
}
-std::string GetWindow(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index) {
- const SnippetMatchProto* match =
- GetSnippetMatch(snippet_proto, property_name, snippet_index);
- const PropertyProto* property = GetProperty(document, property_name);
- if (match == nullptr || property == nullptr) {
- return "";
+std::vector<std::string_view> GetWindows(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+ std::vector<std::string_view> windows;
+ for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+ windows.push_back(
+ content.substr(match.window_position(), match.window_bytes()));
}
- std::string_view value = property->string_values(match->values_index());
- return std::string(
- value.substr(match->window_position(), match->window_bytes()));
+ return windows;
}
-std::string GetMatch(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index) {
- const SnippetMatchProto* match =
- GetSnippetMatch(snippet_proto, property_name, snippet_index);
- const PropertyProto* property = GetProperty(document, property_name);
- if (match == nullptr || property == nullptr) {
- return "";
+std::vector<std::string_view> GetMatches(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+ std::vector<std::string_view> matches;
+ for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+ matches.push_back(content.substr(match.exact_match_position(),
+ match.exact_match_bytes()));
}
- std::string_view value = property->string_values(match->values_index());
- return std::string(
- value.substr(match->exact_match_position(), match->exact_match_bytes()));
+ return matches;
+}
+
+std::string_view GetString(const DocumentProto* document,
+ std::string_view property_path) {
+ std::vector<std::string_view> properties =
+ absl_ports::StrSplit(property_path, kPropertySeparator);
+ for (int i = 0; i < properties.size(); ++i) {
+ std::string_view property = properties.at(i);
+ int property_index;
+ std::tie(property, property_index) = GetPropertyIndex(property);
+ const PropertyProto* prop = GetProperty(*document, property);
+ if (prop == nullptr) {
+ // requested property doesn't exist in the document. Return empty string.
+ return "";
+ }
+ if (i == properties.size() - 1) {
+ // The last property. Get the string_value
+ if (prop->string_values_size() - 1 < property_index) {
+ // The requested string doesn't exist. Return empty string.
+ return "";
+ }
+ return prop->string_values(property_index);
+ } else if (prop->document_values_size() - 1 < property_index) {
+ // The requested subproperty doesn't exist. return an empty string.
+ return "";
+ } else {
+ // Go to the next subproperty.
+ document = &prop->document_values(property_index);
+ }
+ }
+ return "";
}
} // namespace lib
diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
index 124e421..defadeb 100644
--- a/icing/testing/snippet-helpers.h
+++ b/icing/testing/snippet-helpers.h
@@ -23,36 +23,32 @@
namespace icing {
namespace lib {
-// Retrieve pointer to the snippet_index'th SnippetMatchProto within the
-// EntryProto identified by property_name within snippet_proto.
-// Returns nullptr
-// - if there is no EntryProto within snippet_proto corresponding to
-// property_name.
-// - if there is no SnippetMatchProto at snippet_index within the EntryProto
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
- const std::string& property_name,
- int snippet_index);
-
// Retrieve pointer to the PropertyProto identified by property_name.
// Returns nullptr if no such property exists.
+//
+// NOTE: This function does not handle nesting or indexes. "foo.bar" will return
+// a nullptr even if document contains a property called "foo" that contains a
+// subproperty called "bar".
const PropertyProto* GetProperty(const DocumentProto& document,
const std::string& property_name);
-// Retrieves the window defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property, snippet or window exists.
-std::string GetWindow(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index);
+// Retrieves all windows defined by the snippet_proto for the content.
+std::vector<std::string_view> GetWindows(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto);
-// Retrieves the match defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property or snippet exists.
-std::string GetMatch(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index);
+// Retrieves all matches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetMatches(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves the string value held in the document corresponding to the
+// property_path.
+// Example:
+// - GetString(doc, "foo") will retrieve the first string value in the
+// property "foo" in document or an empty string if it doesn't exist.
+// - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
+// the subproperty "bar" of the second document value in the property "foo".
+std::string_view GetString(const DocumentProto* document,
+ std::string_view property_path);
} // namespace lib
} // namespace icing
diff --git a/java/src/com/google/android/icing/BreakIteratorBatcher.java b/java/src/com/google/android/icing/BreakIteratorBatcher.java
index 58efbfc..2b87327 100644
--- a/java/src/com/google/android/icing/BreakIteratorBatcher.java
+++ b/java/src/com/google/android/icing/BreakIteratorBatcher.java
@@ -14,9 +14,6 @@
package com.google.android.icing;
-import androidx.annotation.NonNull;
-import androidx.annotation.RestrictTo;
-
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
@@ -38,20 +35,17 @@
* utf16Boundaries = brkItrBatcher.next(5);
* assertThat(utf16Boundaries).asList().containsExactly(9);
* }</pre>
- *
- * @hide
*/
-@RestrictTo(RestrictTo.Scope.LIBRARY_GROUP)
public class BreakIteratorBatcher {
private final BreakIterator iterator;
- public BreakIteratorBatcher(@NonNull Locale locale) {
+ public BreakIteratorBatcher(Locale locale) {
this.iterator = BreakIterator.getWordInstance(locale);
}
/* Direct calls to BreakIterator */
- public void setText(@NonNull String text) {
+ public void setText(String text) {
iterator.setText(text);
}
@@ -73,7 +67,6 @@
* the end of the text (returns BreakIterator#DONE), then only the results of the previous calls
* in that batch will be returned.
*/
- @NonNull
public int[] next(int batchSize) {
List<Integer> breakIndices = new ArrayList<>(batchSize);
for (int i = 0; i < batchSize; ++i) {
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 88d0578..ff0aadf 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -41,6 +41,7 @@
import com.google.android.icing.proto.SearchSpecProto;
import com.google.android.icing.proto.SetSchemaResultProto;
import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
import com.google.android.icing.proto.UsageReport;
import com.google.protobuf.ExtensionRegistryLite;
import com.google.protobuf.InvalidProtocolBufferException;
@@ -501,6 +502,29 @@
}
@NonNull
+ public StorageInfoResultProto getStorageInfo() {
+ throwIfClosed();
+
+ byte[] storageInfoResultProtoBytes = nativeGetStorageInfo(this);
+ if (storageInfoResultProtoBytes == null) {
+ Log.e(TAG, "Received null StorageInfoResultProto from native.");
+ return StorageInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+
+ try {
+ return StorageInfoResultProto.parseFrom(
+ storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+ } catch (InvalidProtocolBufferException e) {
+ Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
+ return StorageInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+ }
+
+ @NonNull
public ResetResultProto reset() {
throwIfClosed();
@@ -574,5 +598,7 @@
private static native byte[] nativeGetOptimizeInfo(IcingSearchEngine instance);
+ private static native byte[] nativeGetStorageInfo(IcingSearchEngine instance);
+
private static native byte[] nativeReset(IcingSearchEngine instance);
}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 56edaf1..409cdb7 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -45,6 +45,7 @@
import com.google.android.icing.proto.SearchSpecProto;
import com.google.android.icing.proto.SetSchemaResultProto;
import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
import com.google.android.icing.proto.StringIndexingConfig;
import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
import com.google.android.icing.proto.TermMatchType;
@@ -417,6 +418,14 @@
}
@Test
+ public void testGetStorageInfo() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ StorageInfoResultProto storageInfoResultProto = icingSearchEngine.getStorageInfo();
+ assertStatusOk(storageInfoResultProto.getStatus());
+ }
+
+ @Test
public void testGetAllNamespaces() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index d55b7e2..9a4e5b9 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -110,11 +110,11 @@
// go/icing-library-apis.
optional StatusProto status = 1;
- // Stats of the function call. Inside NativePutDocumentStats, the function
+ // Stats of the function call. Inside PutDocumentStatsProto, the function
// call latency 'latency_ms' will always be populated. The other fields will
// be accurate only when the status above is OK. See logging.proto for
// details.
- optional NativePutDocumentStats native_put_document_stats = 2;
+ optional PutDocumentStatsProto put_document_stats = 2;
}
// Result of a call to IcingSearchEngine.Get
@@ -167,7 +167,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteByNamespace
@@ -186,7 +186,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteBySchemaType
@@ -205,7 +205,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteByQuery
@@ -224,5 +224,5 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index ae2944c..ab2556d 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -16,12 +16,11 @@
package icing.lib;
-import "icing/proto/status.proto";
import "icing/proto/logging.proto";
+import "icing/proto/status.proto";
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Next tag: 5
@@ -89,11 +88,11 @@
// go/icing-library-apis.
optional StatusProto status = 1;
- // Stats of the function call. Inside NativeInitializeStats, the function call
+ // Stats of the function call. Inside InitializeStatsProto, the function call
// latency 'latency_ms' will always be populated. The other fields will be
// accurate only when the status above is OK or WARNING_DATA_LOSS. See
// logging.proto for details.
- optional NativeInitializeStats native_initialize_stats = 2;
+ optional InitializeStatsProto initialize_stats = 2;
// TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
// go/icing-library-apis.
diff --git a/proto/icing/proto/internal/optimize.proto b/proto/icing/proto/internal/optimize.proto
new file mode 100644
index 0000000..4ed3d73
--- /dev/null
+++ b/proto/icing/proto/internal/optimize.proto
@@ -0,0 +1,29 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+option java_package = "com.google.android.icing.internal.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// A status that is saved internally in Icing to track information about how
+// often Optimize runs.
+// Next tag: 2
+message OptimizeStatusProto {
+ // The Epoch time at which the last successfuly optimize ran.
+ optional int64 last_successful_optimize_run_time_ms = 1;
+}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 09ec756..e9509d4 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -24,7 +24,7 @@
// Stats of the top-level function IcingSearchEngine::Initialize().
// Next tag: 11
-message NativeInitializeStats {
+message InitializeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -95,7 +95,7 @@
// Stats of the top-level function IcingSearchEngine::Put().
// Next tag: 7
-message NativePutDocumentStats {
+message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -126,7 +126,7 @@
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
// Next tag: 15
-message NativeQueryStats {
+message QueryStatsProto {
// Number of terms in the query string.
optional int32 num_terms = 1;
@@ -178,7 +178,7 @@
// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
// IcingSearchEngine::DeleteByQuery.
// Next tag: 4
-message NativeDeleteStats {
+message DeleteStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -204,4 +204,4 @@
// Number of documents deleted by this call.
optional int32 num_documents_deleted = 3;
-}
\ No newline at end of file
+}
diff --git a/proto/icing/proto/optimize.proto b/proto/icing/proto/optimize.proto
index 1baa64c..42290f3 100644
--- a/proto/icing/proto/optimize.proto
+++ b/proto/icing/proto/optimize.proto
@@ -23,7 +23,7 @@
option objc_class_prefix = "ICNG";
// Result of a call to IcingSearchEngine.Optimize
-// Next tag: 2
+// Next tag: 3
message OptimizeResultProto {
// Status code can be one of:
// OK
@@ -35,12 +35,13 @@
// See status.proto for more details.
optional StatusProto status = 1;
+ optional OptimizeStatsProto optimize_stats = 2;
// TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
// go/icing-library-apis.
}
// Result of a call to IcingSearchEngine.GetOptimizeInfo
-// Next tag: 4
+// Next tag: 5
message GetOptimizeInfoResultProto {
// Status code can be one of:
// OK
@@ -57,4 +58,37 @@
// Estimated bytes that could be recovered. The exact size per document isn't
// tracked, so this is based off an average document size.
optional int64 estimated_optimizable_bytes = 3;
+
+ // The amount of time since the last optimize ran.
+ optional int64 time_since_last_optimize_ms = 4;
+}
+
+// Next tag: 10
+message OptimizeStatsProto {
+ // Overall time used for the function call.
+ optional int32 latency_ms = 1;
+
+ // Time used to optimize the document store.
+ optional int32 document_store_optimize_latency_ms = 2;
+
+ // Time used to restore the index.
+ optional int32 index_restoration_latency_ms = 3;
+
+ // Number of documents before the optimization.
+ optional int32 num_original_documents = 4;
+
+ // Number of documents deleted.
+ optional int32 num_deleted_documents = 5;
+
+ // Number of documents expired.
+ optional int32 num_expired_documents = 6;
+
+ // Size of storage before the optimize.
+ optional int64 storage_size_before = 7;
+
+ // Size of storage after the optimize.
+ optional int64 storage_size_after = 8;
+
+ // The amount of time since the last optimize ran.
+ optional int64 time_since_last_optimize_ms = 9;
}
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 6c4e3c9..afa9126 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -107,19 +107,6 @@
// The representation of a single match within a DocumentProto property.
// Next tag: 6
message SnippetMatchProto {
- // Properties may have multiple values. values_index indicates which of these
- // multiple string values the match occurred in. For properties with only one
- // value, the values_index will always be 0.
- // Ex. "Recipients" [
- // { { "Name" : "Daffy Duck" }
- // { "EmailAddress" : "daffduck@gmail.com" } },
- // { { "Name" : "Donald Duck" }
- // { "EmailAddress" : "donduck@gmail.com" } }
- // "Daffy Duck" is the string value with a value_index of 0 for property
- // "Recipients.Name". "Donald Duck" is the string value with a value_index of
- // 1 for property "Recipients.Name".
- optional int32 values_index = 1;
-
// The position and length within the matched string at which the exact
// match begins.
optional int32 exact_match_position = 2;
@@ -130,6 +117,8 @@
optional int32 window_position = 4;
optional int32 window_bytes = 5;
+
+ reserved 1;
}
// A Proto representing all snippets for a single DocumentProto.
@@ -139,9 +128,29 @@
// property values in the corresponding DocumentProto.
// Next tag: 3
message EntryProto {
- // A '.'-delimited sequence of property names indicating which property in
- // the DocumentProto these snippets correspond to.
- // Example properties: 'body', 'sender.name', 'sender.emailaddress', etc.
+ // A property path indicating which property in the DocumentProto these
+ // snippets correspond to. Property paths will contain 1) property names,
+ // 2) the property separator character '.' used to represent nested property
+ // and 3) indices surrounded by brackets to represent a specific value in
+ // that property.
+ //
+ // Example properties:
+ // - 'body' : the first and only string value of a top-level
+ // property called 'body'.
+ // - 'sender.name' : the first and only string value of a property
+ // called 'name' that is a subproperty of a
+ // property called 'sender'.
+ // - 'bcc[1].emailaddress': the first and only string value of a property
+ // called 'emailaddress' that is a subproperty of
+ // the second document value of a property called
+ // 'bcc'.
+ // - 'attachments[0]' : the first (of more than one) string value of a
+ // property called 'attachments'.
+ // NOTE: If there is only a single value for a property (like
+ // 'sender.name'), then no value index will be added to the property path.
+ // An index of [0] is implied. If there is more than one value for a
+ // property, then the value index will be added to the property path (like
+ // 'attachements[0]').
optional string property_name = 1;
repeated SnippetMatchProto snippet_matches = 2;
@@ -198,7 +207,7 @@
// LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
// Stats for query execution performance.
- optional NativeQueryStats query_stats = 5;
+ optional QueryStatsProto query_stats = 5;
}
// Next tag: 3
diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto
new file mode 100644
index 0000000..9e952fe
--- /dev/null
+++ b/proto/icing/proto/storage.proto
@@ -0,0 +1,139 @@
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/status.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Next tag: 14
+message DocumentStorageInfoProto {
+ // Number of alive documents.
+ optional int32 num_alive_documents = 1;
+
+ // Number of deleted documents.
+ optional int32 num_deleted_documents = 2;
+
+ // Number of expired documents.
+ optional int32 num_expired_documents = 3;
+
+ // Total size of the document store in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 document_store_size = 4;
+
+ // Total size of the ground truth in bytes. The ground truth may
+ // include deleted or expired documents. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 document_log_size = 5;
+
+ // Size of the key mapper in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 key_mapper_size = 6;
+
+ // Size of the document id mapper in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 document_id_mapper_size = 7;
+
+ // Size of the score cache in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 score_cache_size = 8;
+
+ // Size of the filter cache in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 filter_cache_size = 9;
+
+ // Size of the corpus mapper in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 corpus_mapper_size = 10;
+
+ // Size of the corpus score cache in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 corpus_score_cache_size = 11;
+
+ // Size of the namespace id mapper in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 namespace_id_mapper_size = 12;
+
+ // Number of namespaces seen from the current documents.
+ optional int32 num_namespaces = 13;
+}
+
+// Next tag: 5
+message SchemaStoreStorageInfoProto {
+ // Size of the schema store in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 schema_store_size = 1;
+
+ // Total number of schema types.
+ optional int32 num_schema_types = 2;
+
+ // Total number of all sections across all types
+ optional int32 num_total_sections = 3;
+
+ // Total number of types at the current section limit.
+ optional int32 num_schema_types_sections_exhausted = 4;
+}
+
+// Next tag: 9
+message IndexStorageInfoProto {
+ // Total size of the index in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 index_size = 1;
+
+ // Size of the lite index lexicon in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 lite_index_lexicon_size = 2;
+
+ // Size of the lite index hit buffer in bytes. Will be set to -1 if an IO
+ // error is encountered while calculating this field.
+ optional int64 lite_index_hit_buffer_size = 3;
+
+ // Size of the main index lexicon in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 main_index_lexicon_size = 4;
+
+ // Size of the main index storage in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 main_index_storage_size = 5;
+
+ // Size of one main index block in bytes.
+ optional int64 main_index_block_size = 6;
+
+ // Number of main index blocks.
+ optional int32 num_blocks = 7;
+
+ // Percentage of the main index blocks that are free, assuming
+ // allocated blocks are fully used.
+ optional float min_free_fraction = 8;
+}
+
+// Next tag: 5
+message StorageInfoProto {
+ // Total size of Icing’s storage in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 total_storage_size = 1;
+
+ // Storage information of the document store.
+ optional DocumentStorageInfoProto document_storage_info = 2;
+
+ // Storage information of the schema store.
+ optional SchemaStoreStorageInfoProto schema_store_storage_info = 3;
+
+ // Storage information of the index.
+ optional IndexStorageInfoProto index_storage_info = 4;
+}
+
+// Next tag: 3
+message StorageInfoResultProto {
+ // Status code can be one of:
+ // OK
+ // FAILED_PRECONDITION
+ //
+ // See status.proto for more details.
+ optional StatusProto status = 1;
+
+ // Storage information of Icing.
+ optional StorageInfoProto storage_info = 2;
+}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index af8248d..155baba 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=351841227)
+set(synced_AOSP_CL_number=360753101)