Merge remote-tracking branch 'goog/androidx-platform-dev'
* goog/androidx-platform-dev:
Update Icing from upstream.
Add Sync CL number to AOSP external icing.
Update Icing from upstream.
Update Icing from upstream.
Migrate existing jarjar plugin users to shadow plugin
Test: Presubmit
Change-Id: I00137d99897df8567d31a50dcb3c58bf9a5662e6
diff --git a/CMakeLists.txt b/CMakeLists.txt
index e520663..a740924 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -47,27 +47,6 @@
set(ICU_TARGET_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/icu-target")
add_subdirectory(${ICU_SOURCE_DIR} ${ICU_TARGET_BINARY_DIR})
-# Creates a file deps_name.cmake to save dependencies in it. This file should be
-# treated as part of CMakeLists.txt. This file should stay in .gitignore
-# TODO: When supporting cmake v3.12 or higher, use CONFIGURE_DEPENDS in the glob
-# and remove this section.
-function(update_deps_file deps_name deps)
- set(DEPS_FILE ${deps_name}.cmake.gen)
- set(CONTENT "# generated by make process.\nset(Tmp_${deps_name} ${deps})\n")
- set(EXISTING_CONTENT "")
- if(EXISTS ${DEPS_FILE})
- file(READ ${DEPS_FILE} EXISTING_CONTENT)
- endif()
- # Compare the new contents with the existing file, if it exists and is the same
- # we don't want to trigger a make by changing its timestamp.
- if(NOT EXISTING_CONTENT STREQUAL CONTENT)
- file(WRITE ${DEPS_FILE} ${CONTENT})
- endif()
- # Include the file so it's tracked as a generation dependency we don't
- # need the content.
- include(${DEPS_FILE})
-endfunction(update_deps_file)
-
# Glob Icing proto sources. Results look like this: icing/proto/document.proto
file(
GLOB_RECURSE
@@ -76,7 +55,6 @@
"*.proto")
message(STATUS "Icing_PROTO_FILES=${Icing_PROTO_FILES}")
-update_deps_file("IcingProtoFiles" "${Icing_PROTO_FILES}")
# Run protoc on Icing_PROTO_FILES to generate pb.cc and pb.h files
# The DEPENDS section of add_custom_command could trigger a remake if any proto
@@ -115,7 +93,9 @@
icing/*.cc icing/*.h
)
-update_deps_file("IcingCCSources" "${Icing_CC_SOURCES}")
+# TODO(b/170611579): When supporting cmake v3.12 or higher, use CONFIGURE_DEPENDS
+# in the glob and remove this section.
+include(synced_AOSP_CL_number.txt)
# Exclude the same types of files as Android.bp. See the comments there.
list(FILTER Icing_CC_SOURCES EXCLUDE REGEX "^icing/.*[^a-zA-Z0-9]test[^a-zA-Z0-9].*$")
diff --git a/build.gradle b/build.gradle
index 6d13dc2..437f57f 100644
--- a/build.gradle
+++ b/build.gradle
@@ -14,22 +14,18 @@
* limitations under the License.
*/
-import org.anarres.gradle.plugin.jarjar.JarjarTask
-
import static androidx.build.SupportConfig.*
import static androidx.build.dependencies.DependenciesKt.*
buildscript {
dependencies {
classpath('gradle.plugin.com.google.protobuf:protobuf-gradle-plugin:0.8.13')
- classpath('org.anarres.jarjar:jarjar-gradle:1.0.1')
}
}
plugins {
id('com.android.library')
id('com.google.protobuf')
- id('org.anarres.jarjar')
}
android {
@@ -82,43 +78,26 @@
}
}
-// Create jarjar artifact for all variants (debug/release)
+// Create export artifact for all variants (debug/release) for JarJaring
android.libraryVariants.all { variant ->
def variantName = variant.name
def suffix = variantName.capitalize()
- def jarjarTask = tasks.create("jarjar${suffix}", JarjarTask) {
- destinationName "icing-java-${variantName}-jarjar.jar"
+ def exportJarTask = tasks.register("exportJar${suffix}", Jar) {
+ archiveBaseName.set("icing-${variantName}")
-
- dependsOn protoLiteJarWithoutProtoFiles
- from files(protoLiteJarWithoutProtoFiles.archiveFile.get().getAsFile())
+ // The proto-lite dependency includes .proto files, which are not used by icing. When apps
+ // depend on appsearch as well as proto-lite directly, these files conflict since jarjar
+ // only renames the java classes. Remove them here since they are unused.
+ // Expand the jar and remove any .proto files.
+ from(zipTree(configurations.detachedConfiguration(
+ dependencies.create(PROTOBUF_LITE)).getSingleFile())) {
+ exclude("**/*.proto")
+ }
from files(variant.javaCompileProvider.get().destinationDir)
dependsOn variant.javaCompileProvider.get()
- classRename 'com.google.protobuf.**', 'com.google.android.icing.protobuf.@1'
}
- def jarjarConf = configurations.register("jarjar${suffix}")
- artifacts.add("${jarjarConf.name}", jarjarTask.destinationPath) {
- name "icing-java-${variantName}-jarjar"
- type 'jar'
- builtBy jarjarTask
- }
+ def exportConfiguration = configurations.register("export${suffix}")
+ artifacts.add(exportConfiguration.name, exportJarTask.flatMap { it.archiveFile })
}
-
-// The proto-lite dependency includes .proto files, which are not used by icing. When apps depend on
-// appsearch as well as proto-lite directly, these files conflict since jarjar only renames the java
-// classes. Remove them here since they are unused.
-tasks.register("protoLiteJarWithoutProtoFiles", Jar){
- // Get proto lite dependency as a jar file:
- def jarFile = configurations.detachedConfiguration(
- dependencies.create('com.google.protobuf:protobuf-javalite:3.10.0')).getSingleFile()
-
- // Expand the jar and remove any .proto files.
- from(zipTree(jarFile)) {
- exclude("**/*.proto")
- }
-
- into 'icing-proto-lite-dep-stripped'
-}
-
diff --git a/icing/document-builder.h b/icing/document-builder.h
index 4c95b89..ba68ec5 100644
--- a/icing/document-builder.h
+++ b/icing/document-builder.h
@@ -71,11 +71,6 @@
return *this;
}
- DocumentBuilder& ClearCustomProperties() {
- document_.clear_custom_properties();
- return *this;
- }
-
// Takes a property name and any number of string values.
template <typename... V>
DocumentBuilder& AddStringProperty(std::string property_name,
@@ -83,14 +78,6 @@
return AddStringProperty(std::move(property_name), {string_values...});
}
- // Takes a custom property name and any number of string values.
- template <typename... V>
- DocumentBuilder& AddCustomStringProperty(std::string property_name,
- V... string_values) {
- return AddCustomStringProperty(std::move(property_name),
- {string_values...});
- }
-
// Takes a property name and any number of int64_t values.
template <typename... V>
DocumentBuilder& AddInt64Property(std::string property_name,
@@ -98,13 +85,6 @@
return AddInt64Property(std::move(property_name), {int64_values...});
}
- // Takes a custom property name and any number of int64_t values.
- template <typename... V>
- DocumentBuilder& AddCustomInt64Property(std::string property_name,
- V... int64_values) {
- return AddCustomInt64Property(std::move(property_name), {int64_values...});
- }
-
// Takes a property name and any number of double values.
template <typename... V>
DocumentBuilder& AddDoubleProperty(std::string property_name,
@@ -112,14 +92,6 @@
return AddDoubleProperty(std::move(property_name), {double_values...});
}
- // Takes a custom property name and any number of double values.
- template <typename... V>
- DocumentBuilder& AddCustomDoubleProperty(std::string property_name,
- V... double_values) {
- return AddCustomDoubleProperty(std::move(property_name),
- {double_values...});
- }
-
// Takes a property name and any number of boolean values.
template <typename... V>
DocumentBuilder& AddBooleanProperty(std::string property_name,
@@ -127,28 +99,12 @@
return AddBooleanProperty(std::move(property_name), {boolean_values...});
}
- // Takes a custom property name and any number of boolean values.
- template <typename... V>
- DocumentBuilder& AddCustomBooleanProperty(std::string property_name,
- V... boolean_values) {
- return AddCustomBooleanProperty(std::move(property_name),
- {boolean_values...});
- }
-
// Takes a property name and any number of bytes values.
template <typename... V>
DocumentBuilder& AddBytesProperty(std::string property_name,
V... bytes_values) {
return AddBytesProperty(std::move(property_name), {bytes_values...});
}
-
- // Takes a custom property name and any number of bytes values.
- template <typename... V>
- DocumentBuilder& AddCustomBytesProperty(std::string property_name,
- V... bytes_values) {
- return AddCustomBytesProperty(std::move(property_name), {bytes_values...});
- }
-
// Takes a property name and any number of document values.
template <typename... V>
DocumentBuilder& AddDocumentProperty(std::string property_name,
@@ -156,14 +112,6 @@
return AddDocumentProperty(std::move(property_name), {document_values...});
}
- // Takes a custom property name and any number of document values.
- template <typename... V>
- DocumentBuilder& AddCustomDocumentProperty(std::string property_name,
- V&&... document_values) {
- return AddCustomDocumentProperty(std::move(property_name),
- {document_values...});
- }
-
DocumentProto Build() const { return document_; }
private:
@@ -180,17 +128,6 @@
return *this;
}
- DocumentBuilder& AddCustomStringProperty(
- std::string property_name,
- std::initializer_list<std::string_view> string_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (std::string_view string_value : string_values) {
- custom_property->mutable_string_values()->Add(std::string(string_value));
- }
- return *this;
- }
-
DocumentBuilder& AddInt64Property(
std::string property_name, std::initializer_list<int64_t> int64_values) {
auto property = document_.add_properties();
@@ -201,16 +138,6 @@
return *this;
}
- DocumentBuilder& AddCustomInt64Property(
- std::string property_name, std::initializer_list<int64_t> int64_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (int64_t int64_value : int64_values) {
- custom_property->mutable_int64_values()->Add(int64_value);
- }
- return *this;
- }
-
DocumentBuilder& AddDoubleProperty(
std::string property_name, std::initializer_list<double> double_values) {
auto property = document_.add_properties();
@@ -221,16 +148,6 @@
return *this;
}
- DocumentBuilder& AddCustomDoubleProperty(
- std::string property_name, std::initializer_list<double> double_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (double double_value : double_values) {
- custom_property->mutable_double_values()->Add(double_value);
- }
- return *this;
- }
-
DocumentBuilder& AddBooleanProperty(
std::string property_name, std::initializer_list<bool> boolean_values) {
auto property = document_.add_properties();
@@ -241,16 +158,6 @@
return *this;
}
- DocumentBuilder& AddCustomBooleanProperty(
- std::string property_name, std::initializer_list<bool> boolean_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (bool boolean_value : boolean_values) {
- custom_property->mutable_boolean_values()->Add(boolean_value);
- }
- return *this;
- }
-
DocumentBuilder& AddBytesProperty(
std::string property_name,
std::initializer_list<std::string> bytes_values) {
@@ -262,17 +169,6 @@
return *this;
}
- DocumentBuilder& AddCustomBytesProperty(
- std::string property_name,
- std::initializer_list<std::string> bytes_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (const std::string& bytes_value : bytes_values) {
- custom_property->mutable_bytes_values()->Add(std::string(bytes_value));
- }
- return *this;
- }
-
DocumentBuilder& AddDocumentProperty(
std::string property_name,
std::initializer_list<DocumentProto> document_values) {
@@ -283,18 +179,6 @@
}
return *this;
}
-
- DocumentBuilder& AddCustomDocumentProperty(
- std::string property_name,
- std::initializer_list<DocumentProto> document_values) {
- auto custom_property = document_.add_custom_properties();
- custom_property->set_name(std::move(property_name));
- for (DocumentProto document_value : document_values) {
- custom_property->mutable_document_values()->Add(
- std::move(document_value));
- }
- return *this;
- }
};
} // namespace lib
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index aa5a031..763c93b 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -72,6 +72,7 @@
#include "icing/legacy/core/icing-string-util.h"
#include "icing/portable/zlib.h"
#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
@@ -172,10 +173,10 @@
// happen if the file is corrupted or some previously added data was
// unpersisted. This may be used to signal that any derived data off of the
// proto log may need to be regenerated.
- enum DataStatus { NO_DATA_LOSS, PARTIAL_LOSS, COMPLETE_LOSS } data_status;
+ DataLoss data_loss;
bool has_data_loss() {
- return data_status == PARTIAL_LOSS || data_status == COMPLETE_LOSS;
+ return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
}
};
@@ -186,11 +187,11 @@
// added data was unpersisted, the log will rewind to the last-good state. The
// log saves these checkpointed "good" states when PersistToDisk() is called
// or the log is safely destructed. If the log rewinds successfully to the
- // last-good state, then the returned CreateResult.data_status indicates
+ // last-good state, then the returned CreateResult.data_loss indicates
// whether it has a data loss and what kind of data loss it is (partial or
// complete) so that any derived data may know that it needs to be updated. If
// the log re-initializes successfully without any data loss,
- // CreateResult.data_status will be NO_DATA_LOSS.
+ // CreateResult.data_loss will be NONE.
//
// Params:
// filesystem: Handles system level calls
@@ -370,7 +371,7 @@
// Initializes a new proto log.
//
// Returns:
- // std::unique_ptr<FileBackedProtoLog> that can be used immediately
+ // std::unique_ptr<CreateResult> on success
// INTERNAL_ERROR on IO error
static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
const Filesystem* filesystem, const std::string& file_path,
@@ -381,7 +382,7 @@
// content will be lost.
//
// Returns:
- // std::unique_ptr<FileBackedProtoLog> that can be used immediately
+ // std::unique_ptr<CreateResult> on success
// INTERNAL_ERROR on IO error or internal inconsistencies in the file
// INVALID_ARGUMENT_ERROR if options aren't consistent with previous
// instances
@@ -516,7 +517,7 @@
std::unique_ptr<FileBackedProtoLog<ProtoT>>(
new FileBackedProtoLog<ProtoT>(filesystem, file_path,
std::move(header))),
- /*data_status=*/CreateResult::NO_DATA_LOSS};
+ /*data_loss=*/DataLoss::NONE};
return create_result;
}
@@ -566,7 +567,7 @@
}
header->max_proto_size = options.max_proto_size;
- typename CreateResult::DataStatus data_status = CreateResult::NO_DATA_LOSS;
+ DataLoss data_loss = DataLoss::NONE;
ICING_ASSIGN_OR_RETURN(Crc32 calculated_log_checksum,
ComputeChecksum(filesystem, file_path, Crc32(),
sizeof(Header), file_size));
@@ -589,12 +590,12 @@
// Check if it matches our last rewind state. If so, this becomes our last
// good state and we can safely truncate and recover from here.
last_known_good = header->rewind_offset;
- data_status = CreateResult::PARTIAL_LOSS;
+ data_loss = DataLoss::PARTIAL;
} else {
// Otherwise, we're going to truncate the entire log and this resets the
// checksum to an empty log state.
header->log_checksum = 0;
- data_status = CreateResult::COMPLETE_LOSS;
+ data_loss = DataLoss::COMPLETE;
}
if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
@@ -610,7 +611,7 @@
std::unique_ptr<FileBackedProtoLog<ProtoT>>(
new FileBackedProtoLog<ProtoT>(filesystem, file_path,
std::move(header))),
- data_status};
+ data_loss};
return create_result;
}
diff --git a/icing/file/file-backed-proto-log_test.cc b/icing/file/file-backed-proto-log_test.cc
index 7410d2b..d429277 100644
--- a/icing/file/file-backed-proto-log_test.cc
+++ b/icing/file/file-backed-proto-log_test.cc
@@ -339,6 +339,7 @@
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_TRUE(create_result.has_data_loss());
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
// Lost everything in the log since the rewind position doesn't help if
// there's been data corruption within the persisted region
@@ -408,6 +409,7 @@
max_proto_size_)));
auto proto_log = std::move(create_result.proto_log);
ASSERT_TRUE(create_result.has_data_loss());
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
// Check that everything was persisted across instances
ASSERT_THAT(proto_log->ReadProto(document1_offset),
diff --git a/icing/file/file-backed-proto.h b/icing/file/file-backed-proto.h
index aede8de..15a1953 100644
--- a/icing/file/file-backed-proto.h
+++ b/icing/file/file-backed-proto.h
@@ -83,7 +83,7 @@
//
// TODO(cassiewang) The implementation today loses old data if Write() fails.
// We should write to a tmp file first and rename the file to fix this.
- // TODO(samzheng) Change to Write(ProtoT&& proto)
+ // TODO(cassiewang) Change to Write(ProtoT&& proto)
libtextclassifier3::Status Write(std::unique_ptr<ProtoT> proto)
ICING_LOCKS_EXCLUDED(mutex_);
diff --git a/icing/file/filesystem.cc b/icing/file/filesystem.cc
index 4a76c01..6a596f5 100644
--- a/icing/file/filesystem.cc
+++ b/icing/file/filesystem.cc
@@ -464,6 +464,20 @@
return success;
}
+bool Filesystem::CopyFile(const char* src, const char* dst) const {
+ ScopedFd src_fd(OpenForRead(src));
+ ScopedFd dst_fd(OpenForWrite(dst));
+ if (!src_fd.is_valid() || !dst_fd.is_valid()) {
+ return false;
+ }
+ uint64_t size = GetFileSize(*src_fd);
+ std::unique_ptr<uint8_t[]> buf = std::make_unique<uint8_t[]>(size);
+ if (!Read(*src_fd, buf.get(), size)) {
+ return false;
+ }
+ return Write(*dst_fd, buf.get(), size);
+}
+
bool Filesystem::PWrite(int fd, off_t offset, const void* data,
size_t data_size) const {
size_t write_len = data_size;
diff --git a/icing/file/filesystem.h b/icing/file/filesystem.h
index b85f3a0..d3c7787 100644
--- a/icing/file/filesystem.h
+++ b/icing/file/filesystem.h
@@ -83,6 +83,9 @@
// success or if the directory did not yet exist.
virtual bool DeleteDirectoryRecursively(const char* dir_name) const;
+ // Copies the src file to the dst file.
+ virtual bool CopyFile(const char* src, const char* dst) const;
+
// Returns true if a file exists. False if the file doesn't exist.
// If there is an error getting stat on the file, it logs the error and //
// asserts.
diff --git a/icing/file/mock-filesystem.h b/icing/file/mock-filesystem.h
index b89295e..88475cd 100644
--- a/icing/file/mock-filesystem.h
+++ b/icing/file/mock-filesystem.h
@@ -225,6 +225,8 @@
MOCK_METHOD(bool, DeleteDirectoryRecursively, (const char* dir_name),
(const));
+ MOCK_METHOD(bool, CopyFile, (const char* src, const char* dst), (const));
+
MOCK_METHOD(bool, FileExists, (const char* file_name), (const));
MOCK_METHOD(bool, DirectoryExists, (const char* dir_name), (const));
diff --git a/icing/helpers/icu/icu-data-file-helper.cc b/icing/helpers/icu/icu-data-file-helper.cc
index 5cf6a1d..6607c40 100644
--- a/icing/helpers/icu/icu-data-file-helper.cc
+++ b/icing/helpers/icu/icu-data-file-helper.cc
@@ -49,8 +49,6 @@
return absl_ports::InternalError("Unable to open file at provided path");
}
- // TODO(samzheng): figure out why icing::MemoryMappedFile causes
- // segmentation fault here.
const void* data =
mmap(nullptr, file_size, PROT_READ, MAP_PRIVATE, fd.get(), 0);
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 1cb8620..5a9327e 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
@@ -114,7 +115,8 @@
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
} // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 08ceafd..d915d65 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -60,7 +60,6 @@
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
-#include "icing/util/timer.h"
#include "unicode/uloc.h"
namespace icing {
@@ -264,7 +263,7 @@
<< options_.base_dir();
// Measure the latency of the initialization process.
- Timer initialize_timer;
+ std::unique_ptr<Timer> initialize_timer = clock_->GetNewTimer();
InitializeResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
@@ -273,7 +272,8 @@
if (initialized_) {
// Already initialized.
result_status->set_code(StatusProto::OK);
- initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
+ initialize_stats->set_latency_ms(
+ initialize_timer->GetElapsedMilliseconds());
initialize_stats->set_num_documents(document_store_->num_documents());
return result_proto;
}
@@ -284,7 +284,8 @@
libtextclassifier3::Status status = InitializeMembers(initialize_stats);
if (!status.ok()) {
TransformStatus(status, result_status);
- initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
+ initialize_stats->set_latency_ms(
+ initialize_timer->GetElapsedMilliseconds());
return result_proto;
}
@@ -336,10 +337,10 @@
// index.
initialize_stats->set_index_restoration_cause(
NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH);
- Timer index_restore_timer;
+ std::unique_ptr<Timer> index_restore_timer = clock_->GetNewTimer();
status = RestoreIndexIfNeeded();
initialize_stats->set_index_restoration_latency_ms(
- index_restore_timer.GetElapsedMilliseconds());
+ index_restore_timer->GetElapsedMilliseconds());
}
}
}
@@ -348,7 +349,7 @@
initialized_ = true;
}
TransformStatus(status, result_status);
- initialize_stats->set_latency_ms(initialize_timer.GetElapsedMilliseconds());
+ initialize_stats->set_latency_ms(initialize_timer->GetElapsedMilliseconds());
return result_proto;
}
@@ -398,7 +399,7 @@
}
ICING_ASSIGN_OR_RETURN(
schema_store_, SchemaStore::Create(filesystem_.get(), schema_store_dir,
- initialize_stats));
+ clock_.get(), initialize_stats));
return libtextclassifier3::Status::OK;
}
@@ -415,9 +416,10 @@
absl_ports::StrCat("Could not create directory: ", document_dir));
}
ICING_ASSIGN_OR_RETURN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
schema_store_.get(), initialize_stats));
+ document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
}
@@ -451,10 +453,10 @@
Index::Create(index_options, filesystem_.get(),
icing_filesystem_.get()));
- Timer restore_timer;
+ std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
initialize_stats->set_index_restoration_latency_ms(
- restore_timer.GetElapsedMilliseconds());
+ restore_timer->GetElapsedMilliseconds());
} else {
// Index was created fine.
index_ = std::move(index_or).ValueOrDie();
@@ -497,20 +499,20 @@
NativeInitializeStats* initialize_stats, bool log_document_store_stats) {
// Measure the latency of the data recovery. The cause of the recovery should
// be logged by the caller.
- Timer timer;
+ std::unique_ptr<Timer> timer = clock_->GetNewTimer();
ICING_RETURN_IF_ERROR(
document_store_->UpdateSchemaStore(schema_store_.get()));
if (initialize_stats != nullptr && log_document_store_stats) {
initialize_stats->set_document_store_recovery_latency_ms(
- timer.GetElapsedMilliseconds());
+ timer->GetElapsedMilliseconds());
}
// Restart timer.
- timer = Timer();
+ timer = clock_->GetNewTimer();
ICING_RETURN_IF_ERROR(index_->Reset());
ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
if (initialize_stats != nullptr) {
initialize_stats->set_index_restoration_latency_ms(
- timer.GetElapsedMilliseconds());
+ timer->GetElapsedMilliseconds());
}
const std::string header_file =
@@ -673,7 +675,7 @@
PutResultProto IcingSearchEngine::Put(DocumentProto&& document) {
ICING_VLOG(1) << "Writing document to document store";
- Timer put_timer;
+ std::unique_ptr<Timer> put_timer = clock_->GetNewTimer();
PutResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
@@ -687,24 +689,24 @@
if (!initialized_) {
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("IcingSearchEngine has not been initialized!");
- put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
+ put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
return result_proto;
}
auto document_id_or = document_store_->Put(document, put_document_stats);
if (!document_id_or.ok()) {
TransformStatus(document_id_or.status(), result_status);
- put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
+ put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
return result_proto;
}
DocumentId document_id = document_id_or.ValueOrDie();
auto index_processor_or = IndexProcessor::Create(
schema_store_.get(), language_segmenter_.get(), normalizer_.get(),
- index_.get(), CreateIndexProcessorOptions(options_));
+ index_.get(), CreateIndexProcessorOptions(options_), clock_.get());
if (!index_processor_or.ok()) {
TransformStatus(index_processor_or.status(), result_status);
- put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
+ put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
return result_proto;
}
std::unique_ptr<IndexProcessor> index_processor =
@@ -714,7 +716,7 @@
index_processor->IndexDocument(document, document_id, put_document_stats);
TransformStatus(status, result_status);
- put_document_stats->set_latency_ms(put_timer.GetElapsedMilliseconds());
+ put_document_stats->set_latency_ms(put_timer->GetElapsedMilliseconds());
return result_proto;
}
@@ -747,6 +749,11 @@
StatusProto* result_status = result_proto.mutable_status();
absl_ports::unique_lock l(&mutex_);
+ if (!initialized_) {
+ result_status->set_code(StatusProto::FAILED_PRECONDITION);
+ result_status->set_message("IcingSearchEngine has not been initialized!");
+ return result_proto;
+ }
libtextclassifier3::Status status =
document_store_->ReportUsage(usage_report);
@@ -759,6 +766,11 @@
StatusProto* result_status = result_proto.mutable_status();
absl_ports::shared_lock l(&mutex_);
+ if (!initialized_) {
+ result_status->set_code(StatusProto::FAILED_PRECONDITION);
+ result_status->set_message("IcingSearchEngine has not been initialized!");
+ return result_proto;
+ }
std::vector<std::string> namespaces = document_store_->GetAllNamespaces();
@@ -784,6 +796,10 @@
return result_proto;
}
+ NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
libtextclassifier3::Status status = document_store_->Delete(name_space, uri);
@@ -796,6 +812,8 @@
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(1);
return result_proto;
}
@@ -812,18 +830,24 @@
return delete_result;
}
+ NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status =
+ DocumentStore::DeleteByGroupResult doc_store_result =
document_store_->DeleteByNamespace(name_space);
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ if (!doc_store_result.status.ok()) {
+ ICING_LOG(ERROR) << doc_store_result.status.error_message()
<< "Failed to delete Namespace: " << name_space;
- TransformStatus(status, result_status);
+ TransformStatus(doc_store_result.status, result_status);
return delete_result;
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
return delete_result;
}
@@ -840,27 +864,33 @@
return delete_result;
}
+ NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
// that can support error logging.
- libtextclassifier3::Status status =
+ DocumentStore::DeleteByGroupResult doc_store_result =
document_store_->DeleteBySchemaType(schema_type);
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
+ if (!doc_store_result.status.ok()) {
+ ICING_LOG(ERROR) << doc_store_result.status.error_message()
<< "Failed to delete SchemaType: " << schema_type;
- TransformStatus(status, result_status);
+ TransformStatus(doc_store_result.status, result_status);
return delete_result;
}
result_status->set_code(StatusProto::OK);
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(doc_store_result.num_docs_deleted);
return delete_result;
}
-DeleteResultProto IcingSearchEngine::DeleteByQuery(
+DeleteByQueryResultProto IcingSearchEngine::DeleteByQuery(
const SearchSpecProto& search_spec) {
ICING_VLOG(1) << "Deleting documents for query " << search_spec.query()
<< " from doc store";
- DeleteResultProto result_proto;
+ DeleteByQueryResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
absl_ports::unique_lock l(&mutex_);
@@ -870,6 +900,10 @@
return result_proto;
}
+ NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+
+ std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
libtextclassifier3::Status status =
ValidateSearchSpec(search_spec, performance_configuration_);
if (!status.ok()) {
@@ -896,13 +930,12 @@
QueryProcessor::QueryResults query_results =
std::move(query_results_or).ValueOrDie();
- ICING_LOG(ERROR) << "Deleting the docs that matched the query.";
- bool found_results = false;
+ ICING_VLOG(2) << "Deleting the docs that matched the query.";
+ int num_deleted = 0;
while (query_results.root_iterator->Advance().ok()) {
- ICING_LOG(ERROR)
- << "Deleting doc "
- << query_results.root_iterator->doc_hit_info().document_id();
- found_results = true;
+ ICING_VLOG(3) << "Deleting doc "
+ << query_results.root_iterator->doc_hit_info().document_id();
+ ++num_deleted;
status = document_store_->Delete(
query_results.root_iterator->doc_hit_info().document_id());
if (!status.ok()) {
@@ -910,13 +943,15 @@
return result_proto;
}
}
- if (found_results) {
+ if (num_deleted > 0) {
result_proto.mutable_status()->set_code(StatusProto::OK);
} else {
result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
result_proto.mutable_status()->set_message(
"No documents matched the query to delete by!");
}
+ delete_stats->set_latency_ms(delete_timer->GetElapsedMilliseconds());
+ delete_stats->set_num_documents_deleted(num_deleted);
return result_proto;
}
@@ -1139,6 +1174,9 @@
return result_proto;
}
+ NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
+
libtextclassifier3::Status status = ValidateResultSpec(result_spec);
if (!status.ok()) {
TransformStatus(status, result_status);
@@ -1150,6 +1188,15 @@
return result_proto;
}
+ query_stats->set_num_namespaces_filtered(
+ search_spec.namespace_filters_size());
+ query_stats->set_num_schema_types_filtered(
+ search_spec.schema_type_filters_size());
+ query_stats->set_ranking_strategy(scoring_spec.rank_by());
+ query_stats->set_is_first_page(true);
+ query_stats->set_requested_page_size(result_spec.num_per_page());
+
+ std::unique_ptr<Timer> component_timer = clock_->GetNewTimer();
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), language_segmenter_.get(), normalizer_.get(),
@@ -1168,7 +1215,16 @@
}
QueryProcessor::QueryResults query_results =
std::move(query_results_or).ValueOrDie();
+ query_stats->set_parse_query_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ int term_count = 0;
+ for (const auto& section_and_terms : query_results.query_terms) {
+ term_count += section_and_terms.second.size();
+ }
+ query_stats->set_num_terms(term_count);
+
+ component_timer = clock_->GetNewTimer();
// Scores but does not rank the results.
libtextclassifier3::StatusOr<std::unique_ptr<ScoringProcessor>>
scoring_processor_or =
@@ -1182,6 +1238,9 @@
std::vector<ScoredDocumentHit> result_document_hits =
scoring_processor->Score(std::move(query_results.root_iterator),
performance_configuration_.num_to_score);
+ query_stats->set_scoring_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ query_stats->set_num_documents_scored(result_document_hits.size());
// Returns early for empty result
if (result_document_hits.empty()) {
@@ -1189,6 +1248,7 @@
return result_proto;
}
+ component_timer = clock_->GetNewTimer();
// Ranks and paginates results
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
result_state_manager_.RankAndPaginate(ResultState(
@@ -1200,7 +1260,10 @@
}
PageResultState page_result_state =
std::move(page_result_state_or).ValueOrDie();
+ query_stats->set_ranking_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ component_timer = clock_->GetNewTimer();
// Retrieves the document protos and snippets if requested
auto result_retriever_or =
ResultRetriever::Create(document_store_.get(), schema_store_.get(),
@@ -1234,6 +1297,14 @@
if (page_result_state.next_page_token != kInvalidNextPageToken) {
result_proto.set_next_page_token(page_result_state.next_page_token);
}
+ query_stats->set_document_retrieval_latency_ms(
+ component_timer->GetElapsedMilliseconds());
+ query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
+ query_stats->set_num_results_returned_current_page(
+ result_proto.results_size());
+ query_stats->set_num_results_snippeted(
+ std::min(result_proto.results_size(),
+ result_spec.snippet_spec().num_to_snippet()));
return result_proto;
}
@@ -1250,6 +1321,10 @@
return result_proto;
}
+ NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_is_first_page(false);
+
+ std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
result_state_manager_.GetNextPage(next_page_token);
@@ -1266,6 +1341,7 @@
PageResultState page_result_state =
std::move(page_result_state_or).ValueOrDie();
+ query_stats->set_requested_page_size(page_result_state.requested_page_size);
// Retrieves the document protos.
auto result_retriever_or =
@@ -1297,6 +1373,21 @@
if (page_result_state.next_page_token != kInvalidNextPageToken) {
result_proto.set_next_page_token(page_result_state.next_page_token);
}
+
+ // The only thing that we're doing is document retrieval. So document
+ // retrieval latency and overall latency are the same and can use the same
+ // timer.
+ query_stats->set_document_retrieval_latency_ms(
+ overall_timer->GetElapsedMilliseconds());
+ query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
+ query_stats->set_num_results_returned_current_page(
+ result_proto.results_size());
+ int num_left_to_snippet =
+ std::max(page_result_state.snippet_context.snippet_spec.num_to_snippet() -
+ page_result_state.num_previously_returned,
+ 0);
+ query_stats->set_num_results_snippeted(
+ std::min(result_proto.results_size(), num_left_to_snippet));
return result_proto;
}
@@ -1357,21 +1448,21 @@
// Tries to rebuild document store if swapping fails, to avoid leaving the
// system in the broken state for future operations.
- auto document_store_or =
+ auto create_result_or =
DocumentStore::Create(filesystem_.get(), current_document_dir,
clock_.get(), schema_store_.get());
// TODO(b/144458732): Implement a more robust version of
// TC_ASSIGN_OR_RETURN that can support error logging.
- if (!document_store_or.ok()) {
+ if (!create_result_or.ok()) {
// Unable to create DocumentStore from the old file. Mark as uninitialized
// and return INTERNAL.
initialized_ = false;
ICING_LOG(ERROR) << "Failed to create document store instance";
return absl_ports::Annotate(
absl_ports::InternalError("Failed to create document store instance"),
- document_store_or.status().error_message());
+ create_result_or.status().error_message());
}
- document_store_ = std::move(document_store_or).ValueOrDie();
+ document_store_ = std::move(create_result_or.ValueOrDie().document_store);
// Potential data loss
// TODO(b/147373249): Find a way to detect true data loss error
@@ -1380,10 +1471,10 @@
}
// Recreates the doc store instance
- auto document_store_or =
+ auto create_result_or =
DocumentStore::Create(filesystem_.get(), current_document_dir,
clock_.get(), schema_store_.get());
- if (!document_store_or.ok()) {
+ if (!create_result_or.ok()) {
// Unable to create DocumentStore from the new file. Mark as uninitialized
// and return INTERNAL.
initialized_ = false;
@@ -1391,7 +1482,7 @@
"Document store has been optimized, but a valid document store "
"instance can't be created");
}
- document_store_ = std::move(document_store_or).ValueOrDie();
+ document_store_ = std::move(create_result_or.ValueOrDie().document_store);
// Deletes tmp directory
if (!filesystem_->DeleteDirectoryRecursively(
@@ -1432,9 +1523,9 @@
ICING_ASSIGN_OR_RETURN(
std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(schema_store_.get(), language_segmenter_.get(),
- normalizer_.get(), index_.get(),
- CreateIndexProcessorOptions(options_)));
+ IndexProcessor::Create(
+ schema_store_.get(), language_segmenter_.get(), normalizer_.get(),
+ index_.get(), CreateIndexProcessorOptions(options_), clock_.get()));
ICING_VLOG(1) << "Restoring index by replaying documents from document id "
<< first_document_to_reindex << " to document id "
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index 70a9c07..b2bb4f1 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -287,7 +287,7 @@
// NOT_FOUND if the query doesn't match any documents
// FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
// INTERNAL_ERROR on IO error
- DeleteResultProto DeleteByQuery(const SearchSpecProto& search_spec)
+ DeleteByQueryResultProto DeleteByQuery(const SearchSpecProto& search_spec)
ICING_LOCKS_EXCLUDED(mutex_);
// Retrieves, scores, ranks, and returns the results according to the specs.
@@ -404,15 +404,18 @@
bool initialized_ ICING_GUARDED_BY(mutex_) = false;
// Abstraction for accessing time values.
- std::unique_ptr<Clock> clock_;
+ const std::unique_ptr<const Clock> clock_;
// Provides key thresholds that affects the running time and memory of major
// components in Icing search engine.
- PerformanceConfiguration performance_configuration_;
+ const PerformanceConfiguration performance_configuration_;
- // Used to manage pagination state of query results. A lock is not needed here
- // because ResultStateManager has its own reader-writer lock.
- ResultStateManager result_state_manager_;
+ // Used to manage pagination state of query results. Even though
+ // ResultStateManager has its own reader-writer lock, mutex_ must still be
+ // acquired first in order to adhere to the global lock ordering:
+ // 1. mutex_
+ // 2. result_state_manager_.lock_
+ ResultStateManager result_state_manager_ ICING_GUARDED_BY(mutex_);
// Used to provide reader and writer locks
absl_ports::shared_mutex mutex_;
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index a6d96e0..9d33a82 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -117,7 +117,7 @@
// Creates a vector containing num_words randomly-generated words for use by
// documents.
template <typename Rand>
-std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
+std::vector<std::string> CreateLanguages(int num_words, Rand* r) {
std::vector<std::string> language;
std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
while (--num_words >= 0) {
@@ -175,6 +175,165 @@
std::string dir_;
};
+std::vector<DocumentProto> GenerateRandomDocuments(
+ EvenDistributionTypeSelector* type_selector, int num_docs) {
+ std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+ EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+ std::default_random_engine random;
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+ token_generator(language, &random);
+
+ DocumentGenerator<
+ EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+ generator(&namespace_selector, type_selector, &token_generator,
+ kAvgDocumentSize * kContentSizePct);
+
+ std::vector<DocumentProto> random_docs;
+ random_docs.reserve(num_docs);
+ for (int i = 0; i < num_docs; i++) {
+ random_docs.push_back(generator.generateDoc());
+ }
+ return random_docs;
+}
+
+void BM_IndexLatency(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ int num_docs = state.range(0);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs);
+ Timer timer;
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+ int64_t time_taken_ns = timer.GetElapsedNanoseconds();
+ int64_t time_per_doc_ns = time_taken_ns / num_docs;
+ std::cout << "Number of indexed documents:\t" << num_docs
+ << "\t\tNumber of indexed sections:\t" << state.range(1)
+ << "\t\tTime taken (ms):\t" << time_taken_ns / 1000000
+ << "\t\tTime taken per doc (us):\t" << time_per_doc_ns / 1000
+ << std::endl;
+}
+BENCHMARK(BM_IndexLatency)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1, 1)
+ ->ArgPair(2, 1)
+ ->ArgPair(8, 1)
+ ->ArgPair(32, 1)
+ ->ArgPair(128, 1)
+ ->ArgPair(1 << 10, 1)
+ ->ArgPair(1 << 13, 1)
+ ->ArgPair(1 << 15, 1)
+ ->ArgPair(1 << 17, 1)
+ ->ArgPair(1, 5)
+ ->ArgPair(2, 5)
+ ->ArgPair(8, 5)
+ ->ArgPair(32, 5)
+ ->ArgPair(128, 5)
+ ->ArgPair(1 << 10, 5)
+ ->ArgPair(1 << 13, 5)
+ ->ArgPair(1 << 15, 5)
+ ->ArgPair(1 << 17, 5)
+ ->ArgPair(1, 10)
+ ->ArgPair(2, 10)
+ ->ArgPair(8, 10)
+ ->ArgPair(32, 10)
+ ->ArgPair(128, 10)
+ ->ArgPair(1 << 10, 10)
+ ->ArgPair(1 << 13, 10)
+ ->ArgPair(1 << 15, 10)
+ ->ArgPair(1 << 17, 10);
+
+void BM_IndexThroughput(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ SchemaGenerator<ExactStringPropertyGenerator> schema_generator(
+ /*num_properties=*/state.range(1), &property_generator);
+ SchemaProto schema = schema_generator.GenerateSchema(num_types);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ int num_docs = state.range(0);
+ const std::vector<DocumentProto> random_docs =
+ GenerateRandomDocuments(&type_selector, num_docs);
+ for (auto s : state) {
+ for (const DocumentProto& doc : random_docs) {
+ ASSERT_THAT(icing->Put(doc).status(), ProtoIsOk());
+ }
+ }
+ state.SetItemsProcessed(state.iterations() * num_docs);
+}
+BENCHMARK(BM_IndexThroughput)
+ // Arguments: num_indexed_documents, num_sections
+ ->ArgPair(1, 1)
+ ->ArgPair(2, 1)
+ ->ArgPair(8, 1)
+ ->ArgPair(32, 1)
+ ->ArgPair(128, 1)
+ ->ArgPair(1 << 10, 1)
+ ->ArgPair(1 << 13, 1)
+ ->ArgPair(1 << 15, 1)
+ ->ArgPair(1 << 17, 1)
+ ->ArgPair(1, 5)
+ ->ArgPair(2, 5)
+ ->ArgPair(8, 5)
+ ->ArgPair(32, 5)
+ ->ArgPair(128, 5)
+ ->ArgPair(1 << 10, 5)
+ ->ArgPair(1 << 13, 5)
+ ->ArgPair(1 << 15, 5)
+ ->ArgPair(1 << 17, 5)
+ ->ArgPair(1, 10)
+ ->ArgPair(2, 10)
+ ->ArgPair(8, 10)
+ ->ArgPair(32, 10)
+ ->ArgPair(128, 10)
+ ->ArgPair(1 << 10, 10)
+ ->ArgPair(1 << 13, 10)
+ ->ArgPair(1 << 15, 10)
+ ->ArgPair(1 << 17, 10);
+
void BM_MutlipleIndices(benchmark::State& state) {
// Initialize the filesystem
std::string test_dir = GetTestTempDir() + "/icing/benchmark";
@@ -202,11 +361,8 @@
options.set_index_merge_size(kIcingFullIndexSize / num_indices);
auto icing = std::make_unique<IcingSearchEngine>(options);
- InitializeResultProto init_result = icing->Initialize();
- ASSERT_THAT(init_result.status().code(), Eq(StatusProto::OK));
-
- SetSchemaResultProto schema_result = icing->SetSchema(schema);
- ASSERT_THAT(schema_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
icings.push_back(std::move(icing));
}
@@ -214,7 +370,7 @@
std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
EvenDistributionNamespaceSelector namespace_selector(namespaces);
- std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+ std::vector<std::string> language = CreateLanguages(kLanguageSize, &random);
UniformDistributionLanguageTokenGenerator<std::default_random_engine>
token_generator(language, &random);
@@ -231,8 +387,7 @@
ASSERT_THAT(put_result.status().code(), Eq(StatusProto::UNKNOWN));
continue;
}
- put_result = icings.at(i % icings.size())->Put(doc);
- ASSERT_THAT(put_result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(icings.at(i % icings.size())->Put(doc).status(), ProtoIsOk());
}
// QUERY!
@@ -255,13 +410,13 @@
continue;
}
result = icings.at(0)->Search(search_spec, scoring_spec, result_spec);
- ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(result.status(), ProtoIsOk());
while (!result.results().empty()) {
num_results += result.results_size();
if (!icings.empty()) {
result = icings.at(0)->GetNextPage(result.next_page_token());
}
- ASSERT_THAT(result.status().code(), Eq(StatusProto::OK));
+ ASSERT_THAT(result.status(), ProtoIsOk());
}
}
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index b642a94..f4249f3 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -41,6 +41,7 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/platform.h"
#include "icing/testing/random-string.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
@@ -99,17 +100,19 @@
class IcingSearchEngineTest : public testing::Test {
protected:
void SetUp() override {
-#ifndef ICING_REVERSE_JNI_SEGMENTATION
- // If we've specified using the reverse-JNI method for segmentation (i.e.
- // not ICU), then we won't have the ICU data file included to set up.
- // Technically, we could choose to use reverse-JNI for segmentation AND
- // include an ICU data file, but that seems unlikely and our current BUILD
- // setup doesn't do this.
- // File generated via icu_data_file rule in //icing/BUILD.
- std::string icu_data_file_path =
- GetTestFilePath("icing/icu.dat");
- ICING_ASSERT_OK(icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
-#endif // ICING_REVERSE_JNI_SEGMENTATION
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ // File generated via icu_data_file rule in //icing/BUILD.
+ std::string icu_data_file_path =
+ GetTestFilePath("icing/icu.dat");
+ ICING_ASSERT_OK(
+ icu_data_file_helper::SetUpICUDataFile(icu_data_file_path));
+ }
+
filesystem_.CreateDirectoryRecursively(GetTestBaseDir().c_str());
}
@@ -191,6 +194,56 @@
TermMatchType::PREFIX);
subj->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
+ return schema;
+}
+
+SchemaProto CreatePersonAndEmailSchema() {
+ SchemaProto schema;
+
+ auto* person_type = schema.add_types();
+ person_type->set_schema_type("Person");
+ auto* name = person_type->add_properties();
+ name->set_property_name("name");
+ name->set_data_type(PropertyConfigProto::DataType::STRING);
+ name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ name->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ name->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* address = person_type->add_properties();
+ address->set_property_name("emailAddress");
+ address->set_data_type(PropertyConfigProto::DataType::STRING);
+ address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ address->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ address->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ auto* type = schema.add_types();
+ type->set_schema_type("Email");
+
+ auto* body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* subj = type->add_properties();
+ subj->set_property_name("subject");
+ subj->set_data_type(PropertyConfigProto::DataType::STRING);
+ subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ subj->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ subj->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* sender = type->add_properties();
+ sender->set_property_name("sender");
+ sender->set_schema_type("Person");
+ sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ sender->mutable_document_indexing_config()->set_index_nested_properties(true);
return schema;
}
@@ -344,22 +397,26 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// The query token is also truncated to length of 1, so "me"->"m" matches "m"
search_spec.set_query("me");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// The query token is still truncated to length of 1, so "massage"->"m"
// matches "m"
search_spec.set_query("massage");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -392,9 +449,11 @@
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
@@ -748,9 +807,10 @@
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
SchemaProto schema_with_indexed_property = CreateMessageSchema();
// Index restoration should be triggered here because new schema requires more
@@ -762,9 +822,10 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
@@ -1040,9 +1101,11 @@
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
@@ -1074,7 +1137,8 @@
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
@@ -1090,8 +1154,10 @@
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
@@ -1110,8 +1176,10 @@
StatusProto::INVALID_ARGUMENT);
expected_search_result_proto.mutable_status()->set_message(
"ResultSpecProto.num_per_page cannot be negative.");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(), result_spec),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
@@ -1152,17 +1220,19 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto actual_results =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("foo");
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
}
}
@@ -1183,7 +1253,8 @@
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
@@ -1223,7 +1294,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1231,8 +1303,9 @@
document3;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1241,13 +1314,15 @@
// Because there are no more results, we should not return the next page
// token.
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
@@ -1290,7 +1365,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1298,8 +1374,9 @@
document3;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1308,13 +1385,15 @@
// Because there are no more results, we should not return the next page
// token.
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
@@ -1426,7 +1505,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Now document1 is still to be fetched.
// Invalidates token
@@ -1435,8 +1515,9 @@
// Tries to fetch the second page, no result since it's invalidated
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -1468,7 +1549,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto, EqualsProto(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Now document1 is still to be fetched.
OptimizeResultProto optimize_result_proto;
@@ -1480,8 +1562,9 @@
// invalidated during Optimize()
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
- EXPECT_THAT(icing.GetNextPage(next_page_token),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.GetNextPage(next_page_token);
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
@@ -1802,7 +1885,13 @@
.AddStringProperty("subject", "message body2")
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -1820,7 +1909,14 @@
// Delete the first type. The first doc should be irretrievable. The
// second should still be present.
- EXPECT_THAT(icing.DeleteBySchemaType("message").status(), ProtoIsOk());
+ DeleteBySchemaTypeResultProto result_proto =
+ icing.DeleteBySchemaType("message");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -1843,9 +1939,11 @@
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("message");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
@@ -1912,9 +2010,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
@@ -1940,7 +2040,12 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -1963,7 +2068,14 @@
// Delete namespace1. Document1 and document2 should be irretrievable.
// Document3 should still be present.
- EXPECT_THAT(icing.DeleteByNamespace("namespace1").status(), ProtoIsOk());
+ DeleteByNamespaceResultProto result_proto =
+ icing.DeleteByNamespace("namespace1");
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(2);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -1993,9 +2105,11 @@
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
search_spec.set_query("message");
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
@@ -2057,9 +2171,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQuery) {
@@ -2078,7 +2194,12 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(7);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -2099,7 +2220,13 @@
SearchSpecProto search_spec;
search_spec.set_query("body1");
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- EXPECT_THAT(icing.DeleteByQuery(search_spec).status(), ProtoIsOk());
+ DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
+ EXPECT_THAT(result_proto.status(), ProtoIsOk());
+ NativeDeleteStats exp_stats;
+ exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ exp_stats.set_latency_ms(7);
+ exp_stats.set_num_documents_deleted(1);
+ EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
expected_get_result_proto.mutable_status()->set_code(StatusProto::NOT_FOUND);
expected_get_result_proto.mutable_status()->set_message(
@@ -2122,9 +2249,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
@@ -2189,9 +2318,11 @@
document2;
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
@@ -2251,16 +2382,20 @@
ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
// Validates that Search() works right after Optimize()
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
@@ -2311,9 +2446,11 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -2369,9 +2506,11 @@
// Searching old content returns nothing because original file directory is
// missing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("n");
@@ -2379,9 +2518,10 @@
new_document;
// Searching new content returns the new document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
@@ -2437,9 +2577,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// Searching old content returns nothing because original files are missing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
search_spec.set_query("n");
@@ -2447,9 +2589,10 @@
new_document;
// Searching new content returns the new document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
@@ -2498,9 +2641,11 @@
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is returned as part of search results
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
@@ -2547,9 +2692,11 @@
EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
// Check that the document is not returned as part of search results
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
@@ -2584,9 +2731,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
// The message isn't indexed, so we get nothing
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// With just the schema type filter, we can search for the message
search_spec.Clear();
@@ -2595,9 +2744,10 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
message_document;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Since SchemaTypeIds are assigned based on order in the SchemaProto, this
// will force a change in the DocumentStore's cached SchemaTypeIds
@@ -2626,9 +2776,10 @@
search_spec.add_schema_type_filters("message");
// We can still search for the message document
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
@@ -2655,9 +2806,11 @@
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
@@ -2671,9 +2824,11 @@
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2704,9 +2859,11 @@
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Change the header's magic value
@@ -2724,9 +2881,11 @@
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2757,9 +2916,11 @@
ProtoIsOk());
EXPECT_THAT(icing.Get("namespace", "uri"),
EqualsProto(expected_get_result_proto));
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Change the header's checksum value
@@ -2778,9 +2939,11 @@
EqualsProto(expected_get_result_proto));
// Checks that the index is still ok so we can search over it
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -2892,9 +3055,11 @@
SearchResultProto expected_search_result_proto;
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
{
@@ -2930,9 +3095,10 @@
property->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
+ FakeClock fake_clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir()));
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
} // Will persist new schema
@@ -2971,9 +3137,11 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2_with_additional_property;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
@@ -2989,17 +3157,20 @@
} // This should shut down IcingSearchEngine and persist anything it needs to
{
+ FakeClock fake_clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(filesystem(), GetSchemaDir()));
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
ICING_EXPECT_OK(schema_store->SetSchema(CreateMessageSchema()));
// Puts a second document into DocumentStore but doesn't index it.
- FakeClock fake_clock;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(filesystem(), GetDocumentDir(), &fake_clock,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(document2));
}
@@ -3033,9 +3204,11 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document1;
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
@@ -3055,9 +3228,11 @@
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend we lost the entire index
@@ -3068,9 +3243,11 @@
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
@@ -3090,9 +3267,11 @@
EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
ProtoIsOk());
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend index is corrupted
@@ -3105,9 +3284,11 @@
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
// Check that our index is ok by searching over the restored index
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
@@ -3165,9 +3346,10 @@
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
@@ -3223,9 +3405,10 @@
// order.
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
@@ -3278,9 +3461,10 @@
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
@@ -3348,9 +3532,10 @@
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3405,9 +3590,10 @@
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
@@ -3474,9 +3660,10 @@
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3531,9 +3718,10 @@
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
@@ -3599,9 +3787,10 @@
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
@@ -3660,9 +3849,10 @@
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
- EXPECT_THAT(icing.Search(search_spec, scoring_spec,
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto = icing.Search(
+ search_spec, scoring_spec, ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -3740,9 +3930,11 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
CreateMessageDocument("namespace", "uri");
- ASSERT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(expected_search_result_proto));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStats(expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
@@ -3764,9 +3956,10 @@
// Can't search for it
SearchResultProto empty_result;
empty_result.mutable_status()->set_code(StatusProto::OK);
- EXPECT_THAT(icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance()),
- EqualsProto(empty_result));
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, GetDefaultScoringSpec(),
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStats(empty_result));
}
TEST_F(IcingSearchEngineTest, PersistToDisk) {
@@ -4524,11 +4717,16 @@
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogFunctionLatency) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats().latency_ms(),
- Gt(0));
+ Eq(10));
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
@@ -4580,7 +4778,14 @@
TEST_F(IcingSearchEngineTest,
InitializeShouldNotLogRecoveryCauseForFirstTimeInitialize) {
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ // Even though the fake timer will return 10, all the latency numbers related
+ // to recovery / restoration should be 0 during the first-time initialization.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -4637,7 +4842,12 @@
{
// Document store will rewind to previous checkpoint. The cause should be
// DATA_LOSS and the data status should be PARTIAL_LOSS.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -4645,7 +4855,7 @@
Eq(NativeInitializeStats::DATA_LOSS));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_data_status(),
Eq(NativeInitializeStats::PARTIAL_LOSS));
@@ -4669,21 +4879,14 @@
DocumentProto document1 = DocumentBuilder()
.SetKey("icing", "fake_type/1")
.SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
+ .AddStringProperty("body", "message body")
.Build();
- DocumentProto document2 = DocumentBuilder()
- .SetKey("icing", "fake_type/2")
- .SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
- .Build();
-
{
// Initialize and put a document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(document2).status(), ProtoIsOk());
}
{
@@ -4708,7 +4911,12 @@
{
// Document store will completely rewind. The cause should be DATA_LOSS and
// the data status should be COMPLETE_LOSS.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -4716,7 +4924,7 @@
Eq(NativeInitializeStats::DATA_LOSS));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_data_status(),
Eq(NativeInitializeStats::COMPLETE_LOSS));
@@ -4725,9 +4933,9 @@
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.index_restoration_cause(),
Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- // Here we don't check index_restoration_latency_ms because the index
- // restoration is super fast when document store is emtpy. We won't get a
- // latency that is greater than 1 ms.
+ EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.schema_store_recovery_cause(),
Eq(NativeInitializeStats::NONE));
@@ -4761,7 +4969,12 @@
{
// Index is empty but ground truth is not. Index should be restored due to
// the inconsistency.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -4769,7 +4982,7 @@
Eq(NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.index_restoration_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_cause(),
Eq(NativeInitializeStats::NONE));
@@ -4790,23 +5003,17 @@
TEST_F(IcingSearchEngineTest,
InitializeShouldLogRecoveryCauseTotalChecksumMismatch) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
{
- // Initialize and index some documents.
+ // Initialize and put one document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // We need to index enough documents to make
- // DocumentStore::UpdateSchemaStore() run longer than 1 ms.
- for (int i = 0; i < 50; ++i) {
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/" + std::to_string(i))
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
{
@@ -4819,7 +5026,12 @@
{
// Both document store and index should be recovered from checksum mismatch.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -4827,13 +5039,13 @@
Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.index_restoration_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_cause(),
Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_data_status(),
Eq(NativeInitializeStats::NO_DATA_LOSS));
@@ -4847,23 +5059,17 @@
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseIndexIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
{
- // Initialize and index some documents.
+ // Initialize and put one document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // We need to index enough documents to make RestoreIndexIfNeeded() run
- // longer than 1 ms.
- for (int i = 0; i < 50; ++i) {
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/" + std::to_string(i))
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
// lambda to fail OpenForWrite on lite index hit buffer once.
@@ -4884,10 +5090,12 @@
ON_CALL(*mock_icing_filesystem, OpenForWrite)
.WillByDefault(open_write_lambda);
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::make_unique<Filesystem>(),
std::move(mock_icing_filesystem),
- std::make_unique<FakeClock>(), GetTestJniCache());
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
@@ -4896,7 +5104,7 @@
Eq(NativeInitializeStats::IO_ERROR));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.index_restoration_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_cause(),
Eq(NativeInitializeStats::NONE));
@@ -4915,23 +5123,17 @@
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogRecoveryCauseDocStoreIOError) {
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message body")
+ .Build();
{
- // Initialize and index some documents.
+ // Initialize and put one document.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
-
- // We need to index enough documents to make RestoreIndexIfNeeded() run
- // longer than 1 ms.
- for (int i = 0; i < 50; ++i) {
- DocumentProto document =
- DocumentBuilder()
- .SetKey("icing", "fake_type/" + std::to_string(i))
- .SetSchema("Message")
- .AddStringProperty("body", "message body")
- .Build();
- ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
- }
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
// lambda to fail Read on document store header once.
@@ -4954,10 +5156,12 @@
ON_CALL(*mock_filesystem, Read(A<const char*>(), _, _))
.WillByDefault(read_lambda);
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
std::make_unique<IcingFilesystem>(),
- std::make_unique<FakeClock>(), GetTestJniCache());
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
@@ -4966,7 +5170,7 @@
Eq(NativeInitializeStats::IO_ERROR));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_data_status(),
Eq(NativeInitializeStats::NO_DATA_LOSS));
@@ -5000,7 +5204,12 @@
}
{
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
@@ -5008,7 +5217,7 @@
Eq(NativeInitializeStats::IO_ERROR));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.schema_store_recovery_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(initialize_result_proto.native_initialize_stats()
.document_store_recovery_cause(),
Eq(NativeInitializeStats::NONE));
@@ -5085,28 +5294,34 @@
.AddStringProperty("body", "message body")
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(), Gt(0));
+ EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(),
+ Eq(10));
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
- // Create a large enough document so that document_store_latency_ms can be
- // longer than 1 ms.
- std::default_random_engine random;
- std::string random_string_10000 =
- RandomString(kAlNumAlphabet, /*len=*/10000, &random);
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
- .AddStringProperty("body", random_string_10000)
+ .AddStringProperty("body", "message body")
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -5114,28 +5329,31 @@
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
EXPECT_THAT(
put_result_proto.native_put_document_stats().document_store_latency_ms(),
- Gt(0));
+ Eq(10));
EXPECT_THAT(put_result_proto.native_put_document_stats().document_size(),
Eq(document.ByteSizeLong()));
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexingStats) {
- // Create a large enough document so that index_latency_ms can be longer than
- // 1 ms.
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
- .AddStringProperty("body", kIpsumText)
+ .AddStringProperty("body", "message body")
.Build();
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
EXPECT_THAT(put_result_proto.native_put_document_stats().index_latency_ms(),
- Gt(0));
+ Eq(10));
// No merge should happen.
EXPECT_THAT(
put_result_proto.native_put_document_stats().index_merge_latency_ms(),
@@ -5144,11 +5362,11 @@
EXPECT_FALSE(put_result_proto.native_put_document_stats()
.tokenization_stats()
.exceeded_max_token_num());
- // kIpsumText has 137 tokens.
+ // The input document has 2 tokens.
EXPECT_THAT(put_result_proto.native_put_document_stats()
.tokenization_stats()
.num_tokens_indexed(),
- Eq(137));
+ Eq(2));
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogWhetherNumTokensExceeds) {
@@ -5179,8 +5397,6 @@
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogIndexMergeLatency) {
- // Create 2 large enough documents so that index_merge_latency_ms can be
- // longer than 1 ms.
DocumentProto document1 = DocumentBuilder()
.SetKey("icing", "fake_type/1")
.SetSchema("Message")
@@ -5195,7 +5411,12 @@
// Create an icing instance with index_merge_size = document1's size.
IcingSearchEngineOptions icing_options = GetDefaultIcingOptions();
icing_options.set_index_merge_size(document1.ByteSizeLong());
- IcingSearchEngine icing(icing_options, GetTestJniCache());
+
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(icing_options, std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
EXPECT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -5205,7 +5426,290 @@
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
EXPECT_THAT(
put_result_proto.native_put_document_stats().index_merge_latency_ms(),
- Gt(0));
+ Eq(10));
+}
+
+TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and use an empty field
+ // mask to request NO properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ ResultSpecProto::TypePropertyMask* email_field_mask =
+ result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results contain no properties.
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_F(IcingSearchEngineTest, SearchWithProjectionMultipleFieldPaths) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreatePersonAndEmailSchema()).status(),
+ ProtoIsOk());
+
+ // 1. Add two email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ // 2. Issue a query that will match those documents and request only
+ // 'sender.name' and 'subject' properties.
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("hello");
+
+ ResultSpecProto result_spec;
+ // Retrieve only one result at a time to make sure that projection works when
+ // retrieving all pages.
+ result_spec.set_num_per_page(1);
+ ResultSpecProto::TypePropertyMask* email_field_mask =
+ result_spec.add_type_property_masks();
+ email_field_mask->set_schema_type("Email");
+ email_field_mask->add_paths("sender.name");
+ email_field_mask->add_paths("subject");
+
+ SearchResultProto results =
+ icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+
+ // 3. Verify that the returned results only contain the 'sender.name'
+ // property.
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .AddStringProperty("subject", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_two));
+
+ results = icing.GetNextPage(results.next_page_token());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(1));
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .AddStringProperty("subject", "Hello World!")
+ .Build();
+ EXPECT_THAT(results.results(0).document(),
+ EqualsProto(projected_document_one));
+}
+
+TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates and inserts 5 documents
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ DocumentProto document4 = CreateMessageDocument("namespace", "uri4");
+ DocumentProto document5 = CreateMessageDocument("namespace", "uri5");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.add_namespace_filters("namespace");
+ search_spec.add_schema_type_filters(document1.schema());
+ search_spec.set_query("message");
+
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(2);
+ result_spec.mutable_snippet_spec()->set_max_window_bytes(64);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(1);
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(3);
+
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ // Searches and gets the first page, 2 results with 2 snippets
+ SearchResultProto search_result =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
+
+ // Check the stats
+ NativeQueryStats exp_stats;
+ exp_stats.set_num_terms(1);
+ exp_stats.set_num_namespaces_filtered(1);
+ exp_stats.set_num_schema_types_filtered(1);
+ exp_stats.set_ranking_strategy(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+ exp_stats.set_is_first_page(true);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_documents_scored(5);
+ exp_stats.set_num_results_snippeted(2);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_parse_query_latency_ms(5);
+ exp_stats.set_scoring_latency_ms(5);
+ exp_stats.set_ranking_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Second page, 2 result with 1 snippet
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(2));
+ ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
+
+ exp_stats = NativeQueryStats();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(2);
+ exp_stats.set_num_results_snippeted(1);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
+
+ // Third page, 1 result with 0 snippets
+ search_result = icing.GetNextPage(search_result.next_page_token());
+ ASSERT_THAT(search_result.status(), ProtoIsOk());
+ ASSERT_THAT(search_result.results(), SizeIs(1));
+ ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
+
+ exp_stats = NativeQueryStats();
+ exp_stats.set_is_first_page(false);
+ exp_stats.set_requested_page_size(2);
+ exp_stats.set_num_results_returned_current_page(1);
+ exp_stats.set_num_results_snippeted(0);
+ exp_stats.set_latency_ms(5);
+ exp_stats.set_document_retrieval_latency_ms(5);
+ EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
}
} // namespace
diff --git a/icing/index/hit/doc-hit-info.cc b/icing/index/hit/doc-hit-info.cc
index 80dbbde..8e418c8 100644
--- a/icing/index/hit/doc-hit-info.cc
+++ b/icing/index/hit/doc-hit-info.cc
@@ -34,26 +34,28 @@
}
// Doesn't matter which way we compare this array, as long as
// DocHitInfo is unequal when it is unequal.
- return memcmp(max_hit_score_, other.max_hit_score_, sizeof(max_hit_score_)) <
- 0;
+ return memcmp(hit_term_frequency_, other.hit_term_frequency_,
+ sizeof(hit_term_frequency_)) < 0;
}
-void DocHitInfo::UpdateSection(SectionId section_id, Hit::Score hit_score) {
+void DocHitInfo::UpdateSection(SectionId section_id,
+ Hit::TermFrequency hit_term_frequency) {
SectionIdMask section_id_mask = (1u << section_id);
- if (hit_section_ids_mask() & section_id_mask) {
- max_hit_score_[section_id] =
- std::max(max_hit_score_[section_id], hit_score);
- } else {
- max_hit_score_[section_id] = hit_score;
- hit_section_ids_mask_ |= section_id_mask;
+ if ((hit_section_ids_mask() & section_id_mask)) {
+ // If the sectionId is already embedded in the hit_section_ids_mask,
+ // then the term frequencies should always match. So there is no
+ // need to update anything.
+ return;
}
+ hit_term_frequency_[section_id] = hit_term_frequency;
+ hit_section_ids_mask_ |= section_id_mask;
}
void DocHitInfo::MergeSectionsFrom(const DocHitInfo& other) {
SectionIdMask other_mask = other.hit_section_ids_mask();
while (other_mask) {
SectionId section_id = __builtin_ctz(other_mask);
- UpdateSection(section_id, other.max_hit_score(section_id));
+ UpdateSection(section_id, other.hit_term_frequency(section_id));
other_mask &= ~(1u << section_id);
}
}
diff --git a/icing/index/hit/doc-hit-info.h b/icing/index/hit/doc-hit-info.h
index 386822d..8171960 100644
--- a/icing/index/hit/doc-hit-info.h
+++ b/icing/index/hit/doc-hit-info.h
@@ -26,17 +26,18 @@
namespace lib {
// DocHitInfo provides a collapsed view of all hits for a specific term and doc.
-// Hits contain a document_id, section_id and a hit score. The information in
-// multiple hits is collapse into a DocHitInfo by providing a SectionIdMask of
-// all sections that contained a hit for this term as well as the highest hit
-// score of any hit for each section.
+// Hits contain a document_id, section_id and a term frequency. The
+// information in multiple hits is collapse into a DocHitInfo by providing a
+// SectionIdMask of all sections that contained a hit for this term as well as
+// the highest term frequency of any hit for each section.
class DocHitInfo {
public:
explicit DocHitInfo(DocumentId document_id_in = kInvalidDocumentId,
SectionIdMask hit_section_ids_mask = kSectionIdMaskNone)
: document_id_(document_id_in),
hit_section_ids_mask_(hit_section_ids_mask) {
- memset(max_hit_score_, Hit::kMaxHitScore, sizeof(max_hit_score_));
+ memset(hit_term_frequency_, Hit::kDefaultTermFrequency,
+ sizeof(hit_term_frequency_));
}
DocumentId document_id() const { return document_id_; }
@@ -49,8 +50,8 @@
hit_section_ids_mask_ = section_id_mask;
}
- Hit::Score max_hit_score(SectionId section_id) const {
- return max_hit_score_[section_id];
+ Hit::TermFrequency hit_term_frequency(SectionId section_id) const {
+ return hit_term_frequency_[section_id];
}
bool operator<(const DocHitInfo& other) const;
@@ -58,12 +59,14 @@
return (*this < other) == (other < *this);
}
- // Updates the hit_section_ids_mask and max_hit_score for the section, if
- // necessary.
- void UpdateSection(SectionId section_id, Hit::Score hit_score);
+ // Updates the hit_section_ids_mask and hit_term_frequency for the
+ // section, if necessary.
+ void UpdateSection(SectionId section_id,
+ Hit::TermFrequency hit_term_frequency);
- // Merges the sections of other into this. The hit_section_ids_masks are or'd
- // and the max hit score for each section between the two is set.
+ // Merges the sections of other into this. The hit_section_ids_masks are or'd;
+ // if this.hit_term_frequency_[sectionId] has already been defined,
+ // other.hit_term_frequency_[sectionId] value is ignored.
//
// This does not affect the DocumentId of this or other. If callers care about
// only merging sections for DocHitInfos with the same DocumentId, callers
@@ -73,14 +76,15 @@
private:
DocumentId document_id_;
SectionIdMask hit_section_ids_mask_;
- Hit::Score max_hit_score_[kMaxSectionId + 1];
+ Hit::TermFrequency hit_term_frequency_[kMaxSectionId + 1];
} __attribute__((packed));
static_assert(sizeof(DocHitInfo) == 22, "");
// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
static_assert(icing_is_packed_pod<DocHitInfo>::value, "go/icing-ubsan");
-static_assert(sizeof(Hit::Score) == 1,
- "Change how max_hit_score_ is initialized if changing the type "
- "of Hit::Score");
+static_assert(
+ sizeof(Hit::TermFrequency) == 1,
+ "Change how hit_term_frequency_ is initialized if changing the type "
+ "of Hit::TermFrequency");
} // namespace lib
} // namespace icing
diff --git a/icing/index/hit/doc-hit-info_test.cc b/icing/index/hit/doc-hit-info_test.cc
index d8adbc1..15c0de9 100644
--- a/icing/index/hit/doc-hit-info_test.cc
+++ b/icing/index/hit/doc-hit-info_test.cc
@@ -31,44 +31,43 @@
constexpr DocumentId kSomeDocumentId = 12;
constexpr DocumentId kSomeOtherDocumentId = 54;
-TEST(DocHitInfoTest, InitialMaxHitScores) {
+TEST(DocHitInfoTest, InitialMaxHitTermFrequencies) {
DocHitInfo info(kSomeDocumentId);
for (SectionId i = 0; i <= kMaxSectionId; ++i) {
- EXPECT_THAT(info.max_hit_score(i), Eq(Hit::kMaxHitScore));
+ EXPECT_THAT(info.hit_term_frequency(i), Eq(Hit::kDefaultTermFrequency));
}
}
-TEST(DocHitInfoTest, UpdateHitScores) {
+TEST(DocHitInfoTest, UpdateHitTermFrequenciesForTheFirstTime) {
DocHitInfo info(kSomeDocumentId);
- ASSERT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(Hit::kDefaultTermFrequency));
- // Updating a section for the first time, should change its max hit score,
- // even though the hit score (16) may be lower than the current value returned
- // by info.max_hit_score(3) (kMaxHitScore)
+ // Updating a section for the first time, should change its hit
+ // term_frequency
info.UpdateSection(3, 16);
- EXPECT_THAT(info.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
+}
- // Updating a section with a hit score lower than the previously set one
- // should not update max hit score.
+TEST(DocHitInfoTest, UpdateSectionLowerHitTermFrequencyHasNoEffect) {
+ DocHitInfo info(kSomeDocumentId);
+ info.UpdateSection(3, 16);
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(16));
+
+ // Updating a section with a term frequency lower than the previously set
+ // one should have no effect.
info.UpdateSection(3, 15);
- EXPECT_THAT(info.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
+}
- // Updating a section with a hit score higher than the previously set one
- // should update the max hit score.
- info.UpdateSection(3, 17);
- EXPECT_THAT(info.max_hit_score(3), Eq(17));
-
- // Updating a section with kMaxHitScore should *always* set the max hit
- // score to kMaxHitScore (regardless of what value kMaxHitScore is
- // defined with).
- info.UpdateSection(3, Hit::kMaxHitScore);
- EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
-
- // Updating a section that has had kMaxHitScore explicitly set, should
- // *never* change the max hit score (regardless of what value kMaxHitScore
- // is defined with).
+TEST(DocHitInfoTest, UpdateSectionHigherHitTermFrequencyHasNoEffect) {
+ DocHitInfo info(kSomeDocumentId);
info.UpdateSection(3, 16);
- EXPECT_THAT(info.max_hit_score(3), Eq(Hit::kMaxHitScore));
+ ASSERT_THAT(info.hit_term_frequency(3), Eq(16));
+
+ // Updating a section with a term frequency higher than the previously set
+ // one should have no effect.
+ info.UpdateSection(3, 17);
+ EXPECT_THAT(info.hit_term_frequency(3), Eq(16));
}
TEST(DocHitInfoTest, UpdateSectionIdMask) {
@@ -93,7 +92,7 @@
DocHitInfo info2(kSomeOtherDocumentId);
info2.UpdateSection(7, 12);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+ EXPECT_THAT(info1.hit_term_frequency(7), Eq(12));
EXPECT_THAT(info1.document_id(), Eq(kSomeDocumentId));
}
@@ -104,7 +103,7 @@
info1.UpdateSection(3, 16);
DocHitInfo info2(kSomeDocumentId);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(3), Eq(16));
+ EXPECT_THAT(info1.hit_term_frequency(3), Eq(16));
}
TEST(DocHitInfoTest, MergeSectionsFromAddsNewSection) {
@@ -114,29 +113,29 @@
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(7, 12);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(7), Eq(12));
+ EXPECT_THAT(info1.hit_term_frequency(7), Eq(12));
}
-TEST(DocHitInfoTest, MergeSectionsFromSetsHigherHitScore) {
- // Merging should override the value of a section in info1 if the same section
- // is present in info2 with a higher hit score.
+TEST(DocHitInfoTest, MergeSectionsFromHigherHitTermFrequencyHasNoEffect) {
+ // Merging should not override the value of a section in info1 if the same
+ // section is present in info2.
DocHitInfo info1(kSomeDocumentId);
info1.UpdateSection(2, 77);
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(2, 89);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(2), Eq(89));
+ EXPECT_THAT(info1.hit_term_frequency(2), Eq(77));
}
-TEST(DocHitInfoTest, MergeSectionsFromDoesNotSetLowerHitScore) {
+TEST(DocHitInfoTest, MergeSectionsFromLowerHitScoreHasNoEffect) {
// Merging should not override the hit score of a section in info1 if the same
- // section is present in info2 but with a lower hit score.
+ // section is present in info2.
DocHitInfo info1(kSomeDocumentId);
info1.UpdateSection(5, 108);
DocHitInfo info2(kSomeDocumentId);
info2.UpdateSection(5, 13);
info1.MergeSectionsFrom(info2);
- EXPECT_THAT(info1.max_hit_score(5), Eq(108));
+ EXPECT_THAT(info1.hit_term_frequency(5), Eq(108));
}
TEST(DocHitInfoTest, Comparison) {
@@ -150,7 +149,7 @@
DocHitInfo high_section_id_info(kDocumentId);
high_section_id_info.UpdateSection(1, 12);
- high_section_id_info.UpdateSection(6, Hit::kMaxHitScore);
+ high_section_id_info.UpdateSection(6, Hit::kDefaultTermFrequency);
std::vector<DocHitInfo> infos{info, high_document_id_info,
high_section_id_info};
@@ -160,10 +159,10 @@
// There are no requirements for how DocHitInfos with the same DocumentIds and
// hit masks will compare, but they must not be equal.
- DocHitInfo different_hit_score_info(kDocumentId);
- different_hit_score_info.UpdateSection(1, 76);
- EXPECT_THAT(info < different_hit_score_info,
- Ne(different_hit_score_info < info));
+ DocHitInfo different_term_frequency_info(kDocumentId);
+ different_term_frequency_info.UpdateSection(1, 76);
+ EXPECT_THAT(info < different_term_frequency_info,
+ Ne(different_term_frequency_info < info));
}
} // namespace lib
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 1852bd5..2a5a0d9 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -30,8 +30,9 @@
// This hit represents a prefix of a longer term. If exact matches are
// required, then this hit should be ignored.
kPrefixHit = 1,
- // Whether or not the hit has a hit score other than kMaxHitScore.
- kHasScore = 2,
+ // Whether or not the hit has a term_frequency other than
+ // kDefaultTermFrequency.
+ kHasTermFrequency = 2,
kNumFlags = 3,
};
static_assert(kDocumentIdBits + kSectionIdBits + kNumFlags <=
@@ -51,9 +52,10 @@
} // namespace
-Hit::Hit(SectionId section_id, DocumentId document_id, Hit::Score score,
- bool is_in_prefix_section, bool is_prefix_hit)
- : score_(score) {
+Hit::Hit(SectionId section_id, DocumentId document_id,
+ Hit::TermFrequency term_frequency, bool is_in_prefix_section,
+ bool is_prefix_hit)
+ : term_frequency_(term_frequency) {
// Values are stored so that when sorted, they appear in document_id
// descending, section_id ascending, order. Also, all else being
// equal, non-prefix hits sort before prefix hits. So inverted
@@ -64,7 +66,8 @@
kSectionIdBits + kNumFlags, kDocumentIdBits,
&temp_value);
bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
- bit_util::BitfieldSet(score != kMaxHitScore, kHasScore, 1, &temp_value);
+ bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
+ kHasTermFrequency, 1, &temp_value);
bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
value_ = temp_value;
@@ -81,8 +84,8 @@
return bit_util::BitfieldGet(value(), kNumFlags, kSectionIdBits);
}
-bool Hit::has_score() const {
- return bit_util::BitfieldGet(value(), kHasScore, 1);
+bool Hit::has_term_frequency() const {
+ return bit_util::BitfieldGet(value(), kHasTermFrequency, 1);
}
bool Hit::is_prefix_hit() const {
diff --git a/icing/index/hit/hit.h b/icing/index/hit/hit.h
index d1be204..525a5e5 100644
--- a/icing/index/hit/hit.h
+++ b/icing/index/hit/hit.h
@@ -31,18 +31,17 @@
// - a SectionId
// referring to the document and section that the hit corresponds to, as well as
// metadata about the hit:
-// - whether the Hit has a Score other than the default value
+// - whether the Hit has a TermFrequency other than the default value
// - whether the Hit does not appear exactly in the document, but instead
// represents a term that is a prefix of a term in the document
// - whether the Hit came from a section that has prefix expansion enabled
-// and a score for the hit. Ranging from [0,255] a higher score indicates a
-// higher quality hit.
+// and a term frequency for the hit.
// The hit is the most basic unit of the index and, when grouped together by
// term, can be used to encode what terms appear in what documents.
class Hit {
public:
// The datatype used to encode Hit information: the document_id, section_id
- // and the has_score, prefix hit and in prefix section flags.
+ // and the has_term_frequency, prefix hit and in prefix section flags.
using Value = uint32_t;
// WARNING: Changing this value will invalidate any pre-existing posting lists
@@ -53,25 +52,27 @@
// the max in a descending sort.
static constexpr Value kMaxDocumentIdSortValue = 0;
- // A score reflecting the "quality" of this hit. The higher the score, the
- // higher quality the hit.
- using Score = uint8_t;
- // By default, hits are given the highest possible score.
- static constexpr Score kMaxHitScore = std::numeric_limits<Score>::max();
+ // The Term Frequency of a Hit.
+ using TermFrequency = uint8_t;
+ // Max TermFrequency is 255.
+ static constexpr TermFrequency kMaxTermFrequency =
+ std::numeric_limits<TermFrequency>::max();
+ static constexpr TermFrequency kDefaultTermFrequency = 1;
- explicit Hit(Value value = kInvalidValue, Score score = kMaxHitScore)
- : value_(value), score_(score) {}
- Hit(SectionId section_id, DocumentId document_id, Score score,
- bool is_in_prefix_section = false, bool is_prefix_hit = false);
+ explicit Hit(Value value = kInvalidValue,
+ TermFrequency term_frequency = kDefaultTermFrequency)
+ : value_(value), term_frequency_(term_frequency) {}
+ Hit(SectionId section_id, DocumentId document_id,
+ TermFrequency term_frequency, bool is_in_prefix_section = false,
+ bool is_prefix_hit = false);
bool is_valid() const { return value() != kInvalidValue; }
Value value() const { return value_; }
DocumentId document_id() const;
SectionId section_id() const;
- // Whether or not the hit contains a non-default score. Hits with non-default
- // score are considered to be of lower quality.
- bool has_score() const;
- Score score() const { return score_; }
+ // Whether or not the hit contains a valid term frequency.
+ bool has_term_frequency() const;
+ TermFrequency term_frequency() const { return term_frequency_; }
bool is_prefix_hit() const;
bool is_in_prefix_section() const;
@@ -83,10 +84,10 @@
};
private:
- // Value and score must be in this order.
+ // Value and TermFrequency must be in this order.
// Value bits layout: 5 unused + 20 document_id + 4 section id + 3 flags.
Value value_;
- Score score_;
+ TermFrequency term_frequency_;
} __attribute__((packed));
static_assert(sizeof(Hit) == 5, "");
// TODO(b/138991332) decide how to remove/replace all is_packed_pod assertions.
diff --git a/icing/index/hit/hit_test.cc b/icing/index/hit/hit_test.cc
index 17db66b..d47ca37 100644
--- a/icing/index/hit/hit_test.cc
+++ b/icing/index/hit/hit_test.cc
@@ -33,46 +33,46 @@
static constexpr DocumentId kSomeDocumentId = 24;
static constexpr SectionId kSomeSectionid = 5;
-static constexpr Hit::Score kSomeHitScore = 57;
+static constexpr Hit::TermFrequency kSomeTermFrequency = 57;
-TEST(HitTest, HasScoreFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
- EXPECT_THAT(h1.has_score(), IsFalse());
- EXPECT_THAT(h1.score(), Eq(Hit::kMaxHitScore));
+TEST(HitTest, HasTermFrequencyFlag) {
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
+ EXPECT_THAT(h1.has_term_frequency(), IsFalse());
+ EXPECT_THAT(h1.term_frequency(), Eq(Hit::kDefaultTermFrequency));
- Hit h2(kSomeSectionid, kSomeDocumentId, kSomeHitScore);
- EXPECT_THAT(h2.has_score(), IsTrue());
- EXPECT_THAT(h2.score(), Eq(kSomeHitScore));
+ Hit h2(kSomeSectionid, kSomeDocumentId, kSomeTermFrequency);
+ EXPECT_THAT(h2.has_term_frequency(), IsTrue());
+ EXPECT_THAT(h2.term_frequency(), Eq(kSomeTermFrequency));
}
TEST(HitTest, IsPrefixHitFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.is_prefix_hit(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/false);
EXPECT_THAT(h2.is_prefix_hit(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false, /*is_prefix_hit=*/true);
EXPECT_THAT(h3.is_prefix_hit(), IsTrue());
}
TEST(HitTest, IsInPrefixSectionFlag) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.is_in_prefix_section(), IsFalse());
- Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h2(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
EXPECT_THAT(h2.is_in_prefix_section(), IsFalse());
- Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore,
+ Hit h3(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
EXPECT_THAT(h3.is_in_prefix_section(), IsTrue());
}
TEST(HitTest, Accessors) {
- Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kMaxHitScore);
+ Hit h1(kSomeSectionid, kSomeDocumentId, Hit::kDefaultTermFrequency);
EXPECT_THAT(h1.document_id(), Eq(kSomeDocumentId));
EXPECT_THAT(h1.section_id(), Eq(kSomeSectionid));
}
@@ -88,47 +88,53 @@
Hit explicit_valid(kSomeValue);
EXPECT_THAT(explicit_valid.is_valid(), IsTrue());
- Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId, kSomeHitScore);
+ Hit maximum_document_id_hit(kSomeSectionid, kMaxDocumentId,
+ kSomeTermFrequency);
EXPECT_THAT(maximum_document_id_hit.is_valid(), IsTrue());
- Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId, kSomeHitScore);
+ Hit maximum_section_id_hit(kMaxSectionId, kSomeDocumentId,
+ kSomeTermFrequency);
EXPECT_THAT(maximum_section_id_hit.is_valid(), IsTrue());
- Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeHitScore);
+ Hit minimum_document_id_hit(kSomeSectionid, 0, kSomeTermFrequency);
EXPECT_THAT(minimum_document_id_hit.is_valid(), IsTrue());
- Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeHitScore);
+ Hit minimum_section_id_hit(0, kSomeDocumentId, kSomeTermFrequency);
EXPECT_THAT(minimum_section_id_hit.is_valid(), IsTrue());
}
TEST(HitTest, Comparison) {
- Hit hit(1, 243, Hit::kMaxHitScore);
+ Hit hit(1, 243, Hit::kDefaultTermFrequency);
// DocumentIds are sorted in ascending order. So a hit with a lower
// document_id should be considered greater than one with a higher
// document_id.
- Hit higher_document_id_hit(1, 2409, Hit::kMaxHitScore);
- Hit higher_section_id_hit(15, 243, Hit::kMaxHitScore);
- // Whether or not a hit score was set is considered, but the score itself is
- // not.
- Hit hitscore_hit(1, 243, 12);
- Hit prefix_hit(1, 243, Hit::kMaxHitScore, /*is_in_prefix_section=*/false,
+ Hit higher_document_id_hit(1, 2409, Hit::kDefaultTermFrequency);
+ Hit higher_section_id_hit(15, 243, Hit::kDefaultTermFrequency);
+ // Whether or not a term frequency was set is considered, but the term
+ // frequency itself is not.
+ Hit term_frequency_hit(1, 243, 12);
+ Hit prefix_hit(1, 243, Hit::kDefaultTermFrequency,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
- Hit hit_in_prefix_section(1, 243, Hit::kMaxHitScore,
+ Hit hit_in_prefix_section(1, 243, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/false);
- std::vector<Hit> hits{
- hit, higher_document_id_hit, higher_section_id_hit, hitscore_hit,
- prefix_hit, hit_in_prefix_section};
+ std::vector<Hit> hits{hit,
+ higher_document_id_hit,
+ higher_section_id_hit,
+ term_frequency_hit,
+ prefix_hit,
+ hit_in_prefix_section};
std::sort(hits.begin(), hits.end());
- EXPECT_THAT(hits,
- ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
- prefix_hit, hitscore_hit, higher_section_id_hit));
+ EXPECT_THAT(
+ hits, ElementsAre(higher_document_id_hit, hit, hit_in_prefix_section,
+ prefix_hit, term_frequency_hit, higher_section_id_hit));
- Hit higher_hitscore_hit(1, 243, 108);
- // Hit score value is not considered when comparing hits.
- EXPECT_THAT(hitscore_hit, Not(Lt(higher_hitscore_hit)));
- EXPECT_THAT(higher_hitscore_hit, Not(Lt(hitscore_hit)));
+ Hit higher_term_frequency_hit(1, 243, 108);
+ // The term frequency value is not considered when comparing hits.
+ EXPECT_THAT(term_frequency_hit, Not(Lt(higher_term_frequency_hit)));
+ EXPECT_THAT(higher_term_frequency_hit, Not(Lt(term_frequency_hit)));
}
} // namespace
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index 9e57993..892263b 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -37,7 +37,6 @@
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
#include "icing/util/status-macros.h"
-#include "icing/util/timer.h"
namespace icing {
namespace lib {
@@ -46,20 +45,22 @@
IndexProcessor::Create(const SchemaStore* schema_store,
const LanguageSegmenter* lang_segmenter,
const Normalizer* normalizer, Index* index,
- const IndexProcessor::Options& options) {
+ const IndexProcessor::Options& options,
+ const Clock* clock) {
ICING_RETURN_ERROR_IF_NULL(schema_store);
ICING_RETURN_ERROR_IF_NULL(lang_segmenter);
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(index);
+ ICING_RETURN_ERROR_IF_NULL(clock);
return std::unique_ptr<IndexProcessor>(new IndexProcessor(
- schema_store, lang_segmenter, normalizer, index, options));
+ schema_store, lang_segmenter, normalizer, index, options, clock));
}
libtextclassifier3::Status IndexProcessor::IndexDocument(
const DocumentProto& document, DocumentId document_id,
NativePutDocumentStats* put_document_stats) {
- Timer index_timer;
+ std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_->last_added_document_id() != kInvalidDocumentId &&
document_id <= index_->last_added_document_id()) {
@@ -84,6 +85,8 @@
tokenizer->Tokenize(subcontent));
while (itr->Advance()) {
if (++num_tokens > options_.max_tokens_per_document) {
+ // Index all tokens buffered so far.
+ editor.IndexAllBufferedTerms();
if (put_document_stats != nullptr) {
put_document_stats->mutable_tokenization_stats()
->set_exceeded_max_token_num(true);
@@ -95,16 +98,16 @@
return absl_ports::ResourceExhaustedError(
"Max number of tokens reached!");
case Options::TokenLimitBehavior::kSuppressError:
- return libtextclassifier3::Status::OK;
+ return overall_status;
}
}
std::string term = normalizer_.NormalizeTerm(itr->GetToken().text);
- // Add this term to the index. Even if adding this hit fails, we keep
+ // Add this term to Hit buffer. Even if adding this hit fails, we keep
// trying to add more hits because it's possible that future hits could
// still be added successfully. For instance if the lexicon is full, we
// might fail to add a hit for a new term, but should still be able to
// add hits for terms that are already in the index.
- auto status = editor.AddHit(term.c_str());
+ auto status = editor.BufferTerm(term.c_str());
if (overall_status.ok() && !status.ok()) {
// If we've succeeded to add everything so far, set overall_status to
// represent this new failure. If we've already failed, no need to
@@ -114,11 +117,20 @@
}
}
}
+ // Add all the seen terms to the index with their term frequency.
+ auto status = editor.IndexAllBufferedTerms();
+ if (overall_status.ok() && !status.ok()) {
+ // If we've succeeded so far, set overall_status to
+ // represent this new failure. If we've already failed, no need to
+ // update the status - we're already going to return a resource
+ // exhausted error.
+ overall_status = status;
+ }
}
if (put_document_stats != nullptr) {
put_document_stats->set_index_latency_ms(
- index_timer.GetElapsedMilliseconds());
+ index_timer->GetElapsedMilliseconds());
put_document_stats->mutable_tokenization_stats()->set_num_tokens_indexed(
num_tokens);
}
@@ -127,7 +139,7 @@
if (overall_status.ok() && index_->WantsMerge()) {
ICING_VLOG(1) << "Merging the index at docid " << document_id << ".";
- Timer merge_timer;
+ std::unique_ptr<Timer> merge_timer = clock_.GetNewTimer();
libtextclassifier3::Status merge_status = index_->Merge();
if (!merge_status.ok()) {
@@ -146,7 +158,7 @@
if (put_document_stats != nullptr) {
put_document_stats->set_index_merge_latency_ms(
- merge_timer.GetElapsedMilliseconds());
+ merge_timer->GetElapsedMilliseconds());
}
}
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 91719d0..2eb4ad8 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -59,7 +59,8 @@
// FAILED_PRECONDITION if any of the pointers is null.
static libtextclassifier3::StatusOr<std::unique_ptr<IndexProcessor>> Create(
const SchemaStore* schema_store, const LanguageSegmenter* lang_segmenter,
- const Normalizer* normalizer, Index* index, const Options& options);
+ const Normalizer* normalizer, Index* index, const Options& options,
+ const Clock* clock);
// Add document to the index, associated with document_id. If the number of
// tokens in the document exceeds max_tokens_per_document, then only the first
@@ -88,12 +89,13 @@
IndexProcessor(const SchemaStore* schema_store,
const LanguageSegmenter* lang_segmenter,
const Normalizer* normalizer, Index* index,
- const Options& options)
+ const Options& options, const Clock* clock)
: schema_store_(*schema_store),
lang_segmenter_(*lang_segmenter),
normalizer_(*normalizer),
index_(index),
- options_(options) {}
+ options_(options),
+ clock_(*clock) {}
std::string NormalizeToken(const Token& token);
@@ -102,6 +104,7 @@
const Normalizer& normalizer_;
Index* const index_;
const Options options_;
+ const Clock& clock_;
};
} // namespace lib
diff --git a/icing/index/index-processor_benchmark.cc b/icing/index/index-processor_benchmark.cc
index 584cb9b..96a390b 100644
--- a/icing/index/index-processor_benchmark.cc
+++ b/icing/index/index-processor_benchmark.cc
@@ -147,10 +147,10 @@
.ValueOrDie();
}
-std::unique_ptr<SchemaStore> CreateSchemaStore() {
+std::unique_ptr<SchemaStore> CreateSchemaStore(const Clock* clock) {
Filesystem filesystem;
std::unique_ptr<SchemaStore> schema_store =
- SchemaStore::Create(&filesystem, GetTestTempDir()).ValueOrDie();
+ SchemaStore::Create(&filesystem, GetTestTempDir(), clock).ValueOrDie();
SchemaProto schema;
CreateFakeTypeConfig(schema.add_types());
@@ -170,14 +170,14 @@
std::unique_ptr<IndexProcessor> CreateIndexProcessor(
const SchemaStore* schema_store,
const LanguageSegmenter* language_segmenter, const Normalizer* normalizer,
- Index* index) {
+ Index* index, const Clock* clock) {
IndexProcessor::Options processor_options{};
processor_options.max_tokens_per_document = 1024 * 1024 * 10;
processor_options.token_limit_behavior =
IndexProcessor::Options::TokenLimitBehavior::kReturnError;
return IndexProcessor::Create(schema_store, language_segmenter, normalizer,
- index, processor_options)
+ index, processor_options, clock)
.ValueOrDie();
}
@@ -200,10 +200,11 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ Clock clock;
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
std::unique_ptr<IndexProcessor> index_processor =
CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
- normalizer.get(), index.get());
+ normalizer.get(), index.get(), &clock);
DocumentProto input_document = CreateDocumentWithOneProperty(state.range(0));
@@ -250,10 +251,11 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ Clock clock;
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
std::unique_ptr<IndexProcessor> index_processor =
CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
- normalizer.get(), index.get());
+ normalizer.get(), index.get(), &clock);
DocumentProto input_document =
CreateDocumentWithTenProperties(state.range(0));
@@ -301,10 +303,11 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ Clock clock;
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
std::unique_ptr<IndexProcessor> index_processor =
CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
- normalizer.get(), index.get());
+ normalizer.get(), index.get(), &clock);
DocumentProto input_document =
CreateDocumentWithDiacriticLetters(state.range(0));
@@ -352,10 +355,11 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore();
+ Clock clock;
+ std::unique_ptr<SchemaStore> schema_store = CreateSchemaStore(&clock);
std::unique_ptr<IndexProcessor> index_processor =
CreateIndexProcessor(schema_store.get(), language_segmenter.get(),
- normalizer.get(), index.get());
+ normalizer.get(), index.get(), &clock);
DocumentProto input_document = CreateDocumentWithHiragana(state.range(0));
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index e193842..bdd9575 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -44,6 +45,8 @@
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -102,10 +105,12 @@
class IndexProcessorTest : public Test {
protected:
void SetUp() override {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
index_dir_ = GetTestTempDir() + "/index_test";
Index::Options options(index_dir_, /*index_merge_size=*/1024 * 1024);
@@ -120,11 +125,11 @@
ICING_ASSERT_OK_AND_ASSIGN(
normalizer_,
normalizer_factory::Create(
-
/*max_term_byte_size=*/std::numeric_limits<int32_t>::max()));
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
+ schema_store_,
+ SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
SchemaProto schema = CreateFakeSchema();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
@@ -137,7 +142,7 @@
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
normalizer_.get(), index_.get(),
- processor_options));
+ processor_options, &fake_clock_));
mock_icing_filesystem_ = std::make_unique<IcingMockFilesystem>();
}
@@ -149,6 +154,7 @@
Filesystem filesystem_;
IcingFilesystem icing_filesystem_;
+ FakeClock fake_clock_;
std::string index_dir_;
std::unique_ptr<LanguageSegmenter> lang_segmenter_;
@@ -238,24 +244,26 @@
processor_options.token_limit_behavior =
IndexProcessor::Options::TokenLimitBehavior::kReturnError;
- EXPECT_THAT(IndexProcessor::Create(/*schema_store=*/nullptr,
- lang_segmenter_.get(), normalizer_.get(),
- index_.get(), processor_options),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ IndexProcessor::Create(/*schema_store=*/nullptr, lang_segmenter_.get(),
+ normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(IndexProcessor::Create(
- schema_store_.get(), /*lang_segmenter=*/nullptr,
- normalizer_.get(), index_.get(), processor_options),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ IndexProcessor::Create(schema_store_.get(), /*lang_segmenter=*/nullptr,
+ normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
/*normalizer=*/nullptr, index_.get(),
- processor_options),
+ processor_options, &fake_clock_),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
normalizer_.get(), /*index=*/nullptr,
- processor_options),
+ processor_options, &fake_clock_),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
@@ -285,9 +293,11 @@
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("hello", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
- EXPECT_THAT(GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(
- kDocumentId0, std::vector<SectionId>{kExactSectionId})));
+ std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+ {kExactSectionId, 1}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId0, expectedMap)));
ICING_ASSERT_OK_AND_ASSIGN(
itr, index_->GetIterator("hello", 1U << kPrefixedSectionId,
@@ -306,12 +316,18 @@
EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId0), IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+ std::string coffeeRepeatedString = "coffee";
+ for (int i = 0; i < Hit::kMaxTermFrequency + 1; i++) {
+ coffeeRepeatedString += " coffee";
+ }
+
document =
DocumentBuilder()
.SetKey("icing", "fake_type/2")
.SetSchema(std::string(kFakeType))
- .AddStringProperty(std::string(kExactProperty), "pitbull")
- .AddStringProperty(std::string(kPrefixedProperty), "mr. world wide")
+ .AddStringProperty(std::string(kExactProperty), coffeeRepeatedString)
+ .AddStringProperty(std::string(kPrefixedProperty),
+ "mr. world world wide")
.Build();
EXPECT_THAT(index_processor_->IndexDocument(document, kDocumentId1), IsOk());
EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
@@ -319,19 +335,32 @@
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("world", kSectionIdMaskAll,
TermMatchType::EXACT_ONLY));
+ std::vector<DocHitInfo> hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap1{
+ {kPrefixedSectionId, 2}};
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap2{
+ {kExactSectionId, 1}};
EXPECT_THAT(
- GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(kDocumentId1,
- std::vector<SectionId>{kPrefixedSectionId}),
- EqualsDocHitInfo(kDocumentId0,
- std::vector<SectionId>{kExactSectionId})));
+ hits, ElementsAre(
+ EqualsDocHitInfoWithTermFrequency(kDocumentId1, expectedMap1),
+ EqualsDocHitInfoWithTermFrequency(kDocumentId0, expectedMap2)));
ICING_ASSERT_OK_AND_ASSIGN(
itr, index_->GetIterator("world", 1U << kPrefixedSectionId,
TermMatchType::EXACT_ONLY));
- EXPECT_THAT(GetHits(std::move(itr)),
- ElementsAre(EqualsDocHitInfo(
- kDocumentId1, std::vector<SectionId>{kPrefixedSectionId})));
+ hits = GetHits(std::move(itr));
+ std::unordered_map<SectionId, Hit::TermFrequency> expectedMap{
+ {kPrefixedSectionId, 2}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId1, expectedMap)));
+
+ ICING_ASSERT_OK_AND_ASSIGN(itr,
+ index_->GetIterator("coffee", kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
+ hits = GetHits(std::move(itr));
+ expectedMap = {{kExactSectionId, Hit::kMaxTermFrequency}};
+ EXPECT_THAT(hits, ElementsAre(EqualsDocHitInfoWithTermFrequency(
+ kDocumentId1, expectedMap)));
}
TEST_F(IndexProcessorTest, DocWithNestedProperty) {
@@ -389,7 +418,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer_.get(), index_.get(), options));
+ normalizer_.get(), index_.get(), options,
+ &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -428,7 +458,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer_.get(), index_.get(), options));
+ normalizer_.get(), index_.get(), options,
+ &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -468,7 +499,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer.get(), index_.get(), options));
+ normalizer.get(), index_.get(), options,
+ &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -590,6 +622,23 @@
}
TEST_F(IndexProcessorTest, NonAsciiIndexing) {
+ language_segmenter_factory::SegmenterOptions segmenter_options(
+ ULOC_SIMPLIFIED_CHINESE);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ lang_segmenter_,
+ language_segmenter_factory::Create(std::move(segmenter_options)));
+
+ IndexProcessor::Options processor_options;
+ processor_options.max_tokens_per_document = 1000;
+ processor_options.token_limit_behavior =
+ IndexProcessor::Options::TokenLimitBehavior::kReturnError;
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ index_processor_,
+ IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
+ normalizer_.get(), index_.get(),
+ processor_options, &fake_clock_));
+
DocumentProto document =
DocumentBuilder()
.SetKey("icing", "fake_type/1")
@@ -618,8 +667,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer_.get(), index_.get(),
- processor_options));
+ normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_));
// This is the maximum token length that an empty lexicon constructed for a
// lite index with merge size of 1MiB can support.
@@ -679,8 +728,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer_.get(), index_.get(),
- processor_options));
+ normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_));
DocumentId doc_id = 0;
// Have determined experimentally that indexing 3373 documents with this text
// will cause the LiteIndex to fill up. Further indexing will fail unless the
@@ -736,8 +785,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
IndexProcessor::Create(schema_store_.get(), lang_segmenter_.get(),
- normalizer_.get(), index_.get(),
- processor_options));
+ normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_));
// 3. Index one document. This should fit in the LiteIndex without requiring a
// merge.
diff --git a/icing/index/index.cc b/icing/index/index.cc
index 1fb0dc0..bd41b51 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -71,7 +71,7 @@
}
// Helper function to check if a term is in the given namespaces.
-// TODO(samzheng): Implement a method PropertyReadersAll.HasAnyProperty().
+// TODO(tjbarron): Implement a method PropertyReadersAll.HasAnyProperty().
bool IsTermInNamespaces(
const IcingDynamicTrie::PropertyReadersAll& property_reader,
uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
@@ -277,8 +277,7 @@
std::move(main_term_metadata_list), num_to_return);
}
-libtextclassifier3::Status Index::Editor::AddHit(const char* term,
- Hit::Score score) {
+libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
// Step 1: See if this term is already in the lexicon
uint32_t tvi;
auto tvi_or = lite_index_->GetTermId(term);
@@ -287,8 +286,10 @@
if (tvi_or.ok()) {
tvi = tvi_or.ValueOrDie();
if (seen_tokens_.find(tvi) != seen_tokens_.end()) {
- ICING_VLOG(1) << "A hit for term " << term
- << " has already been added. Skipping.";
+ ICING_VLOG(1) << "Updating term frequency for term " << term;
+ if (seen_tokens_[tvi] != Hit::kMaxTermFrequency) {
+ ++seen_tokens_[tvi];
+ }
return libtextclassifier3::Status::OK;
}
ICING_VLOG(1) << "Term " << term
@@ -302,14 +303,20 @@
ICING_ASSIGN_OR_RETURN(
tvi, lite_index_->InsertTerm(term, term_match_type_, namespace_id_));
}
- seen_tokens_.insert(tvi);
+ // Token seen for the first time in the current document.
+ seen_tokens_[tvi] = 1;
+ return libtextclassifier3::Status::OK;
+}
- // Step 3: Add the hit itself
- Hit hit(section_id_, document_id_, score,
- term_match_type_ == TermMatchType::PREFIX);
- ICING_ASSIGN_OR_RETURN(uint32_t term_id,
- term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- return lite_index_->AddHit(term_id, hit);
+libtextclassifier3::Status Index::Editor::IndexAllBufferedTerms() {
+ for (auto itr = seen_tokens_.begin(); itr != seen_tokens_.end(); itr++) {
+ Hit hit(section_id_, document_id_, /*term_frequency=*/itr->second,
+ term_match_type_ == TermMatchType::PREFIX);
+ ICING_ASSIGN_OR_RETURN(
+ uint32_t term_id, term_id_codec_->EncodeTvi(itr->first, TviType::LITE));
+ ICING_RETURN_IF_ERROR(lite_index_->AddHit(term_id, hit));
+ }
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/index/index.h b/icing/index/index.h
index 1305b2c..a4ea719 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -197,14 +197,16 @@
namespace_id_(namespace_id),
section_id_(section_id) {}
- libtextclassifier3::Status AddHit(const char* term,
- Hit::Score score = Hit::kMaxHitScore);
+ // Buffer the term in seen_tokens_.
+ libtextclassifier3::Status BufferTerm(const char* term);
+ // Index all the terms stored in seen_tokens_.
+ libtextclassifier3::Status IndexAllBufferedTerms();
private:
// The Editor is able to store previously seen terms as TermIds. This is
// is more efficient than a client doing this externally because TermIds are
// not exposed to clients.
- std::unordered_set<uint32_t> seen_tokens_;
+ std::unordered_map<uint32_t, Hit::TermFrequency> seen_tokens_;
const TermIdCodec* term_id_codec_;
LiteIndex* lite_index_;
DocumentId document_id_;
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 1d12274..3479ab1 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -177,7 +177,8 @@
TEST_F(IndexTest, AdvancePastEnd) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -200,7 +201,8 @@
TEST_F(IndexTest, AdvancePastEndAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -225,7 +227,8 @@
TEST_F(IndexTest, SingleHitSingleTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -240,7 +243,8 @@
TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -257,8 +261,9 @@
TEST_F(IndexTest, SingleHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -273,8 +278,9 @@
TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -291,8 +297,9 @@
TEST_F(IndexTest, NoHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -305,8 +312,9 @@
TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -321,15 +329,18 @@
TEST_F(IndexTest, MultiHitMultiTermIndex) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -345,15 +356,18 @@
TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -371,11 +385,13 @@
TEST_F(IndexTest, MultiHitSectionRestrict) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
SectionIdMask desired_section = 1U << kSectionId2;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -391,11 +407,13 @@
TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -415,12 +433,13 @@
EXPECT_THAT(size, Eq(0));
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
EXPECT_THAT(size, Gt(0));
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(int64_t new_size, index_->GetElementsSize());
EXPECT_THAT(new_size, Eq(size));
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -435,7 +454,8 @@
TEST_F(IndexTest, PrefixHit) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -450,7 +470,8 @@
TEST_F(IndexTest, PrefixHitAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -467,11 +488,13 @@
TEST_F(IndexTest, MultiPrefixHit) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -488,11 +511,13 @@
TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -511,11 +536,13 @@
TEST_F(IndexTest, NoExactHitInPrefixQuery) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -529,11 +556,13 @@
TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -549,8 +578,9 @@
TEST_F(IndexTest, PrefixHitDedupe) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -564,8 +594,9 @@
TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -621,8 +652,9 @@
TEST_F(IndexTest, NonAsciiTerms) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
- ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -642,8 +674,9 @@
TEST_F(IndexTest, NonAsciiTermsAfterMerge) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("こんにちは"), IsOk());
- ASSERT_THAT(edit.AddHit("あなた"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("こんにちは"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("あなた"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
@@ -684,7 +717,11 @@
index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
size_t idx = uniform(random);
- status = edit.AddHit(query_terms.at(idx).c_str());
+ status = edit.BufferTerm(query_terms.at(idx).c_str());
+ if (!status.ok()) {
+ break;
+ }
+ status = edit.IndexAllBufferedTerms();
if (!status.ok()) {
break;
}
@@ -696,11 +733,10 @@
Index::Editor edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("bar"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("baz"),
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
for (int i = 0; i < query_terms.size(); i += 25) {
@@ -737,7 +773,11 @@
index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
size_t idx = uniform(random);
- status = edit.AddHit(query_terms.at(idx).c_str());
+ status = edit.BufferTerm(query_terms.at(idx).c_str());
+ if (!status.ok()) {
+ break;
+ }
+ status = edit.IndexAllBufferedTerms();
if (!status.ok()) {
break;
}
@@ -751,11 +791,10 @@
Index::Editor edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("bar"),
- StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(edit.AddHit("baz"),
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
@@ -763,9 +802,10 @@
ICING_ASSERT_OK(index_->Merge());
edit =
index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY, 0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
- EXPECT_THAT(edit.AddHit("baz"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
@@ -790,8 +830,9 @@
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Close the index.
index_.reset();
@@ -820,8 +861,9 @@
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->PersistToDisk(), IsOk());
// Close the index.
@@ -847,8 +889,9 @@
// Add some content to the index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("bar"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
EXPECT_THAT(index_->PersistToDisk(), IsOk());
@@ -881,7 +924,8 @@
TEST_F(IndexTest, FindTermByPrefixShouldReturnEmpty) {
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"foo", /*namespace_ids=*/{0},
/*num_to_return=*/0),
@@ -903,8 +947,9 @@
TEST_F(IndexTest, FindTermByPrefixShouldReturnCorrectResult) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("bar"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// "b" should only match "bar" but not "foo".
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"b", /*namespace_ids=*/{0},
@@ -923,9 +968,10 @@
TEST_F(IndexTest, FindTermByPrefixShouldRespectNumToReturn) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fo"), IsOk());
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// We have 3 results but only 2 should be returned.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -944,13 +990,15 @@
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
- EXPECT_THAT(edit1.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
// namespace with id 0 has 2 results.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -982,17 +1030,20 @@
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
Index::Editor edit3 =
index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/2);
- EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
// Should return "foo" and "fool" which are in namespaces with ids 1 and 2.
EXPECT_THAT(
@@ -1015,17 +1066,20 @@
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("fo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fo"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/1);
- EXPECT_THAT(edit2.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
Index::Editor edit3 =
index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/2);
- EXPECT_THAT(edit3.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit3.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit3.IndexAllBufferedTerms(), IsOk());
// Should return "fo", "foo" and "fool" across all namespaces.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{},
@@ -1049,13 +1103,15 @@
Index::Editor edit1 =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit1.AddHit("foo"), IsOk());
- EXPECT_THAT(edit1.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit1.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit1.IndexAllBufferedTerms(), IsOk());
Index::Editor edit2 =
index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit2.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit2.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit2.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit, 'fool' has 2 hits.
EXPECT_THAT(
@@ -1079,30 +1135,38 @@
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId4, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId5, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId6, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId7, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit, 'fool' has 8 hits.
EXPECT_THAT(
@@ -1125,14 +1189,16 @@
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit in the main index, 'fool' has 1 hit in the main index and
// 1 hit in the lite index.
@@ -1147,13 +1213,15 @@
Index::Editor edit =
index_->Edit(kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// 'foo' has 1 hit in the main index, 'fool' has 1 hit in the lite index.
EXPECT_THAT(index_->FindTermsByPrefix(/*prefix=*/"f", /*namespace_ids=*/{0},
@@ -1171,7 +1239,8 @@
// Add an element.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(size, index_->GetElementsSize());
EXPECT_THAT(size, Gt(0));
@@ -1183,19 +1252,23 @@
TEST_F(IndexTest, ExactResultsFromLiteAndMain) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -1212,19 +1285,23 @@
TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
- EXPECT_THAT(edit.AddHit("fool"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foot"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("footer"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.AddHit("foo"), IsOk());
+ EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
@@ -1244,19 +1321,23 @@
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
std::string out0;
index_->GetDebugInfo(/*verbosity=*/0, &out0);
@@ -1269,7 +1350,8 @@
// Add one more doc to the lite index. Debug strings should change.
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
std::string out2;
index_->GetDebugInfo(/*verbosity=*/0, &out2);
@@ -1298,13 +1380,16 @@
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId0, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should have posting lists for
// "fool" {(doc0,sec3)},
@@ -1315,7 +1400,8 @@
// Add one more doc to the lite index.
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should add a posting list for "far" and a
// backfill branch point for "f". In addition to the posting lists described
@@ -1343,11 +1429,13 @@
// then add another doc to the lite index.
Index::Editor edit = index_->Edit(
kDocumentId0, kSectionId2, TermMatchType::EXACT_ONLY, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
- ASSERT_THAT(edit.AddHit("fool"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("fool"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should have posting lists for
// "fool" {(doc0,sec2)},
// "foot" {(doc1,sec3)},
@@ -1356,14 +1444,17 @@
edit = index_->Edit(kDocumentId2, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("footer"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("footer"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Add one more doc to the lite index. Debug strings should change.
edit = index_->Edit(kDocumentId3, kSectionId2, TermMatchType::EXACT_ONLY,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("far"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("far"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// After this merge the index should add posting lists for "far" and "footer"
// and a backfill branch point for "f". The new posting lists should be
@@ -1400,7 +1491,8 @@
// Add one document to the lite index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should have no effect.
ICING_EXPECT_OK(index_->TruncateTo(kInvalidDocumentId));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1420,7 +1512,8 @@
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should still have no effect even if both indices have
// hits.
@@ -1447,7 +1540,8 @@
// Add one document to the lite index
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
// Clipping to invalid should have no effect.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1467,7 +1561,8 @@
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
// Clipping to invalid should still have no effect even if both indices have
// hits.
@@ -1487,14 +1582,16 @@
// Add one document to the lite index and merge it into main.
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
// Add another document to the lite index.
edit = index_->Edit(kDocumentId1, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
@@ -1513,17 +1610,20 @@
// Add two documents to the lite index and merge them into main.
Index::Editor edit = index_->Edit(kDocumentId0, kSectionId2,
TermMatchType::PREFIX, /*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foo"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foul"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK(index_->Merge());
// Add another document to the lite index.
edit = index_->Edit(kDocumentId2, kSectionId3, TermMatchType::PREFIX,
/*namespace_id=*/0);
- ASSERT_THAT(edit.AddHit("foot"), IsOk());
+ ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.cc b/icing/index/iterator/doc-hit-info-iterator-and.cc
index f224583..66f87bd 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-and.cc
@@ -38,8 +38,6 @@
// When combining ANDed iterators, n-ary operator has better performance when
// number of operands > 3 according to benchmark cl/243720660
-// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
-// changes.
inline constexpr int kBinaryAndIteratorPerformanceThreshold = 3;
// The minimum number of iterators needed to construct a And iterator. The And
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index 9eb147a..e0a8cd0 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -63,14 +63,16 @@
auto type_config = schema.add_types();
type_config->set_schema_type("email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
@@ -228,14 +230,16 @@
auto type_config = schema.add_types();
type_config->set_schema_type("email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
@@ -383,14 +387,16 @@
type_config = schema.add_types();
type_config->set_schema_type(schema3_);
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
@@ -521,14 +527,16 @@
auto type_config = schema.add_types();
type_config->set_schema_type(email_schema_);
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
@@ -711,14 +719,16 @@
type_config = schema.add_types();
type_config->set_schema_type(schema2_);
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.cc b/icing/index/iterator/doc-hit-info-iterator-or.cc
index 9d18753..8f00f88 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-or.cc
@@ -29,8 +29,6 @@
// When combining Or iterators, n-ary operator has better performance when
// number of operands > 2 according to benchmark cl/243321264
-// TODO (samzheng): Tune this number when it's necessary, e.g. implementation
-// changes.
constexpr int kBinaryOrIteratorPerformanceThreshold = 2;
} // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index b29217c..91e0cbe 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -72,14 +72,16 @@
// First and only indexed property, so it gets the first id of 0
indexed_section_id_ = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK(schema_store_->SetSchema(schema_));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
}
void TearDown() override {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index 89240ee..e0379b8 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -365,7 +365,7 @@
last_document_id = document_id;
}
if (hits_out != nullptr) {
- hits_out->back().UpdateSection(hit.section_id(), hit.score());
+ hits_out->back().UpdateSection(hit.section_id(), hit.term_frequency());
}
}
return count;
@@ -448,7 +448,7 @@
// Binary search for our term_id. Make sure we get the first
// element. Using kBeginSortValue ensures this for the hit value.
TermIdHitPair term_id_hit_pair(
- term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kMaxHitScore));
+ term_id, Hit(Hit::kMaxDocumentIdSortValue, Hit::kDefaultTermFrequency));
const TermIdHitPair::Value* array =
hit_buffer_.array_cast<TermIdHitPair::Value>();
diff --git a/icing/index/lite/term-id-hit-pair.h b/icing/index/lite/term-id-hit-pair.h
index 191f766..61ec502 100644
--- a/icing/index/lite/term-id-hit-pair.h
+++ b/icing/index/lite/term-id-hit-pair.h
@@ -29,39 +29,42 @@
class TermIdHitPair {
public:
- // Layout bits: 24 termid + 32 hit value + 8 hit score.
+ // Layout bits: 24 termid + 32 hit value + 8 hit term frequency.
using Value = uint64_t;
static constexpr int kTermIdBits = 24;
static constexpr int kHitValueBits = sizeof(Hit::Value) * 8;
- static constexpr int kHitScoreBits = sizeof(Hit::Score) * 8;
+ static constexpr int kHitTermFrequencyBits = sizeof(Hit::TermFrequency) * 8;
static const Value kInvalidValue;
explicit TermIdHitPair(Value v = kInvalidValue) : value_(v) {}
TermIdHitPair(uint32_t term_id, const Hit& hit) {
- static_assert(
- kTermIdBits + kHitValueBits + kHitScoreBits <= sizeof(Value) * 8,
- "TermIdHitPairTooBig");
+ static_assert(kTermIdBits + kHitValueBits + kHitTermFrequencyBits <=
+ sizeof(Value) * 8,
+ "TermIdHitPairTooBig");
value_ = 0;
// Term id goes into the most significant bits because it takes
// precedent in sorts.
- bit_util::BitfieldSet(term_id, kHitValueBits + kHitScoreBits, kTermIdBits,
+ bit_util::BitfieldSet(term_id, kHitValueBits + kHitTermFrequencyBits,
+ kTermIdBits, &value_);
+ bit_util::BitfieldSet(hit.value(), kHitTermFrequencyBits, kHitValueBits,
&value_);
- bit_util::BitfieldSet(hit.value(), kHitScoreBits, kHitValueBits, &value_);
- bit_util::BitfieldSet(hit.score(), 0, kHitScoreBits, &value_);
+ bit_util::BitfieldSet(hit.term_frequency(), 0, kHitTermFrequencyBits,
+ &value_);
}
uint32_t term_id() const {
- return bit_util::BitfieldGet(value_, kHitValueBits + kHitScoreBits,
+ return bit_util::BitfieldGet(value_, kHitValueBits + kHitTermFrequencyBits,
kTermIdBits);
}
Hit hit() const {
- return Hit(bit_util::BitfieldGet(value_, kHitScoreBits, kHitValueBits),
- bit_util::BitfieldGet(value_, 0, kHitScoreBits));
+ return Hit(
+ bit_util::BitfieldGet(value_, kHitTermFrequencyBits, kHitValueBits),
+ bit_util::BitfieldGet(value_, 0, kHitTermFrequencyBits));
}
Value value() const { return value_; }
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.cc b/icing/index/main/doc-hit-info-iterator-term-main.cc
index a60764d..5553c1e 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.cc
+++ b/icing/index/main/doc-hit-info-iterator-term-main.cc
@@ -114,7 +114,8 @@
hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
}
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score());
+ cached_doc_hit_infos_.back().UpdateSection(hit.section_id(),
+ hit.term_frequency());
}
return libtextclassifier3::Status::OK;
}
@@ -162,7 +163,8 @@
hit.document_id() != cached_doc_hit_infos_.back().document_id()) {
cached_doc_hit_infos_.push_back(DocHitInfo(hit.document_id()));
}
- cached_doc_hit_infos_.back().UpdateSection(hit.section_id(), hit.score());
+ cached_doc_hit_infos_.back().UpdateSection(hit.section_id(),
+ hit.term_frequency());
}
return libtextclassifier3::Status::OK;
}
diff --git a/icing/index/main/flash-index-storage_test.cc b/icing/index/main/flash-index-storage_test.cc
index cf899b3..7e15524 100644
--- a/icing/index/main/flash-index-storage_test.cc
+++ b/icing/index/main/flash-index-storage_test.cc
@@ -160,10 +160,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -183,10 +183,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -217,10 +217,10 @@
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -256,10 +256,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -279,10 +279,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -313,10 +313,10 @@
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -354,10 +354,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -377,10 +377,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
@@ -425,10 +425,10 @@
EXPECT_THAT(posting_list_holder3.posting_list.GetHits(),
IsOkAndHolds(IsEmpty()));
std::vector<Hit> hits3 = {
- Hit(/*section_id=*/7, /*document_id=*/1, /*score=*/62),
- Hit(/*section_id=*/12, /*document_id=*/3, /*score=*/45),
- Hit(/*section_id=*/11, /*document_id=*/18, /*score=*/12),
- Hit(/*section_id=*/7, /*document_id=*/100, /*score=*/74)};
+ Hit(/*section_id=*/7, /*document_id=*/1, /*term_frequency=*/62),
+ Hit(/*section_id=*/12, /*document_id=*/3, /*term_frequency=*/45),
+ Hit(/*section_id=*/11, /*document_id=*/18, /*term_frequency=*/12),
+ Hit(/*section_id=*/7, /*document_id=*/100, /*term_frequency=*/74)};
for (const Hit& hit : hits3) {
ICING_ASSERT_OK(posting_list_holder3.posting_list.PrependHit(hit));
}
@@ -466,10 +466,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits1 = {
- Hit(/*section_id=*/1, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/6, /*document_id=*/2, /*score=*/19),
- Hit(/*section_id=*/5, /*document_id=*/2, /*score=*/100),
- Hit(/*section_id=*/8, /*document_id=*/5, /*score=*/197)};
+ Hit(/*section_id=*/1, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/6, /*document_id=*/2, /*term_frequency=*/19),
+ Hit(/*section_id=*/5, /*document_id=*/2, /*term_frequency=*/100),
+ Hit(/*section_id=*/8, /*document_id=*/5, /*term_frequency=*/197)};
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(posting_list_holder1.posting_list.PrependHit(hit));
}
@@ -492,10 +492,10 @@
EXPECT_THAT(flash_index_storage.empty(), IsFalse());
std::vector<Hit> hits2 = {
- Hit(/*section_id=*/4, /*document_id=*/0, /*score=*/12),
- Hit(/*section_id=*/8, /*document_id=*/4, /*score=*/19),
- Hit(/*section_id=*/9, /*document_id=*/7, /*score=*/100),
- Hit(/*section_id=*/6, /*document_id=*/7, /*score=*/197)};
+ Hit(/*section_id=*/4, /*document_id=*/0, /*term_frequency=*/12),
+ Hit(/*section_id=*/8, /*document_id=*/4, /*term_frequency=*/19),
+ Hit(/*section_id=*/9, /*document_id=*/7, /*term_frequency=*/100),
+ Hit(/*section_id=*/6, /*document_id=*/7, /*term_frequency=*/197)};
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(posting_list_holder2.posting_list.PrependHit(hit));
}
diff --git a/icing/index/main/index-block.cc b/icing/index/main/index-block.cc
index 652dbc6..4590d06 100644
--- a/icing/index/main/index-block.cc
+++ b/icing/index/main/index-block.cc
@@ -51,7 +51,7 @@
uint32_t IndexBlock::ApproximateFullPostingListHitsForBlock(
uint32_t block_size, int posting_list_index_bits) {
- // Assume 50% compressed and most don't have scores.
+ // Assume 50% compressed and most don't have term frequencies.
uint32_t bytes_per_hit = sizeof(Hit::Value) / 2;
return (block_size - sizeof(BlockHeader)) /
((1u << posting_list_index_bits) * bytes_per_hit);
diff --git a/icing/index/main/index-block_test.cc b/icing/index/main/index-block_test.cc
index 08ba57d..322918d 100644
--- a/icing/index/main/index-block_test.cc
+++ b/icing/index/main/index-block_test.cc
@@ -105,11 +105,11 @@
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> test_hits{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
PostingListIndex allocated_index;
{
@@ -152,18 +152,18 @@
ASSERT_TRUE(CreateFileWithSize(filesystem, flash_file, kBlockSize));
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
std::vector<Hit> hits_in_posting_list2{
- Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
- Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
- Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
PostingListIndex allocated_index_1;
PostingListIndex allocated_index_2;
@@ -242,11 +242,11 @@
// Add hits to the first posting list.
std::vector<Hit> hits_in_posting_list1{
- Hit(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore),
- Hit(/*section_id=*/5, /*document_id=*/1, /*score=*/99),
- Hit(/*section_id=*/3, /*document_id=*/3, /*score=*/17),
- Hit(/*section_id=*/10, /*document_id=*/10, Hit::kMaxHitScore),
+ Hit(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/5, /*document_id=*/1, /*term_frequency=*/99),
+ Hit(/*section_id=*/3, /*document_id=*/3, /*term_frequency=*/17),
+ Hit(/*section_id=*/10, /*document_id=*/10, Hit::kDefaultTermFrequency),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_1,
block.AllocatePostingList());
@@ -261,11 +261,11 @@
// Add hits to the second posting list.
std::vector<Hit> hits_in_posting_list2{
- Hit(/*section_id=*/12, /*document_id=*/220, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/265, Hit::kMaxHitScore),
- Hit(/*section_id=*/0, /*document_id=*/287, /*score=*/2),
- Hit(/*section_id=*/11, /*document_id=*/306, /*score=*/12),
- Hit(/*section_id=*/10, /*document_id=*/306, Hit::kMaxHitScore),
+ Hit(/*section_id=*/12, /*document_id=*/220, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/265, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/287, /*term_frequency=*/2),
+ Hit(/*section_id=*/11, /*document_id=*/306, /*term_frequency=*/12),
+ Hit(/*section_id=*/10, /*document_id=*/306, Hit::kDefaultTermFrequency),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_2,
block.AllocatePostingList());
@@ -288,9 +288,9 @@
EXPECT_TRUE(block.has_free_posting_lists());
std::vector<Hit> hits_in_posting_list3{
- Hit(/*section_id=*/12, /*document_id=*/0, /*score=*/88),
- Hit(/*section_id=*/17, /*document_id=*/1, Hit::kMaxHitScore),
- Hit(/*section_id=*/0, /*document_id=*/2, /*score=*/2),
+ Hit(/*section_id=*/12, /*document_id=*/0, /*term_frequency=*/88),
+ Hit(/*section_id=*/17, /*document_id=*/1, Hit::kDefaultTermFrequency),
+ Hit(/*section_id=*/0, /*document_id=*/2, /*term_frequency=*/2),
};
ICING_ASSERT_OK_AND_ASSIGN(PostingListIndex allocated_index_3,
block.AllocatePostingList());
diff --git a/icing/index/main/main-index-merger.cc b/icing/index/main/main-index-merger.cc
index 8142b79..f49dc74 100644
--- a/icing/index/main/main-index-merger.cc
+++ b/icing/index/main/main-index-merger.cc
@@ -33,8 +33,8 @@
class HitSelector {
public:
- // Returns whether or not term_id_hit_pair has the same term_id, document_id and section_id
- // as the previously selected hits.
+ // Returns whether or not term_id_hit_pair has the same term_id, document_id
+ // and section_id as the previously selected hits.
bool IsEquivalentHit(const TermIdHitPair& term_id_hit_pair) {
return prev_.term_id() == term_id_hit_pair.term_id() &&
prev_.hit().document_id() == term_id_hit_pair.hit().document_id() &&
@@ -56,20 +56,25 @@
// This function may add between 0-2 hits depending on whether the HitSelector
// holds both a valid exact hit and a valid prefix hit, one of those or none.
size_t InsertSelectedHits(size_t pos, std::vector<TermIdHitPair>* hits) {
- // Given highest scoring prefix/exact hits for a given
- // term+docid+sectionid, push needed hits into hits array at offset
- // pos. Return new pos.
+ // Given the prefix/exact hits for a given term+docid+sectionid, push needed
+ // hits into hits array at offset pos. Return new pos.
if (best_prefix_hit_.hit().is_valid() && best_exact_hit_.hit().is_valid()) {
- // Output both if scores are unequal. Otherwise only exact hit is
- // sufficient because 1) they have the same scores and 2) any prefix query
- // will also accept an exact hit.
(*hits)[pos++] = best_exact_hit_;
- if (best_prefix_hit_.hit().score() != best_exact_hit_.hit().score()) {
- (*hits)[pos++] = best_prefix_hit_;
- // Ensure sorted.
- if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
- std::swap((*hits)[pos - 1], (*hits)[pos - 2]);
- }
+ const Hit& prefix_hit = best_prefix_hit_.hit();
+ // The prefix hit has score equal to the sum of the scores, capped at
+ // kMaxTermFrequency.
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ prefix_hit.term_frequency() + best_exact_hit_.hit().term_frequency());
+ best_prefix_hit_ = TermIdHitPair(
+ best_prefix_hit_.term_id(),
+ Hit(prefix_hit.section_id(), prefix_hit.document_id(),
+ final_term_frequency, prefix_hit.is_in_prefix_section(),
+ prefix_hit.is_prefix_hit()));
+ (*hits)[pos++] = best_prefix_hit_;
+ // Ensure sorted.
+ if (best_prefix_hit_.hit() < best_exact_hit_.hit()) {
+ std::swap((*hits)[pos - 1], (*hits)[pos - 2]);
}
} else if (best_prefix_hit_.hit().is_valid()) {
(*hits)[pos++] = best_prefix_hit_;
@@ -88,16 +93,38 @@
private:
void SelectPrefixHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
- if (!best_prefix_hit_.hit().is_valid() ||
- best_prefix_hit_.hit().score() < term_id_hit_pair.hit().score()) {
+ if (!best_prefix_hit_.hit().is_valid()) {
best_prefix_hit_ = term_id_hit_pair;
+ } else {
+ const Hit& hit = term_id_hit_pair.hit();
+ // Create a new prefix hit with term_frequency as the sum of the term
+ // frequencies. The term frequency is capped at kMaxTermFrequency.
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ hit.term_frequency() + best_prefix_hit_.hit().term_frequency());
+ best_prefix_hit_ = TermIdHitPair(
+ term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+ best_prefix_hit_.hit().is_in_prefix_section(),
+ best_prefix_hit_.hit().is_prefix_hit()));
}
}
void SelectExactHitIfBetter(const TermIdHitPair& term_id_hit_pair) {
- if (!best_exact_hit_.hit().is_valid() ||
- best_exact_hit_.hit().score() < term_id_hit_pair.hit().score()) {
+ if (!best_exact_hit_.hit().is_valid()) {
best_exact_hit_ = term_id_hit_pair;
+ } else {
+ const Hit& hit = term_id_hit_pair.hit();
+ // Create a new exact hit with term_frequency as the sum of the term
+ // frequencies. The term frequency is capped at kMaxHitScore.
+ Hit::TermFrequency final_term_frequency = std::min(
+ static_cast<int>(Hit::kMaxTermFrequency),
+ hit.term_frequency() + best_exact_hit_.hit().term_frequency());
+ best_exact_hit_ = TermIdHitPair(
+ term_id_hit_pair.term_id(),
+ Hit(hit.section_id(), hit.document_id(), final_term_frequency,
+ best_exact_hit_.hit().is_in_prefix_section(),
+ best_exact_hit_.hit().is_prefix_hit()));
}
}
@@ -166,10 +193,10 @@
// {"foot", docid0, sectionid0}
// {"fool", docid0, sectionid0}
//
-// When duplicates are encountered, we prefer the hit with the highest hit
-// score. If there is both an exact and prefix hit for the same term, we prefer
-// the exact hit, unless they have different scores, in which case we keep both
-// them.
+// When two or more prefix hits are duplicates, merge into one hit with term
+// frequency as the sum of the term frequencies. If there is both an exact and
+// prefix hit for the same term, keep the exact hit as it is, update the prefix
+// hit so that its term frequency is the sum of the term frequencies.
void DedupeHits(
std::vector<TermIdHitPair>* hits, const TermIdCodec& term_id_codec,
const std::unordered_map<uint32_t, int>& main_tvi_to_block_index) {
@@ -252,7 +279,7 @@
size_t offset = itr_prefixes->second.first;
size_t len = itr_prefixes->second.second;
size_t offset_end_exclusive = offset + len;
- Hit prefix_hit(hit.section_id(), hit.document_id(), hit.score(),
+ Hit prefix_hit(hit.section_id(), hit.document_id(), hit.term_frequency(),
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
for (; offset < offset_end_exclusive; ++offset) {
// Take the tvi (in the main lexicon) of each prefix term.
diff --git a/icing/index/main/main-index-merger_test.cc b/icing/index/main/main-index-merger_test.cc
index 59d3e82..8a2f691 100644
--- a/icing/index/main/main-index-merger_test.cc
+++ b/icing/index/main/main-index-merger_test.cc
@@ -86,10 +86,10 @@
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -125,10 +125,10 @@
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
@@ -138,7 +138,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_prefix_hit(/*section_id=*/0, /*document_id=*/1,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -172,7 +173,7 @@
TermIdHitPair(foo_term_id, doc1_prefix_hit)));
}
-TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentScores) {
+TEST_F(MainIndexMergerTest, DedupePrefixAndExactWithDifferentTermFrequencies) {
// 1. Index one doc in the Lite Index:
// - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
ICING_ASSERT_OK_AND_ASSIGN(
@@ -187,10 +188,11 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
@@ -200,9 +202,10 @@
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_main_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- // The prefix hit for 'foot' should have the same score as the exact hit for
- // 'foot'.
- Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ // The prefix hit for 'foot' should have the same term frequency as the exact
+ // hit for 'foot'. The final prefix hit has term frequency equal to 58.
+ Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/58,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -221,8 +224,8 @@
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
- // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot'
- // because they have different scores.
+ // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+ // latter with term frequency as the sum of the term frequencies.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
@@ -234,7 +237,7 @@
TermIdHitPair(foo_main_term_id, doc0_prefix_hit)));
}
-TEST_F(MainIndexMergerTest, DedupeWithExactSameScores) {
+TEST_F(MainIndexMergerTest, DedupeWithExactSameTermFrequencies) {
// 1. Index one doc in the Lite Index:
// - Doc0 {"foot" "foo" is_in_prefix_section=TRUE}
ICING_ASSERT_OK_AND_ASSIGN(
@@ -249,12 +252,17 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foo_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*term_frequency=*/57,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, foo_doc0_hit));
+ // The prefix hit should take the sum as term_frequency - 114.
+ Hit prefix_foo_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/114,
+ /*is_in_prefix_section=*/true,
+ /*is_prefix_hit=*/true);
// 2. Build up a fake LexiconMergeOutputs
// This is some made up number that doesn't matter for this test.
@@ -279,16 +287,17 @@
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' to have a hit for 'foo'
- // c. Keep only the exact hit for 'foo' since they both have the same hit
- // score.
+ // c. Keep both the exact hit for 'foo' and the prefix hit for 'foot', the
+ // latter with term frequency as the sum of the term frequencies.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
lexicon_outputs));
- EXPECT_THAT(
- expanded_term_id_hit_pairs,
- UnorderedElementsAre(TermIdHitPair(foot_main_term_id, foot_doc0_hit),
- TermIdHitPair(foo_main_term_id, foo_doc0_hit)));
+ EXPECT_THAT(expanded_term_id_hit_pairs,
+ UnorderedElementsAre(
+ TermIdHitPair(foot_main_term_id, foot_doc0_hit),
+ TermIdHitPair(foo_main_term_id, foo_doc0_hit),
+ TermIdHitPair(foo_main_term_id, prefix_foo_doc0_hit)));
}
TEST_F(MainIndexMergerTest, DedupePrefixExpansion) {
@@ -307,10 +316,12 @@
uint32_t fool_term_id,
term_id_codec_->EncodeTvi(fool_tvi, TviType::LITE));
- Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0, /*score=*/57,
+ Hit foot_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, foot_doc0_hit));
- Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit fool_doc0_hit(/*section_id=*/0, /*document_id=*/0,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, fool_doc0_hit));
@@ -320,9 +331,10 @@
ICING_ASSERT_OK_AND_ASSIGN(
uint32_t foo_term_id,
term_id_codec_->EncodeTvi(foo_main_tvi, TviType::MAIN));
- // The prefix hit should take the best score - MaxHitScore when merging these
- // two.
- Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ // The prefix hit should take the sum as term frequency - 256, capped at
+ // kMaxTermFrequency.
+ Hit doc0_prefix_hit(/*section_id=*/0, /*document_id=*/0,
+ /*term_frequency=*/Hit::kMaxTermFrequency,
/*is_in_prefix_section=*/true, /*is_prefix_hit=*/true);
uint32_t foot_main_tvi = 5;
@@ -349,8 +361,8 @@
// 3. TranslateAndExpand should;
// a. Translate lite term ids to main term ids based on the map
// b. Expand 'foot' and 'fool' to have hits for 'foo'
- // c. Merge the prefix hits from 'foot' and 'fool', taking the best hit
- // score.
+ // c. Merge the prefix hits from 'foot' and 'fool', taking the sum as
+ // term frequency.
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<TermIdHitPair> expanded_term_id_hit_pairs,
MainIndexMerger::TranslateAndExpandLiteHits(*lite_index_, *term_id_codec_,
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 2a5ba83..636f631 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -173,11 +173,12 @@
ICING_ASSIGN_OR_RETURN(PostingListAccessor pl_accessor,
PostingListAccessor::CreateFromExisting(
flash_index_storage_.get(), posting_list_id));
- GetPrefixAccessorResult result = {std::make_unique<PostingListAccessor>(std::move(pl_accessor)), exact};
+ GetPrefixAccessorResult result = {
+ std::make_unique<PostingListAccessor>(std::move(pl_accessor)), exact};
return result;
}
-// TODO(samzheng): Implement a method PropertyReadersAll.HasAnyProperty().
+// TODO(tjbarron): Implement a method PropertyReadersAll.HasAnyProperty().
bool IsTermInNamespaces(
const IcingDynamicTrie::PropertyReadersAll& property_reader,
uint32_t value_index, const std::vector<NamespaceId>& namespace_ids) {
@@ -578,7 +579,8 @@
}
// A backfill hit is a prefix hit in a prefix section.
- const Hit backfill_hit(hit.section_id(), hit.document_id(), hit.score(),
+ const Hit backfill_hit(hit.section_id(), hit.document_id(),
+ hit.term_frequency(),
/*is_in_prefix_section=*/true,
/*is_prefix_hit=*/true);
if (backfill_hit == last_added_hit) {
diff --git a/icing/index/main/main-index_test.cc b/icing/index/main/main-index_test.cc
index abe7181..74139be 100644
--- a/icing/index/main/main-index_test.cc
+++ b/icing/index/main/main-index_test.cc
@@ -145,7 +145,7 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -182,7 +182,7 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
@@ -219,18 +219,18 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -292,18 +292,18 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t far_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc2_hit));
ICING_ASSERT_OK(lite_index_->AddHit(far_term_id, doc2_hit));
@@ -345,14 +345,14 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fall_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kMaxHitScore,
+ Hit doc3_hit(/*section_id=*/0, /*document_id=*/3, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc3_hit));
ICING_ASSERT_OK(lite_index_->AddHit(fall_term_id, doc3_hit));
- Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kMaxHitScore,
+ Hit doc4_hit(/*section_id=*/0, /*document_id=*/4, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(four_term_id, doc4_hit));
ICING_ASSERT_OK(lite_index_->AddHit(foul_term_id, doc4_hit));
@@ -404,15 +404,15 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc2_hit));
@@ -453,15 +453,15 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foo_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc0_hit));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc1_hit));
- Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kMaxHitScore,
+ Hit doc2_hit(/*section_id=*/0, /*document_id=*/2, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(foo_term_id, doc2_hit));
@@ -500,17 +500,17 @@
for (DocumentId document_id = 0; document_id < 2048; ++document_id) {
Hit doc_hit0(/*section_id=*/0, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit0));
Hit doc_hit1(/*section_id=*/1, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit1));
Hit doc_hit2(/*section_id=*/2, /*document_id=*/document_id,
- Hit::kMaxHitScore,
+ Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc_hit2));
}
@@ -543,7 +543,7 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t fool_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kMaxHitScore,
+ Hit doc0_hit(/*section_id=*/0, /*document_id=*/0, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/true);
ICING_ASSERT_OK(lite_index_->AddHit(fool_term_id, doc0_hit));
@@ -570,7 +570,7 @@
ICING_ASSERT_OK_AND_ASSIGN(uint32_t foot_term_id,
term_id_codec_->EncodeTvi(tvi, TviType::LITE));
- Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kMaxHitScore,
+ Hit doc1_hit(/*section_id=*/0, /*document_id=*/1, Hit::kDefaultTermFrequency,
/*is_in_prefix_section=*/false);
ICING_ASSERT_OK(lite_index_->AddHit(foot_term_id, doc1_hit));
diff --git a/icing/index/main/posting-list-accessor_test.cc b/icing/index/main/posting-list-accessor_test.cc
index 8a5ef07..a539fe4 100644
--- a/icing/index/main/posting-list-accessor_test.cc
+++ b/icing/index/main/posting-list-accessor_test.cc
@@ -82,7 +82,7 @@
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add a single hit. This will fit in a min-sized posting list.
- Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
@@ -324,14 +324,14 @@
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
- Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
- Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kMaxHitScore);
+ Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit3),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
@@ -364,7 +364,7 @@
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
- Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
diff --git a/icing/index/main/posting-list-used.cc b/icing/index/main/posting-list-used.cc
index a439c45..62e73e5 100644
--- a/icing/index/main/posting-list-used.cc
+++ b/icing/index/main/posting-list-used.cc
@@ -30,8 +30,8 @@
namespace {
-uint32_t GetScoreByteSize(const Hit &hit) {
- return hit.has_score() ? sizeof(Hit::Score) : 0;
+uint32_t GetTermFrequencyByteSize(const Hit &hit) {
+ return hit.has_term_frequency() ? sizeof(Hit::TermFrequency) : 0;
}
} // namespace
@@ -153,21 +153,21 @@
uint64_t delta = cur.value() - hit.value();
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = VarInt::Encode(delta, delta_buf);
- uint32_t cur_score_bytes = GetScoreByteSize(cur);
+ uint32_t cur_term_frequency_bytes = GetTermFrequencyByteSize(cur);
uint32_t pad_end = GetPadEnd(posting_list_utils::kSpecialHitsSize);
- if (pad_end >=
- posting_list_utils::kSpecialHitsSize + delta_len + cur_score_bytes) {
- // Pad area has enough space for delta and score of existing hit
- // (cur). Write delta at pad_end - delta_len - cur_score_bytes.
+ if (pad_end >= posting_list_utils::kSpecialHitsSize + delta_len +
+ cur_term_frequency_bytes) {
+ // Pad area has enough space for delta and term_frequency of existing hit
+ // (cur). Write delta at pad_end - delta_len - cur_term_frequency_bytes.
uint8_t *delta_offset =
- posting_list_buffer_ + pad_end - delta_len - cur_score_bytes;
+ posting_list_buffer_ + pad_end - delta_len - cur_term_frequency_bytes;
memcpy(delta_offset, delta_buf, delta_len);
- // Now copy score.
- Hit::Score score = cur.score();
- uint8_t *score_offset = delta_offset + delta_len;
- memcpy(score_offset, &score, cur_score_bytes);
+ // Now copy term_frequency.
+ Hit::TermFrequency term_frequency = cur.term_frequency();
+ uint8_t *term_frequency_offset = delta_offset + delta_len;
+ memcpy(term_frequency_offset, &term_frequency, cur_term_frequency_bytes);
// Now first hit is the new hit, at special position 1. Safe to ignore the
// return value because 1 < kNumSpecialHits.
@@ -224,12 +224,12 @@
uint64_t delta = cur_value - hit.value();
uint8_t delta_buf[VarInt::kMaxEncodedLen64];
size_t delta_len = VarInt::Encode(delta, delta_buf);
- uint32_t hit_score_bytes = GetScoreByteSize(hit);
+ uint32_t hit_term_frequency_bytes = GetTermFrequencyByteSize(hit);
// offset now points to one past the end of the first hit.
offset += sizeof(Hit::Value);
if (posting_list_utils::kSpecialHitsSize + sizeof(Hit::Value) + delta_len +
- hit_score_bytes <=
+ hit_term_frequency_bytes <=
offset) {
// Enough space for delta in compressed area.
@@ -237,15 +237,15 @@
offset -= delta_len;
memcpy(posting_list_buffer_ + offset, delta_buf, delta_len);
- // Prepend new hit with (possibly) its score. We know that there is room
- // for 'hit' because of the if statement above, so calling ValueOrDie is
- // safe.
+ // Prepend new hit with (possibly) its term_frequency. We know that there is
+ // room for 'hit' because of the if statement above, so calling ValueOrDie
+ // is safe.
offset = PrependHitUncompressed(hit, offset).ValueOrDie();
// offset is guaranteed to be valid here. So it's safe to ignore the return
// value. The if above will guarantee that offset >= kSpecialHitSize and <
// size_in_bytes_ because the if ensures that there is enough room between
// offset and kSpecialHitSize to fit the delta of the previous hit, any
- // score and the uncompressed hit.
+ // term_frequency and the uncompressed hit.
set_start_byte_offset(offset);
} else if (posting_list_utils::kSpecialHitsSize + delta_len <= offset) {
// Only have space for delta. The new hit must be put in special
@@ -273,14 +273,11 @@
// move first hit to special position 1 and put new hit in
// special position 0.
Hit cur(cur_value);
- if (cur.has_score()) {
- // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
- // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
- // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
- // offset < size_in_bytes, so it is safe to call ValueOrDie here.
- cur = Hit(cur_value, ReadScore(offset).ValueOrDie());
- offset += sizeof(Hit::Score);
- }
+ // offset is < kSpecialHitsSize + delta_len. delta_len is at most 5 bytes.
+ // Therefore, offset must be less than kSpecialHitSize + 5. Since posting
+ // list size must be divisible by sizeof(Hit) (5), it is guaranteed that
+ // offset < size_in_bytes, so it is safe to ignore the return value here.
+ ConsumeTermFrequencyIfPresent(&cur, &offset);
// Safe to ignore the return value of PadToEnd because offset must be less
// than size_in_bytes_. Otherwise, this function already would have returned
// FAILED_PRECONDITION.
@@ -437,18 +434,17 @@
val += delta;
}
Hit hit(val);
- if (hit.has_score()) {
- auto score_or = ReadScore(offset);
- if (!score_or.ok()) {
- // This posting list has been corrupted somehow. The first hit of the
- // posting list claims to have a score, but there's no more room in the
- // posting list for that score to exist. Return an empty vector and zero
- // to indicate no hits retrieved.
+ libtextclassifier3::Status status =
+ ConsumeTermFrequencyIfPresent(&hit, &offset);
+ if (!status.ok()) {
+ // This posting list has been corrupted somehow. The first hit of the
+ // posting list claims to have a term frequency, but there's no more room
+ // in the posting list for that term frequency to exist. Return an empty
+ // vector and zero to indicate no hits retrieved.
+ if (out != nullptr) {
out->clear();
- return absl_ports::InternalError("Posting list has been corrupted!");
}
- hit = Hit(val, score_or.ValueOrDie());
- offset += sizeof(Hit::Score);
+ return absl_ports::InternalError("Posting list has been corrupted!");
}
if (out != nullptr) {
out->push_back(hit);
@@ -475,21 +471,21 @@
offset -= sizeof(Hit::Value);
memcpy(posting_list_buffer_ + offset, &val, sizeof(Hit::Value));
} else {
- // val won't fit in compressed area. Also see if there is a score.
+ // val won't fit in compressed area. Also see if there is a
+ // term_frequency.
Hit hit(val);
- if (hit.has_score()) {
- auto score_or = ReadScore(offset);
- if (!score_or.ok()) {
- // This posting list has been corrupted somehow. The first hit of
- // the posting list claims to have a score, but there's no more room
- // in the posting list for that score to exist. Return an empty
- // vector and zero to indicate no hits retrieved. Do not pop
- // anything.
+ libtextclassifier3::Status status =
+ ConsumeTermFrequencyIfPresent(&hit, &offset);
+ if (!status.ok()) {
+ // This posting list has been corrupted somehow. The first hit of
+ // the posting list claims to have a term frequency, but there's no
+ // more room in the posting list for that term frequency to exist.
+ // Return an empty vector and zero to indicate no hits retrieved. Do
+ // not pop anything.
+ if (out != nullptr) {
out->clear();
- return absl_ports::InternalError(
- "Posting list has been corrupted!");
}
- hit = Hit(val, score_or.ValueOrDie());
+ return absl_ports::InternalError("Posting list has been corrupted!");
}
// Okay to ignore the return value here because 1 < kNumSpecialHits.
mutable_this->set_special_hit(1, hit);
@@ -640,7 +636,7 @@
libtextclassifier3::StatusOr<uint32_t> PostingListUsed::PrependHitUncompressed(
const Hit &hit, uint32_t offset) {
- if (hit.has_score()) {
+ if (hit.has_term_frequency()) {
if (offset < posting_list_utils::kSpecialHitsSize + sizeof(Hit)) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Not enough room to prepend Hit at offset %d.", offset));
@@ -659,16 +655,23 @@
return offset;
}
-libtextclassifier3::StatusOr<Hit::Score> PostingListUsed::ReadScore(
- uint32_t offset) const {
- if (offset + sizeof(Hit::Score) > size_in_bytes_) {
+libtextclassifier3::Status PostingListUsed::ConsumeTermFrequencyIfPresent(
+ Hit *hit, uint32_t *offset) const {
+ if (!hit->has_term_frequency()) {
+ // No term frequency to consume. Everything is fine.
+ return libtextclassifier3::Status::OK;
+ }
+ if (*offset + sizeof(Hit::TermFrequency) > size_in_bytes_) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"offset %d must not point past the end of the posting list of size %d.",
- offset, size_in_bytes_));
+ *offset, size_in_bytes_));
}
- Hit::Score score;
- memcpy(&score, posting_list_buffer_ + offset, sizeof(Hit::Score));
- return score;
+ Hit::TermFrequency term_frequency;
+ memcpy(&term_frequency, posting_list_buffer_ + *offset,
+ sizeof(Hit::TermFrequency));
+ *hit = Hit(hit->value(), term_frequency);
+ *offset += sizeof(Hit::TermFrequency);
+ return libtextclassifier3::Status::OK;
}
} // namespace lib
diff --git a/icing/index/main/posting-list-used.h b/icing/index/main/posting-list-used.h
index 8bc9c8d..1b2e24e 100644
--- a/icing/index/main/posting-list-used.h
+++ b/icing/index/main/posting-list-used.h
@@ -155,12 +155,12 @@
// starts somewhere between [kSpecialHitsSize, kSpecialHitsSize + sizeof(Hit)
// - 1] and ends at size_in_bytes - 1.
//
- // Hit scores are stored after the hit value, compressed or
+ // Hit term frequencies are stored after the hit value, compressed or
// uncompressed. For the first two special hits, we always have a
- // space for the score. For hits in the compressed area, we only have
- // the score following the hit value of hit.has_score() is true. This
- // allows good compression in the common case where hits don't have a
- // specific score.
+ // space for the term frequency. For hits in the compressed area, we only have
+ // the term frequency following the hit value of hit.has_term_frequency() is
+ // true. This allows good compression in the common case where hits don't have
+ // a valid term frequency.
//
// EXAMPLE
// Posting list storage. Posting list size: 20 bytes
@@ -175,7 +175,8 @@
// | 16 |Hit::kInvalidVal| 0x000 | 0x07FFF998 |
// +-------------+----------------+-----------------+----------------------+
//
- // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4, Score=125)
+ // Add Hit 0x07FFF684 (DocumentId = 18, SectionId = 0, Flags = 4,
+ // TermFrequency=125)
// (Hit 0x07FFF998 - Hit 0x07FFF684 = 788)
// +--bytes 0-4--+----- 5-9 ------+-- 10-12 --+-- 13-16 --+- 17 -+-- 18-19 --+
// | 13 |Hit::kInvalidVal| 0x000 | 0x07FFF684| 125 | 788 |
@@ -187,9 +188,9 @@
// | 9 |Hit::kInvVal| 0x00 |0x07FFF4D2| 434 | 125 | 788 |
// +-------------+------------+--------+----------+---------+------+---------+
//
- // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6, Score = 87)
- // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196)
- // ALMOST FULL!
+ // Add Hit 0x07FFF40E (DocumentId = 23, SectionId = 1, Flags = 6,
+ // TermFrequency = 87)
+ // (Hit 0x07FFF684 - Hit 0x07FFF4D2 = 196) ALMOST FULL!
// +--bytes 0-4-+---- 5-9 ----+- 10-12 -+- 13-14 -+- 15-16 -+- 17 -+- 18-19 -+
// |Hit::kInvVal|0x07FFF40E,87| 0x000 | 196 | 434 | 125 | 788 |
// +-------------+------------+---------+---------+---------+------+---------+
@@ -302,13 +303,17 @@
libtextclassifier3::StatusOr<uint32_t> PrependHitUncompressed(
const Hit &hit, uint32_t offset);
- // Reads the score located at offset and returns it. Callers are responsible
- // for ensuring that the bytes starting at offset actually represent a score.
+ // If hit has a term frequency, consumes the term frequency at offset, updates
+ // hit to include the term frequency and updates offset to reflect that the
+ // term frequency has been consumed.
//
// RETURNS:
- // - The score located at offset, if successful
- // - INVALID_ARGUMENT if offset + sizeof(Hit::Score) >= size_in_bytes_
- libtextclassifier3::StatusOr<Hit::Score> ReadScore(uint32_t offset) const;
+ // - OK, if successful
+ // - INVALID_ARGUMENT if hit has a term frequency and offset +
+ // sizeof(Hit::TermFrequency) >=
+ // size_in_bytes_
+ libtextclassifier3::Status ConsumeTermFrequencyIfPresent(
+ Hit *hit, uint32_t *offset) const;
// A byte array of size size_in_bytes_ containing encoded hits for this
// posting list.
diff --git a/icing/index/main/posting-list-used_test.cc b/icing/index/main/posting-list-used_test.cc
index eb62aeb..044d0c1 100644
--- a/icing/index/main/posting-list-used_test.cc
+++ b/icing/index/main/posting-list-used_test.cc
@@ -73,37 +73,37 @@
static_cast<void *>(hits_buf.get()), kHitsSize));
// Make used.
- Hit hit0(/*section_id=*/0, 0, /*score=*/56);
+ Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/56);
pl_used.PrependHit(hit0);
// Size = sizeof(uncompressed hit0)
int expected_size = sizeof(Hit);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
- Hit hit1(/*section_id=*/0, 1, Hit::kMaxHitScore);
+ Hit hit1(/*section_id=*/0, 1, Hit::kDefaultTermFrequency);
pl_used.PrependHit(hit1);
// Size = sizeof(uncompressed hit1)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
- expected_size += 2 + sizeof(Hit::Score);
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+ expected_size += 2 + sizeof(Hit::TermFrequency);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
- Hit hit2(/*section_id=*/0, 2, /*score=*/56);
+ Hit hit2(/*section_id=*/0, 2, /*term_frequency=*/56);
pl_used.PrependHit(hit2);
// Size = sizeof(uncompressed hit2)
// + sizeof(hit1-hit2)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
expected_size += 2;
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit2, hit1, hit0)));
- Hit hit3(/*section_id=*/0, 3, Hit::kMaxHitScore);
+ Hit hit3(/*section_id=*/0, 3, Hit::kDefaultTermFrequency);
pl_used.PrependHit(hit3);
// Size = sizeof(uncompressed hit3)
- // + sizeof(hit2-hit3) + sizeof(hit2::score)
+ // + sizeof(hit2-hit3) + sizeof(hit2::term_frequency)
// + sizeof(hit1-hit2)
- // + sizeof(hit0-hit1) + sizeof(hit0::score)
- expected_size += 2 + sizeof(Hit::Score);
+ // + sizeof(hit0-hit1) + sizeof(hit0::term_frequency)
+ expected_size += 2 + sizeof(Hit::TermFrequency);
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(),
IsOkAndHolds(ElementsAre(hit3, hit2, hit1, hit0)));
@@ -122,7 +122,7 @@
// Adding hit0: EMPTY -> NOT_FULL
// Adding hit1: NOT_FULL -> NOT_FULL
// Adding hit2: NOT_FULL -> NOT_FULL
- Hit hit0(/*section_id=*/0, 0, Hit::kMaxHitScore);
+ Hit hit0(/*section_id=*/0, 0, Hit::kDefaultTermFrequency);
Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
@@ -189,7 +189,8 @@
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(IsEmpty()));
// Add a hit, PL should shift to ALMOST_FULL state
- Hit hit0(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
+ Hit hit0(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/true);
ICING_EXPECT_OK(pl_used.PrependHit(hit0));
// Size = sizeof(uncompressed hit0)
@@ -197,9 +198,10 @@
EXPECT_THAT(pl_used.BytesUsed(), Le(expected_size));
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit0)));
- // Add the smallest hit possible - no score and a delta of 1. PL should shift
- // to FULL state.
- Hit hit1(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/true,
+ // Add the smallest hit possible - no term_frequency and a delta of 1. PL
+ // should shift to FULL state.
+ Hit hit1(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/true,
/*is_prefix_hit=*/false);
ICING_EXPECT_OK(pl_used.PrependHit(hit1));
// Size = sizeof(uncompressed hit1) + sizeof(uncompressed hit0)
@@ -208,7 +210,8 @@
EXPECT_THAT(pl_used.GetHits(), IsOkAndHolds(ElementsAre(hit1, hit0)));
// Try to add the smallest hit possible. Should fail
- Hit hit2(/*section_id=*/0, 0, /*score=*/0, /*is_in_prefix_section=*/false,
+ Hit hit2(/*section_id=*/0, 0, /*term_frequency=*/0,
+ /*is_in_prefix_section=*/false,
/*is_prefix_hit=*/false);
EXPECT_THAT(pl_used.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -227,7 +230,7 @@
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
@@ -268,7 +271,7 @@
static_cast<void *>(hits_buf.get()), size));
std::vector<HitElt> hits_in;
- hits_in.emplace_back(Hit(1, 0, Hit::kMaxHitScore));
+ hits_in.emplace_back(Hit(1, 0, Hit::kDefaultTermFrequency));
hits_in.emplace_back(
CreateHit(hits_in.rbegin()->hit, /*desired_byte_length=*/1));
hits_in.emplace_back(
@@ -332,7 +335,7 @@
// 14-11 Hit #11
// 10 <unused>
// 9-5 kSpecialHit
- // 4-0 Offset=22
+ // 4-0 Offset=11
// ----------------------
byte_size += 11;
@@ -423,9 +426,9 @@
TEST(PostingListTest, PostingListPrependHitArrayTooManyHits) {
static constexpr int kNumHits = 128;
static constexpr int kDeltaSize = 1;
- static constexpr int kScoreSize = 1;
+ static constexpr int kTermFrequencySize = 1;
static constexpr size_t kHitsSize =
- ((kNumHits * (kDeltaSize + kScoreSize)) / 5) * 5;
+ ((kNumHits * (kDeltaSize + kTermFrequencySize)) / 5) * 5;
std::unique_ptr<char[]> hits_buf = std::make_unique<char[]>(kHitsSize);
@@ -654,5 +657,56 @@
IsOkAndHolds(ElementsAreArray(hits2.rbegin(), hits2.rend())));
}
+TEST(PostingListTest, PopHitsWithScores) {
+ int size = 2 * posting_list_utils::min_posting_list_size();
+ std::unique_ptr<char[]> hits_buf1 = std::make_unique<char[]>(size);
+ ICING_ASSERT_OK_AND_ASSIGN(PostingListUsed pl_used,
+ PostingListUsed::CreateFromUnitializedRegion(
+ static_cast<void *>(hits_buf1.get()), size));
+
+ // This posting list is 20-bytes. Create four hits that will have deltas of
+ // two bytes each and all of whom will have a non-default score. This posting
+ // list will be almost_full.
+ //
+ // ----------------------
+ // 19 score(Hit #0)
+ // 18-17 delta(Hit #0)
+ // 16 score(Hit #1)
+ // 15-14 delta(Hit #1)
+ // 13 score(Hit #2)
+ // 12-11 delta(Hit #2)
+ // 10 <unused>
+ // 9-5 Hit #3
+ // 4-0 kInvalidHitVal
+ // ----------------------
+ Hit hit0(/*section_id=*/0, /*document_id=*/0, /*score=*/5);
+ Hit hit1 = CreateHit(hit0, /*desired_byte_length=*/2);
+ Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/2);
+ Hit hit3 = CreateHit(hit2, /*desired_byte_length=*/2);
+ ICING_ASSERT_OK(pl_used.PrependHit(hit0));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit1));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit2));
+ ICING_ASSERT_OK(pl_used.PrependHit(hit3));
+
+ ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> hits_out, pl_used.GetHits());
+ EXPECT_THAT(hits_out, ElementsAre(hit3, hit2, hit1, hit0));
+
+ // Now, pop the last hit. The posting list should contain the first three
+ // hits.
+ //
+ // ----------------------
+ // 19 score(Hit #0)
+ // 18-17 delta(Hit #0)
+ // 16 score(Hit #1)
+ // 15-14 delta(Hit #1)
+ // 13-10 <unused>
+ // 9-5 Hit #2
+ // 4-0 kInvalidHitVal
+ // ----------------------
+ ICING_ASSERT_OK(pl_used.PopFrontHits(1));
+ ICING_ASSERT_OK_AND_ASSIGN(hits_out, pl_used.GetHits());
+ EXPECT_THAT(hits_out, ElementsAre(hit2, hit1, hit0));
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index 71752dd..a18a183 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -27,9 +27,15 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/usage.pb.h"
#include "icing/util/status-macros.h"
namespace {
+
+// JNI string constants
+// Matches field name of IcingSearchEngine#nativePointer.
+const char kNativePointerField[] = "nativePointer";
+
bool ParseProtoFromJniByteArray(JNIEnv* env, jbyteArray bytes,
google::protobuf::MessageLite* protobuf) {
int bytes_size = env->GetArrayLength(bytes);
@@ -57,8 +63,11 @@
return ret;
}
-icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(
- jlong native_pointer) {
+icing::lib::IcingSearchEngine* GetIcingSearchEnginePointer(JNIEnv* env,
+ jobject object) {
+ jclass cls = env->GetObjectClass(object);
+ jfieldID field_id = env->GetFieldID(cls, kNativePointerField, "J");
+ jlong native_pointer = env->GetLongField(object, field_id);
return reinterpret_cast<icing::lib::IcingSearchEngine*>(native_pointer);
}
@@ -96,11 +105,19 @@
return reinterpret_cast<jlong>(icing);
}
+JNIEXPORT void JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeDestroy(
+ JNIEnv* env, jclass clazz, jobject object) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
+ delete icing;
+}
+
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeInitialize(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::InitializeResultProto initialize_result_proto =
icing->Initialize();
@@ -110,10 +127,10 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeSetSchema(
- JNIEnv* env, jclass clazz, jlong native_pointer, jbyteArray schema_bytes,
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray schema_bytes,
jboolean ignore_errors_and_delete_documents) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SchemaProto schema_proto;
if (!ParseProtoFromJniByteArray(env, schema_bytes, &schema_proto)) {
@@ -129,9 +146,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetSchema(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetSchemaResultProto get_schema_result_proto = icing->GetSchema();
@@ -140,9 +157,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetSchemaType(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+ JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_schema_type =
env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
@@ -154,10 +171,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativePut(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray document_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray document_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::DocumentProto document_proto;
if (!ParseProtoFromJniByteArray(env, document_bytes, &document_proto)) {
@@ -173,10 +189,10 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGet(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space,
jstring uri) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -188,10 +204,28 @@
}
JNIEXPORT jbyteArray JNICALL
-Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+Java_com_google_android_icing_IcingSearchEngine_nativeReportUsage(
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray usage_report_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
+
+ icing::lib::UsageReport usage_report;
+ if (!ParseProtoFromJniByteArray(env, usage_report_bytes, &usage_report)) {
+ ICING_LOG(ERROR) << "Failed to parse UsageReport in nativeReportUsage";
+ return nullptr;
+ }
+
+ icing::lib::ReportUsageResultProto report_usage_result_proto =
+ icing->ReportUsage(usage_report);
+
+ return SerializeProtoToJniByteArray(env, report_usage_result_proto);
+}
+
+JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetAllNamespaces(
+ JNIEnv* env, jclass clazz, jobject object) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetAllNamespacesResultProto get_all_namespaces_result_proto =
icing->GetAllNamespaces();
@@ -201,10 +235,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetNextPage(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jlong next_page_token) {
+ JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchResultProto next_page_result_proto =
icing->GetNextPage(next_page_token);
@@ -214,10 +247,9 @@
JNIEXPORT void JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeInvalidateNextPageToken(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jlong next_page_token) {
+ JNIEnv* env, jclass clazz, jobject object, jlong next_page_token) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing->InvalidateNextPageToken(next_page_token);
@@ -226,11 +258,10 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeSearch(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray search_spec_bytes, jbyteArray scoring_spec_bytes,
- jbyteArray result_spec_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes,
+ jbyteArray scoring_spec_bytes, jbyteArray result_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchSpecProto search_spec_proto;
if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
@@ -259,10 +290,10 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDelete(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space,
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space,
jstring uri) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -275,9 +306,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByNamespace(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring name_space) {
+ JNIEnv* env, jclass clazz, jobject object, jstring name_space) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_name_space =
env->GetStringUTFChars(name_space, /*isCopy=*/nullptr);
@@ -289,9 +320,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteBySchemaType(
- JNIEnv* env, jclass clazz, jlong native_pointer, jstring schema_type) {
+ JNIEnv* env, jclass clazz, jobject object, jstring schema_type) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
const char* native_schema_type =
env->GetStringUTFChars(schema_type, /*isCopy=*/nullptr);
@@ -303,17 +334,16 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeDeleteByQuery(
- JNIEnv* env, jclass clazz, jlong native_pointer,
- jbyteArray search_spec_bytes) {
+ JNIEnv* env, jclass clazz, jobject object, jbyteArray search_spec_bytes) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::SearchSpecProto search_spec_proto;
if (!ParseProtoFromJniByteArray(env, search_spec_bytes, &search_spec_proto)) {
ICING_LOG(ERROR) << "Failed to parse SearchSpecProto in nativeSearch";
return nullptr;
}
- icing::lib::DeleteResultProto delete_result_proto =
+ icing::lib::DeleteByQueryResultProto delete_result_proto =
icing->DeleteByQuery(search_spec_proto);
return SerializeProtoToJniByteArray(env, delete_result_proto);
@@ -321,9 +351,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::PersistToDiskResultProto persist_to_disk_result_proto =
icing->PersistToDisk();
@@ -333,9 +363,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeOptimize(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::OptimizeResultProto optimize_result_proto = icing->Optimize();
@@ -344,9 +374,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeGetOptimizeInfo(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::GetOptimizeInfoResultProto get_optimize_info_result_proto =
icing->GetOptimizeInfo();
@@ -356,9 +386,9 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeReset(
- JNIEnv* env, jclass clazz, jlong native_pointer) {
+ JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
- GetIcingSearchEnginePointer(native_pointer);
+ GetIcingSearchEnginePointer(env, object);
icing::lib::ResetResultProto reset_result_proto = icing->Reset();
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index 900cce5..eb8b7a4 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -24,11 +24,11 @@
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/fake-clock.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/transform/normalizer-factory.h"
+#include "icing/util/clock.h"
#include "icing/util/logging.h"
#include "unicode/uloc.h"
@@ -70,7 +70,8 @@
const std::string& token) {
Index::Editor editor =
index->Edit(document_id, section_id, term_match_type, /*namespace_id=*/0);
- ICING_ASSERT_OK(editor.AddHit(token.c_str()));
+ ICING_ASSERT_OK(editor.BufferTerm(token.c_str()));
+ ICING_ASSERT_OK(editor.IndexAllBufferedTerms());
}
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& icing_filesystem,
@@ -114,19 +115,22 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, schema_dir));
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
- std::unique_ptr<DocumentStore> document_store =
- DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+ DocumentStore::CreateResult create_result =
+ DocumentStore::Create(&filesystem, doc_store_dir, &clock,
schema_store.get())
.ValueOrDie();
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentId document_id = document_store
->Put(DocumentBuilder()
@@ -143,7 +147,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &fake_clock));
+ schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -228,19 +232,22 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, schema_dir));
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
- std::unique_ptr<DocumentStore> document_store =
- DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+ DocumentStore::CreateResult create_result =
+ DocumentStore::Create(&filesystem, doc_store_dir, &clock,
schema_store.get())
.ValueOrDie();
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentId document_id = document_store
->Put(DocumentBuilder()
@@ -271,7 +278,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &fake_clock));
+ schema_store.get(), &clock));
const std::string query_string = absl_ports::StrCat(
input_string_a, " ", input_string_b, " ", input_string_c, " ",
@@ -360,19 +367,22 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, schema_dir));
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
- std::unique_ptr<DocumentStore> document_store =
- DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+ DocumentStore::CreateResult create_result =
+ DocumentStore::Create(&filesystem, doc_store_dir, &clock,
schema_store.get())
.ValueOrDie();
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentId document_id = document_store
->Put(DocumentBuilder()
@@ -392,7 +402,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &fake_clock));
+ schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -477,19 +487,22 @@
std::unique_ptr<LanguageSegmenter> language_segmenter =
language_segmenter_factory::Create(std::move(options)).ValueOrDie();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
- FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, schema_dir));
+ Clock clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, schema_dir, &clock));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
- std::unique_ptr<DocumentStore> document_store =
- DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
+ DocumentStore::CreateResult create_result =
+ DocumentStore::Create(&filesystem, doc_store_dir, &clock,
schema_store.get())
.ValueOrDie();
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentId document_id = document_store
->Put(DocumentBuilder()
@@ -509,7 +522,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &fake_clock));
+ schema_store.get(), &clock));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 16bd120..7546ae4 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -39,6 +39,7 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
+#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -95,17 +96,17 @@
filesystem_.CreateDirectoryRecursively(index_dir_.c_str());
filesystem_.CreateDirectoryRecursively(store_dir_.c_str());
-#ifndef ICING_REVERSE_JNI_SEGMENTATION
- // If we've specified using the reverse-JNI method for segmentation (i.e.
- // not ICU), then we won't have the ICU data file included to set up.
- // Technically, we could choose to use reverse-JNI for segmentation AND
- // include an ICU data file, but that seems unlikely and our current BUILD
- // setup doesn't do this.
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
-#endif // ICING_REVERSE_JNI_SEGMENTATION
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ // If we've specified using the reverse-JNI method for segmentation (i.e.
+ // not ICU), then we won't have the ICU data file included to set up.
+ // Technically, we could choose to use reverse-JNI for segmentation AND
+ // include an ICU data file, but that seems unlikely and our current BUILD
+ // setup doesn't do this.
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
Index::Options options(index_dir_,
/*index_merge_size=*/1024 * 1024);
@@ -127,7 +128,8 @@
TermMatchType::Code term_match_type, const std::string& token) {
Index::Editor editor = index_->Edit(document_id, section_id,
term_match_type, /*namespace_id=*/0);
- return editor.AddHit(token.c_str());
+ auto status = editor.BufferTerm(token.c_str());
+ return status.ok() ? editor.IndexAllBufferedTerms() : status;
}
void TearDown() override {
@@ -188,14 +190,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -235,14 +239,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
@@ -282,14 +288,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -338,14 +346,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -391,14 +401,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -444,18 +456,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -500,18 +514,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -556,18 +572,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -612,18 +630,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -674,18 +694,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -736,18 +758,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -797,18 +821,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -891,8 +917,8 @@
}
{
- // OR gets precedence over AND, this is parsed as (kitten AND ((foo OR bar)
- // OR cat))
+ // OR gets precedence over AND, this is parsed as (kitten AND ((foo OR
+ // bar) OR cat))
SearchSpecProto search_spec;
search_spec.set_query("kitten foo OR bar OR cat");
search_spec.set_term_match_type(term_match_type);
@@ -914,18 +940,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -985,18 +1013,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1057,18 +1087,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1128,18 +1160,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that the DocHitInfoIterators will see that the
- // document exists and not filter out the DocumentId as deleted.
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that the DocHitInfoIterators will see
+ // that the document exists and not filter out the DocumentId as deleted.
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1199,18 +1233,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that they'll bump the last_added_document_id,
- // which will give us the proper exclusion results
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that they'll bump the
+ // last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1247,9 +1283,9 @@
ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
query_processor->ParseSearch(search_spec));
- // We don't know have the section mask to indicate what section "world" came.
- // It doesn't matter which section it was in since the query doesn't care. It
- // just wanted documents that didn't have "hello"
+ // We don't know have the section mask to indicate what section "world"
+ // came. It doesn't matter which section it was in since the query doesn't
+ // care. It just wanted documents that didn't have "hello"
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()),
ElementsAre(DocHitInfo(document_id2, kSectionIdMaskNone)));
EXPECT_THAT(results.query_terms, IsEmpty());
@@ -1260,18 +1296,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that they'll bump the last_added_document_id,
- // which will give us the proper exclusion results
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that they'll bump the
+ // last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1319,18 +1357,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that they'll bump the last_added_document_id,
- // which will give us the proper exclusion results
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that they'll bump the
+ // last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1375,8 +1415,8 @@
ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
query_processor->ParseSearch(search_spec));
- // The query is interpreted as "exclude all documents that have animal, and
- // exclude all documents that have cat". Since both documents contain
+ // The query is interpreted as "exclude all documents that have animal,
+ // and exclude all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_terms, IsEmpty());
@@ -1390,8 +1430,8 @@
ICING_ASSERT_OK_AND_ASSIGN(QueryProcessor::QueryResults results,
query_processor->ParseSearch(search_spec));
- // The query is interpreted as "exclude all documents that have animal, and
- // include all documents that have cat". Since both documents contain
+ // The query is interpreted as "exclude all documents that have animal,
+ // and include all documents that have cat". Since both documents contain
// animal, there are no results.
EXPECT_THAT(GetDocHitInfos(results.root_iterator.get()), IsEmpty());
EXPECT_THAT(results.query_terms, SizeIs(1));
@@ -1404,18 +1444,20 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
- // These documents don't actually match to the tokens in the index. We're just
- // inserting the documents so that they'll bump the last_added_document_id,
- // which will give us the proper exclusion results
+ // These documents don't actually match to the tokens in the index. We're
+ // just inserting the documents so that they'll bump the
+ // last_added_document_id, which will give us the proper exclusion results
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store_->Put(DocumentBuilder()
.SetKey("namespace", "1")
@@ -1491,14 +1533,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1560,14 +1604,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1630,14 +1676,16 @@
AddSchemaType(&schema, "email");
AddSchemaType(&schema, "message");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1699,14 +1747,16 @@
AddIndexedProperty(email_type, "subject");
int subject_section_id = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1752,24 +1802,28 @@
// Create the schema and document store
SchemaProto schema;
SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(email_type, "a"); // Section "a" would get sectionId 0
AddIndexedProperty(email_type, "foo");
int email_foo_section_id = 1;
SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(message_type, "foo");
int message_foo_section_id = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1829,23 +1883,27 @@
// Create the schema and document store
SchemaProto schema;
SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(email_type, "foo");
int email_foo_section_id = 0;
SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(message_type, "foo");
int message_foo_section_id = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1883,8 +1941,8 @@
schema_store_.get(), &fake_clock_));
SearchSpecProto search_spec;
- // Create a section filter '<section name>:<query term>', but only look within
- // documents of email schema
+ // Create a section filter '<section name>:<query term>', but only look
+ // within documents of email schema
search_spec.set_query("foo:animal");
search_spec.add_schema_type_filters("email");
search_spec.set_term_match_type(term_match_type);
@@ -1905,23 +1963,27 @@
// Create the schema and document store
SchemaProto schema;
SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(email_type, "foo");
int email_foo_section_id = 0;
SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(message_type, "bar");
int message_foo_section_id = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -1946,9 +2008,9 @@
term_match_type, "animal"),
IsOk());
- // Message document has content "animal", but put in in the same section id as
- // the indexed email section id, the same id as indexed property "foo" in the
- // message type
+ // Message document has content "animal", but put in in the same section id
+ // as the indexed email section id, the same id as indexed property "foo" in
+ // the message type
ASSERT_THAT(AddTokenToIndex(message_document_id, message_foo_section_id,
term_match_type, "animal"),
IsOk());
@@ -1961,8 +2023,8 @@
schema_store_.get(), &fake_clock_));
SearchSpecProto search_spec;
- // Create a section filter '<section name>:<query term>', but only look within
- // documents of email schema
+ // Create a section filter '<section name>:<query term>', but only look
+ // within documents of email schema
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
@@ -1983,14 +2045,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2017,8 +2081,8 @@
schema_store_.get(), &fake_clock_));
SearchSpecProto search_spec;
- // Create a section filter '<section name>:<query term>', but only look within
- // documents of email schema
+ // Create a section filter '<section name>:<query term>', but only look
+ // within documents of email schema
search_spec.set_query("nonexistent:animal");
search_spec.set_term_match_type(term_match_type);
@@ -2039,14 +2103,16 @@
SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
AddUnindexedProperty(email_type, "foo");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2073,8 +2139,8 @@
schema_store_.get(), &fake_clock_));
SearchSpecProto search_spec;
- // Create a section filter '<section name>:<query term>', but only look within
- // documents of email schema
+ // Create a section filter '<section name>:<query term>', but only look
+ // within documents of email schema
search_spec.set_query("foo:animal");
search_spec.set_term_match_type(term_match_type);
@@ -2092,23 +2158,27 @@
// Create the schema and document store
SchemaProto schema;
SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(email_type, "foo");
int email_foo_section_id = 0;
SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type, alphabetically.
+ // SectionIds are assigned in ascending order per schema type,
+ // alphabetically.
AddIndexedProperty(message_type, "foo");
int message_foo_section_id = 0;
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// These documents don't actually match to the tokens in the index. We're
// inserting the documents to get the appropriate number of documents and
@@ -2172,14 +2242,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
@@ -2226,14 +2298,16 @@
SchemaProto schema;
AddSchemaType(&schema, "email");
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store_->Put(DocumentBuilder()
diff --git a/icing/result/page-result-state.h b/icing/result/page-result-state.h
index a26c44e..5932b56 100644
--- a/icing/result/page-result-state.h
+++ b/icing/result/page-result-state.h
@@ -18,6 +18,7 @@
#include <cstdint>
#include <vector>
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/scoring/scored-document-hit.h"
@@ -29,11 +30,14 @@
PageResultState(std::vector<ScoredDocumentHit> scored_document_hits_in,
uint64_t next_page_token_in,
SnippetContext snippet_context_in,
- int num_previously_returned_in)
+ std::unordered_map<std::string, ProjectionTree> tree_map,
+ int num_previously_returned_in, int num_per_page_in)
: scored_document_hits(std::move(scored_document_hits_in)),
next_page_token(next_page_token_in),
snippet_context(std::move(snippet_context_in)),
- num_previously_returned(num_previously_returned_in) {}
+ projection_tree_map(std::move(tree_map)),
+ num_previously_returned(num_previously_returned_in),
+ requested_page_size(num_per_page_in) {}
// Results of one page
std::vector<ScoredDocumentHit> scored_document_hits;
@@ -44,8 +48,15 @@
// Information needed for snippeting.
SnippetContext snippet_context;
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map;
+
// Number of results that have been returned in previous pages.
int num_previously_returned;
+
+ // The page size for this query. This should always be >=
+ // scored_document_hits.size();
+ int requested_page_size;
};
} // namespace lib
diff --git a/icing/result/projection-tree.cc b/icing/result/projection-tree.cc
new file mode 100644
index 0000000..382fcb4
--- /dev/null
+++ b/icing/result/projection-tree.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include <algorithm>
+
+#include "icing/absl_ports/str_join.h"
+#include "icing/schema/section-manager.h"
+
+namespace icing {
+namespace lib {
+
+ProjectionTree::ProjectionTree(
+ const ResultSpecProto::TypePropertyMask& type_field_mask) {
+ for (const std::string& field_mask : type_field_mask.paths()) {
+ Node* current_node = &root_;
+ for (std::string_view sub_field_mask :
+ absl_ports::StrSplit(field_mask, kPropertySeparator)) {
+ current_node = AddChildNode(sub_field_mask, ¤t_node->children);
+ }
+ }
+}
+
+ProjectionTree::Node* ProjectionTree::AddChildNode(
+ std::string_view property_name, std::vector<Node>* current_children) {
+ auto itr = std::find_if(current_children->begin(), current_children->end(),
+ [&property_name](const Node& node) {
+ return node.name == property_name;
+ });
+ if (itr != current_children->end()) {
+ return &(*itr);
+ }
+ current_children->push_back(ProjectionTree::Node(property_name));
+ return ¤t_children->back();
+}
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/projection-tree.h b/icing/result/projection-tree.h
new file mode 100644
index 0000000..a87a8fc
--- /dev/null
+++ b/icing/result/projection-tree.h
@@ -0,0 +1,55 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_RESULT_PROJECTION_TREE_H_
+#define ICING_RESULT_PROJECTION_TREE_H_
+
+#include <string_view>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+class ProjectionTree {
+ public:
+ static constexpr std::string_view kSchemaTypeWildcard = "*";
+
+ struct Node {
+ explicit Node(std::string_view name = "") : name(name) {}
+
+ std::string_view name;
+ std::vector<Node> children;
+ };
+
+ explicit ProjectionTree(
+ const ResultSpecProto::TypePropertyMask& type_field_mask);
+
+ const Node& root() const { return root_; }
+
+ private:
+ // Add a child node with property_name to current_children and returns a
+ // pointer to the child node.
+ Node* AddChildNode(std::string_view property_name,
+ std::vector<Node>* current_children);
+
+ Node root_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_RESULT_PROJECTION_TREE_H_
diff --git a/icing/result/projection-tree_test.cc b/icing/result/projection-tree_test.cc
new file mode 100644
index 0000000..77d1d21
--- /dev/null
+++ b/icing/result/projection-tree_test.cc
@@ -0,0 +1,102 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/result/projection-tree.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/proto/search.pb.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(ProjectionTreeTest, CreateEmptyFieldMasks) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ EXPECT_THAT(tree.root().children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeTopLevel) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeMultipleTopLevel) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject");
+ type_field_mask.add_paths("body");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, IsEmpty());
+ ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNested) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("subject.body");
+ type_field_mask.add_paths("body");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("subject"));
+ ASSERT_THAT(tree.root().children.at(0).children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children, IsEmpty());
+ ASSERT_THAT(tree.root().children.at(1).name, Eq("body"));
+ ASSERT_THAT(tree.root().children.at(1).children, IsEmpty());
+}
+
+TEST(ProjectionTreeTest, CreateTreeNestedSharedNode) {
+ ResultSpecProto::TypePropertyMask type_field_mask;
+ type_field_mask.add_paths("sender.name.first");
+ type_field_mask.add_paths("sender.emailAddress");
+
+ ProjectionTree tree(type_field_mask);
+ EXPECT_THAT(tree.root().name, IsEmpty());
+ ASSERT_THAT(tree.root().children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).name, Eq("sender"));
+ ASSERT_THAT(tree.root().children.at(0).children, SizeIs(2));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).name, Eq("name"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children, SizeIs(1));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).name,
+ Eq("first"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(0).children.at(0).children,
+ IsEmpty());
+ ASSERT_THAT(tree.root().children.at(0).children.at(1).name,
+ Eq("emailAddress"));
+ ASSERT_THAT(tree.root().children.at(0).children.at(1).children, IsEmpty());
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index f09d834..0b8ad4a 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc
@@ -14,15 +14,56 @@
#include "icing/result/result-retriever.h"
+#include <string_view>
+#include <utility>
+
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/page-result-state.h"
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
+
+namespace {
+
+void Project(const std::vector<ProjectionTree::Node>& projection_tree,
+ google::protobuf::RepeatedPtrField<PropertyProto>* properties) {
+ int num_kept = 0;
+ for (int cur_pos = 0; cur_pos < properties->size(); ++cur_pos) {
+ PropertyProto* prop = properties->Mutable(cur_pos);
+ auto itr = std::find_if(projection_tree.begin(), projection_tree.end(),
+ [&prop](const ProjectionTree::Node& node) {
+ return node.name == prop->name();
+ });
+ if (itr == projection_tree.end()) {
+ // Property is not present in the projection tree. Just skip it.
+ continue;
+ }
+ // This property should be kept.
+ properties->SwapElements(num_kept, cur_pos);
+ ++num_kept;
+ if (itr->children.empty()) {
+ // A field mask does refer to this property, but it has no children. So
+ // we should take the entire property, with all of its
+ // subproperties/values
+ continue;
+ }
+ // The field mask refers to children of this property. Recurse through the
+ // document values that this property holds and project the children
+ // requested by this field mask.
+ for (DocumentProto& subproperty : *(prop->mutable_document_values())) {
+ Project(itr->children, subproperty.mutable_properties());
+ }
+ }
+ properties->DeleteSubrange(num_kept, properties->size() - num_kept);
+}
+
+} // namespace
+
libtextclassifier3::StatusOr<std::unique_ptr<ResultRetriever>>
ResultRetriever::Create(const DocumentStore* doc_store,
const SchemaStore* schema_store,
@@ -56,6 +97,9 @@
remaining_num_to_snippet = 0;
}
+ auto wildcard_projection_tree_itr =
+ page_result_state.projection_tree_map.find(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
for (const auto& scored_document_hit :
page_result_state.scored_document_hits) {
libtextclassifier3::StatusOr<DocumentProto> document_or =
@@ -74,6 +118,19 @@
}
}
+ // Apply projection
+ auto itr = page_result_state.projection_tree_map.find(
+ document_or.ValueOrDie().schema());
+
+ if (itr != page_result_state.projection_tree_map.end()) {
+ Project(itr->second.root().children,
+ document_or.ValueOrDie().mutable_properties());
+ } else if (wildcard_projection_tree_itr !=
+ page_result_state.projection_tree_map.end()) {
+ Project(wildcard_projection_tree_itr->second.root().children,
+ document_or.ValueOrDie().mutable_properties());
+ }
+
SearchResultProto::ResultProto result;
// Add the snippet if requested.
if (snippet_context.snippet_spec.num_matches_per_property() > 0 &&
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 1d1f824..98cc75a 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -16,6 +16,8 @@
#include <limits>
#include <memory>
+#include <string_view>
+#include <unordered_map>
#include "gtest/gtest.h"
#include "icing/document-builder.h"
@@ -26,10 +28,13 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/result/projection-tree.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -56,46 +61,101 @@
}
void SetUp() override {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
language_segmenter_factory::SegmenterOptions options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
language_segmenter_,
language_segmenter_factory::Create(std::move(options)));
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- PropertyConfigProto* prop_config = type_config->add_properties();
- prop_config->set_property_name("subject");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- prop_config = type_config->add_properties();
- prop_config->set_property_name("body");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ ASSERT_THAT(schema_store_->SetSchema(CreatePersonAndEmailSchema()), IsOk());
}
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ SchemaProto CreatePersonAndEmailSchema() {
+ SchemaProto schema;
+
+ auto* type = schema.add_types();
+ type->set_schema_type("Email");
+
+ auto* subj = type->add_properties();
+ subj->set_property_name("name");
+ subj->set_data_type(PropertyConfigProto::DataType::STRING);
+ subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ subj->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ subj->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* body = type->add_properties();
+ body->set_property_name("body");
+ body->set_data_type(PropertyConfigProto::DataType::STRING);
+ body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ body->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::EXACT_ONLY);
+ body->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* sender = type->add_properties();
+ sender->set_property_name("sender");
+ sender->set_schema_type("Person");
+ sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ sender->mutable_document_indexing_config()->set_index_nested_properties(
+ true);
+
+ auto* person_type = schema.add_types();
+ person_type->set_schema_type("Person");
+ auto* name = person_type->add_properties();
+ name->set_property_name("name");
+ name->set_data_type(PropertyConfigProto::DataType::STRING);
+ name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ name->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ name->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+ auto* address = person_type->add_properties();
+ address->set_property_name("emailAddress");
+ address->set_data_type(PropertyConfigProto::DataType::STRING);
+ address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ address->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ address->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ return schema;
+ }
+
+ SectionId GetSectionId(const std::string& type, const std::string& property) {
+ auto type_id_or = schema_store_->GetSchemaTypeId(type);
+ if (!type_id_or.ok()) {
+ return kInvalidSectionId;
+ }
+ SchemaTypeId type_id = type_id_or.ValueOrDie();
+ for (SectionId section_id = 0; section_id <= kMaxSectionId; ++section_id) {
+ auto metadata_or = schema_store_->GetSectionMetadata(type_id, section_id);
+ if (!metadata_or.ok()) {
+ break;
+ }
+ const SectionMetadata* metadata = metadata_or.ValueOrDie();
+ if (metadata->path == property) {
+ return metadata->id;
+ }
+ }
+ return kInvalidSectionId;
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
@@ -114,14 +174,22 @@
DocumentProto CreateDocument(int id) {
return DocumentBuilder()
- .SetKey("icing", "email/" + std::to_string(id))
- .SetSchema("email")
- .AddStringProperty("subject", "subject foo " + std::to_string(id))
+ .SetKey("icing", "Email/" + std::to_string(id))
+ .SetSchema("Email")
+ .AddStringProperty("name", "subject foo " + std::to_string(id))
.AddStringProperty("body", "body bar " + std::to_string(id))
.SetCreationTimestampMs(1574365086666 + id)
.Build();
}
+SectionIdMask CreateSectionIdMask(const std::vector<SectionId>& section_ids) {
+ SectionIdMask mask = 0;
+ for (SectionId section_id : section_ids) {
+ mask |= (1u << section_id);
+ }
+ return mask;
+}
+
TEST_F(ResultRetrieverTest, CreationWithNullPointerShouldFail) {
EXPECT_THAT(
ResultRetriever::Create(/*doc_store=*/nullptr, schema_store_.get(),
@@ -129,9 +197,11 @@
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
EXPECT_THAT(
ResultRetriever::Create(doc_store.get(), /*schema_store=*/nullptr,
@@ -149,9 +219,12 @@
TEST_F(ResultRetrieverTest, ShouldRetrieveSimpleResults) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -159,10 +232,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -181,7 +257,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2),
@@ -190,19 +269,25 @@
TEST_F(ResultRetrieverTest, IgnoreErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(CreateDocument(/*id=*/2)));
DocumentId invalid_document_id = -1;
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {invalid_document_id, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -220,7 +305,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(
result_retriever->RetrieveResults(page_result_state),
IsOkAndHolds(ElementsAre(EqualsProto(result1), EqualsProto(result2))));
@@ -228,19 +316,25 @@
TEST_F(ResultRetrieverTest, NotIgnoreErrors) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(CreateDocument(/*id=*/2)));
DocumentId invalid_document_id = -1;
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {invalid_document_id, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {invalid_document_id, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -253,16 +347,18 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
DocumentId non_existing_document_id = 4;
page_result_state.scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {non_existing_document_id, /*hit_section_id_mask=*/0b00000011,
- /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {non_existing_document_id, hit_section_id_mask, /*score=*/0}};
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -272,17 +368,23 @@
ON_CALL(mock_filesystem, OpenForRead(_)).WillByDefault(Return(false));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&mock_filesystem, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
doc_store->Put(CreateDocument(/*id=*/2)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -296,16 +398,22 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
EXPECT_THAT(result_retriever->RetrieveResults(page_result_state),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(ResultRetrieverTest, DefaultSnippetSpecShouldDisableSnippeting) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -313,10 +421,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -328,7 +439,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> results,
result_retriever->RetrieveResults(page_result_state));
@@ -343,9 +457,12 @@
TEST_F(ResultRetrieverTest, SimpleSnippeted) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -353,10 +470,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -367,16 +487,18 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(
- GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
+ Eq("subject foo 1"));
+ EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
Eq("body bar 1"));
@@ -384,10 +506,9 @@
Eq("bar"));
EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(
- GetWindow(result[1].document(), result[1].snippet(), "subject", 0),
- Eq("subject foo 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0),
+ Eq("subject foo 2"));
+ EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
Eq("body bar 2"));
@@ -395,10 +516,9 @@
Eq("bar"));
EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(
- GetWindow(result[2].document(), result[2].snippet(), "subject", 0),
- Eq("subject foo 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0),
+ Eq("subject foo 3"));
+ EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
Eq("body bar 3"));
@@ -408,9 +528,12 @@
TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -421,10 +544,13 @@
ResultSpecProto::SnippetSpecProto snippet_spec = CreateSnippetSpec();
snippet_spec.set_num_to_snippet(1);
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -434,16 +560,18 @@
snippet_spec, TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(
- GetWindow(result[0].document(), result[0].snippet(), "subject", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "subject", 0),
+ EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
+ Eq("subject foo 1"));
+ EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
Eq("foo"));
EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
Eq("body bar 1"));
@@ -461,9 +589,12 @@
TEST_F(ResultRetrieverTest, ShouldSnippetAllResults) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -471,10 +602,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -487,7 +621,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/0);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/3);
ICING_ASSERT_OK_AND_ASSIGN(
std::vector<SearchResultProto::ResultProto> result,
@@ -503,9 +640,12 @@
TEST_F(ResultRetrieverTest, ShouldSnippetSomeResults) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -513,10 +653,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -529,7 +672,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/3);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/3,
+ /*num_per_page_in=*/3);
// num_to_snippet = 5, num_previously_returned_in = 3,
// We can return 5 - 3 = 2 snippets.
@@ -544,9 +690,12 @@
TEST_F(ResultRetrieverTest, ShouldNotSnippetAnyResults) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(CreateDocument(/*id=*/1)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -554,10 +703,13 @@
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
doc_store->Put(CreateDocument(/*id=*/3)));
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id2, /*hit_section_id_mask=*/0b00000011, /*score=*/0},
- {document_id3, /*hit_section_id_mask=*/0b00000011, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id3, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -570,7 +722,10 @@
TermMatchType::EXACT_ONLY);
PageResultState page_result_state(
std::move(scored_document_hits), /*next_page_token_in=*/1,
- std::move(snippet_context), /*num_previously_returned_in=*/6);
+ std::move(snippet_context),
+ std::unordered_map<std::string, ProjectionTree>(),
+ /*num_previously_returned_in=*/6,
+ /*num_per_page_in=*/3);
// num_to_snippet = 5, num_previously_returned_in = 6,
// We can't return any snippets for this page.
@@ -583,6 +738,1187 @@
EXPECT_THAT(result[2].snippet().entries(), IsEmpty());
}
+TEST_F(ResultRetrieverTest, ProjectionTopLevelLeadNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionNestedLeafNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender.name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender.name'
+ // property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionIntermediateNodeFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender'
+ // property and all of the subproperties of 'sender'.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleNestedFieldPaths) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("sender.name");
+ type_property_mask.add_paths("sender.emailAddress");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'sender.name' and
+ // 'sender.address' properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Meg Ryan")
+ .AddStringProperty("emailAddress", "shopgirl@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty(
+ "sender", DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Tom Hanks")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionEmptyFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results contain *no* properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionInvalidFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("nonExistentProperty");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results contain *no* properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionValidAndInvalidFieldPath) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two Email documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .AddStringProperty("body",
+ "Count all the sheep and tell them 'Hello'.")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("name");
+ type_property_mask.add_paths("nonExistentProperty");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Goodnight Moon!")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesNoWildcards) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask type_property_mask;
+ type_property_mask.set_schema_type("Email");
+ type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results have all of their properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcard) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'name'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionMultipleTypesWildcardWithOneOverride) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("body");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'body'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest, ProjectionSingleTypesWildcardAndOverride) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("sender.name");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("name");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results only contain the 'name' property.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
+TEST_F(ResultRetrieverTest,
+ ProjectionSingleTypesWildcardAndOverrideNestedProperty) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // 1. Add two documents
+ DocumentProto document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddStringProperty("name", "Hello World!")
+ .AddStringProperty(
+ "body", "Oh what a beautiful morning! Oh what a beautiful day!")
+ .AddDocumentProperty(
+ "sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .AddStringProperty("emailAddress", "mr.body123@gmail.com")
+ .Build())
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ doc_store->Put(document_one));
+
+ DocumentProto document_two =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .AddStringProperty("name", "Joe Fox")
+ .AddStringProperty("emailAddress", "ny152@aol.com")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ doc_store->Put(document_two));
+
+ // 2. Setup the scored results.
+ std::vector<SectionId> hit_section_ids = {GetSectionId("Email", "name"),
+ GetSectionId("Email", "body")};
+ SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ {document_id1, hit_section_id_mask, /*score=*/0},
+ {document_id2, hit_section_id_mask, /*score=*/0}};
+
+ ResultSpecProto::TypePropertyMask email_type_property_mask;
+ email_type_property_mask.set_schema_type("Email");
+ email_type_property_mask.add_paths("sender.name");
+ ResultSpecProto::TypePropertyMask wildcard_type_property_mask;
+ wildcard_type_property_mask.set_schema_type(
+ std::string(ProjectionTree::kSchemaTypeWildcard));
+ wildcard_type_property_mask.add_paths("sender");
+ std::unordered_map<std::string, ProjectionTree> type_projection_tree_map;
+ type_projection_tree_map.insert(
+ {"Email", ProjectionTree(email_type_property_mask)});
+ type_projection_tree_map.insert(
+ {std::string(ProjectionTree::kSchemaTypeWildcard),
+ ProjectionTree(wildcard_type_property_mask)});
+
+ SnippetContext snippet_context(
+ /*query_terms_in=*/{},
+ ResultSpecProto::SnippetSpecProto::default_instance(),
+ TermMatchType::EXACT_ONLY);
+ PageResultState page_result_state(
+ std::move(scored_document_hits), /*next_page_token_in=*/1,
+ std::move(snippet_context), std::move(type_projection_tree_map),
+ /*num_previously_returned_in=*/0,
+ /*num_per_page_in=*/2);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<ResultRetriever> result_retriever,
+ ResultRetriever::Create(doc_store.get(), schema_store_.get(),
+ language_segmenter_.get(), normalizer_.get()));
+
+ // 3. Verify that the returned Email results only contain the 'sender.name'
+ // property and the returned Person results contain no properties.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<SearchResultProto::ResultProto> result,
+ result_retriever->RetrieveResults(page_result_state));
+ ASSERT_THAT(result, SizeIs(2));
+
+ DocumentProto projected_document_one =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Email")
+ .AddDocumentProperty("sender",
+ DocumentBuilder()
+ .SetKey("namespace", "uri")
+ .SetSchema("Person")
+ .AddStringProperty("name", "Mr. Body")
+ .Build())
+ .Build();
+ EXPECT_THAT(result[0].document(), EqualsProto(projected_document_one));
+
+ DocumentProto projected_document_two = DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetCreationTimestampMs(1000)
+ .SetSchema("Person")
+ .Build();
+ EXPECT_THAT(result[1].document(), EqualsProto(projected_document_two));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index ff5dbf0..0f27d9e 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -39,26 +39,32 @@
// Gets the number before calling GetNextPage() because num_returned() may
// change after returning more results.
int num_previously_returned = result_state.num_returned();
+ int num_per_page = result_state.num_per_page();
std::vector<ScoredDocumentHit> page_result_document_hits =
result_state.GetNextPage();
+ SnippetContext snippet_context_copy = result_state.snippet_context();
+
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_copy =
+ result_state.projection_tree_map();
if (!result_state.HasMoreResults()) {
// No more pages, won't store ResultState, returns directly
return PageResultState(
std::move(page_result_document_hits), kInvalidNextPageToken,
- result_state.snippet_context(), num_previously_returned);
+ std::move(snippet_context_copy), std::move(projection_tree_map_copy),
+ num_previously_returned, num_per_page);
}
absl_ports::unique_lock l(&mutex_);
// ResultState has multiple pages, storing it
- SnippetContext snippet_context_copy = result_state.snippet_context();
uint64_t next_page_token = Add(std::move(result_state));
return PageResultState(std::move(page_result_document_hits), next_page_token,
std::move(snippet_context_copy),
- num_previously_returned);
+ std::move(projection_tree_map_copy),
+ num_previously_returned, num_per_page);
}
uint64_t ResultStateManager::Add(ResultState result_state) {
@@ -83,6 +89,7 @@
}
int num_returned = state_iterator->second.num_returned();
+ int num_per_page = state_iterator->second.num_per_page();
std::vector<ScoredDocumentHit> result_of_page =
state_iterator->second.GetNextPage();
if (result_of_page.empty()) {
@@ -97,13 +104,17 @@
SnippetContext snippet_context_copy =
state_iterator->second.snippet_context();
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_copy =
+ state_iterator->second.projection_tree_map();
+
if (!state_iterator->second.HasMoreResults()) {
InternalInvalidateResultState(next_page_token);
next_page_token = kInvalidNextPageToken;
}
- return PageResultState(result_of_page, next_page_token,
- std::move(snippet_context_copy), num_returned);
+ return PageResultState(
+ result_of_page, next_page_token, std::move(snippet_context_copy),
+ std::move(projection_tree_map_copy), num_returned, num_per_page);
}
void ResultStateManager::InvalidateResultState(uint64_t next_page_token) {
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index bf28f52..f1479b9 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc
@@ -14,6 +14,7 @@
#include "icing/result/result-state.h"
+#include "icing/result/projection-tree.h"
#include "icing/scoring/ranker.h"
#include "icing/util/logging.h"
@@ -46,6 +47,11 @@
num_returned_(0),
scored_document_hit_comparator_(scoring_spec.order_by() ==
ScoringSpecProto::Order::DESC) {
+ for (const ResultSpecProto::TypePropertyMask& type_field_mask :
+ result_spec.type_property_masks()) {
+ projection_tree_map_.insert(
+ {type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
+ }
BuildHeapInPlace(&scored_document_hits_, scored_document_hit_comparator_);
}
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index 82e783b..be92b85 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -15,10 +15,12 @@
#ifndef ICING_RESULT_RESULT_STATE_H_
#define ICING_RESULT_RESULT_STATE_H_
+#include <iostream>
#include <vector>
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/scoring/scored-document-hit.h"
@@ -52,6 +54,15 @@
// constructor.
const SnippetContext& snippet_context() const { return snippet_context_; }
+ // Returns a vector of TypePropertyMasks generated from the specs passed in
+ // via constructor.
+ const std::unordered_map<std::string, ProjectionTree>& projection_tree_map()
+ const {
+ return projection_tree_map_;
+ }
+
+ int num_per_page() const { return num_per_page_; }
+
// The number of results that have already been returned. This number is
// increased when GetNextPage() is called.
int num_returned() const { return num_returned_; }
@@ -65,6 +76,9 @@
// Information needed for snippeting.
SnippetContext snippet_context_;
+ // Information needed for projection.
+ std::unordered_map<std::string, ProjectionTree> projection_tree_map_;
+
// Number of results to return in each page.
int num_per_page_;
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index e552cf2..ecda400 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -33,6 +33,8 @@
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
+#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -57,18 +59,22 @@
test_dir_ = GetTestTempDir() + "/icing";
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
+
language_segmenter_factory::SegmenterOptions options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
language_segmenter_,
language_segmenter_factory::Create(std::move(options)));
// Setup the schema
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
SchemaTypeConfigProto* type_config = schema.add_types();
type_config->set_schema_type("email");
@@ -110,6 +116,7 @@
}
Filesystem filesystem_;
+ FakeClock fake_clock_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<LanguageSegmenter> language_segmenter_;
std::unique_ptr<SnippetRetriever> snippet_retriever_;
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index 9173031..e54cc0c 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -39,7 +39,6 @@
#include "icing/util/crc32.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
-#include "icing/util/timer.h"
namespace icing {
namespace lib {
@@ -105,18 +104,21 @@
libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
- NativeInitializeStats* initialize_stats) {
+ const Clock* clock, NativeInitializeStats* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
+ ICING_RETURN_ERROR_IF_NULL(clock);
- std::unique_ptr<SchemaStore> schema_store =
- std::unique_ptr<SchemaStore>(new SchemaStore(filesystem, base_dir));
+ std::unique_ptr<SchemaStore> schema_store = std::unique_ptr<SchemaStore>(
+ new SchemaStore(filesystem, base_dir, clock));
ICING_RETURN_IF_ERROR(schema_store->Initialize(initialize_stats));
return schema_store;
}
-SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir)
+SchemaStore::SchemaStore(const Filesystem* filesystem, std::string base_dir,
+ const Clock* clock)
: filesystem_(*filesystem),
base_dir_(std::move(base_dir)),
+ clock_(*clock),
schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
SchemaStore::~SchemaStore() {
@@ -142,7 +144,7 @@
ICING_VLOG(3)
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for SchemaStore.";
- Timer regenerate_timer;
+ std::unique_ptr<Timer> regenerate_timer = clock_.GetNewTimer();
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_cause(
NativeInitializeStats::IO_ERROR);
@@ -150,7 +152,7 @@
ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_latency_ms(
- regenerate_timer.GetElapsedMilliseconds());
+ regenerate_timer->GetElapsedMilliseconds());
}
}
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 76f36b4..cff7abd 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -34,6 +34,7 @@
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/key-mapper.h"
+#include "icing/util/clock.h"
#include "icing/util/crc32.h"
namespace icing {
@@ -114,7 +115,7 @@
// INTERNAL_ERROR on any IO errors
static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
const Filesystem* filesystem, const std::string& base_dir,
- NativeInitializeStats* initialize_stats = nullptr);
+ const Clock* clock, NativeInitializeStats* initialize_stats = nullptr);
// Not copyable
SchemaStore(const SchemaStore&) = delete;
@@ -227,7 +228,8 @@
private:
// Use SchemaStore::Create instead.
- explicit SchemaStore(const Filesystem* filesystem, std::string base_dir);
+ explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
+ const Clock* clock);
// Handles initializing the SchemaStore and regenerating any data if needed.
//
@@ -273,6 +275,7 @@
const Filesystem& filesystem_;
const std::string base_dir_;
+ const Clock& clock_;
// Used internally to indicate whether the class has been initialized. This is
// to guard against cases where the object has been created, but Initialize
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 4a458b2..7df3dd9 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -31,6 +31,7 @@
#include "icing/store/document-filter-data.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
+#include "icing/testing/fake-clock.h"
namespace icing {
namespace lib {
@@ -69,17 +70,19 @@
const Filesystem filesystem_;
const std::string test_dir_;
SchemaProto schema_;
+ const FakeClock fake_clock_;
};
TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_),
+ EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(SchemaStoreTest, CorruptSchemaError) {
{
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -105,14 +108,15 @@
serialized_schema.size());
// If ground truth was corrupted, we won't know what to do
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_),
+ EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(SchemaStoreTest, RecoverCorruptDerivedFileOk) {
{
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -134,8 +138,9 @@
absl_ports::StrCat(test_dir_, "/schema_type_mapper");
filesystem_.DeleteDirectoryRecursively(schema_type_mapper_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Everything looks fine, ground truth and derived data
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -146,8 +151,9 @@
TEST_F(SchemaStoreTest, RecoverBadChecksumOk) {
{
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -172,8 +178,9 @@
filesystem_.DeleteFile(header_file.c_str());
filesystem_.Write(header_file.c_str(), &header, sizeof(header));
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Everything looks fine, ground truth and derived data
ICING_ASSERT_OK_AND_ASSIGN(const SchemaProto* actual_schema,
@@ -183,12 +190,13 @@
}
TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+ EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
}
TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaStore::SetSchemaResult result;
result.success = true;
@@ -196,7 +204,7 @@
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_), IsOk());
+ EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
}
TEST_F(SchemaStoreTest, MultipleCreateOk) {
@@ -206,8 +214,9 @@
properties->set_name("subject");
properties->add_string_values("subject_content");
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaStore::SetSchemaResult result;
result.success = true;
@@ -225,8 +234,8 @@
EXPECT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
schema_store.reset();
- ICING_ASSERT_OK_AND_ASSIGN(schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Verify that our in-memory structures are ok
EXPECT_THAT(schema_store->GetSchemaTypeConfig("email"),
@@ -240,8 +249,9 @@
}
TEST_F(SchemaStoreTest, SetNewSchemaOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -254,8 +264,9 @@
}
TEST_F(SchemaStoreTest, SetSameSchemaOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -274,8 +285,9 @@
}
TEST_F(SchemaStoreTest, SetIncompatibleSchemaOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -298,8 +310,9 @@
}
TEST_F(SchemaStoreTest, SetSchemaWithAddedTypeOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type = schema.add_types();
@@ -326,8 +339,9 @@
}
TEST_F(SchemaStoreTest, SetSchemaWithDeletedTypeOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type = schema.add_types();
@@ -381,8 +395,9 @@
}
TEST_F(SchemaStoreTest, SetSchemaWithReorderedTypesOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type = schema.add_types();
@@ -420,8 +435,9 @@
}
TEST_F(SchemaStoreTest, SetSchemaThatRequiresReindexingOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type = schema.add_types();
@@ -460,8 +476,9 @@
}
TEST_F(SchemaStoreTest, SetSchemaWithIncompatibleTypesOk) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type = schema.add_types();
@@ -514,8 +531,9 @@
}
TEST_F(SchemaStoreTest, GetSchemaTypeId) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
schema_.clear_types();
@@ -539,16 +557,18 @@
}
TEST_F(SchemaStoreTest, ComputeChecksumDefaultOnEmptySchemaStore) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
Crc32 default_checksum;
EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(default_checksum));
}
TEST_F(SchemaStoreTest, ComputeChecksumSameBetweenCalls) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto foo_schema;
auto type_config = foo_schema.add_types();
@@ -563,8 +583,9 @@
}
TEST_F(SchemaStoreTest, ComputeChecksumSameAcrossInstances) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto foo_schema;
auto type_config = foo_schema.add_types();
@@ -577,14 +598,15 @@
// Destroy the previous instance and recreate SchemaStore
schema_store.reset();
- ICING_ASSERT_OK_AND_ASSIGN(schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
EXPECT_THAT(schema_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
TEST_F(SchemaStoreTest, ComputeChecksumChangesOnModification) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto foo_schema;
auto type_config = foo_schema.add_types();
@@ -607,16 +629,18 @@
}
TEST_F(SchemaStoreTest, PersistToDiskFineForEmptySchemaStore) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
// Persisting is fine and shouldn't affect anything
ICING_EXPECT_OK(schema_store->PersistToDisk());
}
TEST_F(SchemaStoreTest, PersistToDiskPreservesAcrossInstances) {
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
SchemaProto schema;
auto type_config = schema.add_types();
@@ -640,8 +664,8 @@
schema_store.reset();
// And we get the same schema back on reinitialization
- ICING_ASSERT_OK_AND_ASSIGN(schema_store,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store, SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(actual_schema, schema_store->GetSchema());
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
diff --git a/icing/schema/schema-util.cc b/icing/schema/schema-util.cc
index a755e88..49e7096 100644
--- a/icing/schema/schema-util.cc
+++ b/icing/schema/schema-util.cc
@@ -107,6 +107,21 @@
// already.
std::unordered_set<std::string_view> known_property_names;
+ // Tracks which schemas reference other schemas. This is used to detect
+ // infinite loops between indexed schema references (e.g. A -> B -> C -> A).
+ // We could get into an infinite loop while trying to assign section ids.
+ //
+ // The key is the "child" schema that is being referenced within another
+ // schema.
+ // The value is a set of all the direct/indirect "parent" schemas that
+ // reference the "child" schema.
+ //
+ // For example, if A has a nested document property of type B, then A is the
+ // "parent" and B is the "child" and so schema_references will contain
+ // schema_references[B] == {A}.
+ std::unordered_map<std::string_view, std::unordered_set<std::string_view>>
+ schema_references;
+
for (const auto& type_config : schema.types()) {
std::string_view schema_type(type_config.schema_type());
ICING_RETURN_IF_ERROR(ValidateSchemaType(schema_type));
@@ -120,6 +135,7 @@
// We only care about properties being unique within one type_config
known_property_names.clear();
+
for (const auto& property_config : type_config.properties()) {
std::string_view property_name(property_config.property_name());
ICING_RETURN_IF_ERROR(ValidatePropertyName(property_name, schema_type));
@@ -149,10 +165,55 @@
schema_type, ".", property_name, "'"));
}
+ if (property_schema_type == schema_type) {
+ // The schema refers to itself. This also causes a infinite loop.
+ //
+ // TODO(b/171996137): When clients can opt out of indexing document
+ // properties, then we don't need to do this if the document property
+ // isn't indexed. We only care about infinite loops while we're trying
+ // to assign section ids for indexing.
+ return absl_ports::InvalidArgumentError(
+ absl_ports::StrCat("Infinite loop detected in type configs. '",
+ schema_type, "' references itself."));
+ }
+
// Need to make sure we eventually see/validate this schema_type
if (known_schema_types.count(property_schema_type) == 0) {
unknown_schema_types.insert(property_schema_type);
}
+
+ // Start tracking the parent schemas that references this nested schema
+ // for infinite loop detection.
+ //
+ // TODO(b/171996137): When clients can opt out of indexing document
+ // properties, then we don't need to do this if the document property
+ // isn't indexed. We only care about infinite loops while we're trying
+ // to assign section ids for indexing.
+ std::unordered_set<std::string_view> parent_schemas;
+ parent_schemas.insert(schema_type);
+
+ for (const auto& parent : parent_schemas) {
+ // Check for any indirect parents
+ auto indirect_parents_iter = schema_references.find(parent);
+ if (indirect_parents_iter == schema_references.end()) {
+ continue;
+ }
+
+ // Our "parent" schema has parents as well. They're our indirect
+ // parents now.
+ for (const std::string_view& indirect_parent :
+ indirect_parents_iter->second) {
+ if (indirect_parent == property_schema_type) {
+ // We're our own indirect parent! Infinite loop found.
+ return absl_ports::InvalidArgumentError(absl_ports::StrCat(
+ "Infinite loop detected in type configs. '",
+ property_schema_type, "' references itself."));
+ }
+ parent_schemas.insert(indirect_parent);
+ }
+ }
+
+ schema_references.insert({property_schema_type, parent_schemas});
}
ICING_RETURN_IF_ERROR(ValidateCardinality(property_config.cardinality(),
@@ -166,7 +227,7 @@
}
}
- // An Document property claimed to be of a schema_type that we never
+ // A Document property claimed to be of a schema_type that we never
// saw/validated
if (!unknown_schema_types.empty()) {
return absl_ports::UnknownError(
@@ -272,7 +333,7 @@
const SchemaTypeConfigProto& type_config) {
ParsedPropertyConfigs parsed_property_configs;
- // TODO(samzheng): consider caching property_config_map for some properties,
+ // TODO(cassiewang): consider caching property_config_map for some properties,
// e.g. using LRU cache. Or changing schema.proto to use go/protomap.
for (const PropertyConfigProto& property_config : type_config.properties()) {
parsed_property_configs.property_config_map.emplace(
diff --git a/icing/schema/schema-util.h b/icing/schema/schema-util.h
index ccb2eea..7b989a8 100644
--- a/icing/schema/schema-util.h
+++ b/icing/schema/schema-util.h
@@ -81,10 +81,17 @@
// SchemaTypeConfigProto.schema_type
// 10. Property names can only be alphanumeric.
// 11. Any STRING data types have a valid string_indexing_config
+ // 12. A SchemaTypeConfigProto cannot have a property whose schema_type is
+ // itself, thus creating an infinite loop.
+ // 13. Two SchemaTypeConfigProtos cannot have properties that reference each
+ // other's schema_type, thus creating an infinite loop.
+ //
+ // TODO(b/171996137): Clarify 12 and 13 are only for indexed properties, once
+ // document properties can be opted out of indexing.
//
// Returns:
// ALREADY_EXISTS for case 1 and 2
- // INVALID_ARGUMENT for 3-11
+ // INVALID_ARGUMENT for 3-13
// OK otherwise
static libtextclassifier3::Status Validate(const SchemaProto& schema);
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index ed3bde7..61a861c 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -29,6 +29,7 @@
namespace {
using ::testing::Eq;
+using ::testing::HasSubstr;
// Properties/fields in a schema type
constexpr char kEmailType[] = "EmailMessage";
@@ -84,7 +85,7 @@
}
};
-TEST_F(SchemaUtilTest, Valid_Empty) {
+TEST_F(SchemaUtilTest, EmptySchemaProtoIsValid) {
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
@@ -98,7 +99,7 @@
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Valid_ClearedPropertyConfigs) {
+TEST_F(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
// No property fields is technically ok, but probably not realistic.
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -107,7 +108,7 @@
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedSchemaType) {
+TEST_F(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
type->clear_schema_type();
@@ -116,7 +117,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_EmptySchemaType) {
+TEST_F(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
type->set_schema_type("");
@@ -133,7 +134,7 @@
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertyName) {
+TEST_F(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -146,7 +147,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertyName) {
+TEST_F(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -184,7 +185,7 @@
ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
}
-TEST_F(SchemaUtilTest, Invalid_DuplicatePropertyName) {
+TEST_F(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -202,7 +203,7 @@
StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedDataType) {
+TEST_F(SchemaUtilTest, ClearedDataTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -215,7 +216,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_UnknownDataType) {
+TEST_F(SchemaUtilTest, UnknownDataTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -228,7 +229,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedCardinality) {
+TEST_F(SchemaUtilTest, ClearedCardinalityIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -241,7 +242,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_UnknownCardinality) {
+TEST_F(SchemaUtilTest, UnknownCardinalityIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -254,7 +255,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_ClearedPropertySchemaType) {
+TEST_F(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -282,7 +283,7 @@
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_NoMatchingSchemaType) {
+TEST_F(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
auto type = schema_proto_.add_types();
*type = CreateSchemaTypeConfig(kEmailType);
@@ -293,7 +294,8 @@
property->set_schema_type("NewSchemaType");
ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
- StatusIs(libtextclassifier3::StatusCode::UNKNOWN));
+ StatusIs(libtextclassifier3::StatusCode::UNKNOWN,
+ HasSubstr("Undefined 'schema_type'")));
}
TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
@@ -618,6 +620,153 @@
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
+TEST_F(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+ SchemaProto schema;
+
+ // Create a parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("ParentSchema");
+
+ // Create multiple references to the same child schema
+ auto property = type->add_properties();
+ property->set_property_name("ChildProperty1");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("ChildSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ property = type->add_properties();
+ property->set_property_name("ChildProperty2");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("ChildSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Create a child schema
+ type = schema.add_types();
+ type->set_schema_type("ChildSchema");
+
+ EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
+}
+
+TEST_F(SchemaUtilTest, InvalidSelfReference) {
+ SchemaProto schema;
+
+ // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+ auto type = schema.add_types();
+ type->set_schema_type("OwnSchema");
+
+ // Reference a child schema, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("OwnSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
+ SchemaProto schema;
+
+ // Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
+ auto type = schema.add_types();
+ type->set_schema_type("OwnSchema");
+
+ // Reference a child schema, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("OwnSchema");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ property = type->add_properties();
+ property->set_property_name("SomeString");
+ property->set_data_type(PropertyConfigProto::DataType::STRING);
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ property->mutable_string_indexing_config()->set_term_match_type(
+ TermMatchType::PREFIX);
+ property->mutable_string_indexing_config()->set_tokenizer_type(
+ StringIndexingConfig::TokenizerType::PLAIN);
+
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
+ SchemaProto schema;
+
+ // Create a schema for the parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("A");
+
+ // Reference schema B, so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("B");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("B");
+
+ // Reference the schema A, causing an infinite loop of references.
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("A");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Two degrees of referencing: A -> B -> A
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
+TEST_F(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+ SchemaProto schema;
+
+ // Create a schema for the parent schema
+ auto type = schema.add_types();
+ type->set_schema_type("A");
+
+ // Reference schema B , so far so good
+ auto property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("B");
+ property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("B");
+
+ // Reference schema C, so far so good
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("C");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Create the child schema
+ type = schema.add_types();
+ type->set_schema_type("C");
+
+ // Reference schema A, no good
+ property = type->add_properties();
+ property->set_property_name("NestedDocument");
+ property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property->set_schema_type("A");
+ property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+
+ // Three degrees of referencing: A -> B -> C -> A
+ EXPECT_THAT(SchemaUtil::Validate(schema),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
+ HasSubstr("Infinite loop")));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index 73aa947..0285cef 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -48,39 +48,6 @@
using TypeSectionMap =
std::unordered_map<std::string, const std::vector<SectionMetadata>>;
-// This state helps detect infinite loops (e.g. two type configs referencing
-// each other) when assigning sections. The combination of 'number of section
-// assigned' and 'current schema name' represents a unique state in the
-// section-assign process. If the same state is seen the second time, that means
-// an infinite loop.
-struct SectionAssigningState {
- size_t num_sections_assigned;
- std::string current_schema_name;
-
- SectionAssigningState(size_t num_sections_assigned_in,
- std::string&& current_schema_name_in)
- : num_sections_assigned(num_sections_assigned_in),
- current_schema_name(std::move(current_schema_name_in)) {}
-};
-
-// Provides a hash value of this struct so that it can be stored in a hash
-// set.
-struct SectionAssigningStateHasher {
- size_t operator()(const SectionAssigningState& state) const {
- size_t str_hash = std::hash<std::string>()(state.current_schema_name);
- size_t int_hash = std::hash<size_t>()(state.num_sections_assigned);
- // Combine the two hashes by taking the upper 16-bits of the string hash and
- // the lower 16-bits of the int hash.
- return (str_hash & 0xFFFF0000) | (int_hash & 0x0000FFFF);
- }
-};
-
-bool operator==(const SectionAssigningState& lhs,
- const SectionAssigningState& rhs) {
- return lhs.num_sections_assigned == rhs.num_sections_assigned &&
- lhs.current_schema_name == rhs.current_schema_name;
-}
-
// Helper function to concatenate a path and a property name
std::string ConcatenatePath(const std::string& path,
const std::string& next_property_name) {
@@ -90,28 +57,14 @@
return absl_ports::StrCat(path, kPropertySeparator, next_property_name);
}
-// Helper function to recursively identify sections from a type config and add
-// them to a section metadata list
libtextclassifier3::Status AssignSections(
- const SchemaTypeConfigProto& type_config,
+ const SchemaTypeConfigProto& current_type_config,
const std::string& current_section_path,
const SchemaUtil::TypeConfigMap& type_config_map,
- std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>*
- visited_states,
std::vector<SectionMetadata>* metadata_list) {
- if (!visited_states
- ->emplace(metadata_list->size(),
- std::string(type_config.schema_type()))
- .second) {
- // Failed to insert, the same state has been seen before, there's an
- // infinite loop in type configs
- return absl_ports::InvalidArgumentError(
- "Infinite loop detected in type configs");
- }
-
// Sorts properties by name's alphabetical order so that order doesn't affect
// section assigning.
- auto sorted_properties = type_config.properties();
+ auto sorted_properties = current_type_config.properties();
std::sort(sorted_properties.pointer_begin(), sorted_properties.pointer_end(),
[](const PropertyConfigProto* p1, const PropertyConfigProto* p2) {
return p1->property_name() < p2->property_name();
@@ -137,7 +90,7 @@
AssignSections(nested_type_config,
ConcatenatePath(current_section_path,
property_config.property_name()),
- type_config_map, visited_states, metadata_list));
+ type_config_map, metadata_list));
}
}
@@ -162,6 +115,7 @@
"allowed: %d",
kMaxSectionId - kMinSectionId + 1));
}
+
// Creates section metadata from property config
metadata_list->emplace_back(
new_section_id,
@@ -182,17 +136,14 @@
std::vector<std::vector<SectionMetadata>> section_metadata_cache(
schema_type_mapper.num_keys());
- std::unordered_set<SectionAssigningState, SectionAssigningStateHasher>
- visited_states;
for (const auto& name_and_type : type_config_map) {
// Assigns sections for each type config
- visited_states.clear();
const std::string& type_config_name = name_and_type.first;
const SchemaTypeConfigProto& type_config = name_and_type.second;
std::vector<SectionMetadata> metadata_list;
- ICING_RETURN_IF_ERROR(
- AssignSections(type_config, /*current_section_path*/ "",
- type_config_map, &visited_states, &metadata_list));
+ ICING_RETURN_IF_ERROR(AssignSections(type_config,
+ /*current_section_path*/ "",
+ type_config_map, &metadata_list));
// Insert the section metadata list at the index of the type's SchemaTypeId
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 1a4d324..2d995df 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -163,67 +163,6 @@
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(SectionManagerTest, CreationWithSchemaInfiniteLoopShouldFail) {
- // Creates 2 type configs that reference each other
- SchemaTypeConfigProto type_config1;
- type_config1.set_schema_type("type1");
- auto property1 = type_config1.add_properties();
- property1->set_property_name("property1");
- property1->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property1->set_schema_type("type2"); // Here we reference type2
- property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property1->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- SchemaTypeConfigProto type_config2;
- type_config2.set_schema_type("type2");
- auto property2 = type_config2.add_properties();
- property2->set_property_name("property2");
- property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- // Here we reference type1, which references type2 causing the infinite loop
- property2->set_schema_type("type1");
- property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property2->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type1", type_config1);
- type_config_map.emplace("type2", type_config2);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
- HasSubstr("Infinite loop detected")));
-}
-
-TEST_F(SectionManagerTest, CreationWithSchemaSelfReferenceShouldFail) {
- // Creates a type config that has a section and references to self.
- SchemaTypeConfigProto type_config;
- type_config.set_schema_type("type");
- auto property1 = type_config.add_properties();
- property1->set_property_name("property1");
- property1->set_data_type(PropertyConfigProto::DataType::STRING);
- property1->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- property1->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- auto property2 = type_config.add_properties();
- property2->set_property_name("property2");
- property2->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property2->mutable_document_indexing_config()->set_index_nested_properties(
- true);
- // Here we're referencing our own type, causing an infinite loop
- property2->set_schema_type("type");
- property2->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- SchemaUtil::TypeConfigMap type_config_map;
- type_config_map.emplace("type", type_config);
-
- EXPECT_THAT(
- SectionManager::Create(type_config_map, schema_type_mapper_.get()),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE,
- HasSubstr("Too many properties")));
-}
-
TEST_F(SectionManagerTest, CreationWithTooManyPropertiesShouldFail) {
SchemaTypeConfigProto type_config;
type_config.set_schema_type("type");
diff --git a/icing/schema/section.h b/icing/schema/section.h
index 7669c97..058f261 100644
--- a/icing/schema/section.h
+++ b/icing/schema/section.h
@@ -45,7 +45,6 @@
kMaxSectionId < 8 * sizeof(SectionIdMask),
"SectionIdMask is not large enough to represent all section values!");
-// TODO(samzheng): add more metadata when needed, e.g. tokenizer type,
struct SectionMetadata {
// Dot-joined property names, representing the location of section inside an
// document. E.g. "property1.property2"
diff --git a/icing/scoring/score-and-rank_benchmark.cc b/icing/scoring/score-and-rank_benchmark.cc
index c3ed40a..e940e98 100644
--- a/icing/scoring/score-and-rank_benchmark.cc
+++ b/icing/scoring/score-and-rank_benchmark.cc
@@ -99,14 +99,17 @@
filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir));
-
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, base_dir, &clock));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem, document_store_dir, &clock,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
@@ -198,14 +201,17 @@
filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir));
-
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, base_dir, &clock));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem, document_store_dir, &clock,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
@@ -298,14 +304,17 @@
filesystem.CreateDirectoryRecursively(document_store_dir.c_str());
filesystem.CreateDirectoryRecursively(schema_store_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem, base_dir));
-
Clock clock;
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem, base_dir, &clock));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem, document_store_dir, &clock,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK(schema_store->SetSchema(CreateSchemaWithEmailType()));
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index 42ec09a..0739532 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -144,6 +144,9 @@
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
return std::make_unique<UsageScorer>(document_store, rank_by,
default_score);
+ case ScoringSpecProto::RankingStrategy::
+ RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER:
+ [[fallthrough]];
case ScoringSpecProto::RankingStrategy::NONE:
return std::make_unique<NoScorer>(default_score);
}
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index 06bf484..b669eb1 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -53,12 +53,14 @@
fake_clock2_.SetSystemTimeMilliseconds(1572200000000);
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock1_));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock1_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
SchemaProto test_email_schema;
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 0da25f6..14b2a20 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -49,13 +49,15 @@
filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(schema_store_,
- SchemaStore::Create(&filesystem_, test_dir_));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(
- document_store_,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
schema_store_.get()));
+ document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
SchemaProto test_email_schema;
diff --git a/icing/store/corpus-id.h b/icing/store/corpus-id.h
new file mode 100644
index 0000000..a8f21ba
--- /dev/null
+++ b/icing/store/corpus-id.h
@@ -0,0 +1,30 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_CORPUS_ID_H_
+#define ICING_STORE_CORPUS_ID_H_
+
+#include <cstdint>
+
+namespace icing {
+namespace lib {
+
+// Identifier for corpus, i.e. a <namespace, schema_type> pair>, in
+// DocumentProto. Generated in DocumentStore.
+using CorpusId = int32_t;
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_CORPUS_ID_H_
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 8ddde14..6a664a3 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -37,16 +37,18 @@
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/corpus-id.h"
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/enable-bm25f.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
-#include "icing/util/timer.h"
namespace icing {
namespace lib {
@@ -62,12 +64,14 @@
constexpr char kFilterCacheFilename[] = "filter_cache";
constexpr char kNamespaceMapperFilename[] = "namespace_mapper";
constexpr char kUsageStoreDirectoryName[] = "usage_store";
+constexpr char kCorpusIdMapperFilename[] = "corpus_mapper";
constexpr int32_t kUriMapperMaxSize = 12 * 1024 * 1024; // 12 MiB
// 384 KiB for a KeyMapper would allow each internal array to have a max of
// 128 KiB for storage.
constexpr int32_t kNamespaceMapperMaxSize = 3 * 128 * 1024; // 384 KiB
+constexpr int32_t kCorpusMapperMaxSize = 3 * 128 * 1024; // 384 KiB
DocumentWrapper CreateDocumentWrapper(DocumentProto&& document) {
DocumentWrapper document_wrapper;
@@ -130,6 +134,10 @@
return absl_ports::StrCat(base_dir, "/", kUsageStoreDirectoryName);
}
+std::string MakeCorpusMapperFilename(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kCorpusIdMapperFilename);
+}
+
// TODO(adorokhine): This class internally uses an 8-byte fingerprint of the
// Key and stores the key/value in a file-backed-trie that adds an ~80 byte
// overhead per key. As we know that these fingerprints are always 8-bytes in
@@ -200,21 +208,26 @@
}
}
-libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>>
-DocumentStore::Create(const Filesystem* filesystem, const std::string& base_dir,
- const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats) {
+libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
+ const Filesystem* filesystem, const std::string& base_dir,
+ const Clock* clock, const SchemaStore* schema_store,
+ NativeInitializeStats* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
auto document_store = std::unique_ptr<DocumentStore>(
new DocumentStore(filesystem, base_dir, clock, schema_store));
- ICING_RETURN_IF_ERROR(document_store->Initialize(initialize_stats));
- return document_store;
+ ICING_ASSIGN_OR_RETURN(DataLoss data_loss,
+ document_store->Initialize(initialize_stats));
+
+ CreateResult create_result;
+ create_result.document_store = std::move(document_store);
+ create_result.data_loss = data_loss;
+ return create_result;
}
-libtextclassifier3::Status DocumentStore::Initialize(
+libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
NativeInitializeStats* initialize_stats) {
auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem_, MakeDocumentLogFilename(base_dir_),
@@ -238,8 +251,7 @@
initialize_stats->set_document_store_recovery_cause(
NativeInitializeStats::DATA_LOSS);
- if (create_result.data_status ==
- FileBackedProtoLog<DocumentWrapper>::CreateResult::PARTIAL_LOSS) {
+ if (create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
NativeInitializeStats::PARTIAL_LOSS);
@@ -249,11 +261,11 @@
NativeInitializeStats::COMPLETE_LOSS);
}
}
- Timer document_recovery_timer;
+ std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status = RegenerateDerivedFiles();
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_latency_ms(
- document_recovery_timer.GetElapsedMilliseconds());
+ document_recovery_timer->GetElapsedMilliseconds());
}
if (!status.ok()) {
ICING_LOG(ERROR)
@@ -269,11 +281,11 @@
initialize_stats->set_document_store_recovery_cause(
NativeInitializeStats::IO_ERROR);
}
- Timer document_recovery_timer;
+ std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
libtextclassifier3::Status status = RegenerateDerivedFiles();
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_latency_ms(
- document_recovery_timer.GetElapsedMilliseconds());
+ document_recovery_timer->GetElapsedMilliseconds());
}
if (!status.ok()) {
ICING_LOG(ERROR)
@@ -288,7 +300,7 @@
initialize_stats->set_num_documents(document_id_mapper_->num_elements());
}
- return libtextclassifier3::Status::OK;
+ return create_result.data_loss;
}
libtextclassifier3::Status DocumentStore::InitializeDerivedFiles() {
@@ -354,6 +366,13 @@
usage_store_,
UsageStore::Create(filesystem_, MakeUsageStoreDirectoryName(base_dir_)));
+ if (enableBm25f()) {
+ ICING_ASSIGN_OR_RETURN(
+ corpus_mapper_, KeyMapper<CorpusId>::Create(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize));
+ }
+
// Ensure the usage store is the correct size.
ICING_RETURN_IF_ERROR(
usage_store_->TruncateTo(document_id_mapper_->num_elements()));
@@ -373,6 +392,9 @@
ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
ICING_RETURN_IF_ERROR(ResetFilterCache());
ICING_RETURN_IF_ERROR(ResetNamespaceMapper());
+ if (enableBm25f()) {
+ ICING_RETURN_IF_ERROR(ResetCorpusMapper());
+ }
// Creates a new UsageStore instance. Note that we don't reset the data in
// usage store here because we're not able to regenerate the usage scores.
@@ -514,6 +536,14 @@
namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
namespace_mapper_->num_keys()));
+ if (enableBm25f()) {
+ // Update corpus maps
+ std::string corpus =
+ MakeFingerprint(document_wrapper.document().namespace_(),
+ document_wrapper.document().schema());
+ corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys());
+ }
+
int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
document_wrapper.document().creation_timestamp_ms(),
document_wrapper.document().ttl_ms());
@@ -640,6 +670,27 @@
return libtextclassifier3::Status::OK;
}
+libtextclassifier3::Status DocumentStore::ResetCorpusMapper() {
+ if (enableBm25f()) {
+ // TODO(b/139734457): Replace ptr.reset()->Delete->Create flow with Reset().
+ corpus_mapper_.reset();
+ // TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
+ // that can support error logging.
+ libtextclassifier3::Status status = KeyMapper<CorpusId>::Delete(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_));
+ if (!status.ok()) {
+ ICING_LOG(ERROR) << status.error_message()
+ << "Failed to delete old corpus_id mapper";
+ return status;
+ }
+ ICING_ASSIGN_OR_RETURN(
+ corpus_mapper_, KeyMapper<CorpusId>::Create(
+ *filesystem_, MakeCorpusMapperFilename(base_dir_),
+ kCorpusMapperMaxSize));
+ }
+ return libtextclassifier3::Status::OK;
+}
+
libtextclassifier3::StatusOr<Crc32> DocumentStore::ComputeChecksum() const {
Crc32 total_checksum;
@@ -693,6 +744,10 @@
total_checksum.Append(std::to_string(score_cache_checksum.Get()));
total_checksum.Append(std::to_string(filter_cache_checksum.Get()));
total_checksum.Append(std::to_string(namespace_mapper_checksum.Get()));
+ if (enableBm25f()) {
+ Crc32 corpus_mapper_checksum = corpus_mapper_->ComputeChecksum();
+ total_checksum.Append(std::to_string(corpus_mapper_checksum.Get()));
+ }
return total_checksum;
}
@@ -726,7 +781,7 @@
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
DocumentProto&& document, NativePutDocumentStats* put_document_stats) {
- Timer put_timer;
+ std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
if (put_document_stats != nullptr) {
@@ -783,6 +838,12 @@
NamespaceId namespace_id,
namespace_mapper_->GetOrPut(name_space, namespace_mapper_->num_keys()));
+ if (enableBm25f()) {
+ // Update corpus maps
+ ICING_RETURN_IF_ERROR(corpus_mapper_->GetOrPut(
+ MakeFingerprint(name_space, schema), corpus_mapper_->num_keys()));
+ }
+
ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
schema_store_->GetSchemaTypeId(schema));
@@ -808,7 +869,7 @@
if (put_document_stats != nullptr) {
put_document_stats->set_document_store_latency_ms(
- put_timer.GetElapsedMilliseconds());
+ put_timer->GetElapsedMilliseconds());
}
return new_document_id;
@@ -1025,6 +1086,14 @@
return namespace_mapper_->Get(name_space);
}
+libtextclassifier3::StatusOr<CorpusId> DocumentStore::GetCorpusId(
+ const std::string_view name_space, const std::string_view schema) const {
+ if (enableBm25f()) {
+ return corpus_mapper_->Get(MakeFingerprint(name_space, schema));
+ }
+ return absl_ports::NotFoundError("corpus_mapper disabled");
+}
+
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
auto score_data_or = score_cache_->Get(document_id);
@@ -1073,17 +1142,18 @@
return usage_store_->AddUsageReport(usage_report, document_id);
}
-libtextclassifier3::Status DocumentStore::DeleteByNamespace(
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
std::string_view name_space, bool soft_delete) {
+ DeleteByGroupResult result;
auto namespace_id_or = namespace_mapper_->Get(name_space);
if (!namespace_id_or.ok()) {
- return absl_ports::Annotate(
+ result.status = absl_ports::Annotate(
namespace_id_or.status(),
absl_ports::StrCat("Failed to find namespace: ", name_space));
+ return result;
}
NamespaceId namespace_id = namespace_id_or.ValueOrDie();
- int num_updated_documents = 0;
if (soft_delete) {
// To delete an entire namespace, we append a tombstone that only contains
// the deleted bit and the name of the deleted namespace.
@@ -1096,36 +1166,43 @@
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete namespace. namespace = "
<< name_space;
- return status;
+ result.status = std::move(status);
+ return result;
}
}
- ICING_ASSIGN_OR_RETURN(
- num_updated_documents,
- BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete));
-
- if (num_updated_documents <= 0) {
- // Treat the fact that no existing documents had this namespace to be the
- // same as this namespace not existing at all.
- return absl_ports::NotFoundError(
- absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
+ auto num_deleted_or =
+ BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete);
+ if (!num_deleted_or.ok()) {
+ result.status = std::move(num_deleted_or).status();
+ return result;
}
- return libtextclassifier3::Status::OK;
+ result.num_docs_deleted = num_deleted_or.ValueOrDie();
+ if (result.num_docs_deleted <= 0) {
+ // Treat the fact that no existing documents had this namespace to be the
+ // same as this namespace not existing at all.
+ result.status = absl_ports::NotFoundError(
+ absl_ports::StrCat("Namespace '", name_space, "' doesn't exist"));
+ return result;
+ }
+
+ return result;
}
-libtextclassifier3::Status DocumentStore::DeleteBySchemaType(
+DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
std::string_view schema_type, bool soft_delete) {
+ DeleteByGroupResult result;
auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
if (!schema_type_id_or.ok()) {
- return absl_ports::Annotate(
+ result.status = absl_ports::Annotate(
schema_type_id_or.status(),
absl_ports::StrCat("Failed to find schema type. schema_type: ",
schema_type));
+ return result;
}
SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
- int num_updated_documents = 0;
if (soft_delete) {
// To soft-delete an entire schema type, we append a tombstone that only
// contains the deleted bit and the name of the deleted schema type.
@@ -1138,20 +1215,26 @@
ICING_LOG(ERROR) << status.error_message()
<< "Failed to delete schema_type. schema_type = "
<< schema_type;
- return status;
+ result.status = std::move(status);
+ return result;
}
}
- ICING_ASSIGN_OR_RETURN(
- num_updated_documents,
- BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete));
-
- if (num_updated_documents <= 0) {
- return absl_ports::NotFoundError(absl_ports::StrCat(
- "No documents found with schema type '", schema_type, "'"));
+ auto num_deleted_or =
+ BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete);
+ if (!num_deleted_or.ok()) {
+ result.status = std::move(num_deleted_or).status();
+ return result;
}
- return libtextclassifier3::Status::OK;
+ result.num_docs_deleted = num_deleted_or.ValueOrDie();
+ if (result.num_docs_deleted <= 0) {
+ result.status = absl_ports::NotFoundError(absl_ports::StrCat(
+ "No documents found with schema type '", schema_type, "'"));
+ return result;
+ }
+
+ return result;
}
libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
@@ -1226,6 +1309,10 @@
ICING_RETURN_IF_ERROR(namespace_mapper_->PersistToDisk());
ICING_RETURN_IF_ERROR(usage_store_->PersistToDisk());
+ if (enableBm25f()) {
+ ICING_RETURN_IF_ERROR(corpus_mapper_->PersistToDisk());
+ }
+
// Update the combined checksum and write to header file.
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -1247,9 +1334,16 @@
ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
namespace_mapper_->GetDiskUsage());
- return document_log_disk_usage + document_key_mapper_disk_usage +
- document_id_mapper_disk_usage + score_cache_disk_usage +
- filter_cache_disk_usage + namespace_mapper_disk_usage;
+ int64_t disk_usage = document_log_disk_usage +
+ document_key_mapper_disk_usage +
+ document_id_mapper_disk_usage + score_cache_disk_usage +
+ filter_cache_disk_usage + namespace_mapper_disk_usage;
+ if (enableBm25f()) {
+ ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
+ corpus_mapper_->GetDiskUsage());
+ disk_usage += corpus_mapper_disk_usage;
+ }
+ return disk_usage;
}
libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1406,9 +1500,11 @@
"New directory is the same as the current one.");
}
- ICING_ASSIGN_OR_RETURN(auto new_doc_store,
+ ICING_ASSIGN_OR_RETURN(auto doc_store_create_result,
DocumentStore::Create(filesystem_, new_directory,
&clock_, schema_store_));
+ std::unique_ptr<DocumentStore> new_doc_store =
+ std::move(doc_store_create_result.document_store);
// Writes all valid docs into new document store (new directory)
int size = document_id_mapper_->num_elements();
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index d6ffbaa..78590a5 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -30,6 +30,7 @@
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/schema/schema-store.h"
+#include "icing/store/corpus-id.h"
#include "icing/store/document-associated-score-data.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
@@ -38,6 +39,7 @@
#include "icing/store/usage-store.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
#include "icing/util/document-validator.h"
namespace icing {
@@ -70,6 +72,26 @@
int32_t optimizable_docs = 0;
};
+ struct DeleteByGroupResult {
+ // Status representing whether or not the operation succeeded. See the
+ // comments above the function that returns this result to determine what
+ // possible statuses could be returned.
+ libtextclassifier3::Status status;
+
+ int num_docs_deleted = 0;
+ };
+
+ struct CreateResult {
+ // A successfully initialized document store.
+ std::unique_ptr<DocumentStore> document_store;
+
+ // The data status after initializing from a previous state. Data loss can
+ // happen if the file is corrupted or some previously added data was
+ // unpersisted. This may be used to signal that any derived data off of the
+ // document store may need to be regenerated.
+ DataLoss data_loss;
+ };
+
// Not copyable
DocumentStore(const DocumentStore&) = delete;
DocumentStore& operator=(const DocumentStore&) = delete;
@@ -92,10 +114,10 @@
// were regenerated. This may be helpful in logs.
//
// Returns:
- // A DocumentStore on success
+ // A DocumentStore::CreateResult on success
// FAILED_PRECONDITION on any null pointer input
// INTERNAL_ERROR on IO error
- static libtextclassifier3::StatusOr<std::unique_ptr<DocumentStore>> Create(
+ static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
NativeInitializeStats* initialize_stats = nullptr);
@@ -209,6 +231,15 @@
libtextclassifier3::StatusOr<NamespaceId> GetNamespaceId(
std::string_view name_space) const;
+ // Returns the CorpusId associated with the given namespace and schema.
+ //
+ // Returns:
+ // A CorpusId on success
+ // NOT_FOUND if the key doesn't exist
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<CorpusId> GetCorpusId(
+ const std::string_view name_space, const std::string_view schema) const;
+
// Returns the DocumentAssociatedScoreData of the document specified by the
// DocumentId.
//
@@ -272,8 +303,8 @@
// OK on success
// NOT_FOUND if namespace doesn't exist
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status DeleteByNamespace(std::string_view name_space,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteByNamespace(std::string_view name_space,
+ bool soft_delete = false);
// Deletes all documents belonging to the given schema type. The documents
// will be marked as deleted if 'soft_delete' is true, otherwise they will be
@@ -289,8 +320,8 @@
// OK on success
// NOT_FOUND if schema_type doesn't exist
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status DeleteBySchemaType(std::string_view schema_type,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type,
+ bool soft_delete = false);
// Syncs all the data and metadata changes to disk.
//
@@ -306,7 +337,7 @@
// Disk usage on success
// INTERNAL_ERROR on IO error
//
- // TODO(samzheng): consider returning a struct which has the breakdown of each
+ // TODO(tjbarron): consider returning a struct which has the breakdown of each
// component.
libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
@@ -426,6 +457,12 @@
// DocumentStore. Namespaces may be removed from the mapper during compaction.
std::unique_ptr<KeyMapper<NamespaceId>> namespace_mapper_;
+ // Maps a corpus, i.e. a (namespace, schema type) pair, to a densely-assigned
+ // unique id. A coprus is assigned an
+ // id when the first document belonging to that corpus is added to the
+ // DocumentStore. Corpus ids may be removed from the mapper during compaction.
+ std::unique_ptr<KeyMapper<CorpusId>> corpus_mapper_;
+
// A storage class that caches all usage scores. Usage scores are not
// considered as ground truth. Usage scores are associated with document ids
// so they need to be updated when document ids change.
@@ -438,7 +475,7 @@
// worry about this field.
bool initialized_ = false;
- libtextclassifier3::Status Initialize(
+ libtextclassifier3::StatusOr<DataLoss> Initialize(
NativeInitializeStats* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
@@ -491,6 +528,12 @@
// Returns OK or any IO errors.
libtextclassifier3::Status ResetNamespaceMapper();
+ // Resets the unique_ptr to the corpus_mapper, deletes the underlying file,
+ // and re-creates a new instance of the corpus_mapper.
+ //
+ // Returns OK or any IO errors.
+ libtextclassifier3::Status ResetCorpusMapper();
+
// Checks if the header exists already. This does not create the header file
// if it doesn't exist.
bool HeaderExists();
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index d97ec46..29bf8bb 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -33,9 +33,12 @@
#include "icing/schema/schema-store.h"
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
+#include "icing/store/enable-bm25f.h"
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/platform.h"
+#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/crc32.h"
@@ -98,6 +101,7 @@
}
void SetUp() override {
+ setEnableBm25f(true);
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
@@ -126,7 +130,8 @@
StringIndexingConfig::TokenizerType::PLAIN);
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_, SchemaStore::Create(&filesystem_, schema_store_dir_));
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
}
@@ -182,9 +187,11 @@
TEST_F(DocumentStoreTest, PutAndGetInSameNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Both documents have namespace of "icing"
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
@@ -200,9 +207,11 @@
TEST_F(DocumentStoreTest, PutAndGetAcrossNamespacesOk) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Can handle different namespaces with same url
DocumentProto foo_document = DocumentBuilder()
@@ -231,9 +240,11 @@
// document and old doc ids are not getting reused.
TEST_F(DocumentStoreTest, PutSameKey) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Creates two documents with the same key (namespace + uri)
DocumentProto document1 = DocumentProto(test_document1_);
@@ -258,9 +269,12 @@
TEST_F(DocumentStoreTest, IsDocumentExisting) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(DocumentProto(test_document1_)));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
@@ -287,9 +301,12 @@
TEST_F(DocumentStoreTest, GetSoftDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
EXPECT_THAT(
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
@@ -305,9 +322,12 @@
TEST_F(DocumentStoreTest, GetHardDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
EXPECT_THAT(
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
@@ -330,9 +350,12 @@
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(document));
EXPECT_THAT(document_store->Get("namespace", "uri"),
IsOkAndHolds(EqualsProto(document)));
@@ -355,9 +378,12 @@
TEST_F(DocumentStoreTest, GetInvalidDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(DocumentProto(test_document1_)));
@@ -379,9 +405,11 @@
TEST_F(DocumentStoreTest, DeleteNonexistentDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Validates that deleting something non-existing won't append anything to
// ground truth
@@ -399,9 +427,12 @@
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(test_document1_));
// First time is OK
@@ -416,9 +447,11 @@
TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
DocumentProto document1 = test_document1_;
document1.set_namespace_("namespace.1");
@@ -442,8 +475,10 @@
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
@@ -456,9 +491,11 @@
TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
DocumentProto document1 = test_document1_;
document1.set_namespace_("namespace.1");
@@ -482,8 +519,10 @@
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
@@ -496,17 +535,21 @@
TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/true),
+ EXPECT_THAT(doc_store
+ ->DeleteByNamespace("nonexistent_namespace",
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -516,17 +559,21 @@
TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/false),
+ EXPECT_THAT(doc_store
+ ->DeleteByNamespace("nonexistent_namespace",
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -536,9 +583,12 @@
TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
@@ -546,16 +596,21 @@
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteByNamespace(test_document1_.namespace_(),
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
@@ -563,8 +618,10 @@
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteByNamespace(test_document1_.namespace_(),
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -588,9 +645,12 @@
int64_t ground_truth_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK(doc_store->Put(document1));
ICING_ASSERT_OK(doc_store->Put(document2));
ICING_ASSERT_OK(doc_store->Put(document3));
@@ -598,7 +658,10 @@
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
- ICING_EXPECT_OK(doc_store->DeleteByNamespace("namespace.1"));
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace.1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
@@ -617,9 +680,11 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -650,14 +715,16 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentProto email_document_1 = DocumentBuilder()
.SetKey("namespace1", "1")
@@ -693,8 +760,10 @@
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -705,8 +774,10 @@
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true));
+ group_result =
+ document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -731,14 +802,16 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
DocumentProto email_document_1 = DocumentBuilder()
.SetKey("namespace1", "1")
@@ -774,8 +847,10 @@
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("email", /*soft_delete=*/false));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -786,8 +861,10 @@
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- ICING_EXPECT_OK(
- document_store->DeleteBySchemaType("message", /*soft_delete=*/false));
+ group_result =
+ document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(document_store->Get(email_2_document_id),
@@ -800,17 +877,21 @@
TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType("nonexistent_type",
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -821,17 +902,21 @@
TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Validates that deleting something non-existing won't append anything to
// ground truth
int64_t ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType("nonexistent_type",
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -842,29 +927,39 @@
TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/true),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType(test_document1_.schema(),
+ /*soft_delete=*/true)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/false),
+ EXPECT_THAT(document_store
+ ->DeleteBySchemaType(test_document1_.schema(),
+ /*soft_delete=*/false)
+ .status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
@@ -880,7 +975,7 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
@@ -901,9 +996,11 @@
int64_t ground_truth_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
document_store->Put(email_document));
@@ -911,7 +1008,10 @@
document_store->Put(message_document));
// Delete "email". "message" documents should still be retrievable.
- ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
ground_truth_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
@@ -930,9 +1030,11 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -957,7 +1059,7 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
@@ -978,9 +1080,11 @@
int64_t ground_truth_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(email_document_id,
document_store->Put(email_document));
@@ -988,7 +1092,10 @@
document_store->Put(message_document));
// Delete "email". "message" documents should still be retrievable.
- ICING_EXPECT_OK(document_store->DeleteBySchemaType("email"));
+ DocumentStore::DeleteByGroupResult group_result =
+ document_store->DeleteBySchemaType("email");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1019,9 +1126,11 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
int64_t ground_truth_size_after = filesystem_.GetFileSize(
@@ -1036,9 +1145,11 @@
TEST_F(DocumentStoreTest, OptimizeInto) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
DocumentProto document1 = DocumentBuilder()
.SetKey("namespace", "uri1")
@@ -1118,9 +1229,12 @@
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(document_id1,
doc_store->Put(DocumentProto(test_document1_)));
ICING_ASSERT_OK_AND_ASSIGN(document_id2,
@@ -1150,9 +1264,12 @@
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
@@ -1174,9 +1291,12 @@
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(document_id1,
doc_store->Put(DocumentProto(test_document1_)));
ICING_ASSERT_OK_AND_ASSIGN(document_id2,
@@ -1209,9 +1329,12 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
@@ -1233,9 +1356,12 @@
{
// Can put and delete fine.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(document_id1,
doc_store->Put(DocumentProto(test_document1_)));
ICING_ASSERT_OK_AND_ASSIGN(document_id2,
@@ -1264,9 +1390,12 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
EXPECT_THAT(doc_store->Get(document_id1),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
EXPECT_THAT(doc_store->Get(document_id2),
@@ -1285,9 +1414,12 @@
TEST_F(DocumentStoreTest, GetDiskUsage) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
doc_store->GetDiskUsage());
EXPECT_THAT(empty_doc_store_size, Gt(0));
@@ -1313,18 +1445,23 @@
ON_CALL(mock_filesystem, GetDiskUsage(A<const char*>()))
.WillByDefault(Return(Filesystem::kBadFileSize));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem,
+ create_result,
DocumentStore::Create(&mock_filesystem, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
+ std::move(create_result.document_store);
+
EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
StatusIs(libtextclassifier3::StatusCode::INTERNAL));
}
TEST_F(DocumentStoreTest, MaxDocumentId) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
// Since the DocumentStore is empty, we get an invalid DocumentId
EXPECT_THAT(doc_store->last_added_document_id(), Eq(kInvalidDocumentId));
@@ -1344,9 +1481,11 @@
TEST_F(DocumentStoreTest, GetNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
DocumentProto document_namespace1 =
DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
@@ -1363,15 +1502,23 @@
// DocumentStore
EXPECT_THAT(doc_store->GetNamespaceId("namespace2"), IsOkAndHolds(Eq(1)));
+ // DELETE namespace1 - document_namespace1 is deleted.
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
// NamespaceMapper doesn't care if the document has been deleted
EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
}
TEST_F(DocumentStoreTest, GetDuplicateNamespaceId) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
DocumentProto document1 =
DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
@@ -1387,19 +1534,123 @@
TEST_F(DocumentStoreTest, NonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
EXPECT_THAT(doc_store->GetNamespaceId("nonexistent_namespace"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
+TEST_F(DocumentStoreTest, GetCorpusIdReturnsNotFoundWhenFeatureIsDisabled) {
+ setEnableBm25f(false);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("corpus_mapper disabled")));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusDuplicateCorpusId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "1").SetSchema("email").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(document1));
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ // NamespaceId of 0 since it was the first namespace seen by the DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace", "email"),
+ IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, GetCorpusId) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto document_corpus1 =
+ DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+ DocumentProto document_corpus2 =
+ DocumentBuilder().SetKey("namespace2", "2").SetSchema("email").Build();
+
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus1)));
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus2)));
+
+ // CorpusId of 0 since it was the first corpus seen by the DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace1", "email"),
+ IsOkAndHolds(Eq(0)));
+
+ // CorpusId of 1 since it was the second corpus seen by the
+ // DocumentStore
+ EXPECT_THAT(doc_store->GetCorpusId("namespace2", "email"),
+ IsOkAndHolds(Eq(1)));
+
+ // DELETE namespace1 - document_corpus1 is deleted.
+ DocumentStore::DeleteByGroupResult group_result =
+ doc_store->DeleteByNamespace("namespace1");
+ EXPECT_THAT(group_result.status, IsOk());
+ EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
+
+ // CorpusMapper doesn't care if the document has been deleted
+ EXPECT_THAT(doc_store->GetNamespaceId("namespace1"), IsOkAndHolds(Eq(0)));
+}
+
+TEST_F(DocumentStoreTest, NonexistentCorpusNotFound) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ EXPECT_THAT(
+ doc_store->GetCorpusId("nonexistent_namespace", "nonexistent_schema"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ DocumentProto document_corpus =
+ DocumentBuilder().SetKey("namespace1", "1").SetSchema("email").Build();
+ ICING_ASSERT_OK(doc_store->Put(DocumentProto(document_corpus)));
+
+ EXPECT_THAT(doc_store->GetCorpusId("nonexistent_namespace", "email"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(doc_store->GetCorpusId("namespace1", "nonexistent_schema"),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1418,9 +1669,11 @@
TEST_F(DocumentStoreTest, HardDeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1440,9 +1693,11 @@
TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1460,9 +1715,11 @@
TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1480,9 +1737,11 @@
TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1508,9 +1767,11 @@
TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
doc_store->Put(test_document1_));
@@ -1545,9 +1806,11 @@
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
@@ -1567,9 +1830,11 @@
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
@@ -1591,9 +1856,11 @@
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
@@ -1618,9 +1885,11 @@
int64_t fake_real_time = 100;
fake_clock_.SetSystemTimeMilliseconds(fake_real_time);
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id,
@@ -1649,9 +1918,11 @@
.Build();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> doc_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
doc_store->Put(document1));
@@ -1669,9 +1940,11 @@
TEST_F(DocumentStoreTest, ComputeChecksumSameBetweenCalls) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1682,9 +1955,11 @@
TEST_F(DocumentStoreTest, ComputeChecksumSameAcrossInstances) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1692,17 +1967,20 @@
// Destroy the previous instance and recreate DocumentStore
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- document_store, DocumentStore::Create(&filesystem_, document_store_dir_,
- &fake_clock_, schema_store_.get()));
+ create_result, DocumentStore::Create(&filesystem_, document_store_dir_,
+ &fake_clock_, schema_store_.get()));
+ document_store = std::move(create_result.document_store);
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
ICING_EXPECT_OK(document_store->Put(test_document1_));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
@@ -1739,7 +2017,7 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("email");
@@ -1753,9 +2031,11 @@
schema_store->GetSchemaTypeId("message"));
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Insert and verify a "email "document
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1802,7 +2082,7 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("email");
@@ -1814,9 +2094,11 @@
// Successfully recover from a corrupt derived file issue. We don't fail just
// because the "message" schema type is missing
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// "email" document is fine
EXPECT_THAT(document_store->Get(email_document_id),
@@ -1857,7 +2139,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
@@ -1879,9 +2161,12 @@
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1948,7 +2233,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
// Add two documents, with and without a subject
@@ -1969,9 +2254,12 @@
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
document_store->Put(email_without_subject));
EXPECT_THAT(document_store->Get(email_without_subject_document_id),
@@ -2016,7 +2304,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
// Add a "email" and "message" document
@@ -2036,9 +2324,12 @@
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
EXPECT_THAT(document_store->Get(email_document_id),
@@ -2082,7 +2373,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId old_email_schema_type_id,
@@ -2104,9 +2395,12 @@
// Add the documents and check SchemaTypeIds match
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2175,7 +2469,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
// Add two documents, with and without a subject
@@ -2196,9 +2490,12 @@
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_without_subject_document_id,
document_store->Put(email_without_subject));
EXPECT_THAT(document_store->Get(email_without_subject_document_id),
@@ -2246,7 +2543,7 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir));
+ SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
ICING_EXPECT_OK(schema_store->SetSchema(schema));
// Add a "email" and "message" document
@@ -2266,9 +2563,12 @@
// Insert documents and check they're ok
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_document_id,
document_store->Put(email_document));
EXPECT_THAT(document_store->Get(email_document_id),
@@ -2302,9 +2602,11 @@
TEST_F(DocumentStoreTest, GetOptimizeInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Nothing should be optimizable yet
ICING_ASSERT_OK_AND_ASSIGN(DocumentStore::OptimizeInfo optimize_info,
@@ -2337,9 +2639,10 @@
ICING_ASSERT_OK(document_store->OptimizeInto(optimized_dir));
document_store.reset();
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> optimized_document_store,
- DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_,
- schema_store_.get()));
+ create_result, DocumentStore::Create(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
+ std::unique_ptr<DocumentStore> optimized_document_store =
+ std::move(create_result.document_store);
ICING_ASSERT_OK_AND_ASSIGN(optimize_info,
optimized_document_store->GetOptimizeInfo());
@@ -2350,9 +2653,11 @@
TEST_F(DocumentStoreTest, GetAllNamespaces) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Empty namespaces to start with
EXPECT_THAT(document_store->GetAllNamespaces(), IsEmpty());
@@ -2418,9 +2723,12 @@
TEST_F(DocumentStoreTest, ReportUsageWithDifferentTimestampsAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store->Put(test_document1_));
@@ -2494,9 +2802,12 @@
TEST_F(DocumentStoreTest, ReportUsageWithDifferentTypesAndGetUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
document_store->Put(test_document1_));
@@ -2537,9 +2848,12 @@
DocumentId document_id;
{
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(document_id,
document_store->Put(test_document1_));
@@ -2567,9 +2881,11 @@
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Usage scores should be the same.
ASSERT_THAT(document_store->GetUsageScores(document_id),
@@ -2581,9 +2897,12 @@
DocumentId document_id;
{
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
document_id, document_store->Put(DocumentProto(test_document1_)));
@@ -2612,9 +2931,11 @@
// Successfully recover from a data loss issue.
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
// Usage scores should still be available.
ASSERT_THAT(document_store->GetUsageScores(document_id),
@@ -2623,9 +2944,12 @@
TEST_F(DocumentStoreTest, UsageScoresShouldBeCopiedOverToUpdatedDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id,
document_store->Put(DocumentProto(test_document1_)));
@@ -2656,9 +2980,12 @@
TEST_F(DocumentStoreTest,
UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id,
document_store->Put(DocumentProto(test_document1_)));
@@ -2691,9 +3018,12 @@
TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> document_store,
+ DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
document_store->Put(DocumentProto(test_document1_)));
@@ -2720,9 +3050,10 @@
// Get optimized document store
ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<DocumentStore> optimized_document_store,
- DocumentStore::Create(&filesystem_, optimized_dir, &fake_clock_,
- schema_store_.get()));
+ create_result, DocumentStore::Create(&filesystem_, optimized_dir,
+ &fake_clock_, schema_store_.get()));
+ std::unique_ptr<DocumentStore> optimized_document_store =
+ std::move(create_result.document_store);
// Usage scores should be the same.
// The original document_id2 should have become document_id2 - 1.
@@ -2730,6 +3061,136 @@
IsOkAndHolds(expected_scores));
}
+TEST_F(DocumentStoreTest, DetectPartialDataLoss) {
+ {
+ // Can put and delete fine.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(DocumentProto(test_document1_)));
+ EXPECT_THAT(doc_store->Get(document_id),
+ IsOkAndHolds(EqualsProto(test_document1_)));
+ }
+
+ // "Corrupt" the content written in the log by adding non-checksummed data to
+ // it. This will mess up the checksum of the proto log, forcing it to rewind
+ // to the last saved point and triggering data loss.
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+ const std::string serialized_document = document.SerializeAsString();
+
+ const std::string document_log_file =
+ absl_ports::StrCat(document_store_dir_, "/document_log");
+ int64_t file_size = filesystem_.GetFileSize(document_log_file.c_str());
+ filesystem_.PWrite(document_log_file.c_str(), file_size,
+ serialized_document.data(), serialized_document.size());
+
+ // Successfully recover from a data loss issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+}
+
+TEST_F(DocumentStoreTest, DetectCompleteDataLoss) {
+ int64_t corruptible_offset;
+ const std::string document_log_file =
+ absl_ports::StrCat(document_store_dir_, "/document_log");
+ {
+ // Can put and delete fine.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ EXPECT_THAT(create_result.data_loss, Eq(DataLoss::NONE));
+
+ // There's some space at the beginning of the file (e.g. header, kmagic,
+ // etc) that is necessary to initialize the FileBackedProtoLog. We can't
+ // corrupt that region, so we need to figure out the offset at which
+ // documents will be written to - which is the file size after
+ // initialization.
+ corruptible_offset = filesystem_.GetFileSize(document_log_file.c_str());
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ doc_store->Put(DocumentProto(test_document1_)));
+ EXPECT_THAT(doc_store->Get(document_id),
+ IsOkAndHolds(EqualsProto(test_document1_)));
+ }
+
+ // "Corrupt" the persisted content written in the log. We can't recover if
+ // the persisted data was corrupted.
+ std::string corruption = "abc";
+ filesystem_.PWrite(document_log_file.c_str(), /*offset=*/corruptible_offset,
+ corruption.data(), corruption.size());
+
+ // Successfully recover from a data loss issue.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+}
+
+TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
+ // The directory testdata/v0/document_store contains only the scoring_cache
+ // and the document_store_header (holding the crc for the scoring_cache). If
+ // the current code is compatible with the format of the v0 scoring_cache,
+ // then an empty document store should be initialized, but the non-empty
+ // scoring_cache should be retained.
+ // Since the current document-asscoiated-score-data is compatible with the
+ // score_cache in testdata/v0/document_store, the document store should be
+ // initialized without having to re-generate the derived files.
+
+ // Create dst directory
+ ASSERT_THAT(filesystem_.CreateDirectory(document_store_dir_.c_str()), true);
+
+ // Get src files
+ std::string document_store_v0;
+ if (IsAndroidPlatform() || IsIosPlatform()) {
+ document_store_v0 = GetTestFilePath(
+ "icing/testdata/v0/document_store_android_ios_compatible");
+ } else {
+ document_store_v0 =
+ GetTestFilePath("icing/testdata/v0/document_store");
+ }
+ std::vector<std::string> document_store_files;
+ Filesystem filesystem;
+ filesystem.ListDirectory(document_store_v0.c_str(), &document_store_files);
+
+ VLOG(1) << "Copying files " << document_store_v0 << ' '
+ << document_store_files.size();
+ for (size_t i = 0; i != document_store_files.size(); i++) {
+ std::string src =
+ absl_ports::StrCat(document_store_v0, "/", document_store_files[i]);
+ std::string dst =
+ absl_ports::StrCat(document_store_dir_, "/", document_store_files[i]);
+ ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
+ }
+
+ NativeInitializeStats initializeStats;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get(), &initializeStats));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ // Regeneration never happens.
+ EXPECT_EQ(initializeStats.document_store_recovery_cause(),
+ NativeInitializeStats::NONE);
+}
+
} // namespace
} // namespace lib
diff --git a/icing/store/enable-bm25f.h b/icing/store/enable-bm25f.h
new file mode 100644
index 0000000..cee94d1
--- /dev/null
+++ b/icing/store/enable-bm25f.h
@@ -0,0 +1,31 @@
+// Copyright (C) 2020 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_STORE_ENABLE_BM25F_H_
+#define ICING_STORE_ENABLE_BM25F_H_
+
+namespace icing {
+namespace lib {
+
+inline bool enable_bm25f_ = false;
+
+inline bool enableBm25f() { return enable_bm25f_; }
+
+// Setter for testing purposes. It should never be called in production code.
+inline void setEnableBm25f(bool enable_bm25f) { enable_bm25f_ = enable_bm25f; }
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_STORE_ENABLE_BM25F_H_
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index 31d41fc..a15e64e 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -23,6 +23,8 @@
#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/search.proto.h"
+#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/util/status-macros.h"
@@ -58,6 +60,44 @@
actual.hit_section_ids_mask() == section_mask;
}
+// Used to match a DocHitInfo
+MATCHER_P2(EqualsDocHitInfoWithTermFrequency, document_id,
+ section_ids_to_term_frequencies_map, "") {
+ const DocHitInfo& actual = arg;
+ SectionIdMask section_mask = kSectionIdMaskNone;
+
+ bool term_frequency_as_expected = true;
+ std::vector<Hit::TermFrequency> expected_tfs;
+ std::vector<Hit::TermFrequency> actual_tfs;
+ for (auto itr = section_ids_to_term_frequencies_map.begin();
+ itr != section_ids_to_term_frequencies_map.end(); itr++) {
+ SectionId section_id = itr->first;
+ section_mask |= 1U << section_id;
+ expected_tfs.push_back(itr->second);
+ actual_tfs.push_back(actual.hit_term_frequency(section_id));
+ if (actual.hit_term_frequency(section_id) != itr->second) {
+ term_frequency_as_expected = false;
+ }
+ }
+ std::string actual_term_frequencies = absl_ports::StrCat(
+ "[", absl_ports::StrJoin(actual_tfs, ",", absl_ports::NumberFormatter()),
+ "]");
+ std::string expected_term_frequencies = absl_ports::StrCat(
+ "[",
+ absl_ports::StrJoin(expected_tfs, ",", absl_ports::NumberFormatter()),
+ "]");
+ *result_listener << IcingStringUtil::StringPrintf(
+ "(actual is {document_id=%d, section_mask=%d, term_frequencies=%s}, but "
+ "expected was "
+ "{document_id=%d, section_mask=%d, term_frequencies=%s}.)",
+ actual.document_id(), actual.hit_section_ids_mask(),
+ actual_term_frequencies.c_str(), document_id, section_mask,
+ expected_term_frequencies.c_str());
+ return actual.document_id() == document_id &&
+ actual.hit_section_ids_mask() == section_mask &&
+ term_frequency_as_expected;
+}
+
// Used to match a ScoredDocumentHit
MATCHER_P(EqualsScoredDocumentHit, expected_scored_document_hit, "") {
if (arg.document_id() != expected_scored_document_hit.document_id() ||
@@ -334,6 +374,18 @@
return ExplainMatchResult(error_matcher, arg.message(), result_listener);
}
+MATCHER_P(EqualsSearchResultIgnoreStats, expected, "") {
+ SearchResultProto actual_copy = arg;
+ actual_copy.clear_query_stats();
+ actual_copy.clear_debug_info();
+
+ SearchResultProto expected_copy = expected;
+ expected_copy.clear_query_stats();
+ expected_copy.clear_debug_info();
+ return ExplainMatchResult(testing::EqualsProto(expected_copy), actual_copy,
+ result_listener);
+}
+
// TODO(tjbarron) Remove this once icing has switched to depend on TC3 Status
#define ICING_STATUS_MACROS_CONCAT_NAME(x, y) \
ICING_STATUS_MACROS_CONCAT_IMPL(x, y)
diff --git a/icing/testing/fake-clock.h b/icing/testing/fake-clock.h
index 54b56c3..f9f3654 100644
--- a/icing/testing/fake-clock.h
+++ b/icing/testing/fake-clock.h
@@ -20,6 +20,22 @@
namespace icing {
namespace lib {
+// A fake timer class for tests. It makes sure that the elapsed time changes
+// every time it's requested.
+class FakeTimer : public Timer {
+ public:
+ int64_t GetElapsedMilliseconds() override {
+ return fake_elapsed_milliseconds_;
+ }
+
+ void SetElapsedMilliseconds(int64_t elapsed_milliseconds) {
+ fake_elapsed_milliseconds_ = elapsed_milliseconds;
+ }
+
+ private:
+ int64_t fake_elapsed_milliseconds_ = 0;
+};
+
// Wrapper around real-time clock functions. This is separated primarily so
// tests can override this clock and inject it into the class under test.
class FakeClock : public Clock {
@@ -30,8 +46,17 @@
milliseconds_ = milliseconds;
}
+ std::unique_ptr<Timer> GetNewTimer() const override {
+ return std::make_unique<FakeTimer>(fake_timer_);
+ }
+
+ void SetTimerElapsedMilliseconds(int64_t timer_elapsed_milliseconds) {
+ fake_timer_.SetElapsedMilliseconds(timer_elapsed_milliseconds);
+ }
+
private:
int64_t milliseconds_ = 0;
+ FakeTimer fake_timer_;
};
} // namespace lib
diff --git a/icing/testing/fake-clock_test.cc b/icing/testing/fake-clock_test.cc
index 3c75ae9..4b36727 100644
--- a/icing/testing/fake-clock_test.cc
+++ b/icing/testing/fake-clock_test.cc
@@ -24,7 +24,7 @@
using ::testing::Eq;
-TEST(FakeClockTest, GetSetOk) {
+TEST(FakeClockTest, GetSetSystemTimeOk) {
FakeClock fake_clock;
EXPECT_THAT(fake_clock.GetSystemTimeMilliseconds(), Eq(0));
@@ -35,6 +35,17 @@
EXPECT_THAT(fake_clock.GetSystemTimeMilliseconds(), Eq(-1));
}
+TEST(FakeClockTest, GetSetTimerElapsedTimeOk) {
+ FakeClock fake_clock;
+ EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(0));
+
+ fake_clock.SetTimerElapsedMilliseconds(10);
+ EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(10));
+
+ fake_clock.SetTimerElapsedMilliseconds(-1);
+ EXPECT_THAT(fake_clock.GetNewTimer()->GetElapsedMilliseconds(), Eq(-1));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testing/hit-test-utils.cc b/icing/testing/hit-test-utils.cc
index 0e2eb2a..7ad8a64 100644
--- a/icing/testing/hit-test-utils.cc
+++ b/icing/testing/hit-test-utils.cc
@@ -19,17 +19,18 @@
// Returns a hit that has a delta of desired_byte_length from last_hit.
Hit CreateHit(Hit last_hit, int desired_byte_length) {
- Hit hit =
- (last_hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, last_hit.document_id() + 1, last_hit.score())
- : Hit(last_hit.section_id() - 1, last_hit.document_id(),
- last_hit.score());
+ Hit hit = (last_hit.section_id() == kMinSectionId)
+ ? Hit(kMaxSectionId, last_hit.document_id() + 1,
+ last_hit.term_frequency())
+ : Hit(last_hit.section_id() - 1, last_hit.document_id(),
+ last_hit.term_frequency());
uint8_t buf[5];
while (VarInt::Encode(last_hit.value() - hit.value(), buf) <
desired_byte_length) {
hit = (hit.section_id() == kMinSectionId)
- ? Hit(kMaxSectionId, hit.document_id() + 1, hit.score())
- : Hit(hit.section_id() - 1, hit.document_id(), hit.score());
+ ? Hit(kMaxSectionId, hit.document_id() + 1, hit.term_frequency())
+ : Hit(hit.section_id() - 1, hit.document_id(),
+ hit.term_frequency());
}
return hit;
}
@@ -42,8 +43,8 @@
if (num_hits < 1) {
return hits;
}
- hits.push_back(
- Hit(/*section_id=*/1, /*document_id=*/start_docid, Hit::kMaxHitScore));
+ hits.push_back(Hit(/*section_id=*/1, /*document_id=*/start_docid,
+ Hit::kDefaultTermFrequency));
while (hits.size() < num_hits) {
hits.push_back(CreateHit(hits.back(), desired_byte_length));
}
diff --git a/icing/testing/icu-i18n-test-utils.cc b/icing/testing/icu-i18n-test-utils.cc
index 09878db..50dc26c 100644
--- a/icing/testing/icu-i18n-test-utils.cc
+++ b/icing/testing/icu-i18n-test-utils.cc
@@ -29,7 +29,7 @@
uint8_t utf8_buffer[4]; // U8_APPEND writes 0 to 4 bytes
int utf8_index = 0;
- UBool has_error = FALSE;
+ UBool has_error = false;
// utf8_index is advanced to the end of the contents if successful
U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/icing/testing/platform.h b/icing/testing/platform.h
new file mode 100644
index 0000000..ad612d5
--- /dev/null
+++ b/icing/testing/platform.h
@@ -0,0 +1,58 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_TESTING_PLATFORM_H_
+#define ICING_TESTING_PLATFORM_H_
+
+// This file is meant to hold util functions for tests that help the test
+// determine which platform-specific configuration it may be running in.
+namespace icing {
+namespace lib {
+
+// Returns true if the test was built with the CFStringTokenizer as the
+// implementation of LanguageSegmenter.
+inline bool IsCfStringTokenization() {
+#if defined(__APPLE__) && !defined(ICING_IOS_ICU4C_SEGMENTATION)
+ return true;
+#endif // defined(__APPLE__) && !defined(ICING_IOS_ICU4C_SEGMENTATION)
+ return false;
+}
+
+inline bool IsReverseJniTokenization() {
+#ifdef ICING_REVERSE_JNI_SEGMENTATION
+ return true;
+#endif // ICING_REVERSE_JNI_SEGMENTATION
+ return false;
+}
+
+// Whether the running test is an Android test.
+inline bool IsAndroidPlatform() {
+#if defined(__ANDROID__)
+ return true;
+#endif // defined(__ANDROID__)
+ return false;
+}
+
+// Whether the running test is an iOS test.
+inline bool IsIosPlatform() {
+#if defined(__APPLE__)
+ return true;
+#endif // defined(__APPLE__)
+ return false;
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_TESTING_PLATFORM_H_
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index 863f43f..78430cc 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -40,6 +40,7 @@
}
};
+// Schema generator with random number of properties
template <typename Rand, typename PropertyGenerator>
class RandomSchemaGenerator {
public:
@@ -71,6 +72,37 @@
PropertyGenerator* prop_generator_;
};
+// Schema generator with number of properties specified by the caller
+template <typename PropertyGenerator>
+class SchemaGenerator {
+ public:
+ explicit SchemaGenerator(int num_properties,
+ PropertyGenerator* prop_generator)
+ : num_properties_(num_properties), prop_generator_(prop_generator) {}
+
+ SchemaProto GenerateSchema(int num_types) {
+ SchemaProto schema;
+ while (--num_types >= 0) {
+ SetType(schema.add_types(), "Type" + std::to_string(num_types),
+ num_properties_);
+ }
+ return schema;
+ }
+
+ private:
+ void SetType(SchemaTypeConfigProto* type_config, std::string_view name,
+ int num_properties) const {
+ type_config->set_schema_type(name.data(), name.length());
+ while (--num_properties >= 0) {
+ std::string prop_name = "Prop" + std::to_string(num_properties);
+ (*type_config->add_properties()) = (*prop_generator_)(prop_name);
+ }
+ }
+
+ int num_properties_;
+ PropertyGenerator* prop_generator_;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/tokenization/icu/icu-language-segmenter-factory.cc b/icing/tokenization/icu/icu-language-segmenter-factory.cc
index 9213fbe..363bc6d 100644
--- a/icing/tokenization/icu/icu-language-segmenter-factory.cc
+++ b/icing/tokenization/icu/icu-language-segmenter-factory.cc
@@ -32,7 +32,7 @@
// A LanguageSegmenter on success
// INVALID_ARGUMENT if locale string is invalid
//
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
+// TODO(b/156383798): Figure out if we want to verify locale strings and notify
// users. Right now illegal locale strings will be ignored by ICU. ICU
// components will be created with its default locale.
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
diff --git a/icing/tokenization/icu/icu-language-segmenter_test.cc b/icing/tokenization/icu/icu-language-segmenter_test.cc
index e60f6d5..c0d6d43 100644
--- a/icing/tokenization/icu/icu-language-segmenter_test.cc
+++ b/icing/tokenization/icu/icu-language-segmenter_test.cc
@@ -395,7 +395,6 @@
IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
}
-// TODO(samzheng): test cases for more languages (e.g. top 20 in the world)
TEST_P(IcuLanguageSegmenterAllLocalesTest, WhitespaceSplitLanguages) {
ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
language_segmenter_factory::Create(GetOptions()));
@@ -408,7 +407,6 @@
IsOkAndHolds(ElementsAre("나는", " ", "매일", " ", "출근합니다", ".")));
}
-// TODO(samzheng): more mixed languages test cases
TEST_P(IcuLanguageSegmenterAllLocalesTest, MixedLanguages) {
ICING_ASSERT_OK_AND_ASSIGN(auto language_segmenter,
language_segmenter_factory::Create(GetOptions()));
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index a1b031a..2b1911e 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -17,6 +17,7 @@
#include "icing/absl_ports/str_cat.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
@@ -35,10 +36,12 @@
class LanguageSegmenterIteratorTest : public testing::Test {
protected:
void SetUp() override {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
}
};
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index df0981b..f578567 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -21,6 +21,7 @@
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
@@ -35,10 +36,12 @@
class PlainTokenizerTest : public ::testing::Test {
protected:
void SetUp() override {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
}
};
@@ -132,14 +135,29 @@
EqualsToken(Token::REGULAR, "World"))));
// Full-width punctuation marks are filtered out.
- EXPECT_THAT(
- plain_tokenizer->TokenizeAll("你好,世界!你好:世界。“你好”世界?"),
- IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
- EqualsToken(Token::REGULAR, "世界"),
- EqualsToken(Token::REGULAR, "你好"),
- EqualsToken(Token::REGULAR, "世界"),
- EqualsToken(Token::REGULAR, "你好"),
- EqualsToken(Token::REGULAR, "世界"))));
+ std::vector<std::string_view> exp_tokens;
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(
+ plain_tokenizer->TokenizeAll("你好,世界!你好:世界。“你好”世界?"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你"),
+ EqualsToken(Token::REGULAR, "好"),
+ EqualsToken(Token::REGULAR, "世界"),
+ EqualsToken(Token::REGULAR, "你"),
+ EqualsToken(Token::REGULAR, "好"),
+ EqualsToken(Token::REGULAR, "世界"),
+ EqualsToken(Token::REGULAR, "你"),
+ EqualsToken(Token::REGULAR, "好"),
+ EqualsToken(Token::REGULAR, "世界"))));
+ } else {
+ EXPECT_THAT(
+ plain_tokenizer->TokenizeAll("你好,世界!你好:世界。“你好”世界?"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "你好"),
+ EqualsToken(Token::REGULAR, "世界"),
+ EqualsToken(Token::REGULAR, "你好"),
+ EqualsToken(Token::REGULAR, "世界"),
+ EqualsToken(Token::REGULAR, "你好"),
+ EqualsToken(Token::REGULAR, "世界"))));
+ }
}
TEST_F(PlainTokenizerTest, SpecialCharacters) {
@@ -166,7 +184,10 @@
}
TEST_F(PlainTokenizerTest, CJKT) {
- language_segmenter_factory::SegmenterOptions options(ULOC_US);
+ // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
+ // characters, just add these tests as sanity checks.
+ // Chinese
+ language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE);
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
language_segmenter_factory::Create(std::move(options)));
@@ -174,11 +195,6 @@
tokenizer_factory::CreateIndexingTokenizer(
StringIndexingConfig::TokenizerType::PLAIN,
language_segmenter.get()));
-
- // In plain tokenizer, CJKT characters are handled the same way as non-CJKT
- // characters, just add these tests as sanity checks.
-
- // Chinese
EXPECT_THAT(plain_tokenizer->TokenizeAll("我每天走路去上班。"),
IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "我"),
EqualsToken(Token::REGULAR, "每天"),
@@ -186,16 +202,38 @@
EqualsToken(Token::REGULAR, "去"),
EqualsToken(Token::REGULAR, "上班"))));
// Japanese
- EXPECT_THAT(
- plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
- IsOkAndHolds(ElementsAre(
- EqualsToken(Token::REGULAR, "私"), EqualsToken(Token::REGULAR, "は"),
- EqualsToken(Token::REGULAR, "毎日"),
- EqualsToken(Token::REGULAR, "仕事"),
- EqualsToken(Token::REGULAR, "に"), EqualsToken(Token::REGULAR, "歩"),
- EqualsToken(Token::REGULAR, "い"),
- EqualsToken(Token::REGULAR, "てい"),
- EqualsToken(Token::REGULAR, "ます"))));
+ options = language_segmenter_factory::SegmenterOptions(ULOC_JAPANESE);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter,
+ language_segmenter_factory::Create(std::move(options)));
+ ICING_ASSERT_OK_AND_ASSIGN(plain_tokenizer,
+ tokenizer_factory::CreateIndexingTokenizer(
+ StringIndexingConfig::TokenizerType::PLAIN,
+ language_segmenter.get()));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
+ EqualsToken(Token::REGULAR, "は"),
+ EqualsToken(Token::REGULAR, "毎日"),
+ EqualsToken(Token::REGULAR, "仕事"),
+ EqualsToken(Token::REGULAR, "に"),
+ EqualsToken(Token::REGULAR, "歩い"),
+ EqualsToken(Token::REGULAR, "て"),
+ EqualsToken(Token::REGULAR, "い"),
+ EqualsToken(Token::REGULAR, "ます"))));
+ } else {
+ EXPECT_THAT(plain_tokenizer->TokenizeAll("私は毎日仕事に歩いています。"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "私"),
+ EqualsToken(Token::REGULAR, "は"),
+ EqualsToken(Token::REGULAR, "毎日"),
+ EqualsToken(Token::REGULAR, "仕事"),
+ EqualsToken(Token::REGULAR, "に"),
+ EqualsToken(Token::REGULAR, "歩"),
+ EqualsToken(Token::REGULAR, "い"),
+ EqualsToken(Token::REGULAR, "てい"),
+ EqualsToken(Token::REGULAR, "ます"))));
+ }
+
// Khmer
EXPECT_THAT(plain_tokenizer->TokenizeAll("ញុំដើរទៅធ្វើការរាល់ថ្ងៃ។"),
IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ញុំ"),
@@ -210,13 +248,27 @@
EqualsToken(Token::REGULAR, "출근합니다"))));
// Thai
- EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
- IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
- EqualsToken(Token::REGULAR, "เดิน"),
- EqualsToken(Token::REGULAR, "ไป"),
- EqualsToken(Token::REGULAR, "ทำงาน"),
- EqualsToken(Token::REGULAR, "ทุก"),
- EqualsToken(Token::REGULAR, "วัน"))));
+ // DIFFERENCE!! Disagreement over how to segment "ทุกวัน" (iOS groups).
+ // This difference persists even when locale is set to THAI
+ if (IsCfStringTokenization()) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<Token> tokens,
+ plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"));
+
+ EXPECT_THAT(tokens, ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
+ EqualsToken(Token::REGULAR, "เดิน"),
+ EqualsToken(Token::REGULAR, "ไป"),
+ EqualsToken(Token::REGULAR, "ทำงาน"),
+ EqualsToken(Token::REGULAR, "ทุกวัน")));
+ } else {
+ EXPECT_THAT(plain_tokenizer->TokenizeAll("ฉันเดินไปทำงานทุกวัน"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::REGULAR, "ฉัน"),
+ EqualsToken(Token::REGULAR, "เดิน"),
+ EqualsToken(Token::REGULAR, "ไป"),
+ EqualsToken(Token::REGULAR, "ทำงาน"),
+ EqualsToken(Token::REGULAR, "ทุก"),
+ EqualsToken(Token::REGULAR, "วัน"))));
+ }
}
TEST_F(PlainTokenizerTest, ResetToTokenAfterSimple) {
diff --git a/icing/tokenization/raw-query-tokenizer.cc b/icing/tokenization/raw-query-tokenizer.cc
index 50b25c5..205d3a2 100644
--- a/icing/tokenization/raw-query-tokenizer.cc
+++ b/icing/tokenization/raw-query-tokenizer.cc
@@ -247,7 +247,7 @@
//
// NOTE: Please update the state transition table above if this is updated.
//
-// TODO(samzheng): support syntax "-property1:term1", right now we don't allow
+// TODO(tjbarron): support syntax "-property1:term1", right now we don't allow
// exclusion and property restriction applied on the same term.
// TODO(b/141007791): figure out how we'd like to support special characters
// like "+", "&", "@", "#" in indexing and query tokenizers.
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index d4af9ed..e1a666b 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -18,6 +18,7 @@
#include "gtest/gtest.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
@@ -33,10 +34,12 @@
class RawQueryTokenizerTest : public ::testing::Test {
protected:
void SetUp() override {
- ICING_ASSERT_OK(
- // File generated via icu_data_file rule in //icing/BUILD.
- icu_data_file_helper::SetUpICUDataFile(
- GetTestFilePath("icing/icu.dat")));
+ if (!IsCfStringTokenization() && !IsReverseJniTokenization()) {
+ ICING_ASSERT_OK(
+ // File generated via icu_data_file rule in //icing/BUILD.
+ icu_data_file_helper::SetUpICUDataFile(
+ GetTestFilePath("icing/icu.dat")));
+ }
}
};
@@ -466,16 +469,35 @@
language_segmenter.get()));
// Exclusion only applies to the term right after it.
- EXPECT_THAT(raw_query_tokenizer->TokenizeAll("-今天天气很好"),
- IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
- EqualsToken(Token::REGULAR, "今天"),
- EqualsToken(Token::REGULAR, "天气"),
- EqualsToken(Token::REGULAR, "很好"))));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(
+ raw_query_tokenizer->TokenizeAll("-今天天气很好"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+ EqualsToken(Token::REGULAR, "今天"),
+ EqualsToken(Token::REGULAR, "天气"),
+ EqualsToken(Token::REGULAR, "很"),
+ EqualsToken(Token::REGULAR, "好"))));
+ } else {
+ EXPECT_THAT(
+ raw_query_tokenizer->TokenizeAll("-今天天气很好"),
+ IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_EXCLUSION, ""),
+ EqualsToken(Token::REGULAR, "今天"),
+ EqualsToken(Token::REGULAR, "天气"),
+ EqualsToken(Token::REGULAR, "很好"))));
+ }
- EXPECT_THAT(
- raw_query_tokenizer->TokenizeAll("property1:你好"),
- IsOkAndHolds(ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
- EqualsToken(Token::REGULAR, "你好"))));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
+ IsOkAndHolds(
+ ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+ EqualsToken(Token::REGULAR, "你"),
+ EqualsToken(Token::REGULAR, "好"))));
+ } else {
+ EXPECT_THAT(raw_query_tokenizer->TokenizeAll("property1:你好"),
+ IsOkAndHolds(
+ ElementsAre(EqualsToken(Token::QUERY_PROPERTY, "property1"),
+ EqualsToken(Token::REGULAR, "你好"))));
+ }
EXPECT_THAT(
raw_query_tokenizer->TokenizeAll("标题:你好"),
@@ -567,21 +589,42 @@
tokenizer_factory::CreateQueryTokenizer(tokenizer_factory::RAW_QUERY,
language_segmenter.get()));
- EXPECT_THAT(
- raw_query_tokenizer->TokenizeAll(
- "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
- IsOkAndHolds(ElementsAre(
- EqualsToken(Token::REGULAR, "こんにちは"),
- EqualsToken(Token::REGULAR, "good"),
- EqualsToken(Token::REGULAR, "afternoon"),
- EqualsToken(Token::QUERY_PROPERTY, "title"),
- EqualsToken(Token::REGULAR, "今天"), EqualsToken(Token::QUERY_OR, ""),
- EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
- EqualsToken(Token::REGULAR, "ใน"), EqualsToken(Token::REGULAR, "วัน"),
- EqualsToken(Token::REGULAR, "นี้"),
- EqualsToken(Token::QUERY_EXCLUSION, ""),
- EqualsToken(Token::REGULAR, "B12"),
- EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+ if (IsCfStringTokenization()) {
+ EXPECT_THAT(raw_query_tokenizer->TokenizeAll(
+ "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"),
+ IsOkAndHolds(ElementsAre(
+ EqualsToken(Token::REGULAR, "こんにちは"),
+ EqualsToken(Token::REGULAR, "good"),
+ EqualsToken(Token::REGULAR, "afternoon"),
+ EqualsToken(Token::QUERY_PROPERTY, "title"),
+ EqualsToken(Token::REGULAR, "今天"),
+ EqualsToken(Token::QUERY_OR, ""),
+ EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+ EqualsToken(Token::REGULAR, "ใน"),
+ EqualsToken(Token::REGULAR, "วันนี้"),
+ EqualsToken(Token::QUERY_EXCLUSION, ""),
+ EqualsToken(Token::REGULAR, "B12"),
+ EqualsToken(Token::QUERY_RIGHT_PARENTHESES, ""))));
+ } else {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::vector<Token> tokens,
+ raw_query_tokenizer->TokenizeAll(
+ "こんにちはgood afternoon, title:今天 OR (ในวันนี้ -B12)"));
+ EXPECT_THAT(tokens,
+ ElementsAre(EqualsToken(Token::REGULAR, "こんにちは"),
+ EqualsToken(Token::REGULAR, "good"),
+ EqualsToken(Token::REGULAR, "afternoon"),
+ EqualsToken(Token::QUERY_PROPERTY, "title"),
+ EqualsToken(Token::REGULAR, "今天"),
+ EqualsToken(Token::QUERY_OR, ""),
+ EqualsToken(Token::QUERY_LEFT_PARENTHESES, ""),
+ EqualsToken(Token::REGULAR, "ใน"),
+ EqualsToken(Token::REGULAR, "วัน"),
+ EqualsToken(Token::REGULAR, "นี้"),
+ EqualsToken(Token::QUERY_EXCLUSION, ""),
+ EqualsToken(Token::REGULAR, "B12"),
+ EqualsToken(Token::QUERY_RIGHT_PARENTHESES, "")));
+ }
}
} // namespace
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
index db973f3..0da4c2d 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-factory.cc
@@ -34,7 +34,7 @@
// A LanguageSegmenter on success
// INVALID_ARGUMENT if locale string is invalid
//
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
+// TODO(b/156383798): Figure out if we want to verify locale strings and notify
// users. Right now illegal locale strings will be ignored by ICU. ICU
// components will be created with its default locale.
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
index 1cd6fa3..2c268ff 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
@@ -423,7 +423,6 @@
IsOkAndHolds(ElementsAre("āăąḃḅḇčćç")));
}
-// TODO(samzheng): test cases for more languages (e.g. top 20 in the world)
TEST_P(ReverseJniLanguageSegmenterTest, WhitespaceSplitLanguages) {
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
@@ -438,7 +437,6 @@
IsOkAndHolds(ElementsAre("나는", " ", "매일", " ", "출근합니다", ".")));
}
-// TODO(samzheng): more mixed languages test cases
TEST_P(ReverseJniLanguageSegmenterTest, MixedLanguages) {
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
diff --git a/icing/tokenization/simple/space-language-segmenter-factory.cc b/icing/tokenization/simple/space-language-segmenter-factory.cc
index 1cca603..856ba0a 100644
--- a/icing/tokenization/simple/space-language-segmenter-factory.cc
+++ b/icing/tokenization/simple/space-language-segmenter-factory.cc
@@ -27,7 +27,7 @@
// A LanguageSegmenter on success
// INVALID_ARGUMENT if locale string is invalid
//
-// TODO(samzheng): Figure out if we want to verify locale strings and notify
+// TODO(b/156383798): Figure out if we want to verify locale strings and notify
// users. Right now illegal locale strings will be ignored by ICU. ICU
// components will be created with its default locale.
libtextclassifier3::StatusOr<std::unique_ptr<LanguageSegmenter>> Create(
diff --git a/icing/tokenization/token.h b/icing/tokenization/token.h
index 0bb3aaf..dda9efc 100644
--- a/icing/tokenization/token.h
+++ b/icing/tokenization/token.h
@@ -20,10 +20,6 @@
namespace icing {
namespace lib {
-// TODO(samzheng) Add group id support if needed. Right now in raw query we
-// don't need group ids since all our query operators (OR, Exclusion, Property
-// Restriction) only apply to the token right after them (vs. applying to
-// multiple tokens after them). The "groups" of tokens can be easily recognized.
struct Token {
enum Type {
// Common types
diff --git a/icing/transform/icu/icu-normalizer.cc b/icing/transform/icu/icu-normalizer.cc
index 0bb8326..eb0eead 100644
--- a/icing/transform/icu/icu-normalizer.cc
+++ b/icing/transform/icu/icu-normalizer.cc
@@ -41,7 +41,8 @@
// form decomposition) and NFKC (compatible normalization form composition)
// are applied as well as some other rules we need. More information at
// http://www.unicode.org/reports/tr15/
-// TODO(samzheng) Figure out if we need to support small hiragana to katakana
+//
+// Please note that the following rules don't support small hiragana to katakana
// transformation.
constexpr UChar kTransformRulesUtf16[] =
u"Lower; " // Lowercase
@@ -74,7 +75,7 @@
}
// Maximum number of pieces a Unicode character can be decomposed into.
- // TODO(samzheng) figure out if this number is proper.
+ // TODO(tjbarron) figure out if this number is proper.
constexpr int kDecompositionBufferCapacity = 5;
// A buffer used to store Unicode decomposition mappings of only one
diff --git a/icing/transform/icu/icu-normalizer_test.cc b/icing/transform/icu/icu-normalizer_test.cc
index 83fa972..f5d20ff 100644
--- a/icing/transform/icu/icu-normalizer_test.cc
+++ b/icing/transform/icu/icu-normalizer_test.cc
@@ -125,7 +125,6 @@
// Our current ICU rules can't handle Hebrew properly, e.g. the accents in
// "אָלֶף־בֵּית עִבְרִי"
// will be removed.
- // TODO (samzheng): figure out how we should handle Hebrew.
}
TEST_F(IcuNormalizerTest, FullWidthCharsToASCII) {
diff --git a/icing/util/clock.cc b/icing/util/clock.cc
index 7843bc4..270b5f0 100644
--- a/icing/util/clock.cc
+++ b/icing/util/clock.cc
@@ -16,16 +16,11 @@
#include <chrono> // NOLINT. Abseil library is not available in AOSP so we have
// to use chrono to get current time in milliseconds.
+#include <memory>
namespace icing {
namespace lib {
-int64_t Clock::GetSystemTimeMilliseconds() const {
- return std::chrono::duration_cast<std::chrono::milliseconds>(
- std::chrono::system_clock::now().time_since_epoch())
- .count();
-}
-
int64_t GetSteadyTimeNanoseconds() {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::steady_clock::now().time_since_epoch())
@@ -38,5 +33,15 @@
.count();
}
+int64_t Clock::GetSystemTimeMilliseconds() const {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::system_clock::now().time_since_epoch())
+ .count();
+}
+
+std::unique_ptr<Timer> Clock::GetNewTimer() const {
+ return std::make_unique<Timer>();
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/util/clock.h b/icing/util/clock.h
index a37fe58..2bb7818 100644
--- a/icing/util/clock.h
+++ b/icing/util/clock.h
@@ -16,21 +16,11 @@
#define ICING_UTIL_CLOCK_H_
#include <cstdint>
+#include <memory>
namespace icing {
namespace lib {
-// Wrapper around real-time clock functions. This is separated primarily so
-// tests can override this clock and inject it into the class under test.
-class Clock {
- public:
- virtual ~Clock() = default;
-
- // Returns the current time in milliseconds, it's guaranteed that the return
- // value is non-negative.
- virtual int64_t GetSystemTimeMilliseconds() const;
-};
-
// Returns the current steady time in nanoseconds. The steady clock is different
// from the system clock. It's monotonic and never returns a lower value than a
// previous call, while a system clock can be occasionally adjusted.
@@ -42,6 +32,43 @@
// adjusted.
int64_t GetSteadyTimeMilliseconds();
+// Used to calculate the elapsed time.
+class Timer {
+ public:
+ // Creates and starts the timer.
+ Timer() : start_timestamp_nanoseconds_(GetSteadyTimeNanoseconds()) {}
+
+ virtual ~Timer() = default;
+
+ // Returns the elapsed time from when timer started.
+ virtual int64_t GetElapsedMilliseconds() {
+ return GetElapsedNanoseconds() / 1000000;
+ }
+
+ // Returns the elapsed time from when timer started.
+ virtual int64_t GetElapsedNanoseconds() {
+ return GetSteadyTimeNanoseconds() - start_timestamp_nanoseconds_;
+ }
+
+ private:
+ int64_t start_timestamp_nanoseconds_;
+};
+
+// Wrapper around real-time clock functions. This is separated primarily so
+// tests can override this clock and inject it into the class under test.
+class Clock {
+ public:
+ virtual ~Clock() = default;
+
+ // Returns the current time in milliseconds, it's guaranteed that the return
+ // value is non-negative.
+ virtual int64_t GetSystemTimeMilliseconds() const;
+
+ // Returns a timer used to calculate the elapsed time. The timer starts when
+ // the method returns.
+ virtual std::unique_ptr<Timer> GetNewTimer() const;
+};
+
} // namespace lib
} // namespace icing
diff --git a/icing/util/crc32.h b/icing/util/crc32.h
index e8c7c8f..5befe44 100644
--- a/icing/util/crc32.h
+++ b/icing/util/crc32.h
@@ -28,10 +28,6 @@
// implementation.
//
// See https://www.zlib.net/manual.html#Checksum for more details.
-//
-// TODO (samzheng): investigate/benchmark swapping zlib crc32 with
-// util/hash/crc32c.h. Regarding util/hash/crc32c.h, CRC32C::Extend crashes as
-// described in b/145837799.
class Crc32 {
public:
// Default to the checksum of an empty string, that is "0".
diff --git a/icing/util/data-loss.h b/icing/util/data-loss.h
new file mode 100644
index 0000000..cb19ce2
--- /dev/null
+++ b/icing/util/data-loss.h
@@ -0,0 +1,36 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_UTIL_DATA_LOSS_H_
+#define ICING_UTIL_DATA_LOSS_H_
+
+namespace icing {
+namespace lib {
+
+enum DataLoss {
+ // No data loss happened. Everything initialized correctly.
+ NONE,
+
+ // Anything changes made after a persist to disk call were lost. This includes
+ // adding new data, removing old data, and modifying existing data.
+ PARTIAL,
+
+ // All data is lost. IcingSearchEngine has completely reset.
+ COMPLETE
+};
+
+}
+} // namespace icing
+
+#endif // ICING_UTIL_DATA_LOSS_H_
diff --git a/icing/util/document-validator.cc b/icing/util/document-validator.cc
index 36b84f8..fb1fc4b 100644
--- a/icing/util/document-validator.cc
+++ b/icing/util/document-validator.cc
@@ -96,12 +96,12 @@
if (property_iter == parsed_property_configs.property_config_map.end()) {
return absl_ports::NotFoundError(absl_ports::StrCat(
"Property config '", property.name(), "' not found for key: (",
- document.namespace_(), ", ", document.uri(), ")."));
+ document.namespace_(), ", ", document.uri(),
+ ") of type: ", document.schema(), "."));
}
const PropertyConfigProto& property_config = *property_iter->second;
// Get the property value size according to data type.
- // TODO (samzheng): make sure values of other data types are empty.
int value_size = 0;
if (property_config.data_type() == PropertyConfigProto::DataType::STRING) {
value_size = property.string_values_size();
diff --git a/icing/util/document-validator.h b/icing/util/document-validator.h
index 34a3217..036d1fa 100644
--- a/icing/util/document-validator.h
+++ b/icing/util/document-validator.h
@@ -56,8 +56,6 @@
// In addition, all nested DocumentProto will also be validated towards the
// requirements above.
//
- // DocumentProto.custom_properties are not validated.
- //
// Returns:
// OK on success
// FAILED_PRECONDITION if no schema is set yet
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index 16bdf78..ad5a93e 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -23,6 +23,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
@@ -57,7 +58,8 @@
CreateConversationTypeConfig(type_config);
ICING_ASSERT_OK_AND_ASSIGN(
- schema_store_, SchemaStore::Create(&filesystem_, GetTestTempDir()));
+ schema_store_,
+ SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
document_validator_ =
@@ -121,6 +123,7 @@
std::unique_ptr<DocumentValidator> document_validator_;
std::unique_ptr<SchemaStore> schema_store_;
Filesystem filesystem_;
+ FakeClock fake_clock_;
};
TEST_F(DocumentValidatorTest, ValidateSimpleSchemasOk) {
@@ -192,18 +195,6 @@
HasSubstr("'WrongPropertyName' not found")));
}
-TEST_F(DocumentValidatorTest, ValidateAllCustomPropertyOk) {
- DocumentProto email =
- SimpleEmailBuilder()
- // A nonexistent property, would've triggered a NotFound message
- .AddCustomStringProperty("WrongPropertyName", kDefaultString)
- // 'subject' property should've been a string according to the schema
- .AddCustomBooleanProperty(kPropertySubject, false, true)
- .Build();
-
- EXPECT_THAT(document_validator_->Validate(email), IsOk());
-}
-
TEST_F(DocumentValidatorTest, ValidateExactlyOneRequiredValueOk) {
// Required property should have exactly 1 value
DocumentProto email =
@@ -334,7 +325,7 @@
// Set a schema with only the 'Email' type
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, custom_schema_dir));
+ SchemaStore::Create(&filesystem_, custom_schema_dir, &fake_clock_));
ASSERT_THAT(schema_store->SetSchema(email_schema), IsOk());
DocumentValidator document_validator(schema_store.get());
diff --git a/icing/util/i18n-utils.cc b/icing/util/i18n-utils.cc
index d6754d5..cd0a227 100644
--- a/icing/util/i18n-utils.cc
+++ b/icing/util/i18n-utils.cc
@@ -156,7 +156,7 @@
uint8_t utf8_buffer[4]; // U8_APPEND writes 0 to 4 bytes
int utf8_index = 0;
- UBool has_error = FALSE;
+ UBool has_error = false;
// utf8_index is advanced to the end of the contents if successful
U8_APPEND(utf8_buffer, utf8_index, sizeof(utf8_buffer), uchar, has_error);
diff --git a/icing/util/timer.h b/icing/util/timer.h
deleted file mode 100644
index da872fe..0000000
--- a/icing/util/timer.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_UTIL_TIMER_H_
-#define ICING_UTIL_TIMER_H_
-
-#include <cstdint>
-
-#include "icing/util/clock.h"
-
-namespace icing {
-namespace lib {
-
-// A util class to calculate the elapsed time.
-class Timer {
- public:
- // Timer starts.
- Timer() : start_timestamp_milliseconds_(GetSteadyTimeMilliseconds()) {}
-
- // Returns the elapsed time from when timer started.
- int64_t GetElapsedMilliseconds() {
- return GetSteadyTimeMilliseconds() - start_timestamp_milliseconds_;
- }
-
- private:
- int64_t start_timestamp_milliseconds_;
-};
-
-} // namespace lib
-} // namespace icing
-
-#endif // ICING_UTIL_TIMER_H_
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 76fa33d..f4d8312 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -17,6 +17,7 @@
import android.util.Log;
import androidx.annotation.NonNull;
import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
import com.google.android.icing.proto.DeleteResultProto;
import com.google.android.icing.proto.DocumentProto;
@@ -30,6 +31,7 @@
import com.google.android.icing.proto.OptimizeResultProto;
import com.google.android.icing.proto.PersistToDiskResultProto;
import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
import com.google.android.icing.proto.ResetResultProto;
import com.google.android.icing.proto.ResultSpecProto;
import com.google.android.icing.proto.SchemaProto;
@@ -38,17 +40,27 @@
import com.google.android.icing.proto.SearchSpecProto;
import com.google.android.icing.proto.SetSchemaResultProto;
import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.UsageReport;
import com.google.protobuf.ExtensionRegistryLite;
import com.google.protobuf.InvalidProtocolBufferException;
+import java.io.Closeable;
-/** Java wrapper to access native APIs in external/icing/icing/icing-search-engine.h */
-public final class IcingSearchEngine {
+/**
+ * Java wrapper to access native APIs in external/icing/icing/icing-search-engine.h
+ *
+ * <p>If this instance has been closed, the instance is no longer usable.
+ *
+ * <p>NOTE: This class is NOT thread-safe.
+ */
+public final class IcingSearchEngine implements Closeable {
private static final String TAG = "IcingSearchEngine";
private static final ExtensionRegistryLite EXTENSION_REGISTRY_LITE =
ExtensionRegistryLite.getEmptyRegistry();
- private final long nativePointer;
+ private long nativePointer;
+
+ private boolean closed = false;
static {
// NOTE: This can fail with an UnsatisfiedLinkError
@@ -64,9 +76,34 @@
}
}
+ private void throwIfClosed() {
+ if (closed) {
+ throw new IllegalStateException("Trying to use a closed IcingSearchEngine instance.");
+ }
+ }
+
+ @Override
+ public void close() {
+ throwIfClosed();
+
+ if (nativePointer != 0) {
+ nativeDestroy(this);
+ }
+ nativePointer = 0;
+ closed = true;
+ }
+
+ @Override
+ protected void finalize() throws Throwable {
+ super.finalize();
+ close();
+ }
+
@NonNull
public InitializeResultProto initialize() {
- byte[] initializeResultBytes = nativeInitialize(nativePointer);
+ throwIfClosed();
+
+ byte[] initializeResultBytes = nativeInitialize(this);
if (initializeResultBytes == null) {
Log.e(TAG, "Received null InitializeResult from native.");
return InitializeResultProto.newBuilder()
@@ -92,8 +129,10 @@
@NonNull
public SetSchemaResultProto setSchema(
@NonNull SchemaProto schema, boolean ignoreErrorsAndDeleteDocuments) {
+ throwIfClosed();
+
byte[] setSchemaResultBytes =
- nativeSetSchema(nativePointer, schema.toByteArray(), ignoreErrorsAndDeleteDocuments);
+ nativeSetSchema(this, schema.toByteArray(), ignoreErrorsAndDeleteDocuments);
if (setSchemaResultBytes == null) {
Log.e(TAG, "Received null SetSchemaResultProto from native.");
return SetSchemaResultProto.newBuilder()
@@ -113,7 +152,9 @@
@NonNull
public GetSchemaResultProto getSchema() {
- byte[] getSchemaResultBytes = nativeGetSchema(nativePointer);
+ throwIfClosed();
+
+ byte[] getSchemaResultBytes = nativeGetSchema(this);
if (getSchemaResultBytes == null) {
Log.e(TAG, "Received null GetSchemaResultProto from native.");
return GetSchemaResultProto.newBuilder()
@@ -133,7 +174,9 @@
@NonNull
public GetSchemaTypeResultProto getSchemaType(@NonNull String schemaType) {
- byte[] getSchemaTypeResultBytes = nativeGetSchemaType(nativePointer, schemaType);
+ throwIfClosed();
+
+ byte[] getSchemaTypeResultBytes = nativeGetSchemaType(this, schemaType);
if (getSchemaTypeResultBytes == null) {
Log.e(TAG, "Received null GetSchemaTypeResultProto from native.");
return GetSchemaTypeResultProto.newBuilder()
@@ -153,7 +196,9 @@
@NonNull
public PutResultProto put(@NonNull DocumentProto document) {
- byte[] putResultBytes = nativePut(nativePointer, document.toByteArray());
+ throwIfClosed();
+
+ byte[] putResultBytes = nativePut(this, document.toByteArray());
if (putResultBytes == null) {
Log.e(TAG, "Received null PutResultProto from native.");
return PutResultProto.newBuilder()
@@ -173,7 +218,9 @@
@NonNull
public GetResultProto get(@NonNull String namespace, @NonNull String uri) {
- byte[] getResultBytes = nativeGet(nativePointer, namespace, uri);
+ throwIfClosed();
+
+ byte[] getResultBytes = nativeGet(this, namespace, uri);
if (getResultBytes == null) {
Log.e(TAG, "Received null GetResultProto from native.");
return GetResultProto.newBuilder()
@@ -192,8 +239,32 @@
}
@NonNull
+ public ReportUsageResultProto reportUsage(@NonNull UsageReport usageReport) {
+ throwIfClosed();
+
+ byte[] reportUsageResultBytes = nativeReportUsage(this, usageReport.toByteArray());
+ if (reportUsageResultBytes == null) {
+ Log.e(TAG, "Received null ReportUsageResultProto from native.");
+ return ReportUsageResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+
+ try {
+ return ReportUsageResultProto.parseFrom(reportUsageResultBytes, EXTENSION_REGISTRY_LITE);
+ } catch (InvalidProtocolBufferException e) {
+ Log.e(TAG, "Error parsing ReportUsageResultProto.", e);
+ return ReportUsageResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+ }
+
+ @NonNull
public GetAllNamespacesResultProto getAllNamespaces() {
- byte[] getAllNamespacesResultBytes = nativeGetAllNamespaces(nativePointer);
+ throwIfClosed();
+
+ byte[] getAllNamespacesResultBytes = nativeGetAllNamespaces(this);
if (getAllNamespacesResultBytes == null) {
Log.e(TAG, "Received null GetAllNamespacesResultProto from native.");
return GetAllNamespacesResultProto.newBuilder()
@@ -217,12 +288,11 @@
@NonNull SearchSpecProto searchSpec,
@NonNull ScoringSpecProto scoringSpec,
@NonNull ResultSpecProto resultSpec) {
+ throwIfClosed();
+
byte[] searchResultBytes =
nativeSearch(
- nativePointer,
- searchSpec.toByteArray(),
- scoringSpec.toByteArray(),
- resultSpec.toByteArray());
+ this, searchSpec.toByteArray(), scoringSpec.toByteArray(), resultSpec.toByteArray());
if (searchResultBytes == null) {
Log.e(TAG, "Received null SearchResultProto from native.");
return SearchResultProto.newBuilder()
@@ -242,7 +312,9 @@
@NonNull
public SearchResultProto getNextPage(long nextPageToken) {
- byte[] searchResultBytes = nativeGetNextPage(nativePointer, nextPageToken);
+ throwIfClosed();
+
+ byte[] searchResultBytes = nativeGetNextPage(this, nextPageToken);
if (searchResultBytes == null) {
Log.e(TAG, "Received null SearchResultProto from native.");
return SearchResultProto.newBuilder()
@@ -262,12 +334,16 @@
@NonNull
public void invalidateNextPageToken(long nextPageToken) {
- nativeInvalidateNextPageToken(nativePointer, nextPageToken);
+ throwIfClosed();
+
+ nativeInvalidateNextPageToken(this, nextPageToken);
}
@NonNull
public DeleteResultProto delete(@NonNull String namespace, @NonNull String uri) {
- byte[] deleteResultBytes = nativeDelete(nativePointer, namespace, uri);
+ throwIfClosed();
+
+ byte[] deleteResultBytes = nativeDelete(this, namespace, uri);
if (deleteResultBytes == null) {
Log.e(TAG, "Received null DeleteResultProto from native.");
return DeleteResultProto.newBuilder()
@@ -287,7 +363,9 @@
@NonNull
public DeleteByNamespaceResultProto deleteByNamespace(@NonNull String namespace) {
- byte[] deleteByNamespaceResultBytes = nativeDeleteByNamespace(nativePointer, namespace);
+ throwIfClosed();
+
+ byte[] deleteByNamespaceResultBytes = nativeDeleteByNamespace(this, namespace);
if (deleteByNamespaceResultBytes == null) {
Log.e(TAG, "Received null DeleteByNamespaceResultProto from native.");
return DeleteByNamespaceResultProto.newBuilder()
@@ -308,7 +386,9 @@
@NonNull
public DeleteBySchemaTypeResultProto deleteBySchemaType(@NonNull String schemaType) {
- byte[] deleteBySchemaTypeResultBytes = nativeDeleteBySchemaType(nativePointer, schemaType);
+ throwIfClosed();
+
+ byte[] deleteBySchemaTypeResultBytes = nativeDeleteBySchemaType(this, schemaType);
if (deleteBySchemaTypeResultBytes == null) {
Log.e(TAG, "Received null DeleteBySchemaTypeResultProto from native.");
return DeleteBySchemaTypeResultProto.newBuilder()
@@ -328,21 +408,22 @@
}
@NonNull
- public DeleteResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) {
- byte[] deleteResultBytes = nativeDeleteByQuery(nativePointer, searchSpec.toByteArray());
+ public DeleteByQueryResultProto deleteByQuery(@NonNull SearchSpecProto searchSpec) {
+ throwIfClosed();
+
+ byte[] deleteResultBytes = nativeDeleteByQuery(this, searchSpec.toByteArray());
if (deleteResultBytes == null) {
Log.e(TAG, "Received null DeleteResultProto from native.");
- return DeleteResultProto.newBuilder()
+ return DeleteByQueryResultProto.newBuilder()
.setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
.build();
}
try {
- return DeleteResultProto.parseFrom(
- deleteResultBytes, EXTENSION_REGISTRY_LITE);
+ return DeleteByQueryResultProto.parseFrom(deleteResultBytes, EXTENSION_REGISTRY_LITE);
} catch (InvalidProtocolBufferException e) {
Log.e(TAG, "Error parsing DeleteResultProto.", e);
- return DeleteResultProto.newBuilder()
+ return DeleteByQueryResultProto.newBuilder()
.setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
.build();
}
@@ -350,7 +431,9 @@
@NonNull
public PersistToDiskResultProto persistToDisk() {
- byte[] persistToDiskResultBytes = nativePersistToDisk(nativePointer);
+ throwIfClosed();
+
+ byte[] persistToDiskResultBytes = nativePersistToDisk(this);
if (persistToDiskResultBytes == null) {
Log.e(TAG, "Received null PersistToDiskResultProto from native.");
return PersistToDiskResultProto.newBuilder()
@@ -370,7 +453,9 @@
@NonNull
public OptimizeResultProto optimize() {
- byte[] optimizeResultBytes = nativeOptimize(nativePointer);
+ throwIfClosed();
+
+ byte[] optimizeResultBytes = nativeOptimize(this);
if (optimizeResultBytes == null) {
Log.e(TAG, "Received null OptimizeResultProto from native.");
return OptimizeResultProto.newBuilder()
@@ -390,7 +475,9 @@
@NonNull
public GetOptimizeInfoResultProto getOptimizeInfo() {
- byte[] getOptimizeInfoResultBytes = nativeGetOptimizeInfo(nativePointer);
+ throwIfClosed();
+
+ byte[] getOptimizeInfoResultBytes = nativeGetOptimizeInfo(this);
if (getOptimizeInfoResultBytes == null) {
Log.e(TAG, "Received null GetOptimizeInfoResultProto from native.");
return GetOptimizeInfoResultProto.newBuilder()
@@ -411,7 +498,9 @@
@NonNull
public ResetResultProto reset() {
- byte[] resetResultBytes = nativeReset(nativePointer);
+ throwIfClosed();
+
+ byte[] resetResultBytes = nativeReset(this);
if (resetResultBytes == null) {
Log.e(TAG, "Received null ResetResultProto from native.");
return ResetResultProto.newBuilder()
@@ -431,41 +520,54 @@
private static native long nativeCreate(byte[] icingSearchEngineOptionsBytes);
- private static native byte[] nativeInitialize(long nativePointer);
+ private static native void nativeDestroy(IcingSearchEngine instance);
+
+ private static native byte[] nativeInitialize(IcingSearchEngine instance);
private static native byte[] nativeSetSchema(
- long nativePointer, byte[] schemaBytes, boolean ignoreErrorsAndDeleteDocuments);
+ IcingSearchEngine instance, byte[] schemaBytes, boolean ignoreErrorsAndDeleteDocuments);
- private static native byte[] nativeGetSchema(long nativePointer);
+ private static native byte[] nativeGetSchema(IcingSearchEngine instance);
- private static native byte[] nativeGetSchemaType(long nativePointer, String schemaType);
+ private static native byte[] nativeGetSchemaType(IcingSearchEngine instance, String schemaType);
- private static native byte[] nativePut(long nativePointer, byte[] documentBytes);
+ private static native byte[] nativePut(IcingSearchEngine instance, byte[] documentBytes);
- private static native byte[] nativeGet(long nativePointer, String namespace, String uri);
+ private static native byte[] nativeGet(IcingSearchEngine instance, String namespace, String uri);
- private static native byte[] nativeGetAllNamespaces(long nativePointer);
+ private static native byte[] nativeReportUsage(
+ IcingSearchEngine instance, byte[] usageReportBytes);
+
+ private static native byte[] nativeGetAllNamespaces(IcingSearchEngine instance);
private static native byte[] nativeSearch(
- long nativePointer, byte[] searchSpecBytes, byte[] scoringSpecBytes, byte[] resultSpecBytes);
+ IcingSearchEngine instance,
+ byte[] searchSpecBytes,
+ byte[] scoringSpecBytes,
+ byte[] resultSpecBytes);
- private static native byte[] nativeGetNextPage(long nativePointer, long nextPageToken);
+ private static native byte[] nativeGetNextPage(IcingSearchEngine instance, long nextPageToken);
- private static native void nativeInvalidateNextPageToken(long nativePointer, long nextPageToken);
+ private static native void nativeInvalidateNextPageToken(
+ IcingSearchEngine instance, long nextPageToken);
- private static native byte[] nativeDelete(long nativePointer, String namespace, String uri);
+ private static native byte[] nativeDelete(
+ IcingSearchEngine instance, String namespace, String uri);
- private static native byte[] nativeDeleteByNamespace(long nativePointer, String namespace);
+ private static native byte[] nativeDeleteByNamespace(
+ IcingSearchEngine instance, String namespace);
- private static native byte[] nativeDeleteBySchemaType(long nativePointer, String schemaType);
+ private static native byte[] nativeDeleteBySchemaType(
+ IcingSearchEngine instance, String schemaType);
- private static native byte[] nativeDeleteByQuery(long nativePointer, byte[] searchSpecBytes);
+ private static native byte[] nativeDeleteByQuery(
+ IcingSearchEngine instance, byte[] searchSpecBytes);
- private static native byte[] nativePersistToDisk(long nativePointer);
+ private static native byte[] nativePersistToDisk(IcingSearchEngine instance);
- private static native byte[] nativeOptimize(long nativePointer);
+ private static native byte[] nativeOptimize(IcingSearchEngine instance);
- private static native byte[] nativeGetOptimizeInfo(long nativePointer);
+ private static native byte[] nativeGetOptimizeInfo(IcingSearchEngine instance);
- private static native byte[] nativeReset(long nativePointer);
+ private static native byte[] nativeReset(IcingSearchEngine instance);
}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 4c05a7a..6f07e1a 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -18,6 +18,7 @@
import static com.google.common.truth.Truth.assertWithMessage;
import com.google.android.icing.proto.DeleteByNamespaceResultProto;
+import com.google.android.icing.proto.DeleteByQueryResultProto;
import com.google.android.icing.proto.DeleteBySchemaTypeResultProto;
import com.google.android.icing.proto.DeleteResultProto;
import com.google.android.icing.proto.DocumentProto;
@@ -33,6 +34,7 @@
import com.google.android.icing.proto.PropertyConfigProto;
import com.google.android.icing.proto.PropertyProto;
import com.google.android.icing.proto.PutResultProto;
+import com.google.android.icing.proto.ReportUsageResultProto;
import com.google.android.icing.proto.ResetResultProto;
import com.google.android.icing.proto.ResultSpecProto;
import com.google.android.icing.proto.SchemaProto;
@@ -45,10 +47,12 @@
import com.google.android.icing.proto.StringIndexingConfig;
import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
import com.google.android.icing.proto.TermMatchType;
+import com.google.android.icing.proto.UsageReport;
import com.google.android.icing.IcingSearchEngine;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
+import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
@@ -68,6 +72,8 @@
private File tempDir;
+ private IcingSearchEngine icingSearchEngine;
+
private static SchemaTypeConfigProto createEmailTypeConfig() {
return SchemaTypeConfigProto.newBuilder()
.setSchemaType(EMAIL_TYPE)
@@ -104,77 +110,72 @@
@Before
public void setUp() throws Exception {
tempDir = temporaryFolder.newFolder();
+ IcingSearchEngineOptions options =
+ IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
+ icingSearchEngine = new IcingSearchEngine(options);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ icingSearchEngine.close();
}
@Test
public void testInitialize() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
-
- InitializeResultProto initializeResultProto = icing.initialize();
+ InitializeResultProto initializeResultProto = icingSearchEngine.initialize();
assertStatusOk(initializeResultProto.getStatus());
}
@Test
public void testSetAndGetSchema() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
SetSchemaResultProto setSchemaResultProto =
- icing.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
+ icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false);
assertStatusOk(setSchemaResultProto.getStatus());
- GetSchemaResultProto getSchemaResultProto = icing.getSchema();
+ GetSchemaResultProto getSchemaResultProto = icingSearchEngine.getSchema();
assertStatusOk(getSchemaResultProto.getStatus());
assertThat(getSchemaResultProto.getSchema()).isEqualTo(schema);
GetSchemaTypeResultProto getSchemaTypeResultProto =
- icing.getSchemaType(emailTypeConfig.getSchemaType());
+ icingSearchEngine.getSchemaType(emailTypeConfig.getSchemaType());
assertStatusOk(getSchemaTypeResultProto.getStatus());
assertThat(getSchemaTypeResultProto.getSchemaTypeConfig()).isEqualTo(emailTypeConfig);
}
@Test
public void testPutAndGetDocuments() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- PutResultProto putResultProto = icing.put(emailDocument);
+ PutResultProto putResultProto = icingSearchEngine.put(emailDocument);
assertStatusOk(putResultProto.getStatus());
- GetResultProto getResultProto = icing.get("namespace", "uri");
+ GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
assertStatusOk(getResultProto.getStatus());
assertThat(getResultProto.getDocument()).isEqualTo(emailDocument);
}
@Test
public void testSearch() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
@@ -184,7 +185,7 @@
createEmailDocument("namespace", "uri").toBuilder()
.addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
.build();
- assertStatusOk(icing.put(emailDocument).getStatus());
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
SearchSpecProto searchSpec =
SearchSpecProto.newBuilder()
@@ -193,7 +194,7 @@
.build();
SearchResultProto searchResultProto =
- icing.search(
+ icingSearchEngine.search(
searchSpec,
ScoringSpecProto.getDefaultInstance(),
ResultSpecProto.getDefaultInstance());
@@ -204,15 +205,12 @@
@Test
public void testGetNextPage() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
@@ -225,8 +223,8 @@
.addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
.build();
documents.put("uri:" + i, emailDocument);
- assertWithMessage(icing.put(emailDocument).getStatus().getMessage())
- .that(icing.put(emailDocument).getStatus().getCode())
+ assertWithMessage(icingSearchEngine.put(emailDocument).getStatus().getMessage())
+ .that(icingSearchEngine.put(emailDocument).getStatus().getCode())
.isEqualTo(StatusProto.Code.OK);
}
@@ -238,7 +236,8 @@
ResultSpecProto resultSpecProto = ResultSpecProto.newBuilder().setNumPerPage(1).build();
SearchResultProto searchResultProto =
- icing.search(searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+ icingSearchEngine.search(
+ searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
assertStatusOk(searchResultProto.getStatus());
assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
@@ -246,7 +245,7 @@
// fetch rest pages
for (int i = 1; i < 5; i++) {
- searchResultProto = icing.getNextPage(searchResultProto.getNextPageToken());
+ searchResultProto = icingSearchEngine.getNextPage(searchResultProto.getNextPageToken());
assertWithMessage(searchResultProto.getStatus().getMessage())
.that(searchResultProto.getStatus().getCode())
.isEqualTo(StatusProto.Code.OK);
@@ -256,120 +255,109 @@
}
// invalidate rest result
- icing.invalidateNextPageToken(searchResultProto.getNextPageToken());
+ icingSearchEngine.invalidateNextPageToken(searchResultProto.getNextPageToken());
- searchResultProto = icing.getNextPage(searchResultProto.getNextPageToken());
+ searchResultProto = icingSearchEngine.getNextPage(searchResultProto.getNextPageToken());
assertStatusOk(searchResultProto.getStatus());
assertThat(searchResultProto.getResultsCount()).isEqualTo(0);
}
@Test
public void testDelete() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icing.put(emailDocument).getStatus());
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
- DeleteResultProto deleteResultProto = icing.delete("namespace", "uri");
+ DeleteResultProto deleteResultProto = icingSearchEngine.delete("namespace", "uri");
assertStatusOk(deleteResultProto.getStatus());
- GetResultProto getResultProto = icing.get("namespace", "uri");
+ GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
}
@Test
public void testDeleteByNamespace() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icing.put(emailDocument).getStatus());
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
DeleteByNamespaceResultProto deleteByNamespaceResultProto =
- icing.deleteByNamespace("namespace");
+ icingSearchEngine.deleteByNamespace("namespace");
assertStatusOk(deleteByNamespaceResultProto.getStatus());
- GetResultProto getResultProto = icing.get("namespace", "uri");
+ GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
}
@Test
public void testDeleteBySchemaType() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
+ icingSearchEngine
.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
.getStatus()
.getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icing.put(emailDocument).getStatus());
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
DeleteBySchemaTypeResultProto deleteBySchemaTypeResultProto =
- icing.deleteBySchemaType(EMAIL_TYPE);
+ icingSearchEngine.deleteBySchemaType(EMAIL_TYPE);
assertStatusOk(deleteBySchemaTypeResultProto.getStatus());
- GetResultProto getResultProto = icing.get("namespace", "uri");
+ GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri");
assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
}
-
@Test
public void testDeleteByQuery() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
- .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
- .getStatus()
- .getCode())
+ icingSearchEngine
+ .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+ .getStatus()
+ .getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument1 =
createEmailDocument("namespace", "uri1").toBuilder()
- .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
- .build();;
- assertStatusOk(icing.put(emailDocument1).getStatus());
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("foo"))
+ .build();
+
+ assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
DocumentProto emailDocument2 =
createEmailDocument("namespace", "uri2").toBuilder()
- .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar"))
- .build();;
- assertStatusOk(icing.put(emailDocument2).getStatus());
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues("bar"))
+ .build();
+
+ assertStatusOk(icingSearchEngine.put(emailDocument2).getStatus());
SearchSpecProto searchSpec =
SearchSpecProto.newBuilder()
@@ -378,7 +366,7 @@
.build();
SearchResultProto searchResultProto =
- icing.search(
+ icingSearchEngine.search(
searchSpec,
ScoringSpecProto.getDefaultInstance(),
ResultSpecProto.getDefaultInstance());
@@ -386,45 +374,36 @@
assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
assertThat(searchResultProto.getResults(0).getDocument()).isEqualTo(emailDocument1);
- DeleteResultProto deleteResultProto = icing.deleteByQuery(searchSpec);
+ DeleteByQueryResultProto deleteResultProto = icingSearchEngine.deleteByQuery(searchSpec);
assertStatusOk(deleteResultProto.getStatus());
- GetResultProto getResultProto = icing.get("namespace", "uri1");
+ GetResultProto getResultProto = icingSearchEngine.get("namespace", "uri1");
assertThat(getResultProto.getStatus().getCode()).isEqualTo(StatusProto.Code.NOT_FOUND);
- getResultProto = icing.get("namespace", "uri2");
+ getResultProto = icingSearchEngine.get("namespace", "uri2");
assertStatusOk(getResultProto.getStatus());
}
@Test
public void testPersistToDisk() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
- PersistToDiskResultProto persistToDiskResultProto = icing.persistToDisk();
+ PersistToDiskResultProto persistToDiskResultProto = icingSearchEngine.persistToDisk();
assertStatusOk(persistToDiskResultProto.getStatus());
}
@Test
public void testOptimize() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
- OptimizeResultProto optimizeResultProto = icing.optimize();
+ OptimizeResultProto optimizeResultProto = icingSearchEngine.optimize();
assertStatusOk(optimizeResultProto.getStatus());
}
@Test
public void testGetOptimizeInfo() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
- GetOptimizeInfoResultProto getOptimizeInfoResultProto = icing.getOptimizeInfo();
+ GetOptimizeInfoResultProto getOptimizeInfoResultProto = icingSearchEngine.getOptimizeInfo();
assertStatusOk(getOptimizeInfoResultProto.getStatus());
assertThat(getOptimizeInfoResultProto.getOptimizableDocs()).isEqualTo(0);
assertThat(getOptimizeInfoResultProto.getEstimatedOptimizableBytes()).isEqualTo(0);
@@ -432,39 +411,63 @@
@Test
public void testGetAllNamespaces() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
assertThat(
- icing
- .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
- .getStatus()
- .getCode())
+ icingSearchEngine
+ .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+ .getStatus()
+ .getCode())
.isEqualTo(StatusProto.Code.OK);
DocumentProto emailDocument = createEmailDocument("namespace", "uri");
- assertStatusOk(icing.put(emailDocument).getStatus());
+ assertStatusOk(icingSearchEngine.put(emailDocument).getStatus());
- GetAllNamespacesResultProto getAllNamespacesResultProto = icing.getAllNamespaces();
+ GetAllNamespacesResultProto getAllNamespacesResultProto = icingSearchEngine.getAllNamespaces();
assertStatusOk(getAllNamespacesResultProto.getStatus());
assertThat(getAllNamespacesResultProto.getNamespacesList()).containsExactly("namespace");
}
@Test
public void testReset() throws Exception {
- IcingSearchEngineOptions options =
- IcingSearchEngineOptions.newBuilder().setBaseDir(tempDir.getCanonicalPath()).build();
- IcingSearchEngine icing = new IcingSearchEngine(options);
- assertStatusOk(icing.initialize().getStatus());
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
- ResetResultProto resetResultProto = icing.reset();
+ ResetResultProto resetResultProto = icingSearchEngine.reset();
assertStatusOk(resetResultProto.getStatus());
}
+ @Test
+ public void testReportUsage() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ // Set schema and put a document.
+ SchemaTypeConfigProto emailTypeConfig = createEmailTypeConfig();
+ SchemaProto schema = SchemaProto.newBuilder().addTypes(emailTypeConfig).build();
+ assertThat(
+ icingSearchEngine
+ .setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false)
+ .getStatus()
+ .getCode())
+ .isEqualTo(StatusProto.Code.OK);
+
+ DocumentProto emailDocument = createEmailDocument("namespace", "uri");
+ PutResultProto putResultProto = icingSearchEngine.put(emailDocument);
+ assertStatusOk(putResultProto.getStatus());
+
+ // Report usage
+ UsageReport usageReport =
+ UsageReport.newBuilder()
+ .setDocumentNamespace("namespace")
+ .setDocumentUri("uri")
+ .setUsageTimestampMs(1)
+ .setUsageType(UsageReport.UsageType.USAGE_TYPE1)
+ .build();
+ ReportUsageResultProto reportUsageResultProto = icingSearchEngine.reportUsage(usageReport);
+ assertStatusOk(reportUsageResultProto.getStatus());
+ }
+
private static void assertStatusOk(StatusProto status) {
assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
}
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index ff215bd..ae73917 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -16,8 +16,8 @@
package icing.lib;
-import "icing/proto/status.proto";
import "icing/proto/logging.proto";
+import "icing/proto/status.proto";
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
@@ -50,11 +50,6 @@
// already defined in the schema for this Document's schema_type.
repeated PropertyProto properties = 5;
- // OPTIONAL: Properties that will not be validated against the schema,
- // indexed, or be searchable. The properties will be stored in the Documents,
- // but never looked at by Icing.
- repeated PropertyProto custom_properties = 6;
-
// OPTIONAL: Score of the document which could be used during search result
// ranking. Negative values will lead to validation errors. The default is the
// lowest score 0.
@@ -69,6 +64,8 @@
// TODO(cassiewang): Benchmark if fixed64 or some other proto type is better
// in terms of space/time efficiency. Both for ttl_ms and timestamp fields
optional int64 ttl_ms = 8 [default = 0];
+
+ reserved 6;
}
// Holds a property field of the Document.
@@ -146,7 +143,7 @@
}
// Result of a call to IcingSearchEngine.Delete
-// Next tag: 2
+// Next tag: 3
message DeleteResultProto {
// Status code can be one of:
// OK
@@ -159,10 +156,13 @@
// TODO(b/147699081): Fix error codes: +ABORTED.
// go/icing-library-apis.
optional StatusProto status = 1;
+
+ // Stats for delete execution performance.
+ optional NativeDeleteStats delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteByNamespace
-// Next tag: 2
+// Next tag: 3
message DeleteByNamespaceResultProto {
// Status code can be one of:
// OK
@@ -175,10 +175,13 @@
// TODO(b/147699081): Fix error codes: +ABORTED.
// go/icing-library-apis.
optional StatusProto status = 1;
+
+ // Stats for delete execution performance.
+ optional NativeDeleteStats delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteBySchemaType
-// Next tag: 2
+// Next tag: 3
message DeleteBySchemaTypeResultProto {
// Status code can be one of:
// OK
@@ -191,4 +194,26 @@
// TODO(b/147699081): Fix error codes: +ABORTED.
// go/icing-library-apis.
optional StatusProto status = 1;
+
+ // Stats for delete execution performance.
+ optional NativeDeleteStats delete_stats = 2;
+}
+
+// Result of a call to IcingSearchEngine.DeleteByQuery
+// Next tag: 3
+message DeleteByQueryResultProto {
+ // Status code can be one of:
+ // OK
+ // FAILED_PRECONDITION
+ // NOT_FOUND
+ // INTERNAL
+ //
+ // See status.proto for more details.
+ //
+ // TODO(b/147699081): Fix error codes: +ABORTED.
+ // go/icing-library-apis.
+ optional StatusProto status = 1;
+
+ // Stats for delete execution performance.
+ optional NativeDeleteStats delete_stats = 2;
}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index be6c9f4..09ec756 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -16,9 +16,10 @@
package icing.lib;
+import "icing/proto/scoring.proto";
+
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Stats of the top-level function IcingSearchEngine::Initialize().
@@ -121,3 +122,86 @@
}
optional TokenizationStats tokenization_stats = 6;
}
+
+// Stats of the top-level function IcingSearchEngine::Search() and
+// IcingSearchEngine::GetNextPage().
+// Next tag: 15
+message NativeQueryStats {
+ // Number of terms in the query string.
+ optional int32 num_terms = 1;
+
+ // Number of namespaces filtered.
+ optional int32 num_namespaces_filtered = 2;
+
+ // Number of schema types filtered.
+ optional int32 num_schema_types_filtered = 3;
+
+ // Strategy of scoring and ranking.
+ optional ScoringSpecProto.RankingStrategy.Code ranking_strategy = 4;
+
+ // Whether the function call is querying the first page. If it’s
+ // not, Icing will fetch the results from cache so that some steps
+ // may be skipped.
+ optional bool is_first_page = 5;
+
+ // The requested number of results in one page.
+ optional int32 requested_page_size = 6;
+
+ // The actual number of results returned in the current page.
+ optional int32 num_results_returned_current_page = 7;
+
+ // Number of documents scored.
+ optional int32 num_documents_scored = 8;
+
+ // How many of the results in the page returned were snippeted.
+ optional bool num_results_snippeted = 9;
+
+ // Overall time used for the function call.
+ optional int32 latency_ms = 10;
+
+ // Time used to parse the query, including 2 parts: tokenizing and
+ // transforming tokens into an iterator tree.
+ optional int32 parse_query_latency_ms = 11;
+
+ // Time used to score the raw results.
+ optional int32 scoring_latency_ms = 12;
+
+ // Time used to rank the scored results.
+ optional int32 ranking_latency_ms = 13;
+
+ // Time used to fetch the document protos. Note that it includes the
+ // time to snippet if ‘has_snippets’ is true.
+ optional int32 document_retrieval_latency_ms = 14;
+}
+
+// Stats of the top-level functions IcingSearchEngine::Delete,
+// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
+// IcingSearchEngine::DeleteByQuery.
+// Next tag: 4
+message NativeDeleteStats {
+ // Overall time used for the function call.
+ optional int32 latency_ms = 1;
+
+ message DeleteType {
+ enum Code {
+ // Default. Should never be used.
+ UNKNOWN = 0;
+
+ // Delete one document.
+ SINGLE = 1;
+
+ // Delete by query.
+ QUERY = 2;
+
+ // Delete by namespace.
+ NAMESPACE = 3;
+
+ // Delete by schema type.
+ SCHEMA_TYPE = 4;
+ }
+ }
+ optional DeleteType.Code delete_type = 2;
+
+ // Number of documents deleted by this call.
+ optional int32 num_documents_deleted = 3;
+}
\ No newline at end of file
diff --git a/proto/icing/proto/scoring.proto b/proto/icing/proto/scoring.proto
index 3a99b09..bfa7aec 100644
--- a/proto/icing/proto/scoring.proto
+++ b/proto/icing/proto/scoring.proto
@@ -18,7 +18,6 @@
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Encapsulates the configurations on how Icing should score and rank the search
@@ -64,6 +63,12 @@
// Ranked by last used timestamp with usage type 3. The timestamps are
// compared in seconds.
USAGE_TYPE3_LAST_USED_TIMESTAMP = 8;
+
+ // Placeholder for ranking by relevance score, currently computed as BM25F
+ // score.
+ // TODO(b/173156803): one the implementation is ready, rename to
+ // RELEVANCE_SCORE.
+ RELEVANCE_SCORE_NONFUNCTIONAL_PLACEHOLDER = 9;
}
}
optional RankingStrategy.Code rank_by = 1;
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index abbfc32..f63acfa 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -17,6 +17,7 @@
package icing.lib;
import "icing/proto/document.proto";
+import "icing/proto/logging.proto";
import "icing/proto/status.proto";
import "icing/proto/term.proto";
@@ -64,7 +65,7 @@
// Client-supplied specifications on what to include/how to format the search
// results.
-// Next tag: 4
+// Next tag: 5
message ResultSpecProto {
// The results will be returned in pages, and num_per_page specifies the
// number of documents in one page.
@@ -96,6 +97,25 @@
optional int32 max_window_bytes = 3;
}
optional SnippetSpecProto snippet_spec = 3;
+
+ // How to specify a subset of properties to retrieve. If no type property mask
+ // has been specified for a schema type, then *all* properties of that schema
+ // type will be retrieved.
+ // Next tag: 3
+ message TypePropertyMask {
+ // The schema type to which these property masks should apply.
+ // If the schema type is the wildcard ("*"), then the type property masks
+ // will apply to all results of types that don't have their own, specific
+ // type property mask entry.
+ optional string schema_type = 1;
+
+ // The property masks specifying the property to be retrieved. Property
+ // masks must be composed only of property names, property separators (the
+ // '.' character). For example, "subject", "recipients.name". Specifying no
+ // property masks will result in *no* properties being retrieved.
+ repeated string paths = 2;
+ }
+ repeated TypePropertyMask type_property_masks = 4;
}
// The representation of a single match within a DocumentProto property.
@@ -145,7 +165,7 @@
}
// Icing lib-supplied results from a search results.
-// Next tag: 5
+// Next tag: 6
message SearchResultProto {
// Status code can be one of:
// OK
@@ -173,19 +193,14 @@
repeated ResultProto results = 2;
// Various debug fields. Not populated if ResultSpecProto.debug_info = false.
+ // Next tag: 4
message DebugInfoProto {
- // The number of results that actually matched the SearchSpecProto. This is
- // different from the number of `documents` returned since the user can
- // set a ResultSpecProto.limit on how many results are returned to them.
- optional uint64 num_results = 1;
-
- // Latency to parse and execute the query, in milliseconds.
- optional uint64 latency_ms = 2;
-
// The internal representation of the actual query string that was executed.
// This may be different from the SearchSpecProto.query if the original
// query was malformed.
optional string executed_query = 3;
+
+ reserved 1, 2;
}
optional DebugInfoProto debug_info = 3;
@@ -195,4 +210,7 @@
// LINT.IfChange(next_page_token)
optional uint64 next_page_token = 4;
// LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
+
+ // Stats for query execution performance.
+ optional NativeQueryStats query_stats = 5;
}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
new file mode 100644
index 0000000..be9e98c
--- /dev/null
+++ b/synced_AOSP_CL_number.txt
@@ -0,0 +1 @@
+set(synced_AOSP_CL_number=349594076)