[LSC] Add LOCAL_LICENSE_KINDS to external/icing am: a650630c0d am: 7c143aefa0
Original change: https://googleplex-android-review.googlesource.com/c/platform/external/icing/+/13740841
Change-Id: Ib75562e41e9ebac94422e727440bff00f76e795c
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a740924..70f6852 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@
# Compile libandroidicu
set(ICU_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../icu/libandroidicu")
set(ICU_TARGET_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/icu-target")
-add_subdirectory(${ICU_SOURCE_DIR} ${ICU_TARGET_BINARY_DIR})
+add_subdirectory("${ICU_SOURCE_DIR}/static_shim" ${ICU_TARGET_BINARY_DIR})
# Glob Icing proto sources. Results look like this: icing/proto/document.proto
file(
diff --git a/icing/file/destructible-file.h b/icing/file/destructible-file.h
new file mode 100644
index 0000000..006dcb4
--- /dev/null
+++ b/icing/file/destructible-file.h
@@ -0,0 +1,72 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_FILE_DESTRUCTIBLE_FILE_H_
+#define ICING_FILE_DESTRUCTIBLE_FILE_H_
+
+#include <unistd.h>
+
+#include <string>
+
+#include "icing/file/filesystem.h"
+#include "icing/util/logging.h"
+
+namespace icing {
+namespace lib {
+
+// A convenient RAII class which will open the specified file path for write and
+// delete the underlying file upon destruction.
+class DestructibleFile {
+ public:
+ explicit DestructibleFile(const std::string& filepath,
+ const Filesystem* filesystem)
+ : filesystem_(filesystem), filepath_(filepath) {
+ fd_ = filesystem_->OpenForWrite(filepath_.c_str());
+ }
+
+ DestructibleFile(const DestructibleFile&) = delete;
+ DestructibleFile(DestructibleFile&& other) : filesystem_(nullptr), fd_(-1) {
+ *this = std::move(other);
+ }
+
+ DestructibleFile& operator=(const DestructibleFile&) = delete;
+ DestructibleFile& operator=(DestructibleFile&& other) {
+ std::swap(fd_, other.fd_);
+ std::swap(filesystem_, other.filesystem_);
+ std::swap(filepath_, other.filepath_);
+ return *this;
+ }
+
+ ~DestructibleFile() {
+ if (is_valid()) {
+ close(fd_);
+ if (!filesystem_->DeleteFile(filepath_.c_str())) {
+ ICING_VLOG(1) << "Failed to delete file " << filepath_;
+ }
+ }
+ }
+
+ bool is_valid() const { return fd_ >= 0; }
+ int get_fd() const { return fd_; }
+
+ private:
+ const Filesystem* filesystem_;
+ std::string filepath_;
+ int fd_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_DESTRUCTIBLE_FILE_H_
diff --git a/icing/file/destructible-file_test.cc b/icing/file/destructible-file_test.cc
new file mode 100644
index 0000000..61316d1
--- /dev/null
+++ b/icing/file/destructible-file_test.cc
@@ -0,0 +1,117 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/destructible-file.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+TEST(DestructibleFileTest, DeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+
+ {
+ // 1. Create the file
+ ScopedFd sfd(filesystem.OpenForWrite(filepath1.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+ int i = 127;
+ ASSERT_TRUE(filesystem.Write(sfd.get(), &i, sizeof(i)));
+ }
+
+ {
+ // 2. Open with a Destructible file.
+ DestructibleFile destructible(filepath1, &filesystem);
+ ASSERT_TRUE(destructible.is_valid());
+ }
+
+ // 3. Ensure that the file doesn't exist.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveAssignDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+ std::string filepath2 = GetTestTempDir() + "/file2";
+
+ // 1. Create file1
+ DestructibleFile destructible1(filepath1, &filesystem);
+ ASSERT_TRUE(destructible1.is_valid());
+ int i = 127;
+ ASSERT_TRUE(filesystem.Write(destructible1.get_fd(), &i, sizeof(i)));
+
+ {
+ // 2. Create file2
+ DestructibleFile destructible2(filepath2, &filesystem);
+ ASSERT_TRUE(destructible2.is_valid());
+ i = 458;
+ ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+ // Move assign destructible2 into destructible1
+ destructible1 = std::move(destructible2);
+ }
+
+ // 3. file1 shouldn't exist because it was destroyed when destructible1 was
+ // move assigned to.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+
+ // 4. file2 should still exist because it moved into destructible1 from
+ // destructible2.
+ EXPECT_TRUE(filesystem.FileExists(filepath2.c_str()));
+}
+
+TEST(DestructibleFileTest, MoveConstructionDeletesFileProperly) {
+ Filesystem filesystem;
+ std::string filepath1 = GetTestTempDir() + "/file1";
+
+ // 1. Create destructible1, it'll be reconstructed soon anyways.
+ std::unique_ptr<DestructibleFile> destructible1;
+ {
+ // 2. Create file1
+ DestructibleFile destructible2(filepath1, &filesystem);
+ ASSERT_TRUE(destructible2.is_valid());
+ int i = 458;
+ ASSERT_TRUE(filesystem.Write(destructible2.get_fd(), &i, sizeof(i)));
+
+ // Move construct destructible1 from destructible2
+ destructible1 =
+ std::make_unique<DestructibleFile>(std::move(destructible2));
+ }
+
+ // 3. file1 should still exist because it moved into destructible1 from
+ // destructible2.
+ ASSERT_TRUE(destructible1->is_valid());
+ EXPECT_TRUE(filesystem.FileExists(filepath1.c_str()));
+
+ {
+ // 4. Move construct destructible3 from destructible1
+ DestructibleFile destructible3(std::move(*destructible1));
+ ASSERT_TRUE(destructible3.is_valid());
+ }
+
+ // 5. file1 shouldn't exist because it was destroyed when destructible3 was
+ // destroyed.
+ EXPECT_FALSE(filesystem.FileExists(filepath1.c_str()));
+}
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/file-backed-proto-log.h b/icing/file/file-backed-proto-log.h
index 763c93b..9ccd81b 100644
--- a/icing/file/file-backed-proto-log.h
+++ b/icing/file/file-backed-proto-log.h
@@ -70,6 +70,7 @@
#include "icing/file/filesystem.h"
#include "icing/file/memory-mapped-file.h"
#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/platform.h"
#include "icing/portable/zlib.h"
#include "icing/util/crc32.h"
#include "icing/util/data-loss.h"
@@ -422,7 +423,8 @@
static constexpr int kDeflateCompressionLevel = 3;
// Chunks of the file to mmap at a time, so we don't mmap the entire file.
- static constexpr int kMmapChunkSize = 4 * 1024;
+ // Only used on 32-bit devices
+ static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
ScopedFd fd_;
const Filesystem* const filesystem_;
@@ -631,6 +633,14 @@
file_path.c_str(), static_cast<long long>(start)));
}
+ if (end < start) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Ending checksum offset of file '%s' must be greater than start "
+ "'%lld', was '%lld'",
+ file_path.c_str(), static_cast<long long>(start),
+ static_cast<long long>(end)));
+ }
+
int64_t file_size = filesystem->GetFileSize(file_path.c_str());
if (end > file_size) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
@@ -640,17 +650,41 @@
static_cast<long long>(end)));
}
- for (int i = start; i < end; i += kMmapChunkSize) {
- // Don't read past the file size.
- int next_chunk_size = kMmapChunkSize;
- if ((i + kMmapChunkSize) >= end) {
- next_chunk_size = end - i;
+ Architecture architecture = GetArchitecture();
+ switch (architecture) {
+ case Architecture::BIT_64: {
+ // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+ // devices where mmap/munmap calls need the mmap write semaphore, which
+ // blocks mmap/munmap/mprotect and all page faults from executing while
+ // they run. On 64-bit devices, this doesn't actually load into memory, it
+ // just makes the file faultable. So the whole file should be ok.
+ // b/185822878.
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+ auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+ new_crc.Append(mmap_str);
+ break;
}
+ case Architecture::BIT_32:
+ [[fallthrough]];
+ case Architecture::UNKNOWN: {
+ // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+ // much memory at once. If we're unknown, then also chunk it because we're
+ // not sure what the device can handle.
+ for (int i = start; i < end; i += kMmapChunkSize) {
+ // Don't read past the file size.
+ int next_chunk_size = kMmapChunkSize;
+ if ((i + kMmapChunkSize) >= end) {
+ next_chunk_size = end - i;
+ }
- ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
- auto mmap_str = std::string_view(mmapped_file.region(), next_chunk_size);
- new_crc.Append(mmap_str);
+ auto mmap_str =
+ std::string_view(mmapped_file.region(), next_chunk_size);
+ new_crc.Append(mmap_str);
+ }
+ break;
+ }
}
return new_crc;
@@ -670,7 +704,8 @@
static_cast<long long>(proto_size), header_->max_proto_size));
}
- // At this point, we've guaranteed that proto_size is under kMaxProtoSize (see
+ // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+ // (see
// ::Create), so we can safely store it in an int.
int final_size = 0;
@@ -735,8 +770,8 @@
MemoryMappedFile mmapped_file(*filesystem_, file_path_,
MemoryMappedFile::Strategy::READ_ONLY);
if (file_offset >= file_size) {
- // file_size points to the next byte to write at, so subtract one to get the
- // inclusive, actual size of file.
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
return absl_ports::OutOfRangeError(
IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
"out of range of the file size, %lld",
@@ -778,8 +813,8 @@
int64_t file_offset) {
int64_t file_size = filesystem_->GetFileSize(fd_.get());
if (file_offset >= file_size) {
- // file_size points to the next byte to write at, so subtract one to get the
- // inclusive, actual size of file.
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
"Trying to erase data at a location, %lld, "
"out of range of the file size, %lld",
@@ -798,12 +833,12 @@
ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
GetProtoSize(metadata)));
- // We need to update the crc checksum if the erased area is before the rewind
- // position.
+ // We need to update the crc checksum if the erased area is before the
+ // rewind position.
if (file_offset + sizeof(metadata) < header_->rewind_offset) {
// We need to calculate [original string xor 0s].
- // The xored string is the same as the original string because 0 xor 0 = 0,
- // 1 xor 0 = 1.
+ // The xored string is the same as the original string because 0 xor 0 =
+ // 0, 1 xor 0 = 1.
const std::string_view xored_str(mmapped_file.region(),
mmapped_file.region_size());
@@ -896,7 +931,8 @@
template <typename ProtoT>
typename FileBackedProtoLog<ProtoT>::Iterator
FileBackedProtoLog<ProtoT>::GetIterator() {
- return Iterator(*filesystem_, file_path_, /*initial_offset=*/sizeof(Header));
+ return Iterator(*filesystem_, file_path_,
+ /*initial_offset=*/sizeof(Header));
}
template <typename ProtoT>
@@ -959,7 +995,8 @@
header_->header_checksum = header_->CalculateHeaderChecksum();
if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
- sizeof(Header))) {
+ sizeof(Header)) ||
+ !filesystem_->DataSync(fd_.get())) {
return absl_ports::InternalError(
absl_ports::StrCat("Failed to update header to: ", file_path_));
}
diff --git a/icing/file/file-backed-proto-log_benchmark.cc b/icing/file/file-backed-proto-log_benchmark.cc
index 26e0fb0..766cc64 100644
--- a/icing/file/file-backed-proto-log_benchmark.cc
+++ b/icing/file/file-backed-proto-log_benchmark.cc
@@ -164,6 +164,48 @@
// 16MiB, and we need some extra space for the
// rest of the document properties
+static void BM_ComputeChecksum(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = GetTestTempDir() + "/proto.log";
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log =
+ FileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ FileBackedProtoLog<DocumentProto>::Options(compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Make each document 1KiB
+ int string_length = 1024;
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ int num_docs = state.range(0);
+ for (int i = 0; i < num_docs; ++i) {
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+ }
+
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ComputeChecksum());
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/file/file-backed-vector.h b/icing/file/file-backed-vector.h
index 3ecef54..2443cb2 100644
--- a/icing/file/file-backed-vector.h
+++ b/icing/file/file-backed-vector.h
@@ -175,7 +175,27 @@
// synced by the system and the checksum will be updated.
~FileBackedVector();
- // Accesses the element at idx.
+ // Gets a copy of the element at idx.
+ //
+ // This is useful if you think the FileBackedVector may grow before you need
+ // to access this return value. When the FileBackedVector grows, the
+ // underlying mmap will be unmapped and remapped, which will invalidate any
+ // pointers to the previously mapped region. Getting a copy will avoid
+ // referencing the now-invalidated region.
+ //
+ // Returns:
+ // OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
+ libtextclassifier3::StatusOr<T> GetCopy(int32_t idx) const;
+
+ // Gets a pointer to the element at idx.
+ //
+ // WARNING: Subsequent calls to Set may invalidate the pointer returned by
+ // Get.
+ //
+ // This is useful if you do not think the FileBackedVector will grow before
+ // you need to reference this value, and you want to avoid a copy. When the
+ // FileBackedVector grows, the underlying mmap will be unmapped and remapped,
+ // which will invalidate this pointer to the previously mapped region.
//
// Returns:
// OUT_OF_RANGE_ERROR if idx < 0 or > num_elements()
@@ -183,6 +203,10 @@
// Writes the value at idx.
//
+ // May grow the underlying file and mmapped region as needed to fit the new
+ // value. If it does grow, then any pointers to previous values returned
+ // from Get() may be invalidated.
+ //
// Returns:
// OUT_OF_RANGE_ERROR if idx < 0 or file cannot be grown idx size
libtextclassifier3::Status Set(int32_t idx, const T& value);
@@ -468,6 +492,13 @@
}
template <typename T>
+libtextclassifier3::StatusOr<T> FileBackedVector<T>::GetCopy(
+ int32_t idx) const {
+ ICING_ASSIGN_OR_RETURN(const T* value, Get(idx));
+ return *value;
+}
+
+template <typename T>
libtextclassifier3::StatusOr<const T*> FileBackedVector<T>::Get(
int32_t idx) const {
if (idx < 0) {
@@ -492,8 +523,6 @@
IcingStringUtil::StringPrintf("Index, %d, was less than 0", idx));
}
- int32_t start_byte = idx * sizeof(T);
-
ICING_RETURN_IF_ERROR(GrowIfNecessary(idx + 1));
if (idx + 1 > header_->num_elements) {
@@ -518,6 +547,8 @@
changes_end_ = 0;
header_->vector_checksum = 0;
} else {
+ int32_t start_byte = idx * sizeof(T);
+
changes_.push_back(idx);
saved_original_buffer_.append(
reinterpret_cast<char*>(const_cast<T*>(array())) + start_byte,
diff --git a/icing/file/portable-file-backed-proto-log.h b/icing/file/portable-file-backed-proto-log.h
new file mode 100644
index 0000000..95c3949
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log.h
@@ -0,0 +1,1173 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// File-backed log of protos with append-only writes and position based reads.
+//
+// There should only be one instance of a PortableFileBackedProtoLog of the same
+// file at a time; using multiple instances at the same time may lead to
+// undefined behavior.
+//
+// The entire checksum is computed on initialization to verify the contents are
+// valid. On failure, the log will be truncated to the last verified state when
+// PersistToDisk() was called. If the log cannot successfully restore the last
+// state due to disk corruption or some other inconsistency, then the entire log
+// will be lost.
+//
+// Each proto written to the file will have a metadata written just before it.
+// The metadata consists of
+// {
+// 1 bytes of kProtoMagic;
+// 3 bytes of the proto size
+// n bytes of the proto itself
+// }
+//
+// All metadata is written in a portable format, encoded with htonl before
+// writing to file and decoded with ntohl when reading from file.
+//
+// Example usage:
+// ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+// PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+// file_path_,
+// options));
+// auto proto_log = create_result.proto_log;
+//
+// Document document;
+// document.set_namespace("com.google.android.example");
+// document.set_uri("www.google.com");
+//
+// int64_t document_offset = proto_log->WriteProto(document));
+// Document same_document = proto_log->ReadProto(document_offset));
+// proto_log->PersistToDisk();
+
+#ifndef ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+#define ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+#include <string>
+#include <string_view>
+#include <utility>
+#include <vector>
+
+#include "icing/text_classifier/lib3/utils/base/status.h"
+#include "icing/text_classifier/lib3/utils/base/statusor.h"
+#include <google/protobuf/io/gzip_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/memory-mapped-file.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/portable/endian.h"
+#include "icing/portable/platform.h"
+#include "icing/portable/zlib.h"
+#include "icing/util/bit-util.h"
+#include "icing/util/crc32.h"
+#include "icing/util/data-loss.h"
+#include "icing/util/logging.h"
+#include "icing/util/status-macros.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Number of bytes we reserve for the heading at the beginning of the proto log.
+// We reserve this so the header can grow without running into the contents of
+// the proto log, triggering an unnecessary migration of the data.
+constexpr int kHeaderReservedBytes = 256;
+
+bool IsEmptyBuffer(const char* buffer, int size) {
+ return std::all_of(buffer, buffer + size,
+ [](const char byte) { return byte == 0; });
+}
+
+// Helper function to get stored proto size from the metadata.
+// Metadata format: 8 bits magic + 24 bits size
+int GetProtoSize(int metadata) { return metadata & 0x00FFFFFF; }
+
+// Helper function to get stored proto magic from the metadata.
+// Metadata format: 8 bits magic + 24 bits size
+uint8_t GetProtoMagic(int metadata) { return metadata >> 24; }
+
+} // namespace
+
+template <typename ProtoT>
+class PortableFileBackedProtoLog {
+ public:
+ struct Options {
+ // Whether to compress each proto before writing to the proto log.
+ bool compress;
+
+ // Byte-size limit for each proto written to the store. This does not
+ // include the bytes needed for the metadata of each proto.
+ //
+ // NOTE: Currently, we only support protos up to 16MiB. We store the proto
+ // size in 3 bytes within the metadata.
+ //
+ // NOTE: This limit is only enforced for future writes. If the store
+ // previously had a higher limit, then reading older entries could return
+ // larger protos.
+ //
+ // NOTE: The max_proto_size is the upper limit for input protos into the
+ // ProtoLog. Even if the proto is larger than max_proto_size, but compresses
+ // to a smaller size, ProtoLog will not accept it. Protos that result in a
+ // compressed size larger than max_proto_size are also not accepted.
+ const int32_t max_proto_size;
+
+ // Must specify values for options.
+ Options() = delete;
+ explicit Options(bool compress_in,
+ const int32_t max_proto_size_in = kMaxProtoSize)
+ : compress(compress_in), max_proto_size(max_proto_size_in) {}
+ };
+
+ // Header stored at the beginning of the file before the rest of the log
+ // contents. Stores metadata on the log.
+ class Header {
+ public:
+ static constexpr int32_t kMagic = 0xf4c6f67a;
+
+ static constexpr int32_t kFileFormatVersion = 0;
+
+ uint32_t CalculateHeaderChecksum() const {
+ Crc32 crc;
+
+ // Get a string_view of all the fields of the Header, excluding the
+ // magic_nbytes and header_checksum_nbytes
+ std::string_view header_str(reinterpret_cast<const char*>(this) +
+ offsetof(Header, header_checksum_nbytes) +
+ sizeof(header_checksum_nbytes),
+ sizeof(Header) - sizeof(magic_nbytes) -
+ sizeof(header_checksum_nbytes));
+ crc.Append(header_str);
+ return crc.Get();
+ }
+
+ int32_t GetMagic() const { return gntohl(magic_nbytes); }
+
+ void SetMagic(int32_t magic_in) { magic_nbytes = ghtonl(magic_in); }
+
+ int32_t GetFileFormatVersion() const {
+ return gntohl(file_format_version_nbytes);
+ }
+
+ void SetFileFormatVersion(int32_t file_format_version_in) {
+ file_format_version_nbytes = ghtonl(file_format_version_in);
+ }
+
+ int32_t GetMaxProtoSize() const { return gntohl(max_proto_size_nbytes); }
+
+ void SetMaxProtoSize(int32_t max_proto_size_in) {
+ max_proto_size_nbytes = ghtonl(max_proto_size_in);
+ }
+
+ int32_t GetLogChecksum() const { return gntohl(log_checksum_nbytes); }
+
+ void SetLogChecksum(int32_t log_checksum_in) {
+ log_checksum_nbytes = ghtonl(log_checksum_in);
+ }
+
+ int64_t GetRewindOffset() const { return gntohll(rewind_offset_nbytes); }
+
+ void SetRewindOffset(int64_t rewind_offset_in) {
+ rewind_offset_nbytes = ghtonll(rewind_offset_in);
+ }
+
+ int32_t GetHeaderChecksum() const { return gntohl(header_checksum_nbytes); }
+
+ void SetHeaderChecksum(int32_t header_checksum_in) {
+ header_checksum_nbytes = ghtonl(header_checksum_in);
+ }
+
+ bool GetCompressFlag() const {
+ uint16_t host_order_flags = gntohs(flags_nbytes);
+ return bit_util::BitfieldGet(host_order_flags, kCompressBit, /*len=*/1);
+ }
+
+ void SetCompressFlag(bool compress) {
+ uint16_t host_order_flags = gntohs(flags_nbytes);
+ bit_util::BitfieldSet(compress, kCompressBit,
+ /*len=*/1, &host_order_flags);
+ flags_nbytes = ghtons(host_order_flags);
+ }
+
+ private:
+ // The least-significant bit offset at which the compress flag is stored in
+ // 'flags_nbytes'. Represents whether the protos in the log are compressed
+ // or not.
+ static constexpr int32_t kCompressBit = 0;
+
+ // Holds the magic as a quick sanity check against file corruption.
+ //
+ // Field is in network-byte order.
+ int32_t magic_nbytes = ghtonl(kMagic);
+
+ // Must be at the beginning after kMagic. Contains the crc checksum of
+ // the following fields.
+ //
+ // Field is in network-byte order.
+ uint32_t header_checksum_nbytes = 0;
+
+ // Last known good offset at which the log and its checksum were updated.
+ // If we crash between writing to the log and updating the checksum, we can
+ // try to rewind the log to this offset and verify the checksum is still
+ // valid instead of throwing away the entire log.
+ //
+ // Field is in network-byte order.
+ int64_t rewind_offset_nbytes = ghtonll(kHeaderReservedBytes);
+
+ // Version number tracking how we serialize the file to disk. If we change
+ // how/what we write to disk, this version should be updated and this class
+ // should handle a migration.
+ //
+ // Currently at kFileFormatVersion.
+ //
+ // Field is in network-byte order.
+ int32_t file_format_version_nbytes = 0;
+
+ // The maximum proto size that can be written to the log.
+ //
+ // Field is in network-byte order.
+ int32_t max_proto_size_nbytes = 0;
+
+ // Checksum of the log elements, doesn't include the header fields.
+ //
+ // Field is in network-byte order.
+ uint32_t log_checksum_nbytes = 0;
+
+ // Bits are used to hold various flags.
+ // Lowest bit is whether the protos are compressed or not.
+ //
+ // Field is in network-byte order.
+ uint16_t flags_nbytes = 0;
+
+ // NOTE: New fields should *almost always* be added to the end here. Since
+ // this class may have already been written to disk, appending fields
+ // increases the chances that changes are backwards-compatible.
+ };
+ static_assert(sizeof(Header) <= kHeaderReservedBytes,
+ "Header has grown past our reserved bytes!");
+
+ struct CreateResult {
+ // A successfully initialized log.
+ std::unique_ptr<PortableFileBackedProtoLog<ProtoT>> proto_log;
+
+ // The data status after initializing from a previous state. Data loss can
+ // happen if the file is corrupted or some previously added data was
+ // unpersisted. This may be used to signal that any derived data off of the
+ // proto log may need to be regenerated.
+ DataLoss data_loss;
+
+ bool has_data_loss() {
+ return data_loss == DataLoss::PARTIAL || data_loss == DataLoss::COMPLETE;
+ }
+ };
+
+ // Factory method to create, initialize, and return a
+ // PortableFileBackedProtoLog. Will create the file if it doesn't exist.
+ //
+ // If on re-initialization the log detects disk corruption or some previously
+ // added data was unpersisted, the log will rewind to the last-good state. The
+ // log saves these checkpointed "good" states when PersistToDisk() is called
+ // or the log is safely destructed. If the log rewinds successfully to the
+ // last-good state, then the returned CreateResult.data_loss indicates
+ // whether it has a data loss and what kind of data loss it is (partial or
+ // complete) so that any derived data may know that it needs to be updated. If
+ // the log re-initializes successfully without any data loss,
+ // CreateResult.data_loss will be NONE.
+ //
+ // Params:
+ // filesystem: Handles system level calls
+ // file_path: Path of the underlying file. Directory of the file should
+ // already exist
+ // options: Configuration options for the proto log
+ //
+ // Returns:
+ // PortableFileBackedProtoLog::CreateResult on success
+ // INVALID_ARGUMENT on an invalid option
+ // INTERNAL_ERROR on IO error
+ static libtextclassifier3::StatusOr<CreateResult> Create(
+ const Filesystem* filesystem, const std::string& file_path,
+ const Options& options);
+
+ // Not copyable
+ PortableFileBackedProtoLog(const PortableFileBackedProtoLog&) = delete;
+ PortableFileBackedProtoLog& operator=(const PortableFileBackedProtoLog&) =
+ delete;
+
+ // This will update the checksum of the log as well.
+ ~PortableFileBackedProtoLog();
+
+ // Writes the serialized proto to the underlying file. Writes are applied
+ // directly to the underlying file. Users do not need to sync the file after
+ // writing.
+ //
+ // Returns:
+ // Offset of the newly appended proto in file on success
+ // INVALID_ARGUMENT if proto is too large, as decided by
+ // Options.max_proto_size
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> WriteProto(const ProtoT& proto);
+
+ // Reads out a proto located at file_offset from the file.
+ //
+ // Returns:
+ // A proto on success
+ // NOT_FOUND if the proto at the given offset has been erased
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file size
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<ProtoT> ReadProto(int64_t file_offset) const;
+
+ // Erases the data of a proto located at file_offset from the file.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file size
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::Status EraseProto(int64_t file_offset);
+
+ // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+ // block size.
+ //
+ // Returns:
+ // Disk usage on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
+ // Returns the file size of all the elements held in the log. File size is in
+ // bytes. This excludes the size of any internal metadata of the log, e.g. the
+ // log's header.
+ //
+ // Returns:
+ // File size on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+
+ // An iterator helping to find offsets of all the protos in file.
+ // Example usage:
+ //
+ // while (iterator.Advance().ok()) {
+ // int64_t offset = iterator.GetOffset();
+ // // Do something
+ // }
+ class Iterator {
+ public:
+ Iterator(const Filesystem& filesystem, const std::string& file_path,
+ int64_t initial_offset);
+
+ // Advances to the position of next proto whether it has been erased or not.
+ //
+ // Returns:
+ // OK on success
+ // OUT_OF_RANGE_ERROR if it reaches the end
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::Status Advance();
+
+ // Returns the file offset of current proto.
+ int64_t GetOffset();
+
+ private:
+ static constexpr int64_t kInvalidOffset = -1;
+ // Used to read proto metadata
+ MemoryMappedFile mmapped_file_;
+ // Offset of first proto
+ int64_t initial_offset_;
+ int64_t current_offset_;
+ int64_t file_size_;
+ };
+
+ // Returns an iterator of current proto log. The caller needs to keep the
+ // proto log unchanged while using the iterator, otherwise unexpected
+ // behaviors could happen.
+ Iterator GetIterator();
+
+ // Persists all changes since initialization or the last call to
+ // PersistToDisk(). Any changes that aren't persisted may be lost if the
+ // system fails to close safely.
+ //
+ // Example use case:
+ //
+ // Document document;
+ // document.set_namespace("com.google.android.example");
+ // document.set_uri("www.google.com");
+ //
+ // {
+ // ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+ // PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+ // file_path,
+ // options));
+ // auto proto_log = std::move(create_result.proto_log);
+ //
+ // int64_t document_offset = proto_log->WriteProto(document));
+ //
+ // // We lose the document here since it wasn't persisted.
+ // // *SYSTEM CRASH*
+ // }
+ //
+ // {
+ // // Can still successfully create after a crash since the log can
+ // // rewind/truncate to recover into a previously good state
+ // ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+ // PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+ // file_path,
+ // options));
+ // auto proto_log = std::move(create_result.proto_log);
+ //
+ // // Lost the proto since we didn't PersistToDisk before the crash
+ // proto_log->ReadProto(document_offset)); // INVALID_ARGUMENT error
+ //
+ // int64_t document_offset = proto_log->WriteProto(document));
+ //
+ // // Persisted this time, so we should be ok.
+ // ICING_ASSERT_OK(proto_log->PersistToDisk());
+ // }
+ //
+ // {
+ // ICING_ASSERT_OK_AND_ASSIGN(auto create_result,
+ // PortableFileBackedProtoLog<DocumentProto>::Create(filesystem,
+ // file_path,
+ // options));
+ // auto proto_log = std::move(create_result.proto_log);
+ //
+ // // SUCCESS
+ // Document same_document = proto_log->ReadProto(document_offset));
+ // }
+ //
+ // NOTE: Since all protos are already written to the file directly, this
+ // just updates the checksum and rewind position. Without these updates,
+ // future initializations will truncate the file and discard unpersisted
+ // changes.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::Status PersistToDisk();
+
+ // Calculates the checksum of the log contents. Excludes the header content.
+ //
+ // Returns:
+ // Crc of the log content
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<Crc32> ComputeChecksum();
+
+ private:
+ // Object can only be instantiated via the ::Create factory.
+ PortableFileBackedProtoLog(const Filesystem* filesystem,
+ const std::string& file_path,
+ std::unique_ptr<Header> header);
+
+ // Initializes a new proto log.
+ //
+ // Returns:
+ // std::unique_ptr<CreateResult> on success
+ // INTERNAL_ERROR on IO error
+ static libtextclassifier3::StatusOr<CreateResult> InitializeNewFile(
+ const Filesystem* filesystem, const std::string& file_path,
+ const Options& options);
+
+ // Verifies that the existing proto log is in a good state. If not in a good
+ // state, then the proto log may be truncated to the last good state and
+ // content will be lost.
+ //
+ // Returns:
+ // std::unique_ptr<CreateResult> on success
+ // INTERNAL_ERROR on IO error or internal inconsistencies in the file
+ // INVALID_ARGUMENT_ERROR if options aren't consistent with previous
+ // instances
+ static libtextclassifier3::StatusOr<CreateResult> InitializeExistingFile(
+ const Filesystem* filesystem, const std::string& file_path,
+ const Options& options, int64_t file_size);
+
+ // Takes an initial checksum and updates it with the content between `start`
+ // and `end` offsets in the file.
+ //
+ // Returns:
+ // Crc of the content between `start`, inclusive, and `end`, exclusive.
+ // INTERNAL_ERROR on IO error
+ // INVALID_ARGUMENT_ERROR if start and end aren't within the file size
+ static libtextclassifier3::StatusOr<Crc32> ComputeChecksum(
+ const Filesystem* filesystem, const std::string& file_path,
+ Crc32 initial_crc, int64_t start, int64_t end);
+
+ // Reads out the metadata of a proto located at file_offset from the file.
+ // Metadata will be returned in host byte order endianness.
+ //
+ // Returns:
+ // Proto's metadata on success
+ // OUT_OF_RANGE_ERROR if file_offset exceeds file_size
+ // INTERNAL_ERROR if the metadata is invalid or any IO errors happen
+ static libtextclassifier3::StatusOr<int32_t> ReadProtoMetadata(
+ MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size);
+
+ // Writes metadata of a proto to the fd. Takes in a host byte order endianness
+ // metadata and converts it into a portable metadata before writing.
+ //
+ // Returns:
+ // OK on success
+ // INTERNAL_ERROR on any IO errors
+ static libtextclassifier3::Status WriteProtoMetadata(
+ const Filesystem* filesystem, int fd, int32_t host_order_metadata);
+
+ // Magic number added in front of every proto. Used when reading out protos
+ // as a first check for corruption in each entry in the file. Even if there is
+ // a corruption, the best we can do is roll back to our last recovery point
+ // and throw away un-flushed data. We can discard/reuse this byte if needed so
+ // that we have 4 bytes to store the size of protos, and increase the size of
+ // protos we support.
+ static constexpr uint8_t kProtoMagic = 0x5C;
+
+ // Our internal max for protos.
+ //
+ // WARNING: Changing this to a larger number may invalidate our assumption
+ // that that proto size can safely be stored in the last 3 bytes of the proto
+ // header.
+ static constexpr int kMaxProtoSize = (1 << 24) - 1; // 16MiB
+ static_assert(kMaxProtoSize <= 0x00FFFFFF,
+ "kMaxProtoSize doesn't fit in 3 bytes");
+
+ // Level of compression, BEST_SPEED = 1, BEST_COMPRESSION = 9
+ static constexpr int kDeflateCompressionLevel = 3;
+
+ // Chunks of the file to mmap at a time, so we don't mmap the entire file.
+ // Only used on 32-bit devices
+ static constexpr int kMmapChunkSize = 4 * 1024 * 1024; // 4MiB
+
+ ScopedFd fd_;
+ const Filesystem* const filesystem_;
+ const std::string file_path_;
+ std::unique_ptr<Header> header_;
+};
+
+template <typename ProtoT>
+constexpr uint8_t PortableFileBackedProtoLog<ProtoT>::kProtoMagic;
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::PortableFileBackedProtoLog(
+ const Filesystem* filesystem, const std::string& file_path,
+ std::unique_ptr<Header> header)
+ : filesystem_(filesystem),
+ file_path_(file_path),
+ header_(std::move(header)) {
+ fd_.reset(filesystem_->OpenForAppend(file_path.c_str()));
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::~PortableFileBackedProtoLog() {
+ if (!PersistToDisk().ok()) {
+ ICING_LOG(WARNING) << "Error persisting to disk during destruction of "
+ "PortableFileBackedProtoLog: "
+ << file_path_;
+ }
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+ typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::Create(const Filesystem* filesystem,
+ const std::string& file_path,
+ const Options& options) {
+ if (options.max_proto_size <= 0) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "options.max_proto_size must be greater than 0, was %d",
+ options.max_proto_size));
+ }
+
+ // Since we store the proto_size in 3 bytes, we can only support protos of up
+ // to 16MiB.
+ if (options.max_proto_size > kMaxProtoSize) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "options.max_proto_size must be under 16MiB, was %d",
+ options.max_proto_size));
+ }
+
+ if (!filesystem->FileExists(file_path.c_str())) {
+ return InitializeNewFile(filesystem, file_path, options);
+ }
+
+ int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+ if (file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Bad file size '", file_path, "'"));
+ }
+
+ if (file_size == 0) {
+ return InitializeNewFile(filesystem, file_path, options);
+ }
+
+ return InitializeExistingFile(filesystem, file_path, options, file_size);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+ typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeNewFile(
+ const Filesystem* filesystem, const std::string& file_path,
+ const Options& options) {
+ // Grow to the minimum reserved bytes for the header.
+ if (!filesystem->Truncate(file_path.c_str(), kHeaderReservedBytes)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to initialize file size: ", file_path));
+ }
+
+ // Create the header
+ std::unique_ptr<Header> header = std::make_unique<Header>();
+ header->SetCompressFlag(options.compress);
+ header->SetMaxProtoSize(options.max_proto_size);
+ header->SetHeaderChecksum(header->CalculateHeaderChecksum());
+
+ if (!filesystem->Write(file_path.c_str(), header.get(), sizeof(Header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to write header for file: ", file_path));
+ }
+
+ CreateResult create_result = {
+ std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+ new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+ std::move(header))),
+ /*data_loss=*/DataLoss::NONE};
+
+ return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<
+ typename PortableFileBackedProtoLog<ProtoT>::CreateResult>
+PortableFileBackedProtoLog<ProtoT>::InitializeExistingFile(
+ const Filesystem* filesystem, const std::string& file_path,
+ const Options& options, int64_t file_size) {
+ if (file_size < kHeaderReservedBytes) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("File header too short for: ", file_path));
+ }
+
+ std::unique_ptr<Header> header = std::make_unique<Header>();
+ if (!filesystem->PRead(file_path.c_str(), header.get(), sizeof(Header),
+ /*offset=*/0)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to read header for file: ", file_path));
+ }
+
+ // Make sure the header is still valid before we use any of its values. This
+ // is covered by the header_checksum check below, but this is a quick check
+ // that can save us from an extra crc computation.
+ if (header->GetMagic() != Header::kMagic) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header kMagic for file: ", file_path));
+ }
+
+ if (header->GetHeaderChecksum() != header->CalculateHeaderChecksum()) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header checksum for: ", file_path));
+ }
+
+ if (header->GetFileFormatVersion() != Header::kFileFormatVersion) {
+ // If this changes, we might need to handle a migration rather than throwing
+ // an error.
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Invalid header file format version: ", file_path));
+ }
+
+ if (header->GetCompressFlag() != options.compress) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Inconsistent compress option, expected %d, actual %d",
+ header->GetCompressFlag(), options.compress));
+ }
+
+ if (header->GetMaxProtoSize() > options.max_proto_size) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Max proto size cannot be smaller than previous "
+ "instantiations, previous size %d, wanted size %d",
+ header->GetMaxProtoSize(), options.max_proto_size));
+ }
+ header->SetMaxProtoSize(options.max_proto_size);
+
+ DataLoss data_loss = DataLoss::NONE;
+ ICING_ASSIGN_OR_RETURN(
+ Crc32 calculated_log_checksum,
+ ComputeChecksum(filesystem, file_path, Crc32(),
+ /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+
+ // Double check that the log checksum is the same as the one that was
+ // persisted last time. If not, we start recovery logic.
+ if (header->GetLogChecksum() != calculated_log_checksum.Get()) {
+ // Need to rewind the proto log since the checksums don't match.
+ // Worst case, we have to rewind the entire log back to just the header
+ int64_t last_known_good = kHeaderReservedBytes;
+
+ // Calculate the checksum of the log contents just up to the last rewind
+ // offset point. This will be valid if we just appended contents to the log
+ // without updating the checksum, and we can rewind back to this point
+ // safely.
+ ICING_ASSIGN_OR_RETURN(calculated_log_checksum,
+ ComputeChecksum(filesystem, file_path, Crc32(),
+ /*start=*/kHeaderReservedBytes,
+ /*end=*/header->GetRewindOffset()));
+ if (header->GetLogChecksum() == calculated_log_checksum.Get()) {
+ // Check if it matches our last rewind state. If so, this becomes our last
+ // good state and we can safely truncate and recover from here.
+ last_known_good = header->GetRewindOffset();
+ data_loss = DataLoss::PARTIAL;
+ } else {
+ // Otherwise, we're going to truncate the entire log and this resets the
+ // checksum to an empty log state.
+ header->SetLogChecksum(0);
+ data_loss = DataLoss::COMPLETE;
+ }
+
+ if (!filesystem->Truncate(file_path.c_str(), last_known_good)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Error truncating file: ", file_path));
+ }
+
+ ICING_LOG(INFO) << "Truncated '" << file_path << "' to size "
+ << last_known_good;
+ }
+
+ CreateResult create_result = {
+ std::unique_ptr<PortableFileBackedProtoLog<ProtoT>>(
+ new PortableFileBackedProtoLog<ProtoT>(filesystem, file_path,
+ std::move(header))),
+ data_loss};
+
+ return create_result;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
+ const Filesystem* filesystem, const std::string& file_path,
+ Crc32 initial_crc, int64_t start, int64_t end) {
+ auto mmapped_file = MemoryMappedFile(*filesystem, file_path,
+ MemoryMappedFile::Strategy::READ_ONLY);
+ Crc32 new_crc(initial_crc.Get());
+
+ if (start < 0) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Starting checksum offset of file '%s' must be greater than 0, was "
+ "%lld",
+ file_path.c_str(), static_cast<long long>(start)));
+ }
+
+ if (end < start) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Ending checksum offset of file '%s' must be greater than start "
+ "'%lld', was '%lld'",
+ file_path.c_str(), static_cast<long long>(start),
+ static_cast<long long>(end)));
+ }
+
+ int64_t file_size = filesystem->GetFileSize(file_path.c_str());
+ if (end > file_size) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Ending checksum offset of file '%s' must be within "
+ "file size of %lld, was %lld",
+ file_path.c_str(), static_cast<long long>(file_size),
+ static_cast<long long>(end)));
+ }
+
+ Architecture architecture = GetArchitecture();
+ switch (architecture) {
+ case Architecture::BIT_64: {
+ // Don't mmap in chunks here since mmapping can be harmful on 64-bit
+ // devices where mmap/munmap calls need the mmap write semaphore, which
+ // blocks mmap/munmap/mprotect and all page faults from executing while
+ // they run. On 64-bit devices, this doesn't actually load into memory, it
+ // just makes the file faultable. So the whole file should be ok.
+ // b/185822878.
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(start, end - start));
+ auto mmap_str = std::string_view(mmapped_file.region(), end - start);
+ new_crc.Append(mmap_str);
+ break;
+ }
+ case Architecture::BIT_32:
+ [[fallthrough]];
+ case Architecture::UNKNOWN: {
+ // 32-bit devices only have 4GB of RAM. Mmap in chunks to not use up too
+ // much memory at once. If we're unknown, then also chunk it because we're
+ // not sure what the device can handle.
+ for (int i = start; i < end; i += kMmapChunkSize) {
+ // Don't read past the file size.
+ int next_chunk_size = kMmapChunkSize;
+ if ((i + kMmapChunkSize) >= end) {
+ next_chunk_size = end - i;
+ }
+
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(i, next_chunk_size));
+
+ auto mmap_str =
+ std::string_view(mmapped_file.region(), next_chunk_size);
+ new_crc.Append(mmap_str);
+ }
+ break;
+ }
+ }
+
+ return new_crc;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::WriteProto(const ProtoT& proto) {
+ int64_t proto_size = proto.ByteSizeLong();
+ int32_t host_order_metadata;
+ int64_t current_position = filesystem_->GetCurrentPosition(fd_.get());
+
+ if (proto_size > header_->GetMaxProtoSize()) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "proto_size, %lld, was too large to write. Max is %d",
+ static_cast<long long>(proto_size), header_->GetMaxProtoSize()));
+ }
+
+ // At this point, we've guaranteed that proto_size is under kMaxProtoSize
+ // (see
+ // ::Create), so we can safely store it in an int.
+ int final_size = 0;
+
+ std::string proto_str;
+ google::protobuf::io::StringOutputStream proto_stream(&proto_str);
+
+ if (header_->GetCompressFlag()) {
+ google::protobuf::io::GzipOutputStream::Options options;
+ options.format = google::protobuf::io::GzipOutputStream::ZLIB;
+ options.compression_level = kDeflateCompressionLevel;
+
+ google::protobuf::io::GzipOutputStream compressing_stream(&proto_stream,
+ options);
+
+ bool success = proto.SerializeToZeroCopyStream(&compressing_stream) &&
+ compressing_stream.Close();
+
+ if (!success) {
+ return absl_ports::InternalError("Error compressing proto.");
+ }
+
+ final_size = proto_str.size();
+
+ // In case the compressed proto is larger than the original proto, we also
+ // can't write it.
+ if (final_size > header_->GetMaxProtoSize()) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Compressed proto size, %d, was greater than "
+ "max_proto_size, %d",
+ final_size, header_->GetMaxProtoSize()));
+ }
+ } else {
+ // Serialize the proto directly into the write buffer at an offset of the
+ // metadata.
+ proto.SerializeToZeroCopyStream(&proto_stream);
+ final_size = proto_str.size();
+ }
+
+ // 1st byte for magic, next 3 bytes for proto size.
+ host_order_metadata = (kProtoMagic << 24) | final_size;
+
+ // Actually write metadata, has to be done after we know the possibly
+ // compressed proto size
+ ICING_RETURN_IF_ERROR(
+ WriteProtoMetadata(filesystem_, fd_.get(), host_order_metadata));
+
+ // Write the serialized proto
+ if (!filesystem_->Write(fd_.get(), proto_str.data(), proto_str.size())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to write proto to: ", file_path_));
+ }
+
+ return current_position;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<ProtoT>
+PortableFileBackedProtoLog<ProtoT>::ReadProto(int64_t file_offset) const {
+ int64_t file_size = filesystem_->GetFileSize(fd_.get());
+ MemoryMappedFile mmapped_file(*filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY);
+ if (file_offset >= file_size) {
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
+ return absl_ports::OutOfRangeError(
+ IcingStringUtil::StringPrintf("Trying to read from a location, %lld, "
+ "out of range of the file size, %lld",
+ static_cast<long long>(file_offset),
+ static_cast<long long>(file_size - 1)));
+ }
+
+ // Read out the metadata
+ ICING_ASSIGN_OR_RETURN(
+ int32_t metadata,
+ ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+ // Copy out however many bytes it says the proto is
+ int stored_size = GetProtoSize(metadata);
+
+ ICING_RETURN_IF_ERROR(
+ mmapped_file.Remap(file_offset + sizeof(metadata), stored_size));
+
+ if (IsEmptyBuffer(mmapped_file.region(), mmapped_file.region_size())) {
+ return absl_ports::NotFoundError("The proto data has been erased.");
+ }
+
+ google::protobuf::io::ArrayInputStream proto_stream(
+ mmapped_file.mutable_region(), stored_size);
+
+ // Deserialize proto
+ ProtoT proto;
+ if (header_->GetCompressFlag()) {
+ google::protobuf::io::GzipInputStream decompress_stream(&proto_stream);
+ proto.ParseFromZeroCopyStream(&decompress_stream);
+ } else {
+ proto.ParseFromZeroCopyStream(&proto_stream);
+ }
+
+ return proto;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::EraseProto(
+ int64_t file_offset) {
+ int64_t file_size = filesystem_->GetFileSize(fd_.get());
+ if (file_offset >= file_size) {
+ // file_size points to the next byte to write at, so subtract one to get
+ // the inclusive, actual size of file.
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "Trying to erase data at a location, %lld, "
+ "out of range of the file size, %lld",
+ static_cast<long long>(file_offset),
+ static_cast<long long>(file_size - 1)));
+ }
+
+ MemoryMappedFile mmapped_file(
+ *filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_WRITE_AUTO_SYNC);
+
+ // Read out the metadata
+ ICING_ASSIGN_OR_RETURN(
+ int32_t metadata,
+ ReadProtoMetadata(&mmapped_file, file_offset, file_size));
+
+ ICING_RETURN_IF_ERROR(mmapped_file.Remap(file_offset + sizeof(metadata),
+ GetProtoSize(metadata)));
+
+ // We need to update the crc checksum if the erased area is before the
+ // rewind position.
+ if (file_offset + sizeof(metadata) < header_->GetRewindOffset()) {
+ // We need to calculate [original string xor 0s].
+ // The xored string is the same as the original string because 0 xor 0 =
+ // 0, 1 xor 0 = 1.
+ const std::string_view xored_str(mmapped_file.region(),
+ mmapped_file.region_size());
+
+ Crc32 crc(header_->GetLogChecksum());
+ ICING_ASSIGN_OR_RETURN(
+ uint32_t new_crc,
+ crc.UpdateWithXor(xored_str,
+ /*full_data_size=*/header_->GetRewindOffset() -
+ kHeaderReservedBytes,
+ /*position=*/file_offset + sizeof(metadata) -
+ kHeaderReservedBytes));
+
+ header_->SetLogChecksum(new_crc);
+ header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+ if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+ sizeof(Header))) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to update header to: ", file_path_));
+ }
+ }
+
+ memset(mmapped_file.mutable_region(), '\0', mmapped_file.region_size());
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetDiskUsage() const {
+ int64_t size = filesystem_->GetDiskUsage(file_path_.c_str());
+ if (size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError("Failed to get disk usage of proto log");
+ }
+ return size;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int64_t>
+PortableFileBackedProtoLog<ProtoT>::GetElementsFileSize() const {
+ int64_t total_file_size = filesystem_->GetFileSize(file_path_.c_str());
+ if (total_file_size == Filesystem::kBadFileSize) {
+ return absl_ports::InternalError(
+ "Failed to get file size of elments in the proto log");
+ }
+ return total_file_size - kHeaderReservedBytes;
+}
+
+template <typename ProtoT>
+PortableFileBackedProtoLog<ProtoT>::Iterator::Iterator(
+ const Filesystem& filesystem, const std::string& file_path,
+ int64_t initial_offset)
+ : mmapped_file_(filesystem, file_path,
+ MemoryMappedFile::Strategy::READ_ONLY),
+ initial_offset_(initial_offset),
+ current_offset_(kInvalidOffset),
+ file_size_(filesystem.GetFileSize(file_path.c_str())) {
+ if (file_size_ == Filesystem::kBadFileSize) {
+ // Fails all Advance() calls
+ file_size_ = 0;
+ }
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::Iterator::Advance() {
+ if (current_offset_ == kInvalidOffset) {
+ // First Advance() call
+ current_offset_ = initial_offset_;
+ } else {
+ // Jumps to the next proto position
+ ICING_ASSIGN_OR_RETURN(
+ int32_t metadata,
+ ReadProtoMetadata(&mmapped_file_, current_offset_, file_size_));
+ current_offset_ += sizeof(metadata) + GetProtoSize(metadata);
+ }
+
+ if (current_offset_ < file_size_) {
+ return libtextclassifier3::Status::OK;
+ } else {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "The next proto offset, %lld, is out of file range [0, %lld)",
+ static_cast<long long>(current_offset_),
+ static_cast<long long>(file_size_)));
+ }
+}
+
+template <typename ProtoT>
+int64_t PortableFileBackedProtoLog<ProtoT>::Iterator::GetOffset() {
+ return current_offset_;
+}
+
+template <typename ProtoT>
+typename PortableFileBackedProtoLog<ProtoT>::Iterator
+PortableFileBackedProtoLog<ProtoT>::GetIterator() {
+ return Iterator(*filesystem_, file_path_,
+ /*initial_offset=*/kHeaderReservedBytes);
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<int32_t>
+PortableFileBackedProtoLog<ProtoT>::ReadProtoMetadata(
+ MemoryMappedFile* mmapped_file, int64_t file_offset, int64_t file_size) {
+ // Checks file_offset
+ if (file_offset >= file_size) {
+ return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
+ "offset, %lld, is out of file range [0, %lld)",
+ static_cast<long long>(file_offset),
+ static_cast<long long>(file_size)));
+ }
+ int32_t portable_metadata;
+ int metadata_size = sizeof(portable_metadata);
+ if (file_offset + metadata_size >= file_size) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Wrong metadata offset %lld, metadata doesn't fit in "
+ "with file range [0, %lld)",
+ static_cast<long long>(file_offset),
+ static_cast<long long>(file_size)));
+ }
+
+ // Reads metadata
+ ICING_RETURN_IF_ERROR(mmapped_file->Remap(file_offset, metadata_size));
+ memcpy(&portable_metadata, mmapped_file->region(), metadata_size);
+
+ // Need to switch it back to host order endianness after reading from disk.
+ int32_t host_order_metadata = gntohl(portable_metadata);
+
+ // Checks magic number
+ uint8_t stored_k_proto_magic = GetProtoMagic(host_order_metadata);
+ if (stored_k_proto_magic != kProtoMagic) {
+ return absl_ports::InternalError(IcingStringUtil::StringPrintf(
+ "Failed to read kProtoMagic, expected %d, actual %d", kProtoMagic,
+ stored_k_proto_magic));
+ }
+
+ return host_order_metadata;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status
+PortableFileBackedProtoLog<ProtoT>::WriteProtoMetadata(
+ const Filesystem* filesystem, int fd, int32_t host_order_metadata) {
+ // Convert it into portable endian format before writing to disk
+ int32_t portable_metadata = ghtonl(host_order_metadata);
+ int portable_metadata_size = sizeof(portable_metadata);
+
+ // Write metadata
+ if (!filesystem->Write(fd, &portable_metadata, portable_metadata_size)) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to write proto metadata."));
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::Status PortableFileBackedProtoLog<ProtoT>::PersistToDisk() {
+ int64_t file_size = filesystem_->GetFileSize(file_path_.c_str());
+ if (file_size == header_->GetRewindOffset()) {
+ // No new protos appended, don't need to update the checksum.
+ return libtextclassifier3::Status::OK;
+ }
+
+ int64_t new_content_size = file_size - header_->GetRewindOffset();
+ Crc32 crc;
+ if (new_content_size < 0) {
+ // File shrunk, recalculate the entire checksum.
+ ICING_ASSIGN_OR_RETURN(
+ crc,
+ ComputeChecksum(filesystem_, file_path_, Crc32(),
+ /*start=*/kHeaderReservedBytes, /*end=*/file_size));
+ } else {
+ // Append new changes to the existing checksum.
+ ICING_ASSIGN_OR_RETURN(
+ crc, ComputeChecksum(filesystem_, file_path_,
+ Crc32(header_->GetLogChecksum()),
+ header_->GetRewindOffset(), file_size));
+ }
+
+ header_->SetLogChecksum(crc.Get());
+ header_->SetRewindOffset(file_size);
+ header_->SetHeaderChecksum(header_->CalculateHeaderChecksum());
+
+ if (!filesystem_->PWrite(fd_.get(), /*offset=*/0, header_.get(),
+ sizeof(Header)) ||
+ !filesystem_->DataSync(fd_.get())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Failed to update header to: ", file_path_));
+ }
+
+ return libtextclassifier3::Status::OK;
+}
+
+template <typename ProtoT>
+libtextclassifier3::StatusOr<Crc32>
+PortableFileBackedProtoLog<ProtoT>::ComputeChecksum() {
+ return PortableFileBackedProtoLog<ProtoT>::ComputeChecksum(
+ filesystem_, file_path_, Crc32(), /*start=*/kHeaderReservedBytes,
+ /*end=*/filesystem_->GetFileSize(file_path_.c_str()));
+}
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_FILE_PORTABLE_FILE_BACKED_PROTO_LOG_H_
diff --git a/icing/file/portable-file-backed-proto-log_benchmark.cc b/icing/file/portable-file-backed-proto-log_benchmark.cc
new file mode 100644
index 0000000..b1dfe12
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_benchmark.cc
@@ -0,0 +1,211 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdint>
+#include <random>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/portable-file-backed-proto-log.h"
+#include "icing/legacy/core/icing-string-util.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/tmp-directory.h"
+
+// go/microbenchmarks
+//
+// To build and run on a local machine:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// icing/file:portable-file-backed-proto-log_benchmark
+//
+// $ blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+// --benchmarks=all
+//
+//
+// To build and run on an Android device (must be connected and rooted):
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// icing/file:portable-file-backed-proto-log_benchmark
+//
+// $ adb root
+//
+// $ adb push
+// blaze-bin/icing/file/portable-file-backed-proto-log_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/portable-file-backed-proto-log-benchmark
+// --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+static void BM_Write(benchmark::State& state) {
+ const Filesystem filesystem;
+ int string_length = state.range(0);
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->WriteProto(document));
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+ string_length);
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Write)
+ ->Arg(1)
+ ->Arg(32)
+ ->Arg(512)
+ ->Arg(1024)
+ ->Arg(4 * 1024)
+ ->Arg(8 * 1024)
+ ->Arg(16 * 1024)
+ ->Arg(32 * 1024)
+ ->Arg(256 * 1024)
+ ->Arg(2 * 1024 * 1024)
+ ->Arg(8 * 1024 * 1024)
+ ->Arg(15 * 1024 * 1024); // We do 15MiB here since our max proto size is
+ // 16MiB, and we need some extra space for the
+ // rest of the document properties
+
+static void BM_Read(benchmark::State& state) {
+ const Filesystem filesystem;
+ int string_length = state.range(0);
+ const std::string file_path = IcingStringUtil::StringPrintf(
+ "%s%s%d%s", GetTestTempDir().c_str(), "/proto_", string_length, ".log");
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t write_offset,
+ proto_log->WriteProto(document));
+
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ReadProto(write_offset));
+ }
+ state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
+ string_length);
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_Read)
+ ->Arg(1)
+ ->Arg(32)
+ ->Arg(512)
+ ->Arg(1024)
+ ->Arg(4 * 1024)
+ ->Arg(8 * 1024)
+ ->Arg(16 * 1024)
+ ->Arg(32 * 1024)
+ ->Arg(256 * 1024)
+ ->Arg(2 * 1024 * 1024)
+ ->Arg(8 * 1024 * 1024)
+ ->Arg(15 * 1024 * 1024); // We do 15MiB here since our max proto size is
+ // 16MiB, and we need some extra space for the
+ // rest of the document properties
+
+static void BM_ComputeChecksum(benchmark::State& state) {
+ const Filesystem filesystem;
+ const std::string file_path = GetTestTempDir() + "/proto.log";
+ int max_proto_size = (1 << 24) - 1; // 16 MiB
+ bool compress = true;
+
+ // Make sure it doesn't already exist.
+ filesystem.DeleteFile(file_path.c_str());
+
+ auto proto_log = PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem, file_path,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress, max_proto_size))
+ .ValueOrDie()
+ .proto_log;
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Make each document 1KiB
+ int string_length = 1024;
+ std::default_random_engine random;
+ const std::string rand_str =
+ RandomString(kAlNumAlphabet, string_length, &random);
+
+ auto document_properties = document.add_properties();
+ document_properties->set_name("string property");
+ document_properties->add_string_values(rand_str);
+
+ int num_docs = state.range(0);
+ for (int i = 0; i < num_docs; ++i) {
+ ICING_ASSERT_OK(proto_log->WriteProto(document));
+ }
+
+ for (auto _ : state) {
+ testing::DoNotOptimize(proto_log->ComputeChecksum());
+ }
+
+ // Cleanup after ourselves
+ filesystem.DeleteFile(file_path.c_str());
+}
+BENCHMARK(BM_ComputeChecksum)->Range(1024, 1 << 20);
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/file/portable-file-backed-proto-log_test.cc b/icing/file/portable-file-backed-proto-log_test.cc
new file mode 100644
index 0000000..dfb67aa
--- /dev/null
+++ b/icing/file/portable-file-backed-proto-log_test.cc
@@ -0,0 +1,727 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/file/portable-file-backed-proto-log.h"
+
+#include <cstdint>
+#include <cstdlib>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/file/mock-filesystem.h"
+#include "icing/portable/equals-proto.h"
+#include "icing/proto/document.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+using ::icing::lib::portable_equals_proto::EqualsProto;
+using ::testing::A;
+using ::testing::Eq;
+using ::testing::Gt;
+using ::testing::HasSubstr;
+using ::testing::Not;
+using ::testing::NotNull;
+using ::testing::Pair;
+using ::testing::Return;
+
+class PortableFileBackedProtoLogTest : public ::testing::Test {
+ protected:
+ // Adds a user-defined default construct because a const member variable may
+ // make the compiler accidentally delete the default constructor.
+ // https://stackoverflow.com/a/47368753
+ PortableFileBackedProtoLogTest() {}
+
+ void SetUp() override {
+ file_path_ = GetTestTempDir() + "/proto_log";
+ filesystem_.DeleteFile(file_path_.c_str());
+ }
+
+ void TearDown() override { filesystem_.DeleteFile(file_path_.c_str()); }
+
+ const Filesystem filesystem_;
+ std::string file_path_;
+ bool compress_ = true;
+ int64_t max_proto_size_ = 256 * 1024; // 256 KiB
+};
+
+TEST_F(PortableFileBackedProtoLogTest, Initialize) {
+ // max_proto_size must be greater than 0
+ int invalid_max_proto_size = 0;
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, invalid_max_proto_size)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ EXPECT_THAT(create_result.proto_log, NotNull());
+ EXPECT_FALSE(create_result.has_data_loss());
+
+ // Can't recreate the same file with different options.
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ !compress_, max_proto_size_)),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReservedSpaceForHeader) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+
+ // With no protos written yet, the log should be minimum the size of the
+ // reserved header space.
+ ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()), kHeaderReservedBytes);
+}
+
+TEST_F(PortableFileBackedProtoLogTest, WriteProtoTooLarge) {
+ int max_proto_size = 1;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ // Proto is too large for the max_proto_size_in
+ ASSERT_THAT(proto_log->WriteProto(document),
+ StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadProtoWrongKProtoMagic) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write a proto
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t file_offset,
+ proto_log->WriteProto(document));
+
+ // The 4 bytes of metadata that just doesn't have the same kProtoMagic
+ // specified in file-backed-proto-log.h
+ uint32_t wrong_magic = 0x7E000000;
+
+ // Sanity check that we opened the file correctly
+ int fd = filesystem_.OpenForWrite(file_path_.c_str());
+ ASSERT_GT(fd, 0);
+
+ // Write the wrong kProtoMagic in, kProtoMagics are stored at the beginning of
+ // a proto entry.
+ filesystem_.PWrite(fd, file_offset, &wrong_magic, sizeof(wrong_magic));
+
+ ASSERT_THAT(proto_log->ReadProto(file_offset),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteUncompressedProto) {
+ int last_offset;
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/false, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the first proto
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+ proto_log->WriteProto(document1));
+
+ int document1_offset = written_position;
+
+ // Check that what we read is what we wrote
+ ASSERT_THAT(proto_log->ReadProto(written_position),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Write a second proto that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(written_position,
+ proto_log->WriteProto(document2));
+
+ int document2_offset = written_position;
+ last_offset = written_position;
+ ASSERT_GT(document2_offset, document1_offset);
+
+ // Check the second proto
+ ASSERT_THAT(proto_log->ReadProto(written_position),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+ }
+
+ {
+ // Make a new proto_log with the same file_path, and make sure we
+ // can still write to the same underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/false, max_proto_size_)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write a third proto
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+ ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+ IsOkAndHolds(Gt(last_offset)));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ReadWriteCompressedProto) {
+ int last_offset;
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write the first proto
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(int written_position,
+ proto_log->WriteProto(document1));
+
+ int document1_offset = written_position;
+
+ // Check that what we read is what we wrote
+ ASSERT_THAT(proto_log->ReadProto(written_position),
+ IsOkAndHolds(EqualsProto(document1)));
+
+ // Write a second proto that's close to the max size. Leave some room for
+ // the rest of the proto properties.
+ std::string long_str(max_proto_size_ - 1024, 'a');
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace2", "uri2")
+ .AddStringProperty("long_str", long_str)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(written_position,
+ proto_log->WriteProto(document2));
+
+ int document2_offset = written_position;
+ last_offset = written_position;
+ ASSERT_GT(document2_offset, document1_offset);
+
+ // Check the second proto
+ ASSERT_THAT(proto_log->ReadProto(written_position),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+ }
+
+ {
+ // Make a new proto_log with the same file_path, and make sure we
+ // can still write to the same underlying file.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ /*compress_in=*/true, max_proto_size_)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write a third proto
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace3", "uri3").Build();
+
+ ASSERT_THAT(recreated_proto_log->WriteProto(document3),
+ IsOkAndHolds(Gt(last_offset)));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, CorruptHeader) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ EXPECT_FALSE(create_result.has_data_loss());
+ }
+
+ int corrupt_value = 24;
+
+ // Offset after the kMagic and the header_checksum.
+ int offset_after_checksum = 8;
+ filesystem_.PWrite(file_path_.c_str(), offset_after_checksum, &corrupt_value,
+ sizeof(corrupt_value));
+
+ {
+ // Reinitialize the same proto_log
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+ HasSubstr("Invalid header checksum")));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, DifferentMagic) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto recreated_proto_log = std::move(create_result.proto_log);
+ EXPECT_FALSE(create_result.has_data_loss());
+
+ // Corrupt the magic that's stored at the beginning of the header.
+ int invalid_magic = -1;
+ filesystem_.PWrite(file_path_.c_str(), /*offset=*/0, &invalid_magic,
+ sizeof(invalid_magic));
+ }
+
+ {
+ // Reinitialize the same proto_log
+ ASSERT_THAT(PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)),
+ StatusIs(libtextclassifier3::StatusCode::INTERNAL,
+ HasSubstr("Invalid header kMagic")));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, CorruptContent) {
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ EXPECT_FALSE(create_result.has_data_loss());
+
+ DocumentProto document =
+ DocumentBuilder().SetKey("namespace1", "uri1").Build();
+
+ // Write and persist an document.
+ ICING_ASSERT_OK_AND_ASSIGN(int document_offset,
+ proto_log->WriteProto(document));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ // "Corrupt" the content written in the log.
+ document.set_uri("invalid");
+ std::string serialized_document = document.SerializeAsString();
+ filesystem_.PWrite(file_path_.c_str(), document_offset,
+ serialized_document.data(), serialized_document.size());
+ }
+
+ {
+ // We can recover, but we have data loss.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_TRUE(create_result.has_data_loss());
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
+
+ // Lost everything in the log since the rewind position doesn't help if
+ // there's been data corruption within the persisted region
+ ASSERT_EQ(filesystem_.GetFileSize(file_path_.c_str()),
+ kHeaderReservedBytes);
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, PersistToDisk) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace1", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace2", "uri2").Build();
+ int document1_offset, document2_offset;
+ int log_size;
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Write and persist the first proto
+ ICING_ASSERT_OK_AND_ASSIGN(document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->PersistToDisk());
+
+ // Write, but don't explicitly persist the second proto
+ ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+ proto_log->WriteProto(document2));
+
+ // Check that what we read is what we wrote
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+ ASSERT_THAT(proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ log_size = filesystem_.GetFileSize(file_path_.c_str());
+ ASSERT_GT(log_size, 0);
+ }
+
+ {
+ // The header rewind position and checksum aren't updated in this "system
+ // crash" scenario.
+
+ std::string bad_proto =
+ "some incomplete proto that we didn't finish writing before the "
+ "system crashed";
+ filesystem_.PWrite(file_path_.c_str(), log_size, bad_proto.data(),
+ bad_proto.size());
+
+ // Double check that we actually wrote something to the underlying file
+ ASSERT_GT(filesystem_.GetFileSize(file_path_.c_str()), log_size);
+ }
+
+ {
+ // We can recover, but we have data loss
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_TRUE(create_result.has_data_loss());
+ ASSERT_THAT(create_result.data_loss, Eq(DataLoss::PARTIAL));
+
+ // Check that everything was persisted across instances
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ IsOkAndHolds(EqualsProto(document1)));
+ ASSERT_THAT(proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+
+ // We correctly rewound to the last good state.
+ ASSERT_EQ(log_size, filesystem_.GetFileSize(file_path_.c_str()));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, Iterator) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ {
+ // Empty iterator
+ auto iterator = proto_log->GetIterator();
+ ASSERT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ }
+
+ {
+ // Iterates through some documents
+ ICING_ASSERT_OK(proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->WriteProto(document2));
+ auto iterator = proto_log->GetIterator();
+ // 1st proto
+ ICING_ASSERT_OK(iterator.Advance());
+ ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+ IsOkAndHolds(EqualsProto(document1)));
+ // 2nd proto
+ ICING_ASSERT_OK(iterator.Advance());
+ ASSERT_THAT(proto_log->ReadProto(iterator.GetOffset()),
+ IsOkAndHolds(EqualsProto(document2)));
+ // Tries to advance
+ ASSERT_THAT(iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ }
+
+ {
+ // Iterator with bad filesystem
+ MockFilesystem mock_filesystem;
+ ON_CALL(mock_filesystem, GetFileSize(A<const char *>()))
+ .WillByDefault(Return(Filesystem::kBadFileSize));
+ PortableFileBackedProtoLog<DocumentProto>::Iterator bad_iterator(
+ mock_filesystem, file_path_, /*initial_offset=*/0);
+ ASSERT_THAT(bad_iterator.Advance(),
+ StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ComputeChecksum) {
+ DocumentProto document = DocumentBuilder().SetKey("namespace", "uri").Build();
+ Crc32 checksum;
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ ICING_EXPECT_OK(proto_log->WriteProto(document));
+
+ ICING_ASSERT_OK_AND_ASSIGN(checksum, proto_log->ComputeChecksum());
+
+ // Calling it twice with no changes should get us the same checksum
+ EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+ }
+
+ {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Checksum should be consistent across instances
+ EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+ // PersistToDisk shouldn't affect the checksum value
+ ICING_EXPECT_OK(proto_log->PersistToDisk());
+ EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+
+ // Check that modifying the log leads to a different checksum
+ ICING_EXPECT_OK(proto_log->WriteProto(document));
+ EXPECT_THAT(proto_log->ComputeChecksum(), IsOkAndHolds(Not(Eq(checksum))));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldSetZero) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Writes and erases proto
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ // Checks if the erased area is set to 0.
+ int64_t file_size = filesystem_.GetFileSize(file_path_.c_str());
+ MemoryMappedFile mmapped_file(filesystem_, file_path_,
+ MemoryMappedFile::Strategy::READ_ONLY);
+
+ // document1_offset + sizeof(int) is the start byte of the proto where
+ // sizeof(int) is the size of the proto metadata.
+ mmapped_file.Remap(document1_offset + sizeof(int), file_size - 1);
+ for (size_t i = 0; i < mmapped_file.region_size(); ++i) {
+ ASSERT_THAT(mmapped_file.region()[i], Eq(0));
+ }
+}
+
+TEST_F(PortableFileBackedProtoLogTest, EraseProtoShouldReturnNotFound) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "uri2").Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(compress_,
+ max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Writes 2 protos
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document2_offset,
+ proto_log->WriteProto(document2));
+
+ // Erases the first proto
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ // The first proto has been erased.
+ ASSERT_THAT(proto_log->ReadProto(document1_offset),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ // The second proto should be returned.
+ ASSERT_THAT(proto_log->ReadProto(document2_offset),
+ IsOkAndHolds(EqualsProto(document2)));
+}
+
+TEST_F(PortableFileBackedProtoLogTest, ChecksumShouldBeCorrectWithErasedProto) {
+ DocumentProto document1 =
+ DocumentBuilder().SetKey("namespace", "uri1").Build();
+ DocumentProto document2 =
+ DocumentBuilder().SetKey("namespace", "uri2").Build();
+ DocumentProto document3 =
+ DocumentBuilder().SetKey("namespace", "uri3").Build();
+ DocumentProto document4 =
+ DocumentBuilder().SetKey("namespace", "uri4").Build();
+
+ int64_t document2_offset;
+ int64_t document3_offset;
+
+ {
+ // Erase data after the rewind position. This won't update the checksum
+ // immediately.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Writes 3 protos
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t document1_offset,
+ proto_log->WriteProto(document1));
+ ICING_ASSERT_OK_AND_ASSIGN(document2_offset,
+ proto_log->WriteProto(document2));
+ ICING_ASSERT_OK_AND_ASSIGN(document3_offset,
+ proto_log->WriteProto(document3));
+
+ // Erases the 1st proto, checksum won't be updated immediately because the
+ // rewind position is 0.
+ ICING_ASSERT_OK(proto_log->EraseProto(document1_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(2175574628))));
+ } // New checksum is updated in destructor.
+
+ {
+ // Erase data before the rewind position. This will update the checksum
+ // immediately.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Erases the 2nd proto that is now before the rewind position. Checksum
+ // is updated.
+ ICING_ASSERT_OK(proto_log->EraseProto(document2_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(790877774))));
+ }
+
+ {
+ // Append data and erase data before the rewind position. This will update
+ // the checksum twice: in EraseProto() and destructor.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ ASSERT_FALSE(create_result.has_data_loss());
+
+ // Append a new document which is after the rewind position.
+ ICING_ASSERT_OK(proto_log->WriteProto(document4));
+
+ // Erases the 3rd proto that is now before the rewind position. Checksum
+ // is updated.
+ ICING_ASSERT_OK(proto_log->EraseProto(document3_offset));
+
+ EXPECT_THAT(proto_log->ComputeChecksum(),
+ IsOkAndHolds(Eq(Crc32(2344803210))));
+ } // Checksum is updated with the newly appended document.
+
+ {
+ // A successful creation means that the checksum matches.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PortableFileBackedProtoLog<DocumentProto>::CreateResult create_result,
+ PortableFileBackedProtoLog<DocumentProto>::Create(
+ &filesystem_, file_path_,
+ PortableFileBackedProtoLog<DocumentProto>::Options(
+ compress_, max_proto_size_)));
+ auto proto_log = std::move(create_result.proto_log);
+ EXPECT_FALSE(create_result.has_data_loss());
+ }
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine-with-icu-file_test.cc b/icing/icing-search-engine-with-icu-file_test.cc
index 5a9327e..48e81e5 100644
--- a/icing/icing-search-engine-with-icu-file_test.cc
+++ b/icing/icing-search-engine-with-icu-file_test.cc
@@ -27,6 +27,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/tmp-directory.h"
@@ -36,6 +37,14 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::Eq;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
std::string GetTestBaseDir() {
return GetTestTempDir() + "/icing_with_icu_files";
}
@@ -55,23 +64,6 @@
.Build();
}
-SchemaProto CreateMessageSchema() {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
-}
-
ScoringSpecProto GetDefaultScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -81,15 +73,31 @@
TEST(IcingSearchEngineWithIcuFileTest, ShouldInitialize) {
IcingSearchEngine icing(GetDefaultIcingOptions());
EXPECT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ EXPECT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
}
TEST(IcingSearchEngineWithIcuFileTest, ShouldIndexAndSearch) {
IcingSearchEngine icing(GetDefaultIcingOptions());
ASSERT_THAT(icing.Initialize().status().code(), Eq(StatusProto::OK));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status().code(),
- Eq(StatusProto::OK));
+
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status().code(), Eq(StatusProto::OK));
DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
ASSERT_THAT(icing.Put(document_one).status().code(), Eq(StatusProto::OK));
@@ -115,8 +123,8 @@
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
} // namespace
diff --git a/icing/icing-search-engine.cc b/icing/icing-search-engine.cc
index 791368a..e9865e4 100644
--- a/icing/icing-search-engine.cc
+++ b/icing/icing-search-engine.cc
@@ -27,6 +27,8 @@
#include "icing/absl_ports/canonical_errors.h"
#include "icing/absl_ports/mutex.h"
#include "icing/absl_ports/str_cat.h"
+#include "icing/file/destructible-file.h"
+#include "icing/file/file-backed-proto.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/index-processor.h"
@@ -35,6 +37,7 @@
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/internal/optimize.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/optimize.pb.h"
#include "icing/proto/persist.pb.h"
@@ -73,8 +76,8 @@
constexpr std::string_view kDocumentSubfolderName = "document_dir";
constexpr std::string_view kIndexSubfolderName = "index_dir";
constexpr std::string_view kSchemaSubfolderName = "schema_dir";
-constexpr std::string_view kIcingSearchEngineHeaderFilename =
- "icing_search_engine_header";
+constexpr std::string_view kSetSchemaMarkerFilename = "set_schema_marker";
+constexpr std::string_view kOptimizeStatusFilename = "optimize_status";
libtextclassifier3::Status ValidateOptions(
const IcingSearchEngineOptions& options) {
@@ -94,6 +97,21 @@
return absl_ports::InvalidArgumentError(
"ResultSpecProto.num_per_page cannot be negative.");
}
+ std::unordered_set<std::string> unique_namespaces;
+ for (const ResultSpecProto::ResultGrouping& result_grouping :
+ result_spec.result_groupings()) {
+ if (result_grouping.max_results() <= 0) {
+ return absl_ports::InvalidArgumentError(
+ "Cannot specify a result grouping with max results <= 0.");
+ }
+ for (const std::string& name_space : result_grouping.namespaces()) {
+ if (unique_namespaces.count(name_space) > 0) {
+ return absl_ports::InvalidArgumentError(
+ "Namespaces must be unique across result groups.");
+ }
+ unique_namespaces.insert(name_space);
+ }
+ }
return libtextclassifier3::Status::OK;
}
@@ -119,10 +137,6 @@
return index_processor_options;
}
-std::string MakeHeaderFilename(const std::string& base_dir) {
- return absl_ports::StrCat(base_dir, "/", kIcingSearchEngineHeaderFilename);
-}
-
// Document store files are in a standalone subfolder for easier file
// management. We can delete and recreate the subfolder and not touch/affect
// anything else.
@@ -150,6 +164,9 @@
std::string MakeSchemaDirectoryPath(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kSchemaSubfolderName);
}
+std::string MakeSetSchemaMarkerFilePath(const std::string& base_dir) {
+ return absl_ports::StrCat(base_dir, "/", kSetSchemaMarkerFilename);
+}
void TransformStatus(const libtextclassifier3::Status& internal_status,
StatusProto* status_proto) {
@@ -238,15 +255,13 @@
filesystem_(std::move(filesystem)),
icing_filesystem_(std::move(icing_filesystem)),
clock_(std::move(clock)),
- result_state_manager_(performance_configuration_.max_num_hits_per_query,
- performance_configuration_.max_num_cache_results),
jni_cache_(std::move(jni_cache)) {
ICING_VLOG(1) << "Creating IcingSearchEngine in dir: " << options_.base_dir();
}
IcingSearchEngine::~IcingSearchEngine() {
if (initialized_) {
- if (PersistToDisk().status().code() != StatusProto::OK) {
+ if (PersistToDisk(PersistType::FULL).status().code() != StatusProto::OK) {
ICING_LOG(ERROR)
<< "Error persisting to disk in IcingSearchEngine destructor";
}
@@ -270,8 +285,8 @@
InitializeResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
- NativeInitializeStats* initialize_stats =
- result_proto.mutable_native_initialize_stats();
+ InitializeStatsProto* initialize_stats =
+ result_proto.mutable_initialize_stats();
if (initialized_) {
// Already initialized.
result_status->set_code(StatusProto::OK);
@@ -281,73 +296,7 @@
return result_proto;
}
- // Releases result / query cache if any
- result_state_manager_.InvalidateAllResultStates();
-
libtextclassifier3::Status status = InitializeMembers(initialize_stats);
- if (!status.ok()) {
- TransformStatus(status, result_status);
- initialize_stats->set_latency_ms(
- initialize_timer->GetElapsedMilliseconds());
- return result_proto;
- }
-
- // Even if each subcomponent initialized fine independently, we need to
- // check if they're consistent with each other.
- if (!CheckConsistency().ok()) {
- // The total checksum doesn't match the stored value, it could be one of the
- // following cases:
- // 1. Icing is initialized the first time in this directory.
- // 2. Non-checksumed changes have been made to some files.
- if (index_->last_added_document_id() == kInvalidDocumentId &&
- document_store_->last_added_document_id() == kInvalidDocumentId &&
- absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
- // First time initialize. Not recovering but creating all the files.
- // We need to explicitly clear the recovery-related fields because some
- // sub-components may not be able to tell if the storage is being
- // initialized the first time or has lost some files. Sub-components may
- // already have set these fields in earlier steps.
- *initialize_stats = NativeInitializeStats();
- status = RegenerateDerivedFiles();
- } else {
- ICING_VLOG(1)
- << "IcingSearchEngine in inconsistent state, regenerating all "
- "derived data";
- // Total checksum mismatch may not be the root cause of document store
- // recovery. Preserve the root cause that was set by the document store.
- bool should_log_document_store_recovery_cause =
- initialize_stats->document_store_recovery_cause() ==
- NativeInitializeStats::NONE;
- if (should_log_document_store_recovery_cause) {
- initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
- }
- initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH);
- status = RegenerateDerivedFiles(initialize_stats,
- should_log_document_store_recovery_cause);
- }
- } else {
- DocumentId last_stored_document_id =
- document_store_->last_added_document_id();
- DocumentId last_indexed_document_id = index_->last_added_document_id();
- if (last_stored_document_id != last_indexed_document_id) {
- if (last_stored_document_id == kInvalidDocumentId) {
- // Document store is empty but index is not. Reset the index.
- status = index_->Reset();
- } else {
- // Index is inconsistent with the document store, we need to restore the
- // index.
- initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH);
- std::unique_ptr<Timer> index_restore_timer = clock_->GetNewTimer();
- status = RestoreIndexIfNeeded();
- initialize_stats->set_index_restoration_latency_ms(
- index_restore_timer->GetElapsedMilliseconds());
- }
- }
- }
-
if (status.ok() || absl_ports::IsDataLoss(status)) {
initialized_ = true;
}
@@ -357,11 +306,10 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeMembers(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
ICING_RETURN_IF_ERROR(InitializeOptions());
ICING_RETURN_IF_ERROR(InitializeSchemaStore(initialize_stats));
- ICING_RETURN_IF_ERROR(InitializeDocumentStore(initialize_stats));
// TODO(b/156383798) : Resolve how to specify the locale.
language_segmenter_factory::SegmenterOptions segmenter_options(
@@ -372,9 +320,75 @@
TC3_ASSIGN_OR_RETURN(normalizer_,
normalizer_factory::Create(options_.max_token_length()));
- ICING_RETURN_IF_ERROR(InitializeIndex(initialize_stats));
+ std::string marker_filepath =
+ MakeSetSchemaMarkerFilePath(options_.base_dir());
+ libtextclassifier3::Status status;
+ if (absl_ports::IsNotFound(schema_store_->GetSchema().status())) {
+ // The schema was either lost or never set before. Wipe out the doc store
+ // and index directories and initialize them from scratch.
+ const std::string doc_store_dir =
+ MakeDocumentDirectoryPath(options_.base_dir());
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ if (!filesystem_->DeleteDirectoryRecursively(doc_store_dir.c_str()) ||
+ !filesystem_->DeleteDirectoryRecursively(index_dir.c_str())) {
+ return absl_ports::InternalError(absl_ports::StrCat(
+ "Could not delete directories: ", index_dir, " and ", doc_store_dir));
+ }
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+ status = InitializeIndex(initialize_stats);
+ } else if (filesystem_->FileExists(marker_filepath.c_str())) {
+ // If the marker file is still around then something wonky happened when we
+ // last tried to set the schema.
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/true, initialize_stats));
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
- return libtextclassifier3::Status::OK;
+ // We're going to need to build the index from scratch. So just delete its
+ // files now.
+ const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
+ Index::Options index_options(index_dir, options_.index_merge_size());
+ if (!filesystem_->DeleteDirectoryRecursively(index_dir.c_str()) ||
+ !filesystem_->CreateDirectoryRecursively(index_dir.c_str())) {
+ return absl_ports::InternalError(
+ absl_ports::StrCat("Could not recreate directory: ", index_dir));
+ }
+ ICING_ASSIGN_OR_RETURN(index_,
+ Index::Create(index_options, filesystem_.get(),
+ icing_filesystem_.get()));
+
+ std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ status = std::move(restore_result.status);
+ // DATA_LOSS means that we have successfully initialized and re-added
+ // content to the index. Some indexed content was lost, but otherwise the
+ // index is in a valid state and can be queried.
+ if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+ return status;
+ }
+
+ // Delete the marker file to indicate that everything is now in sync with
+ // whatever changes were made to the schema.
+ filesystem_->DeleteFile(marker_filepath.c_str());
+
+ initialize_stats->set_index_restoration_latency_ms(
+ restore_timer->GetElapsedMilliseconds());
+ initialize_stats->set_index_restoration_cause(
+ InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC);
+ } else {
+ ICING_RETURN_IF_ERROR(InitializeDocumentStore(
+ /*force_recovery_and_revalidate_documents=*/false, initialize_stats));
+ status = InitializeIndex(initialize_stats);
+ if (!status.ok() && !absl_ports::IsDataLoss(status)) {
+ return status;
+ }
+ }
+
+ result_state_manager_ = std::make_unique<ResultStateManager>(
+ performance_configuration_.max_num_total_hits, *document_store_);
+
+ return status;
}
libtextclassifier3::Status IcingSearchEngine::InitializeOptions() {
@@ -390,7 +404,7 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeSchemaStore(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string schema_store_dir =
@@ -408,7 +422,8 @@
}
libtextclassifier3::Status IcingSearchEngine::InitializeDocumentStore(
- NativeInitializeStats* initialize_stats) {
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string document_dir =
@@ -420,15 +435,16 @@
}
ICING_ASSIGN_OR_RETURN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(filesystem_.get(), document_dir, clock_.get(),
- schema_store_.get(), initialize_stats));
+ DocumentStore::Create(
+ filesystem_.get(), document_dir, clock_.get(), schema_store_.get(),
+ force_recovery_and_revalidate_documents, initialize_stats));
document_store_ = std::move(create_result.document_store);
return libtextclassifier3::Status::OK;
}
libtextclassifier3::Status IcingSearchEngine::InitializeIndex(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(initialize_stats);
const std::string index_dir = MakeIndexDirectoryPath(options_.base_dir());
@@ -439,6 +455,7 @@
}
Index::Options index_options(index_dir, options_.index_merge_size());
+ InitializeStatsProto::RecoveryCause recovery_cause;
auto index_or =
Index::Create(index_options, filesystem_.get(), icing_filesystem_.get());
if (!index_or.ok()) {
@@ -448,88 +465,28 @@
absl_ports::StrCat("Could not recreate directory: ", index_dir));
}
- initialize_stats->set_index_restoration_cause(
- NativeInitializeStats::IO_ERROR);
+ recovery_cause = InitializeStatsProto::IO_ERROR;
// Try recreating it from scratch and re-indexing everything.
ICING_ASSIGN_OR_RETURN(index_,
Index::Create(index_options, filesystem_.get(),
icing_filesystem_.get()));
-
- std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
- initialize_stats->set_index_restoration_latency_ms(
- restore_timer->GetElapsedMilliseconds());
} else {
// Index was created fine.
index_ = std::move(index_or).ValueOrDie();
+ // If a recover does have to happen, then it must be because the index is
+ // out of sync with the document store.
+ recovery_cause = InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH;
}
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::CheckConsistency() {
- if (!HeaderExists()) {
- // Without a header file, we have no checksum and can't even detect
- // inconsistencies
- return absl_ports::NotFoundError("No header file found.");
- }
-
- // Header does exist, verify that the header looks fine.
- IcingSearchEngine::Header header;
- if (!filesystem_->Read(MakeHeaderFilename(options_.base_dir()).c_str(),
- &header, sizeof(header))) {
- return absl_ports::InternalError(absl_ports::StrCat(
- "Couldn't read: ", MakeHeaderFilename(options_.base_dir())));
- }
-
- if (header.magic != IcingSearchEngine::Header::kMagic) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Invalid header kMagic for file: ",
- MakeHeaderFilename(options_.base_dir())));
- }
-
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- if (checksum.Get() != header.checksum) {
- return absl_ports::InternalError(
- "IcingSearchEngine checksum doesn't match");
- }
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::Status IcingSearchEngine::RegenerateDerivedFiles(
- NativeInitializeStats* initialize_stats, bool log_document_store_stats) {
- // Measure the latency of the data recovery. The cause of the recovery should
- // be logged by the caller.
- std::unique_ptr<Timer> timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(
- document_store_->UpdateSchemaStore(schema_store_.get()));
- if (initialize_stats != nullptr && log_document_store_stats) {
- initialize_stats->set_document_store_recovery_latency_ms(
- timer->GetElapsedMilliseconds());
- }
- // Restart timer.
- timer = clock_->GetNewTimer();
- ICING_RETURN_IF_ERROR(index_->Reset());
- ICING_RETURN_IF_ERROR(RestoreIndexIfNeeded());
- if (initialize_stats != nullptr) {
+ std::unique_ptr<Timer> restore_timer = clock_->GetNewTimer();
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ if (restore_result.needed_restoration) {
initialize_stats->set_index_restoration_latency_ms(
- timer->GetElapsedMilliseconds());
+ restore_timer->GetElapsedMilliseconds());
+ initialize_stats->set_index_restoration_cause(recovery_cause);
}
-
- const std::string header_file =
- MakeHeaderFilename(options_.base_dir().c_str());
- if (HeaderExists()) {
- if (!filesystem_->DeleteFile(header_file.c_str())) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Unable to delete file: ", header_file));
- }
- }
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
+ return restore_result.status;
}
SetSchemaResultProto IcingSearchEngine::SetSchema(
@@ -564,6 +521,15 @@
}
bool lost_previous_schema = lost_previous_schema_or.ValueOrDie();
+ std::string marker_filepath =
+ MakeSetSchemaMarkerFilePath(options_.base_dir());
+ // Create the marker file indicating that we are going to apply a schema
+ // change. No need to write anything to the marker file - its existence is the
+ // only thing that matters. The marker file is used to indicate if we
+ // encountered a crash or a power loss while updating the schema and other
+ // files. So set it up to be deleted as long as we return from this function.
+ DestructibleFile marker_file(marker_filepath, filesystem_.get());
+
auto set_schema_result_or = schema_store_->SetSchema(
std::move(new_schema), ignore_errors_and_delete_documents);
if (!set_schema_result_or.ok()) {
@@ -611,8 +577,12 @@
return result_proto;
}
- status = RestoreIndexIfNeeded();
- if (!status.ok()) {
+ IndexRestorationResult restore_result = RestoreIndexIfNeeded();
+ // DATA_LOSS means that we have successfully re-added content to the
+ // index. Some indexed content was lost, but otherwise the index is in a
+ // valid state and can be queried.
+ if (!restore_result.status.ok() &&
+ !absl_ports::IsDataLoss(restore_result.status)) {
TransformStatus(status, result_status);
return result_proto;
}
@@ -623,6 +593,7 @@
result_status->set_code(StatusProto::FAILED_PRECONDITION);
result_status->set_message("Schema is incompatible.");
}
+
return result_proto;
}
@@ -682,8 +653,8 @@
PutResultProto result_proto;
StatusProto* result_status = result_proto.mutable_status();
- NativePutDocumentStats* put_document_stats =
- result_proto.mutable_native_put_document_stats();
+ PutDocumentStatsProto* put_document_stats =
+ result_proto.mutable_put_document_stats();
// Lock must be acquired before validation because the DocumentStore uses
// the schema file to validate, and the schema could be changed in
@@ -833,8 +804,8 @@
return result_proto;
}
- NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SINGLE);
+ DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SINGLE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -867,8 +838,8 @@
return delete_result;
}
- NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -901,8 +872,8 @@
return delete_result;
}
- NativeDeleteStats* delete_stats = delete_result.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ DeleteStatsProto* delete_stats = delete_result.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
// TODO(b/144458732): Implement a more robust version of TC_RETURN_IF_ERROR
@@ -937,8 +908,8 @@
return result_proto;
}
- NativeDeleteStats* delete_stats = result_proto.mutable_delete_stats();
- delete_stats->set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ DeleteStatsProto* delete_stats = result_proto.mutable_delete_stats();
+ delete_stats->set_delete_type(DeleteStatsProto::DeleteType::QUERY);
std::unique_ptr<Timer> delete_timer = clock_->GetNewTimer();
libtextclassifier3::Status status =
@@ -951,7 +922,7 @@
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get(), clock_.get());
+ document_store_.get(), schema_store_.get());
if (!query_processor_or.ok()) {
TransformStatus(query_processor_or.status(), result_status);
return result_proto;
@@ -969,6 +940,7 @@
ICING_VLOG(2) << "Deleting the docs that matched the query.";
int num_deleted = 0;
+
while (query_results.root_iterator->Advance().ok()) {
ICING_VLOG(3) << "Deleting doc "
<< query_results.root_iterator->doc_hit_info().document_id();
@@ -980,6 +952,7 @@
return result_proto;
}
}
+
if (num_deleted > 0) {
result_proto.mutable_status()->set_code(StatusProto::OK);
} else {
@@ -992,7 +965,8 @@
return result_proto;
}
-PersistToDiskResultProto IcingSearchEngine::PersistToDisk() {
+PersistToDiskResultProto IcingSearchEngine::PersistToDisk(
+ PersistType::Code persist_type) {
ICING_VLOG(1) << "Persisting data to disk";
PersistToDiskResultProto result_proto;
@@ -1005,7 +979,7 @@
return result_proto;
}
- auto status = InternalPersistToDisk();
+ auto status = InternalPersistToDisk(persist_type);
TransformStatus(status, result_status);
return result_proto;
}
@@ -1029,11 +1003,18 @@
return result_proto;
}
- // Releases result / query cache if any
- result_state_manager_.InvalidateAllResultStates();
+ std::unique_ptr<Timer> optimize_timer = clock_->GetNewTimer();
+ OptimizeStatsProto* optimize_stats = result_proto.mutable_optimize_stats();
+ int64_t before_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (before_size != Filesystem::kBadFileSize) {
+ optimize_stats->set_storage_size_before(before_size);
+ } else {
+ // Set -1 as a sentinel value when failures occur.
+ optimize_stats->set_storage_size_before(-1);
+ }
// Flushes data to disk before doing optimization
- auto status = InternalPersistToDisk();
+ auto status = InternalPersistToDisk(PersistType::FULL);
if (!status.ok()) {
TransformStatus(status, result_status);
return result_proto;
@@ -1041,7 +1022,11 @@
// TODO(b/143646633): figure out if we need to optimize index and doc store
// at the same time.
- libtextclassifier3::Status optimization_status = OptimizeDocumentStore();
+ std::unique_ptr<Timer> optimize_doc_store_timer = clock_->GetNewTimer();
+ libtextclassifier3::Status optimization_status =
+ OptimizeDocumentStore(optimize_stats);
+ optimize_stats->set_document_store_optimize_latency_ms(
+ optimize_doc_store_timer->GetElapsedMilliseconds());
if (!optimization_status.ok() &&
!absl_ports::IsDataLoss(optimization_status)) {
@@ -1055,6 +1040,7 @@
// The status is either OK or DATA_LOSS. The optimized document store is
// guaranteed to work, so we update index according to the new document store.
+ std::unique_ptr<Timer> optimize_index_timer = clock_->GetNewTimer();
libtextclassifier3::Status index_reset_status = index_->Reset();
if (!index_reset_status.ok()) {
status = absl_ports::Annotate(
@@ -1064,17 +1050,52 @@
return result_proto;
}
- libtextclassifier3::Status index_restoration_status = RestoreIndexIfNeeded();
- if (!index_restoration_status.ok()) {
+ IndexRestorationResult index_restoration_status = RestoreIndexIfNeeded();
+ optimize_stats->set_index_restoration_latency_ms(
+ optimize_index_timer->GetElapsedMilliseconds());
+ // DATA_LOSS means that we have successfully re-added content to the index.
+ // Some indexed content was lost, but otherwise the index is in a valid state
+ // and can be queried.
+ if (!index_restoration_status.status.ok() &&
+ !absl_ports::IsDataLoss(index_restoration_status.status)) {
status = absl_ports::Annotate(
absl_ports::InternalError(
"Failed to reindex documents after optimization."),
- index_restoration_status.error_message());
+ index_restoration_status.status.error_message());
TransformStatus(status, result_status);
return result_proto;
}
+ // Read the optimize status to get the time that we last ran.
+ std::string optimize_status_filename =
+ absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+ FileBackedProto<OptimizeStatusProto> optimize_status_file(
+ *filesystem_, optimize_status_filename);
+ auto optimize_status_or = optimize_status_file.Read();
+ int64_t current_time = clock_->GetSystemTimeMilliseconds();
+ if (optimize_status_or.ok()) {
+ // If we have trouble reading the status or this is the first time that
+ // we've ever run, don't set this field.
+ optimize_stats->set_time_since_last_optimize_ms(
+ current_time - optimize_status_or.ValueOrDie()
+ ->last_successful_optimize_run_time_ms());
+ }
+
+ // Update the status for this run and write it.
+ auto optimize_status = std::make_unique<OptimizeStatusProto>();
+ optimize_status->set_last_successful_optimize_run_time_ms(current_time);
+ optimize_status_file.Write(std::move(optimize_status));
+
+ int64_t after_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (after_size != Filesystem::kBadFileSize) {
+ optimize_stats->set_storage_size_after(after_size);
+ } else {
+ // Set -1 as a sentinel value when failures occur.
+ optimize_stats->set_storage_size_after(-1);
+ }
+ optimize_stats->set_latency_ms(optimize_timer->GetElapsedMilliseconds());
+
TransformStatus(optimization_status, result_status);
return result_proto;
}
@@ -1092,6 +1113,22 @@
return result_proto;
}
+ // Read the optimize status to get the time that we last ran.
+ std::string optimize_status_filename =
+ absl_ports::StrCat(options_.base_dir(), "/", kOptimizeStatusFilename);
+ FileBackedProto<OptimizeStatusProto> optimize_status_file(
+ *filesystem_, optimize_status_filename);
+ auto optimize_status_or = optimize_status_file.Read();
+ int64_t current_time = clock_->GetSystemTimeMilliseconds();
+
+ if (optimize_status_or.ok()) {
+ // If we have trouble reading the status or this is the first time that
+ // we've ever run, don't set this field.
+ result_proto.set_time_since_last_optimize_ms(
+ current_time - optimize_status_or.ValueOrDie()
+ ->last_successful_optimize_run_time_ms());
+ }
+
// Get stats from DocumentStore
auto doc_store_optimize_info_or = document_store_->GetOptimizeInfo();
if (!doc_store_optimize_info_or.ok()) {
@@ -1127,74 +1164,41 @@
return result_proto;
}
-libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk() {
+StorageInfoResultProto IcingSearchEngine::GetStorageInfo() {
+ StorageInfoResultProto result;
+ absl_ports::shared_lock l(&mutex_);
+ if (!initialized_) {
+ result.mutable_status()->set_code(StatusProto::FAILED_PRECONDITION);
+ result.mutable_status()->set_message(
+ "IcingSearchEngine has not been initialized!");
+ return result;
+ }
+
+ int64_t index_size = filesystem_->GetDiskUsage(options_.base_dir().c_str());
+ if (index_size != Filesystem::kBadFileSize) {
+ result.mutable_storage_info()->set_total_storage_size(index_size);
+ } else {
+ result.mutable_storage_info()->set_total_storage_size(-1);
+ }
+ *result.mutable_storage_info()->mutable_document_storage_info() =
+ document_store_->GetStorageInfo();
+ *result.mutable_storage_info()->mutable_schema_store_storage_info() =
+ schema_store_->GetStorageInfo();
+ *result.mutable_storage_info()->mutable_index_storage_info() =
+ index_->GetStorageInfo();
+ result.mutable_status()->set_code(StatusProto::OK);
+ return result;
+}
+
+libtextclassifier3::Status IcingSearchEngine::InternalPersistToDisk(
+ PersistType::Code persist_type) {
+ if (persist_type == PersistType::LITE) {
+ return document_store_->PersistToDisk(persist_type);
+ }
ICING_RETURN_IF_ERROR(schema_store_->PersistToDisk());
- ICING_RETURN_IF_ERROR(document_store_->PersistToDisk());
+ ICING_RETURN_IF_ERROR(document_store_->PersistToDisk(PersistType::FULL));
ICING_RETURN_IF_ERROR(index_->PersistToDisk());
- // Update the combined checksum and write to header file.
- ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
- ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
-
- return libtextclassifier3::Status::OK;
-}
-
-libtextclassifier3::StatusOr<Crc32> IcingSearchEngine::ComputeChecksum() {
- Crc32 total_checksum;
- // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
- // that can support error logging.
- auto checksum_or = schema_store_->ComputeChecksum();
- if (!checksum_or.ok()) {
- ICING_LOG(ERROR) << checksum_or.status().error_message()
- << "Failed to compute checksum of SchemaStore";
- return checksum_or.status();
- }
-
- Crc32 schema_store_checksum = std::move(checksum_or).ValueOrDie();
-
- // TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
- // that can support error logging.
- checksum_or = document_store_->ComputeChecksum();
- if (!checksum_or.ok()) {
- ICING_LOG(ERROR) << checksum_or.status().error_message()
- << "Failed to compute checksum of DocumentStore";
- return checksum_or.status();
- }
- Crc32 document_store_checksum = std::move(checksum_or).ValueOrDie();
-
- total_checksum.Append(std::to_string(document_store_checksum.Get()));
- total_checksum.Append(std::to_string(schema_store_checksum.Get()));
-
- return total_checksum;
-}
-
-bool IcingSearchEngine::HeaderExists() {
- if (!filesystem_->FileExists(
- MakeHeaderFilename(options_.base_dir()).c_str())) {
- return false;
- }
-
- int64_t file_size =
- filesystem_->GetFileSize(MakeHeaderFilename(options_.base_dir()).c_str());
-
- // If it's been truncated to size 0 before, we consider it to be a new file
- return file_size != 0 && file_size != Filesystem::kBadFileSize;
-}
-
-libtextclassifier3::Status IcingSearchEngine::UpdateHeader(
- const Crc32& checksum) {
- // Write the header
- IcingSearchEngine::Header header;
- header.magic = IcingSearchEngine::Header::kMagic;
- header.checksum = checksum.Get();
-
- // This should overwrite the header.
- if (!filesystem_->Write(MakeHeaderFilename(options_.base_dir()).c_str(),
- &header, sizeof(header))) {
- return absl_ports::InternalError(
- absl_ports::StrCat("Failed to write IcingSearchEngine header: ",
- MakeHeaderFilename(options_.base_dir())));
- }
return libtextclassifier3::Status::OK;
}
@@ -1211,7 +1215,8 @@
return result_proto;
}
- NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ QueryStatsProto* query_stats = result_proto.mutable_query_stats();
+ query_stats->set_query_length(search_spec.query().length());
std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::Status status = ValidateResultSpec(result_spec);
@@ -1237,7 +1242,7 @@
// Gets unordered results from query processor
auto query_processor_or = QueryProcessor::Create(
index_.get(), language_segmenter_.get(), normalizer_.get(),
- document_store_.get(), schema_store_.get(), clock_.get());
+ document_store_.get(), schema_store_.get());
if (!query_processor_or.ok()) {
TransformStatus(query_processor_or.status(), result_status);
return result_proto;
@@ -1289,9 +1294,9 @@
component_timer = clock_->GetNewTimer();
// Ranks and paginates results
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
- result_state_manager_.RankAndPaginate(ResultState(
+ result_state_manager_->RankAndPaginate(ResultState(
std::move(result_document_hits), std::move(query_results.query_terms),
- search_spec, scoring_spec, result_spec));
+ search_spec, scoring_spec, result_spec, *document_store_));
if (!page_result_state_or.ok()) {
TransformStatus(page_result_state_or.status(), result_status);
return result_proto;
@@ -1307,7 +1312,7 @@
ResultRetriever::Create(document_store_.get(), schema_store_.get(),
language_segmenter_.get(), normalizer_.get());
if (!result_retriever_or.ok()) {
- result_state_manager_.InvalidateResultState(
+ result_state_manager_->InvalidateResultState(
page_result_state.next_page_token);
TransformStatus(result_retriever_or.status(), result_status);
return result_proto;
@@ -1318,7 +1323,7 @@
libtextclassifier3::StatusOr<std::vector<SearchResultProto::ResultProto>>
results_or = result_retriever->RetrieveResults(page_result_state);
if (!results_or.ok()) {
- result_state_manager_.InvalidateResultState(
+ result_state_manager_->InvalidateResultState(
page_result_state.next_page_token);
TransformStatus(results_or.status(), result_status);
return result_proto;
@@ -1340,7 +1345,7 @@
query_stats->set_latency_ms(overall_timer->GetElapsedMilliseconds());
query_stats->set_num_results_returned_current_page(
result_proto.results_size());
- query_stats->set_num_results_snippeted(
+ query_stats->set_num_results_with_snippets(
std::min(result_proto.results_size(),
result_spec.snippet_spec().num_to_snippet()));
return result_proto;
@@ -1359,12 +1364,12 @@
return result_proto;
}
- NativeQueryStats* query_stats = result_proto.mutable_query_stats();
+ QueryStatsProto* query_stats = result_proto.mutable_query_stats();
query_stats->set_is_first_page(false);
std::unique_ptr<Timer> overall_timer = clock_->GetNewTimer();
libtextclassifier3::StatusOr<PageResultState> page_result_state_or =
- result_state_manager_.GetNextPage(next_page_token);
+ result_state_manager_->GetNextPage(next_page_token);
if (!page_result_state_or.ok()) {
if (absl_ports::IsNotFound(page_result_state_or.status())) {
@@ -1424,7 +1429,7 @@
std::max(page_result_state.snippet_context.snippet_spec.num_to_snippet() -
page_result_state.num_previously_returned,
0);
- query_stats->set_num_results_snippeted(
+ query_stats->set_num_results_with_snippets(
std::min(result_proto.results_size(), num_left_to_snippet));
return result_proto;
}
@@ -1435,10 +1440,11 @@
ICING_LOG(ERROR) << "IcingSearchEngine has not been initialized!";
return;
}
- result_state_manager_.InvalidateResultState(next_page_token);
+ result_state_manager_->InvalidateResultState(next_page_token);
}
-libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore() {
+libtextclassifier3::Status IcingSearchEngine::OptimizeDocumentStore(
+ OptimizeStatsProto* optimize_stats) {
// Gets the current directory path and an empty tmp directory path for
// document store optimization.
const std::string current_document_dir =
@@ -1455,7 +1461,7 @@
// Copies valid document data to tmp directory
auto optimize_status = document_store_->OptimizeInto(
- temporary_document_dir, language_segmenter_.get());
+ temporary_document_dir, language_segmenter_.get(), optimize_stats);
// Handles error if any
if (!optimize_status.ok()) {
@@ -1465,7 +1471,9 @@
optimize_status.error_message());
}
- // Resets before swapping
+ // result_state_manager_ depends on document_store_. So we need to reset it at
+ // the same time that we reset the document_store_.
+ result_state_manager_.reset();
document_store_.reset();
// When swapping files, always put the current working directory at the
@@ -1502,6 +1510,8 @@
create_result_or.status().error_message());
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+ result_state_manager_ = std::make_unique<ResultStateManager>(
+ performance_configuration_.max_num_total_hits, *document_store_);
// Potential data loss
// TODO(b/147373249): Find a way to detect true data loss error
@@ -1522,6 +1532,8 @@
"instance can't be created");
}
document_store_ = std::move(create_result_or.ValueOrDie().document_store);
+ result_state_manager_ = std::make_unique<ResultStateManager>(
+ performance_configuration_.max_num_total_hits, *document_store_);
// Deletes tmp directory
if (!filesystem_->DeleteDirectoryRecursively(
@@ -1529,23 +1541,23 @@
ICING_LOG(ERROR) << "Document store has been optimized, but it failed to "
"delete temporary file directory";
}
-
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status IcingSearchEngine::RestoreIndexIfNeeded() {
+IcingSearchEngine::IndexRestorationResult
+IcingSearchEngine::RestoreIndexIfNeeded() {
DocumentId last_stored_document_id =
document_store_->last_added_document_id();
DocumentId last_indexed_document_id = index_->last_added_document_id();
if (last_stored_document_id == last_indexed_document_id) {
// No need to recover.
- return libtextclassifier3::Status::OK;
+ return {libtextclassifier3::Status::OK, false};
}
if (last_stored_document_id == kInvalidDocumentId) {
// Document store is empty but index is not. Reset the index.
- return index_->Reset();
+ return {index_->Reset(), false};
}
// TruncateTo ensures that the index does not hold any data that is not
@@ -1554,17 +1566,29 @@
// lost documents. If the index does not contain any hits for documents with
// document id greater than last_stored_document_id, then TruncateTo will have
// no effect.
- ICING_RETURN_IF_ERROR(index_->TruncateTo(last_stored_document_id));
+ auto status = index_->TruncateTo(last_stored_document_id);
+ if (!status.ok()) {
+ return {status, false};
+ }
+ // Last indexed document id may have changed thanks to TruncateTo.
+ last_indexed_document_id = index_->last_added_document_id();
DocumentId first_document_to_reindex =
(last_indexed_document_id != kInvalidDocumentId)
? index_->last_added_document_id() + 1
: kMinDocumentId;
+ if (first_document_to_reindex > last_stored_document_id) {
+ // Nothing to restore. Just return.
+ return {libtextclassifier3::Status::OK, false};
+ }
- ICING_ASSIGN_OR_RETURN(
- std::unique_ptr<IndexProcessor> index_processor,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- CreateIndexProcessorOptions(options_),
- clock_.get()));
+ auto index_processor_or = IndexProcessor::Create(
+ normalizer_.get(), index_.get(), CreateIndexProcessorOptions(options_),
+ clock_.get());
+ if (!index_processor_or.ok()) {
+ return {index_processor_or.status(), true};
+ }
+ std::unique_ptr<IndexProcessor> index_processor =
+ std::move(index_processor_or).ValueOrDie();
ICING_VLOG(1) << "Restoring index by replaying documents from document id "
<< first_document_to_reindex << " to document id "
@@ -1582,7 +1606,7 @@
continue;
} else {
// Returns other errors
- return document_or.status();
+ return {document_or.status(), true};
}
}
DocumentProto document(std::move(document_or).ValueOrDie());
@@ -1592,7 +1616,7 @@
language_segmenter_.get(),
std::move(document));
if (!tokenized_document_or.ok()) {
- return tokenized_document_or.status();
+ return {tokenized_document_or.status(), true};
}
TokenizedDocument tokenized_document(
std::move(tokenized_document_or).ValueOrDie());
@@ -1602,7 +1626,7 @@
if (!status.ok()) {
if (!absl_ports::IsDataLoss(status)) {
// Real error. Stop recovering and pass it up.
- return status;
+ return {status, true};
}
// Just a data loss. Keep trying to add the remaining docs, but report the
// data loss when we're done.
@@ -1610,7 +1634,7 @@
}
}
- return overall_status;
+ return {overall_status, true};
}
libtextclassifier3::StatusOr<bool> IcingSearchEngine::LostPreviousSchema() {
diff --git a/icing/icing-search-engine.h b/icing/icing-search-engine.h
index a899131..3dc7e29 100644
--- a/icing/icing-search-engine.h
+++ b/icing/icing-search-engine.h
@@ -37,6 +37,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/result/result-state-manager.h"
#include "icing/schema/schema-store.h"
@@ -52,16 +53,6 @@
// TODO(cassiewang) Top-level comments and links to design-doc.
class IcingSearchEngine {
public:
- struct Header {
- static constexpr int32_t kMagic = 0x6e650d0a;
-
- // Holds the magic as a quick sanity check against file corruption.
- int32_t magic;
-
- // Checksum of the IcingSearchEngine's sub-component's checksums.
- uint32_t checksum;
- };
-
// Note: It is only required to provide a pointer to a valid instance of
// JniCache if this instance needs to perform reverse-jni calls. Users on
// Linux and iOS should always provide a nullptr.
@@ -328,12 +319,26 @@
// Invalidates the next-page token so that no more results of the related
// query can be returned.
- void InvalidateNextPageToken(uint64_t next_page_token);
+ void InvalidateNextPageToken(uint64_t next_page_token)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Makes sure that every update/delete received till this point is flushed
// to disk. If the app crashes after a call to PersistToDisk(), Icing
// would be able to fully recover all data written up to this point.
//
+ // If persist_type is PersistType::LITE, then only the ground truth will be
+ // synced. This should be relatively lightweight to do (order of microseconds)
+ // and ensures that there will be no data loss. At worst, Icing may need to
+ // recover internal data structures by replaying the document log upon the
+ // next startup. Clients should call PersistToDisk(LITE) after each batch of
+ // mutations.
+ //
+ // If persist_type is PersistType::FULL, then all internal data structures in
+ // Icing will be synced. This is a heavier operation (order of milliseconds).
+ // It ensures that Icing will not need to recover internal data structures
+ // upon the next startup. Clients should call PersistToDisk(FULL) before their
+ // process dies.
+ //
// NOTE: It is not necessary to call PersistToDisk() to read back data
// that was recently written. All read APIs will include the most recent
// updates/deletes regardless of the data being flushed to disk.
@@ -342,7 +347,8 @@
// OK on success
// FAILED_PRECONDITION IcingSearchEngine has not been initialized yet
// INTERNAL on I/O error
- PersistToDiskResultProto PersistToDisk() ICING_LOCKS_EXCLUDED(mutex_);
+ PersistToDiskResultProto PersistToDisk(PersistType::Code persist_type)
+ ICING_LOCKS_EXCLUDED(mutex_);
// Allows Icing to run tasks that are too expensive and/or unnecessary to be
// executed in real-time, but are useful to keep it fast and be
@@ -378,6 +384,12 @@
// INTERNAL_ERROR on IO error
GetOptimizeInfoResultProto GetOptimizeInfo() ICING_LOCKS_EXCLUDED(mutex_);
+ // Calculates the StorageInfo for Icing.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ StorageInfoResultProto GetStorageInfo() ICING_LOCKS_EXCLUDED(mutex_);
+
// Clears all data from Icing and re-initializes. Clients DO NOT need to call
// Initialize again.
//
@@ -416,7 +428,8 @@
// acquired first in order to adhere to the global lock ordering:
// 1. mutex_
// 2. result_state_manager_.lock_
- ResultStateManager result_state_manager_ ICING_GUARDED_BY(mutex_);
+ std::unique_ptr<ResultStateManager> result_state_manager_
+ ICING_GUARDED_BY(mutex_);
// Used to provide reader and writer locks
absl_ports::shared_mutex mutex_;
@@ -442,8 +455,8 @@
// separate method so that other public methods don't need to call
// PersistToDisk(). Public methods calling each other may cause deadlock
// issues.
- libtextclassifier3::Status InternalPersistToDisk()
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ libtextclassifier3::Status InternalPersistToDisk(
+ PersistType::Code persist_type) ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to the actual work to Initialize. We need this separate
// method so that other public methods don't need to call Initialize(). Public
@@ -460,7 +473,7 @@
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL on any I/O errors
libtextclassifier3::Status InitializeMembers(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any validation/setup required for the given IcingSearchEngineOptions
@@ -479,18 +492,22 @@
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
libtextclassifier3::Status InitializeSchemaStore(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any initialization/recovery necessary to create a DocumentStore
// instance.
//
+ // See comments on DocumentStore::Create for explanation of
+ // force_recovery_and_revalidate_documents.
+ //
// Returns:
// OK on success
// FAILED_PRECONDITION if initialize_stats is null
// INTERNAL on I/O error
libtextclassifier3::Status InitializeDocumentStore(
- NativeInitializeStats* initialize_stats)
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Do any initialization/recovery necessary to create a DocumentStore
@@ -503,7 +520,7 @@
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL on I/O error
libtextclassifier3::Status InitializeIndex(
- NativeInitializeStats* initialize_stats)
+ InitializeStatsProto* initialize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Many of the internal components rely on other components' derived data.
@@ -527,7 +544,7 @@
// OK on success
// INTERNAL_ERROR on any IO errors
libtextclassifier3::Status RegenerateDerivedFiles(
- NativeInitializeStats* initialize_stats = nullptr,
+ InitializeStatsProto* initialize_stats = nullptr,
bool log_document_store_stats = false)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
@@ -545,7 +562,8 @@
// document store is still available
// INTERNAL_ERROR on any IO errors or other errors that we can't recover
// from
- libtextclassifier3::Status OptimizeDocumentStore()
+ libtextclassifier3::Status OptimizeDocumentStore(
+ OptimizeStatsProto* optimize_stats)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to restore missing document data in index_. All documents
@@ -553,29 +571,19 @@
// call Index::Reset first.
//
// Returns:
- // OK on success
+ // On success, OK and a bool indicating whether or not restoration was
+ // needed.
+ // DATA_LOSS, if an error during index merging caused us to lose indexed
+ // data in the main index. Despite the data loss, this is still considered
+ // a successful run and needed_restoration will be set to true.
// RESOURCE_EXHAUSTED if the index fills up before finishing indexing
// NOT_FOUND if some Document's schema type is not in the SchemaStore
// INTERNAL_ERROR on any IO errors
- libtextclassifier3::Status RestoreIndexIfNeeded()
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Computes the combined checksum of the IcingSearchEngine - includes all its
- // subcomponents
- //
- // Returns:
- // Combined checksum on success
- // INTERNAL_ERROR on compute error
- libtextclassifier3::StatusOr<Crc32> ComputeChecksum()
- ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Checks if the header exists already. This does not create the header file
- // if it doesn't exist.
- bool HeaderExists() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
-
- // Update and replace the header file. Creates the header file if it doesn't
- // exist.
- libtextclassifier3::Status UpdateHeader(const Crc32& checksum)
+ struct IndexRestorationResult {
+ libtextclassifier3::Status status;
+ bool needed_restoration;
+ };
+ IndexRestorationResult RestoreIndexIfNeeded()
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// If we lost the schema during a previous failure, it may "look" the same as
diff --git a/icing/icing-search-engine_benchmark.cc b/icing/icing-search-engine_benchmark.cc
index 9d33a82..b437724 100644
--- a/icing/icing-search-engine_benchmark.cc
+++ b/icing/icing-search-engine_benchmark.cc
@@ -39,6 +39,7 @@
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/document-generator.h"
#include "icing/testing/random-string.h"
@@ -462,6 +463,120 @@
->ArgPair(10, 32768)
->ArgPair(10, 131072);
+void BM_SearchNoStackOverflow(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document that has the term "foo"
+ DocumentProto base_document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .AddStringProperty("body", "foo")
+ .Build();
+
+ // Insert a lot of documents with the term "foo"
+ int64_t num_docs = state.range(0);
+ for (int64_t i = 0; i < num_docs; ++i) {
+ DocumentProto document =
+ DocumentBuilder(base_document).SetUri(std::to_string(i)).Build();
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ }
+
+ // Do a query and exclude documents with the term "foo". The way this is
+ // currently implemented is that we'll iterate over all the documents in the
+ // index, then apply the exclusion check. Since all our documents have "foo",
+ // we'll consider it a "miss". Previously with recursion, we would have
+ // recursed until we got a success, which would never happen causing us to
+ // recurse through all the documents and trigger a stack overflow. With
+ // the iterative implementation, we should avoid this.
+ SearchSpecProto search_spec;
+ search_spec.set_query("-foo");
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+
+ ResultSpecProto result_spec;
+ ScoringSpecProto scoring_spec;
+ for (auto s : state) {
+ icing->Search(search_spec, scoring_spec, result_spec);
+ }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_SearchNoStackOverflow)
+ ->Range(/*start=*/1 << 10, /*limit=*/1 << 18);
+
+// Added for b/184373205. Ensure that we can repeatedly put documents even if
+// the underlying mmapped areas grow past a few page sizes.
+void BM_RepeatedPut(benchmark::State& state) {
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(TermMatchType::PREFIX,
+ StringIndexingConfig::TokenizerType::PLAIN)
+ .SetCardinality(PropertyConfigProto::Cardinality::OPTIONAL)))
+ .Build();
+
+ // Create the index.
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ options.set_index_merge_size(kIcingFullIndexSize);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+
+ // Create a document that has the term "foo"
+ DocumentProto base_document = DocumentBuilder()
+ .SetSchema("Message")
+ .SetNamespace("namespace")
+ .AddStringProperty("body", "foo")
+ .Build();
+
+ // Insert a lot of documents with the term "foo"
+ int64_t num_docs = state.range(0);
+ for (auto s : state) {
+ for (int64_t i = 0; i < num_docs; ++i) {
+ DocumentProto document =
+ DocumentBuilder(base_document).SetUri("uri").Build();
+ ASSERT_THAT(icing->Put(document).status(), ProtoIsOk());
+ }
+ }
+}
+// For other reasons, we hit a limit when inserting the ~350,000th document. So
+// cap the limit to 1 << 18.
+BENCHMARK(BM_RepeatedPut)->Range(/*start=*/100, /*limit=*/1 << 18);
+
} // namespace
} // namespace lib
diff --git a/icing/icing-search-engine_flush_benchmark.cc b/icing/icing-search-engine_flush_benchmark.cc
new file mode 100644
index 0000000..de8f550
--- /dev/null
+++ b/icing/icing-search-engine_flush_benchmark.cc
@@ -0,0 +1,200 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/icing-search-engine.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/initialize.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/proto/status.pb.h"
+#include "icing/proto/term.pb.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/document-generator.h"
+#include "icing/testing/random-string.h"
+#include "icing/testing/schema-generator.h"
+#include "icing/testing/tmp-directory.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing:icing-search-engine_flush_benchmark
+//
+// $ blaze-bin/icing/icing-search-engine_flush_benchmark
+// --benchmarks=all --benchmark_memory_usage
+//
+// Run on an Android device:
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing:icing-search-engine_flush_benchmark
+//
+// $ adb push blaze-bin/icing/icing-search-engine_flush_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark
+// --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+// Assume that there will be roughly 10 packages, each using 3 of its own types.
+constexpr int kAvgNumNamespaces = 10;
+constexpr int kAvgNumTypes = 3;
+
+// ASSUME: Types will have at most ten properties. Types will be created with
+// [1, 10] properties.
+constexpr int kMaxNumProperties = 10;
+
+// Based on logs from Icing GMSCore.
+constexpr int kAvgDocumentSize = 300;
+
+// ASSUME: ~75% of the document's size comes from its content.
+constexpr float kContentSizePct = 0.7;
+
+// Average length of word in English is 4.7 characters.
+constexpr int kAvgTokenLen = 5;
+// Made up value. This results in a fairly reasonable language - the majority of
+// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
+// longest ones are 27 chars, (roughly consistent with the longest,
+// non-contrived English words
+// https://en.wikipedia.org/wiki/Longest_word_in_English)
+constexpr int kTokenStdDev = 7;
+constexpr int kLanguageSize = 1000;
+
+// The number of documents to index.
+constexpr int kNumDocuments = 1024;
+
+std::vector<std::string> CreateNamespaces(int num_namespaces) {
+ std::vector<std::string> namespaces;
+ while (--num_namespaces >= 0) {
+ namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
+ }
+ return namespaces;
+}
+
+// Creates a vector containing num_words randomly-generated words for use by
+// documents.
+template <typename Rand>
+std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
+ std::vector<std::string> language;
+ std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
+ while (--num_words >= 0) {
+ int word_length = 0;
+ while (word_length < 1) {
+ word_length = std::round(norm_dist(*r));
+ }
+ language.push_back(RandomString(kAlNumAlphabet, word_length, r));
+ }
+ return language;
+}
+
+class DestructibleDirectory {
+ public:
+ explicit DestructibleDirectory(const Filesystem& filesystem,
+ const std::string& dir)
+ : filesystem_(filesystem), dir_(dir) {
+ filesystem_.CreateDirectoryRecursively(dir_.c_str());
+ }
+ ~DestructibleDirectory() {
+ filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+ }
+
+ private:
+ Filesystem filesystem_;
+ std::string dir_;
+};
+
+void BM_FlushBenchmark(benchmark::State& state) {
+ PersistType::Code persist_type =
+ (state.range(0)) ? PersistType::LITE : PersistType::FULL;
+ int num_documents_per_persist = state.range(1);
+
+ // Initialize the filesystem
+ std::string test_dir = GetTestTempDir() + "/icing/benchmark/flush";
+ Filesystem filesystem;
+ DestructibleDirectory ddir(filesystem, test_dir);
+
+ // Create the schema.
+ std::default_random_engine random;
+ int num_types = kAvgNumNamespaces * kAvgNumTypes;
+ ExactStringPropertyGenerator property_generator;
+ RandomSchemaGenerator<std::default_random_engine,
+ ExactStringPropertyGenerator>
+ schema_generator(&random, &property_generator);
+ SchemaProto schema =
+ schema_generator.GenerateSchema(num_types, kMaxNumProperties);
+ EvenDistributionTypeSelector type_selector(schema);
+
+ std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
+ EvenDistributionNamespaceSelector namespace_selector(namespaces);
+
+ std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>
+ token_generator(language, &random);
+
+ DocumentGenerator<
+ EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
+ UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
+ generator(&namespace_selector, &type_selector, &token_generator,
+ kAvgDocumentSize * kContentSizePct);
+
+ IcingSearchEngineOptions options;
+ options.set_base_dir(test_dir);
+ std::unique_ptr<IcingSearchEngine> icing =
+ std::make_unique<IcingSearchEngine>(options);
+
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
+ for (auto s : state) {
+ for (int i = 0; i < kNumDocuments; ++i) {
+ icing->Put(generator.generateDoc());
+
+ if (i % num_documents_per_persist == num_documents_per_persist - 1) {
+ icing->PersistToDisk(persist_type);
+ }
+ }
+ }
+}
+BENCHMARK(BM_FlushBenchmark)
+ // First argument: lite_flush,
+ // Second argument: num_document_per_lite_flush
+ ->ArgPair(true, 1)
+ ->ArgPair(false, 1)
+ ->ArgPair(true, 32)
+ ->ArgPair(false, 32)
+ ->ArgPair(true, 1024)
+ ->ArgPair(false, 1024);
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/icing-search-engine_fuzz_test.cc b/icing/icing-search-engine_fuzz_test.cc
index 1f59c6e..2d07e37 100644
--- a/icing/icing-search-engine_fuzz_test.cc
+++ b/icing/icing-search-engine_fuzz_test.cc
@@ -23,6 +23,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
#include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -30,27 +31,20 @@
namespace lib {
namespace {
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
IcingSearchEngineOptions Setup() {
IcingSearchEngineOptions icing_options;
icing_options.set_base_dir(GetTestTempDir() + "/icing");
return icing_options;
}
-SchemaProto SetTypes() {
- SchemaProto schema;
- SchemaTypeConfigProto* type = schema.add_types();
- type->set_schema_type("Message");
- PropertyConfigProto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- return schema;
-}
-
DocumentProto MakeDocument(const uint8_t* data, size_t size) {
// TODO (sidchhabra): Added more optimized fuzzing techniques.
DocumentProto document;
@@ -83,7 +77,15 @@
// TODO (b/145758378): Deleting directory should not be required.
filesystem_.DeleteDirectoryRecursively(icing_options.base_dir().c_str());
icing.Initialize();
- SchemaProto schema_proto = SetTypes();
+
+ SchemaProto schema_proto =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
icing.SetSchema(schema_proto);
// Index
diff --git a/icing/icing-search-engine_test.cc b/icing/icing-search-engine_test.cc
index 8c64614..c1de0f0 100644
--- a/icing/icing-search-engine_test.cc
+++ b/icing/icing-search-engine_test.cc
@@ -30,18 +30,21 @@
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/status.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/random-string.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
@@ -85,13 +88,28 @@
"vehicula posuere vitae, convallis eu lorem. Donec semper augue eu nibh "
"placerat semper.";
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+ StringIndexingConfig_TokenizerType_Code_NONE;
+
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+constexpr TermMatchType_Code MATCH_NONE = TermMatchType_Code_UNKNOWN;
+
// For mocking purpose, we allow tests to provide a custom Filesystem.
class TestIcingSearchEngine : public IcingSearchEngine {
public:
TestIcingSearchEngine(const IcingSearchEngineOptions& options,
std::unique_ptr<const Filesystem> filesystem,
std::unique_ptr<const IcingFilesystem> icing_filesystem,
- std::unique_ptr<FakeClock> clock,
+ std::unique_ptr<Clock> clock,
std::unique_ptr<JniCache> jni_cache)
: IcingSearchEngine(options, std::move(filesystem),
std::move(icing_filesystem), std::move(clock),
@@ -172,95 +190,61 @@
}
SchemaProto CreateMessageSchema() {
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("Message");
-
- auto body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
+ return SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
}
SchemaProto CreateEmailSchema() {
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* subj = type->add_properties();
- subj->set_property_name("subject");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- return schema;
+ return SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
}
SchemaProto CreatePersonAndEmailSchema() {
- SchemaProto schema;
-
- auto* person_type = schema.add_types();
- person_type->set_schema_type("Person");
- auto* name = person_type->add_properties();
- name->set_property_name("name");
- name->set_data_type(PropertyConfigProto::DataType::STRING);
- name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- name->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* address = person_type->add_properties();
- address->set_property_name("emailAddress");
- address->set_data_type(PropertyConfigProto::DataType::STRING);
- address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- address->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- address->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* subj = type->add_properties();
- subj->set_property_name("subject");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* sender = type->add_properties();
- sender->set_property_name("sender");
- sender->set_schema_type("Person");
- sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- sender->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- return schema;
+ return SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
}
ScoringSpecProto GetDefaultScoringSpec() {
@@ -428,23 +412,23 @@
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// The query token is also truncated to length of 1, so "me"->"m" matches "m"
search_spec.set_query("me");
actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// The query token is still truncated to length of 1, so "massage"->"m"
// matches "m"
search_spec.set_query("massage");
actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -480,8 +464,8 @@
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, FailToCreateDocStore) {
@@ -596,7 +580,7 @@
HasSubstr("Unable to open file for write"));
}
-TEST_F(IcingSearchEngineTest, SetSchemaDelete2) {
+TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleFails) {
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -639,15 +623,18 @@
property->set_data_type(PropertyConfigProto::DataType::STRING);
property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- EXPECT_THAT(icing.SetSchema(schema, false).status(),
- ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ icing.SetSchema(schema, /*ignore_errors_and_delete_documents=*/false)
+ .status(),
+ ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
- // 4. Try to delete by email type.
+ // 4. Try to delete by email type. This should succeed because email wasn't
+ // deleted in step 3.
EXPECT_THAT(icing.DeleteBySchemaType("Email").status(), ProtoIsOk());
}
}
-TEST_F(IcingSearchEngineTest, SetSchemaDelete) {
+TEST_F(IcingSearchEngineTest, SetSchemaIncompatibleForceOverrideSucceeds) {
{
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
@@ -681,7 +668,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // 3. Set a schema that deletes email. This should fail.
+ // 3. Set a schema that deletes email with force override. This should
+ // succeed and delete the email type.
SchemaProto schema;
SchemaTypeConfigProto* type = schema.add_types();
type->set_schema_type("Message");
@@ -692,7 +680,8 @@
EXPECT_THAT(icing.SetSchema(schema, true).status(), ProtoIsOk());
- // 4. Try to delete by email type.
+ // 4. Try to delete by email type. This should fail because email was
+ // already deleted.
EXPECT_THAT(icing.DeleteBySchemaType("Email").status(),
ProtoStatusIs(StatusProto::NOT_FOUND));
}
@@ -1026,7 +1015,8 @@
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
SchemaProto schema_with_indexed_property = CreateMessageSchema();
// Index restoration should be triggered here because new schema requires more
@@ -1040,8 +1030,8 @@
CreateMessageDocument("namespace", "uri");
actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaRevalidatesDocumentsAndReturnsOk) {
@@ -1500,24 +1490,21 @@
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(results.status(), ProtoIsOk());
EXPECT_THAT(results.results(), SizeIs(2));
- EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
- EXPECT_THAT(GetMatch(results.results(0).document(),
- results.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(
- GetWindow(results.results(0).document(), results.results(0).snippet(),
- "body", /*snippet_index=*/0),
- Eq("message body"));
+
+ const DocumentProto& document = results.results(0).document();
+ EXPECT_THAT(document, EqualsProto(document_two));
+
+ const SnippetProto& snippet = results.results(0).snippet();
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("message"));
+
EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
- EXPECT_THAT(
- GetMatch(results.results(1).document(), results.results(1).snippet(),
- "body", /*snippet_index=*/0),
- IsEmpty());
- EXPECT_THAT(
- GetWindow(results.results(1).document(), results.results(1).snippet(),
- "body", /*snippet_index=*/0),
- IsEmpty());
+ EXPECT_THAT(results.results(1).snippet().entries(), IsEmpty());
search_spec.set_query("foo");
@@ -1526,8 +1513,79 @@
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsScoresDocumentScore) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+
+ // Rank by DOCUMENT_SCORE and ensure that the score field is populated with
+ // document score.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(0).score(), 93);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(1).score(), 15);
+}
+
+TEST_F(IcingSearchEngineTest, SearchReturnsScoresCreationTimestamp) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ DocumentProto document_one = CreateMessageDocument("namespace", "uri1");
+ document_one.set_score(93);
+ document_one.set_creation_timestamp_ms(10000);
+ ASSERT_THAT(icing.Put(document_one).status(), ProtoIsOk());
+
+ DocumentProto document_two = CreateMessageDocument("namespace", "uri2");
+ document_two.set_score(15);
+ document_two.set_creation_timestamp_ms(12000);
+ ASSERT_THAT(icing.Put(document_two).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("message");
+
+ // Rank by CREATION_TS and ensure that the score field is populated with
+ // creation ts.
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(
+ ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
+
+ SearchResultProto results = icing.Search(search_spec, scoring_spec,
+ ResultSpecProto::default_instance());
+ EXPECT_THAT(results.status(), ProtoIsOk());
+ EXPECT_THAT(results.results(), SizeIs(2));
+
+ EXPECT_THAT(results.results(0).document(), EqualsProto(document_two));
+ EXPECT_THAT(results.results(0).score(), 12000);
+ EXPECT_THAT(results.results(1).document(), EqualsProto(document_one));
+ EXPECT_THAT(results.results(1).score(), 10000);
}
TEST_F(IcingSearchEngineTest, SearchReturnsOneResult) {
@@ -1559,8 +1617,8 @@
// The token is a random number so we don't verify it.
expected_search_result_proto.set_next_page_token(
search_result_proto.next_page_token());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchZeroResultLimitReturnsEmptyResults) {
@@ -1578,8 +1636,8 @@
expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchNegativeResultLimitReturnsInvalidArgument) {
@@ -1600,8 +1658,8 @@
"ResultSpecProto.num_per_page cannot be negative.");
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithPersistenceReturnsValidResults) {
@@ -1645,8 +1703,8 @@
SearchResultProto actual_results =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
search_spec.set_query("foo");
@@ -1654,7 +1712,8 @@
empty_result.mutable_status()->set_code(StatusProto::OK);
actual_results = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(actual_results, EqualsSearchResultIgnoreStats(empty_result));
+ EXPECT_THAT(actual_results,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
}
}
@@ -1675,8 +1734,8 @@
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldReturnMultiplePages) {
@@ -1716,8 +1775,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1726,8 +1785,8 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1737,14 +1796,14 @@
// token.
expected_search_result_proto.clear_next_page_token();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWithNoScoringShouldReturnMultiplePages) {
@@ -1787,8 +1846,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Second page, 2 results
expected_search_result_proto.clear_results();
@@ -1797,8 +1856,8 @@
*expected_search_result_proto.mutable_results()->Add()->mutable_document() =
document2;
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Third page, 1 result
expected_search_result_proto.clear_results();
@@ -1808,14 +1867,14 @@
// token.
expected_search_result_proto.clear_next_page_token();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// No more results
expected_search_result_proto.clear_results();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, ShouldReturnMultiplePagesWithSnippets) {
@@ -1852,24 +1911,28 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document5));
- EXPECT_THAT(GetMatch(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
- EXPECT_THAT(search_result.results(1).document(), EqualsProto(document4));
- EXPECT_THAT(GetMatch(search_result.results(1).document(),
- search_result.results(1).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(1).document(),
- search_result.results(1).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
+ const DocumentProto& document_result_1 = search_result.results(0).document();
+ EXPECT_THAT(document_result_1, EqualsProto(document5));
+ const SnippetProto& snippet_result_1 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_1.entries(), SizeIs(1));
+ EXPECT_THAT(snippet_result_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &document_result_1, snippet_result_1.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_1.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_1.entries(0)),
+ ElementsAre("message"));
+
+ const DocumentProto& document_result_2 = search_result.results(1).document();
+ EXPECT_THAT(document_result_2, EqualsProto(document4));
+ const SnippetProto& snippet_result_2 = search_result.results(1).snippet();
+ EXPECT_THAT(snippet_result_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_2,
+ snippet_result_2.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_2.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_2.entries(0)),
+ ElementsAre("message"));
// Second page, 2 result with 1 snippet
search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1877,17 +1940,19 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- EXPECT_THAT(search_result.results(0).document(), EqualsProto(document3));
- EXPECT_THAT(GetMatch(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message"));
- EXPECT_THAT(GetWindow(search_result.results(0).document(),
- search_result.results(0).snippet(), "body",
- /*snippet_index=*/0),
- Eq("message body"));
+ const DocumentProto& document_result_3 = search_result.results(0).document();
+ EXPECT_THAT(document_result_3, EqualsProto(document3));
+ const SnippetProto& snippet_result_3 = search_result.results(0).snippet();
+ EXPECT_THAT(snippet_result_3.entries(0).property_name(), Eq("body"));
+ content = GetString(&document_result_3,
+ snippet_result_3.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet_result_3.entries(0)),
+ ElementsAre("message body"));
+ EXPECT_THAT(GetMatches(content, snippet_result_3.entries(0)),
+ ElementsAre("message"));
+
EXPECT_THAT(search_result.results(1).document(), EqualsProto(document2));
- EXPECT_THAT(search_result.results(1).snippet().entries_size(), Eq(0));
+ EXPECT_THAT(search_result.results(1).snippet().entries(), IsEmpty());
// Third page, 1 result with 0 snippets
search_result = icing.GetNextPage(search_result.next_page_token());
@@ -1896,7 +1961,7 @@
ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
EXPECT_THAT(search_result.results(0).document(), EqualsProto(document1));
- EXPECT_THAT(search_result.results(0).snippet().entries_size(), Eq(0));
+ EXPECT_THAT(search_result.results(0).snippet().entries(), IsEmpty());
}
TEST_F(IcingSearchEngineTest, ShouldInvalidateNextPageToken) {
@@ -1927,8 +1992,8 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Now document1 is still to be fetched.
// Invalidates token
@@ -1938,8 +2003,8 @@
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -1971,22 +2036,24 @@
uint64_t next_page_token = search_result_proto.next_page_token();
// Since the token is a random number, we don't need to verify
expected_search_result_proto.set_next_page_token(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Now document1 is still to be fetched.
OptimizeResultProto optimize_result_proto;
optimize_result_proto.mutable_status()->set_code(StatusProto::OK);
optimize_result_proto.mutable_status()->set_message("");
- ASSERT_THAT(icing.Optimize(), EqualsProto(optimize_result_proto));
+ OptimizeResultProto actual_result = icing.Optimize();
+ actual_result.clear_optimize_stats();
+ ASSERT_THAT(actual_result, EqualsProto(optimize_result_proto));
// Tries to fetch the second page, no results since all tokens have been
// invalidated during Optimize()
expected_search_result_proto.clear_results();
expected_search_result_proto.clear_next_page_token();
search_result_proto = icing.GetNextPage(next_page_token);
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRemoveDeletedDocs) {
@@ -2063,59 +2130,78 @@
.SetTtlMs(500)
.Build();
- auto fake_clock = std::make_unique<FakeClock>();
- fake_clock->SetSystemTimeMilliseconds(1000);
+ {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(1000);
- TestIcingSearchEngine icing(GetDefaultIcingOptions(),
- std::make_unique<Filesystem>(),
- std::make_unique<IcingFilesystem>(),
- std::move(fake_clock), GetTestJniCache());
- ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
- // Just initialized, nothing is optimizable yet.
- GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ // Just initialized, nothing is optimizable yet.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
- // Only have active documents, nothing is optimizable yet.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ // Only have active documents, nothing is optimizable yet.
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- // Deletes document1
- ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
+ // Deletes document1
+ ASSERT_THAT(icing.Delete("namespace", "uri1").status(), ProtoIsOk());
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
- int64_t first_estimated_optimizable_bytes =
- optimize_info.estimated_optimizable_bytes();
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(1));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Gt(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
+ int64_t first_estimated_optimizable_bytes =
+ optimize_info.estimated_optimizable_bytes();
- // Add a second document, but it'll be expired since the time (1000) is
- // greater than the document's creation timestamp (100) + the document's ttl
- // (500)
- ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ // Add a second document, but it'll be expired since the time (1000) is
+ // greater than the document's creation timestamp (100) + the document's ttl
+ // (500)
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
- Gt(first_estimated_optimizable_bytes));
+ optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(2));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(),
+ Gt(first_estimated_optimizable_bytes));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(0));
- // Optimize
- ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ // Optimize
+ ASSERT_THAT(icing.Optimize().status(), ProtoIsOk());
+ }
- // Nothing is optimizable now that everything has been optimized away.
- optimize_info = icing.GetOptimizeInfo();
- EXPECT_THAT(optimize_info.status(), ProtoIsOk());
- EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
- EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ {
+ // Recreate with new time
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetSystemTimeMilliseconds(5000);
+
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Nothing is optimizable now that everything has been optimized away.
+ GetOptimizeInfoResultProto optimize_info = icing.GetOptimizeInfo();
+ EXPECT_THAT(optimize_info.status(), ProtoIsOk());
+ EXPECT_THAT(optimize_info.optimizable_docs(), Eq(0));
+ EXPECT_THAT(optimize_info.estimated_optimizable_bytes(), Eq(0));
+ EXPECT_THAT(optimize_info.time_since_last_optimize_ms(), Eq(4000));
+ }
}
TEST_F(IcingSearchEngineTest, GetAndPutShouldWorkAfterOptimization) {
@@ -2351,8 +2437,8 @@
DeleteBySchemaTypeResultProto result_proto =
icing.DeleteBySchemaType("message");
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::SCHEMA_TYPE);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::SCHEMA_TYPE);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(1);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2383,8 +2469,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteSchemaTypeByQuery) {
@@ -2458,8 +2544,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByNamespace) {
@@ -2519,8 +2605,8 @@
DeleteByNamespaceResultProto result_proto =
icing.DeleteByNamespace("namespace1");
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::NAMESPACE);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::NAMESPACE);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(2);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2559,8 +2645,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteNamespaceByQuery) {
@@ -2629,8 +2715,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQuery) {
@@ -2679,8 +2765,8 @@
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
DeleteByQueryResultProto result_proto = icing.DeleteByQuery(search_spec);
EXPECT_THAT(result_proto.status(), ProtoIsOk());
- NativeDeleteStats exp_stats;
- exp_stats.set_delete_type(NativeDeleteStats::DeleteType::QUERY);
+ DeleteStatsProto exp_stats;
+ exp_stats.set_delete_type(DeleteStatsProto::DeleteType::QUERY);
exp_stats.set_latency_ms(7);
exp_stats.set_num_documents_deleted(1);
EXPECT_THAT(result_proto.delete_stats(), EqualsProto(exp_stats));
@@ -2711,8 +2797,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, DeleteByQueryNotFound) {
@@ -2784,8 +2870,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SetSchemaShouldWorkAfterOptimization) {
@@ -2848,8 +2934,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // Destroys IcingSearchEngine to make sure nothing is cached.
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
@@ -2857,8 +2943,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, IcingShouldWorkFineIfOptimizationIsAborted) {
@@ -2913,8 +2999,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -2974,8 +3060,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
search_spec.set_query("n");
@@ -2985,8 +3071,8 @@
// Searching new content returns the new document
search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OptimizationShouldRecoverIfDataFilesAreMissing) {
@@ -3046,8 +3132,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
search_spec.set_query("n");
@@ -3057,8 +3143,8 @@
// Searching new content returns the new document
search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchIncludesDocumentsBeforeTtl) {
@@ -3110,8 +3196,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchDoesntIncludeDocumentsPastTtl) {
@@ -3161,8 +3247,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchWorksAfterSchemaTypesCompatiblyModified) {
@@ -3200,8 +3286,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// With just the schema type filter, we can search for the message
search_spec.Clear();
@@ -3212,8 +3298,8 @@
search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Since SchemaTypeIds are assigned based on order in the SchemaProto, this
// will force a change in the DocumentStore's cached SchemaTypeIds
@@ -3244,8 +3330,8 @@
// We can still search for the message document
search_result_proto = icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromMissingHeaderFile) {
@@ -3276,8 +3362,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
EXPECT_TRUE(filesystem()->DeleteFile(GetHeaderFilename().c_str()));
@@ -3295,127 +3381,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderMagic) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Change the header's magic value
- int32_t invalid_magic = 1; // Anything that's not the actual kMagic value.
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, magic),
- &invalid_magic, sizeof(invalid_magic));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
-
- // Checks that Schema is still since it'll be needed to validate the document
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
-}
-
-TEST_F(IcingSearchEngineTest, RecoverFromInvalidHeaderChecksum) {
- SearchSpecProto search_spec;
- search_spec.set_query("message");
- search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
-
- SearchResultProto expected_search_result_proto;
- expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- GetResultProto expected_get_result_proto;
- expected_get_result_proto.mutable_status()->set_code(StatusProto::OK);
- *expected_get_result_proto.mutable_document() =
- CreateMessageDocument("namespace", "uri");
-
- {
- // Basic initialization/setup
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
- EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
- EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
- ProtoIsOk());
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
- } // This should shut down IcingSearchEngine and persist anything it needs to
-
- // Change the header's checksum value
- uint32_t invalid_checksum =
- 1; // Anything that's not the actual checksum value
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, checksum),
- &invalid_checksum, sizeof(invalid_checksum));
-
- // We should be able to recover from this and access all our previous data
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
- EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
-
- // Checks that DocumentLog is still ok
- EXPECT_THAT(
- icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
- EqualsProto(expected_get_result_proto));
-
- // Checks that the index is still ok so we can search over it
- SearchResultProto search_result_proto =
- icing.Search(search_spec, GetDefaultScoringSpec(),
- ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
// Checks that Schema is still since it'll be needed to validate the document
EXPECT_THAT(icing.Put(CreateMessageDocument("namespace", "uri")).status(),
@@ -3493,9 +3460,10 @@
.SetCreationTimestampMs(kDefaultCreationTimestampMs)
.Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
{
// Initializes folder and schema
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ IcingSearchEngine icing(options, GetTestJniCache());
EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
SchemaProto schema;
@@ -3532,8 +3500,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
{
@@ -3569,6 +3537,13 @@
property->mutable_string_indexing_config()->set_tokenizer_type(
StringIndexingConfig::TokenizerType::PLAIN);
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
FakeClock fake_clock;
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -3615,8 +3590,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentDocumentStore) {
@@ -3684,8 +3659,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromInconsistentIndex) {
@@ -3708,8 +3683,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend we lost the entire index
@@ -3723,8 +3698,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, RecoverFromCorruptIndex) {
@@ -3747,8 +3722,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
// Pretend index is corrupted
@@ -3764,8 +3739,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByDocumentScore) {
@@ -3825,8 +3800,8 @@
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchShouldAllowNoScoring) {
@@ -3884,8 +3859,8 @@
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByCreationTimestamp) {
@@ -3940,8 +3915,8 @@
ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageCount) {
@@ -4011,8 +3986,8 @@
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -4069,8 +4044,8 @@
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedByUsageTimestamp) {
@@ -4139,8 +4114,8 @@
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, Bm25fRelevanceScoringOneNamespace) {
@@ -4303,24 +4278,21 @@
SearchSpecProto search_spec;
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
- search_spec.set_query("body:coffee OR body:food");
+ search_spec.set_query("subject:coffee OR body:food");
ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- // Result should be in descending score order, section restrict doesn't impact
- // the BM25F score.
+ // Result should be in descending score order
EXPECT_THAT(search_result_proto.status(), ProtoIsOk());
- // Both doc5 and doc7 have "coffee" in name and text sections.
- // However, doc5 has more matches.
+ // The term frequencies of "coffee" and "food" are calculated respectively
+ // from the subject section and the body section.
// Documents with "food" are ranked lower as the term "food" is commonly
// present in this corpus, and thus, has a lower IDF.
EXPECT_THAT(
GetUrisFromSearchResults(search_result_proto),
- ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject,
- // 1 time in section body
- "namespace1/uri7", // 'coffee' 2 times in section body
+ ElementsAre("namespace1/uri5", // 'coffee' 2 times in section subject
"namespace1/uri1", // 'food' 2 times in section body
"namespace1/uri4", // 'food' 2 times in section body
"namespace1/uri2", // 'food' 1 time in section body
@@ -4583,8 +4555,8 @@
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, OlderUsageTimestampShouldNotOverrideNewerOnes) {
@@ -4652,8 +4624,8 @@
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest, SearchResultShouldBeRankedAscendingly) {
@@ -4714,8 +4686,218 @@
scoring_spec.set_order_by(ScoringSpecProto::Order::ASC);
SearchResultProto search_result_proto = icing.Search(
search_spec, scoring_spec, ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
+}
+
+TEST_F(IcingSearchEngineTest,
+ SearchResultGroupingDuplicateNamespaceShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify "namespace1" twice. This should result in an error.
+ ResultSpecProto result_spec;
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping->add_namespaces("namespace2");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest,
+ SearchResultGroupingNonPositiveMaxResultsShouldReturnError) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+
+ // "m" will match all 2 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ // Specify zero results. This should result in an error.
+ ResultSpecProto result_spec;
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(0);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping->add_namespaces("namespace2");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+
+ // Specify negative results. This should result in an error.
+ result_spec.mutable_result_groupings(0)->set_max_results(-1);
+ EXPECT_THAT(search_result_proto.status(),
+ ProtoStatusIs(StatusProto::INVALID_ARGUMENT));
+}
+
+TEST_F(IcingSearchEngineTest, SearchResultGroupingMultiNamespaceGrouping) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Creates 3 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(1)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message2")
+ .SetScore(2)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(3)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document4 =
+ DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(4)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document5 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message3")
+ .SetScore(5)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+ DocumentProto document6 =
+ DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Message")
+ .AddStringProperty("body", "message1")
+ .SetScore(6)
+ .SetCreationTimestampMs(kDefaultCreationTimestampMs)
+ .Build();
+
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document4).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document5).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document6).status(), ProtoIsOk());
+
+ // "m" will match all 6 documents
+ SearchSpecProto search_spec;
+ search_spec.set_term_match_type(TermMatchType::PREFIX);
+ search_spec.set_query("m");
+
+ ScoringSpecProto scoring_spec = GetDefaultScoringSpec();
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+
+ ResultSpecProto result_spec;
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ result_grouping->add_namespaces("namespace2");
+ result_grouping->add_namespaces("namespace3");
+
+ SearchResultProto search_result_proto =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ // The last result (document1) in namespace "namespace1" should not be
+ // included. "namespace2" and "namespace3" are grouped together. So only the
+ // two highest scored documents between the two (both of which are in
+ // "namespace3") should be returned.
+ SearchResultProto expected_search_result_proto;
+ expected_search_result_proto.mutable_status()->set_code(StatusProto::OK);
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document6;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document5;
+ *expected_search_result_proto.mutable_results()->Add()->mutable_document() =
+ document2;
+
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
}
TEST_F(IcingSearchEngineTest,
@@ -4797,8 +4979,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto,
- EqualsSearchResultIgnoreStats(expected_search_result_proto));
+ EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStatsAndScores(
+ expected_search_result_proto));
} // This should shut down IcingSearchEngine and persist anything it needs to
ASSERT_TRUE(filesystem()->DeleteDirectoryRecursively(GetSchemaDir().c_str()));
@@ -4824,7 +5006,8 @@
SearchResultProto search_result_proto =
icing.Search(search_spec, GetDefaultScoringSpec(),
ResultSpecProto::default_instance());
- EXPECT_THAT(search_result_proto, EqualsSearchResultIgnoreStats(empty_result));
+ EXPECT_THAT(search_result_proto,
+ EqualsSearchResultIgnoreStatsAndScores(empty_result));
}
TEST_F(IcingSearchEngineTest, PersistToDisk) {
@@ -4841,7 +5024,7 @@
ProtoIsOk());
// Persisting shouldn't affect anything
- EXPECT_THAT(icing.PersistToDisk().status(), ProtoIsOk());
+ EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
EXPECT_THAT(
icing.Get("namespace", "uri", GetResultSpecProto::default_instance()),
@@ -4855,6 +5038,48 @@
EqualsProto(expected_get_result_proto));
}
+TEST_F(IcingSearchEngineTest, NoPersistToDiskLiteDoesntPersistPut) {
+ IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document1));
+
+ IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
+ // The document shouldn't be found because we forgot to call
+ // PersistToDisk(LITE)!
+ EXPECT_THAT(
+ icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+ .status(),
+ ProtoStatusIs(StatusProto::NOT_FOUND));
+}
+
+TEST_F(IcingSearchEngineTest, PersistToDiskLitePersistsPut) {
+ IcingSearchEngine icing1(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing1.Initialize().status(), ProtoIsOk());
+ EXPECT_THAT(icing1.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri");
+ EXPECT_THAT(icing1.Put(document1).status(), ProtoIsOk());
+ EXPECT_THAT(icing1.PersistToDisk(PersistType::LITE).status(), ProtoIsOk());
+ EXPECT_THAT(
+ icing1.Get("namespace", "uri", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document1));
+
+ IcingSearchEngine icing2(GetDefaultIcingOptions(), GetTestJniCache());
+ EXPECT_THAT(icing2.Initialize().status(), ProtoIsOk());
+ // The document should be found because we called PersistToDisk(LITE)!
+ EXPECT_THAT(
+ icing2.Get("namespace", "uri", GetResultSpecProto::default_instance())
+ .document(),
+ EqualsProto(document1));
+}
+
TEST_F(IcingSearchEngineTest, ResetOk) {
SchemaProto message_schema = CreateMessageSchema();
SchemaProto empty_schema = SchemaProto(message_schema);
@@ -4886,7 +5111,7 @@
EXPECT_THAT(icing.SetSchema(empty_schema).status(), ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, ResetAbortedError) {
+TEST_F(IcingSearchEngineTest, ResetDeleteFailureCausesAbortedError) {
auto mock_filesystem = std::make_unique<MockFilesystem>();
// This fails IcingSearchEngine::Reset(). But since we didn't actually delete
@@ -4920,22 +5145,27 @@
ProtoIsOk());
}
-TEST_F(IcingSearchEngineTest, ResetInternalError) {
+TEST_F(IcingSearchEngineTest, ResetCreateFailureCausesInternalError) {
auto mock_filesystem = std::make_unique<MockFilesystem>();
- // Let all other calls succeed.
- EXPECT_CALL(*mock_filesystem, Write(Matcher<const char*>(_), _, _))
+ // Let all other delete directory calls succeed.
+ EXPECT_CALL(*mock_filesystem,
+ DeleteDirectoryRecursively(Matcher<const char*>(_)))
.WillRepeatedly(Return(true));
- // This prevents IcingSearchEngine from creating a DocumentStore instance on
- // reinitialization
- const std::string document_log_path =
- GetTestBaseDir() + "/document_dir/document_log";
+ // This prevents IcingSearchEngine from deleting our base dir when resetting
+ EXPECT_CALL(*mock_filesystem, DeleteDirectoryRecursively(Matcher<const char*>(
+ StrEq(GetTestBaseDir().c_str()))))
+ .WillOnce(Return(false));
+
+ // The first call will show our base directory had 100 bytes, but after we
+ // falied to delete, we lost those 100 bytes. So this will be reported as an
+ // INTERNAL error since data was lost.
EXPECT_CALL(
*mock_filesystem,
- Write(Matcher<const char*>(StrEq(document_log_path.c_str())), _, _))
- .WillOnce(Return(true))
- .WillOnce(Return(false));
+ GetDiskUsage(Matcher<const char*>(StrEq(GetTestBaseDir().c_str()))))
+ .WillOnce(Return(100))
+ .WillOnce(Return(0));
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
std::move(mock_filesystem),
@@ -4985,34 +5215,28 @@
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("Zürich"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("mdi Zürich Team Meeting"));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
const DocumentProto& result_document_2 = results.results(1).document();
const SnippetProto& result_snippet_2 = results.results(1).snippet();
EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("zurich"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("MDI zurich Team Meeting"));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
}
TEST_F(IcingSearchEngineTest, SnippetNormalizationPrefix) {
@@ -5054,34 +5278,28 @@
const DocumentProto& result_document_1 = results.results(0).document();
const SnippetProto& result_snippet_1 = results.results(0).snippet();
EXPECT_THAT(result_document_1, EqualsProto(document_two));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/0),
- Eq("mdi Zürich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("Zürich"));
- EXPECT_THAT(GetWindow(result_document_1, result_snippet_1, "body",
- /*snippet_index=*/1),
- Eq("mdi Zürich Team Meeting"));
+ EXPECT_THAT(result_snippet_1.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_1.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_1, result_snippet_1.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi Zürich Team Meeting", "mdi Zürich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_1.entries(0)),
+ ElementsAre("mdi", "Zürich"));
const DocumentProto& result_document_2 = results.results(1).document();
const SnippetProto& result_snippet_2 = results.results(1).snippet();
EXPECT_THAT(result_document_2, EqualsProto(document_one));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(GetMatch(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("zurich"));
- EXPECT_THAT(GetWindow(result_document_2, result_snippet_2, "body",
- /*snippet_index=*/1),
- Eq("MDI zurich Team Meeting"));
+ EXPECT_THAT(result_snippet_2.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet_2.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_2,
+ result_snippet_2.entries(0).property_name());
+ EXPECT_THAT(
+ GetWindows(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI zurich Team Meeting", "MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet_2.entries(0)),
+ ElementsAre("MDI", "zurich"));
}
TEST_F(IcingSearchEngineTest, SnippetSectionRestrict) {
@@ -5112,21 +5330,18 @@
icing.Search(search_spec, GetDefaultScoringSpec(), result_spec);
EXPECT_THAT(results.status(), ProtoIsOk());
ASSERT_THAT(results.results(), SizeIs(1));
+
const DocumentProto& result_document = results.results(0).document();
const SnippetProto& result_snippet = results.results(0).snippet();
EXPECT_THAT(result_document, EqualsProto(document_one));
- EXPECT_THAT(
- GetMatch(result_document, result_snippet, "body", /*snippet_index=*/0),
- Eq("zurich"));
- EXPECT_THAT(
- GetWindow(result_document, result_snippet, "body", /*snippet_index=*/0),
- Eq("MDI zurich Team Meeting"));
- EXPECT_THAT(
- GetMatch(result_document, result_snippet, "subject", /*snippet_index=*/0),
- IsEmpty());
- EXPECT_THAT(GetWindow(result_document, result_snippet, "subject",
- /*snippet_index=*/0),
- IsEmpty());
+ EXPECT_THAT(result_snippet.entries(), SizeIs(1));
+ EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&result_document, result_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+ ElementsAre("MDI zurich Team Meeting"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+ ElementsAre("zurich"));
}
TEST_F(IcingSearchEngineTest, UninitializedInstanceFailsSafely) {
@@ -5167,7 +5382,7 @@
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
icing.InvalidateNextPageToken(kSomePageToken); // Verify this doesn't crash.
- EXPECT_THAT(icing.PersistToDisk().status(),
+ EXPECT_THAT(icing.PersistToDisk(PersistType::FULL).status(),
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
EXPECT_THAT(icing.Optimize().status(),
ProtoStatusIs(StatusProto::FAILED_PRECONDITION));
@@ -5514,6 +5729,88 @@
}
}
+TEST_F(IcingSearchEngineTest,
+ DocumentWithNoIndexedContentDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no indexed
+ // content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Set a schema for a single type that has no indexed properties.
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("unindexedField")
+ .SetDataTypeString(MATCH_NONE, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ // Add a document that contains no indexed content.
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("unindexedField",
+ "Don't you dare search over this!")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
+TEST_F(IcingSearchEngineTest,
+ DocumentWithNoValidIndexedContentDoesntCauseRestoreIndex) {
+ // 1. Create an index with a single document in it that has no valid indexed
+ // tokens in its content.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+
+ // Set a schema for a single type that has no indexed properties.
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Add a document that contains no valid indexed content - just punctuation.
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "fake_type/0")
+ .SetSchema("Message")
+ .AddStringProperty("body", "?...!")
+ .Build();
+ EXPECT_THAT(icing.Put(document).status(), ProtoIsOk());
+ }
+
+ // 2. Create the index again. This should NOT trigger a recovery of any kind.
+ {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ InitializeResultProto init_result = icing.Initialize();
+ EXPECT_THAT(init_result.status(), ProtoIsOk());
+ EXPECT_THAT(init_result.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(init_result.initialize_stats().document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(init_result.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ }
+}
+
TEST_F(IcingSearchEngineTest, IndexingDocMergeFailureResets) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
@@ -5596,8 +5893,7 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats().latency_ms(),
- Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().latency_ms(), Eq(10));
}
TEST_F(IcingSearchEngineTest, InitializeShouldLogNumberOfDocuments) {
@@ -5617,9 +5913,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(0));
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
@@ -5629,9 +5924,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(1));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(1));
// Put another document.
ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
@@ -5641,9 +5935,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_documents(),
- Eq(2));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_documents(),
+ Eq(2));
}
}
@@ -5659,25 +5952,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5721,25 +6014,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::PARTIAL_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::PARTIAL_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5790,27 +6083,27 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::COMPLETE_LOSS));
- // The complete rewind of ground truth causes the mismatch of total
- // checksum, so index should be restored.
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::COMPLETE_LOSS));
+ // The complete rewind of ground truth causes us to clear the index, but
+ // that's not considered a restoration.
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5848,51 +6141,76 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::INCONSISTENT_WITH_GROUND_TRUTH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::INCONSISTENT_WITH_GROUND_TRUTH));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
}
TEST_F(IcingSearchEngineTest,
- InitializeShouldLogRecoveryCauseTotalChecksumMismatch) {
+ InitializeShouldLogRecoveryCauseSchemaChangesOutofSync) {
DocumentProto document = DocumentBuilder()
.SetKey("icing", "fake_type/0")
.SetSchema("Message")
.AddStringProperty("body", "message body")
.Build();
+ IcingSearchEngineOptions options = GetDefaultIcingOptions();
{
// Initialize and put one document.
- IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ IcingSearchEngine icing(options, GetTestJniCache());
ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
}
{
- // Change the header's checksum value to a random value.
- uint32_t invalid_checksum = 1;
- filesystem()->PWrite(GetHeaderFilename().c_str(),
- offsetof(IcingSearchEngine::Header, checksum),
- &invalid_checksum, sizeof(invalid_checksum));
+ // Simulate a schema change where power is lost after the schema is written.
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Message")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ // Write the marker file
+ std::string marker_filepath =
+ absl_ports::StrCat(options.base_dir(), "/set_schema_marker");
+ ScopedFd sfd(filesystem()->OpenForWrite(marker_filepath.c_str()));
+ ASSERT_TRUE(sfd.is_valid());
+
+ // Write the new schema
+ FakeClock fake_clock;
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(filesystem(), GetSchemaDir(), &fake_clock));
+ ICING_EXPECT_OK(schema_store->SetSchema(new_schema));
}
{
@@ -5905,25 +6223,58 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::TOTAL_CHECKSUM_MISMATCH));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::SCHEMA_CHANGES_OUT_OF_SYNC));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_latency_ms(),
+ Eq(0));
+ }
+
+ {
+ // No recovery should be needed.
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(10);
+ TestIcingSearchEngine icing(GetDefaultIcingOptions(),
+ std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(),
+ std::move(fake_clock), GetTestJniCache());
+ InitializeResultProto initialize_result_proto = icing.Initialize();
+ EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .document_store_recovery_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
+ .schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -5970,25 +6321,25 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(10));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -6036,25 +6387,25 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_latency_ms(),
- Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .schema_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_latency_ms(),
+ Eq(0));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().schema_store_recovery_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(0));
}
@@ -6083,25 +6434,25 @@
std::move(fake_clock), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_cause(),
- Eq(NativeInitializeStats::IO_ERROR));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::IO_ERROR));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.schema_store_recovery_latency_ms(),
Eq(10));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.document_store_recovery_latency_ms(),
Eq(0));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .document_store_data_status(),
- Eq(NativeInitializeStats::NO_DATA_LOSS));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
- .index_restoration_cause(),
- Eq(NativeInitializeStats::NONE));
- EXPECT_THAT(initialize_result_proto.native_initialize_stats()
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().document_store_data_status(),
+ Eq(InitializeStatsProto::NO_DATA_LOSS));
+ EXPECT_THAT(
+ initialize_result_proto.initialize_stats().index_restoration_cause(),
+ Eq(InitializeStatsProto::NONE));
+ EXPECT_THAT(initialize_result_proto.initialize_stats()
.index_restoration_latency_ms(),
Eq(0));
}
@@ -6114,9 +6465,8 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
// There should be 0 schema types.
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(0));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(0));
// Set a schema with one type config.
ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
@@ -6127,9 +6477,8 @@
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
// There should be 1 schema type.
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(1));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(1));
// Create and set a schema with two type configs: Email and Message.
SchemaProto schema = CreateEmailSchema();
@@ -6152,9 +6501,8 @@
IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
InitializeResultProto initialize_result_proto = icing.Initialize();
EXPECT_THAT(initialize_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- initialize_result_proto.native_initialize_stats().num_schema_types(),
- Eq(2));
+ EXPECT_THAT(initialize_result_proto.initialize_stats().num_schema_types(),
+ Eq(2));
}
}
@@ -6176,8 +6524,7 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.native_put_document_stats().latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().latency_ms(), Eq(10));
}
TEST_F(IcingSearchEngineTest, PutDocumentShouldLogDocumentStoreStats) {
@@ -6200,11 +6547,9 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().document_store_latency_ms(),
- Eq(10));
- size_t document_size =
- put_result_proto.native_put_document_stats().document_size();
+ EXPECT_THAT(put_result_proto.put_document_stats().document_store_latency_ms(),
+ Eq(10));
+ size_t document_size = put_result_proto.put_document_stats().document_size();
EXPECT_THAT(document_size, Ge(document.ByteSizeLong()));
EXPECT_THAT(document_size, Le(document.ByteSizeLong() +
sizeof(DocumentProto::InternalFields)));
@@ -6228,18 +6573,16 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(put_result_proto.native_put_document_stats().index_latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_latency_ms(), Eq(10));
// No merge should happen.
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().index_merge_latency_ms(),
- Eq(0));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(0));
// Number of tokens should not exceed.
- EXPECT_FALSE(put_result_proto.native_put_document_stats()
+ EXPECT_FALSE(put_result_proto.put_document_stats()
.tokenization_stats()
.exceeded_max_token_num());
// The input document has 2 tokens.
- EXPECT_THAT(put_result_proto.native_put_document_stats()
+ EXPECT_THAT(put_result_proto.put_document_stats()
.tokenization_stats()
.num_tokens_indexed(),
Eq(2));
@@ -6263,10 +6606,10 @@
PutResultProto put_result_proto = icing.Put(document);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
// Number of tokens(2) exceeds the max allowed value(1).
- EXPECT_TRUE(put_result_proto.native_put_document_stats()
+ EXPECT_TRUE(put_result_proto.put_document_stats()
.tokenization_stats()
.exceeded_max_token_num());
- EXPECT_THAT(put_result_proto.native_put_document_stats()
+ EXPECT_THAT(put_result_proto.put_document_stats()
.tokenization_stats()
.num_tokens_indexed(),
Eq(1));
@@ -6300,9 +6643,8 @@
// Putting document2 should trigger an index merge.
PutResultProto put_result_proto = icing.Put(document2);
EXPECT_THAT(put_result_proto.status(), ProtoIsOk());
- EXPECT_THAT(
- put_result_proto.native_put_document_stats().index_merge_latency_ms(),
- Eq(10));
+ EXPECT_THAT(put_result_proto.put_document_stats().index_merge_latency_ms(),
+ Eq(10));
}
TEST_F(IcingSearchEngineTest, SearchWithProjectionEmptyFieldPath) {
@@ -6491,7 +6833,7 @@
EqualsProto(projected_document_one));
}
-TEST_F(IcingSearchEngineTest, NativeQueryStatsTest) {
+TEST_F(IcingSearchEngineTest, QueryStatsProtoTest) {
auto fake_clock = std::make_unique<FakeClock>();
fake_clock->SetTimerElapsedMilliseconds(5);
TestIcingSearchEngine icing(GetDefaultIcingOptions(),
@@ -6537,7 +6879,8 @@
ASSERT_THAT(search_result.next_page_token(), Ne(kInvalidNextPageToken));
// Check the stats
- NativeQueryStats exp_stats;
+ QueryStatsProto exp_stats;
+ exp_stats.set_query_length(7);
exp_stats.set_num_terms(1);
exp_stats.set_num_namespaces_filtered(1);
exp_stats.set_num_schema_types_filtered(1);
@@ -6547,7 +6890,7 @@
exp_stats.set_requested_page_size(2);
exp_stats.set_num_results_returned_current_page(2);
exp_stats.set_num_documents_scored(5);
- exp_stats.set_num_results_snippeted(2);
+ exp_stats.set_num_results_with_snippets(2);
exp_stats.set_latency_ms(5);
exp_stats.set_parse_query_latency_ms(5);
exp_stats.set_scoring_latency_ms(5);
@@ -6561,11 +6904,11 @@
ASSERT_THAT(search_result.results(), SizeIs(2));
ASSERT_THAT(search_result.next_page_token(), Gt(kInvalidNextPageToken));
- exp_stats = NativeQueryStats();
+ exp_stats = QueryStatsProto();
exp_stats.set_is_first_page(false);
exp_stats.set_requested_page_size(2);
exp_stats.set_num_results_returned_current_page(2);
- exp_stats.set_num_results_snippeted(1);
+ exp_stats.set_num_results_with_snippets(1);
exp_stats.set_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
@@ -6576,16 +6919,269 @@
ASSERT_THAT(search_result.results(), SizeIs(1));
ASSERT_THAT(search_result.next_page_token(), Eq(kInvalidNextPageToken));
- exp_stats = NativeQueryStats();
+ exp_stats = QueryStatsProto();
exp_stats.set_is_first_page(false);
exp_stats.set_requested_page_size(2);
exp_stats.set_num_results_returned_current_page(1);
- exp_stats.set_num_results_snippeted(0);
+ exp_stats.set_num_results_with_snippets(0);
exp_stats.set_latency_ms(5);
exp_stats.set_document_retrieval_latency_ms(5);
EXPECT_THAT(search_result.query_stats(), EqualsProto(exp_stats));
}
+TEST_F(IcingSearchEngineTest, OptimizeStatsProtoTest) {
+ auto fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(10000);
+ auto icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing->SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create three documents.
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ document2.set_creation_timestamp_ms(9000);
+ document2.set_ttl_ms(500);
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing->Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing->Put(document3).status(), ProtoIsOk());
+
+ // Delete the first document.
+ ASSERT_THAT(icing->Delete(document1.namespace_(), document1.uri()).status(),
+ ProtoIsOk());
+ ASSERT_THAT(icing->PersistToDisk(PersistType::FULL).status(), ProtoIsOk());
+
+ OptimizeStatsProto expected;
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(3);
+ expected.set_num_deleted_documents(1);
+ expected.set_num_expired_documents(1);
+
+ // Run Optimize
+ OptimizeResultProto result = icing->Optimize();
+ // Depending on how many blocks the documents end up spread across, it's
+ // possible that Optimize can remove documents without shrinking storage. The
+ // first Optimize call will also write the OptimizeStatusProto for the first
+ // time which will take up 1 block. So make sure that before_size is no less
+ // than after_size - 1 block.
+ uint32_t page_size = getpagesize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Ge(result.optimize_stats().storage_size_after() - page_size));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+
+ fake_clock = std::make_unique<FakeClock>();
+ fake_clock->SetTimerElapsedMilliseconds(5);
+ fake_clock->SetSystemTimeMilliseconds(20000);
+ icing = std::make_unique<TestIcingSearchEngine>(
+ GetDefaultIcingOptions(), std::make_unique<Filesystem>(),
+ std::make_unique<IcingFilesystem>(), std::move(fake_clock),
+ GetTestJniCache());
+ ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
+
+ expected = OptimizeStatsProto();
+ expected.set_latency_ms(5);
+ expected.set_document_store_optimize_latency_ms(5);
+ expected.set_index_restoration_latency_ms(5);
+ expected.set_num_original_documents(1);
+ expected.set_num_deleted_documents(0);
+ expected.set_num_expired_documents(0);
+ expected.set_time_since_last_optimize_ms(10000);
+
+ // Run Optimize
+ result = icing->Optimize();
+ EXPECT_THAT(result.optimize_stats().storage_size_before(),
+ Eq(result.optimize_stats().storage_size_after()));
+ result.mutable_optimize_stats()->clear_storage_size_before();
+ result.mutable_optimize_stats()->clear_storage_size_after();
+ EXPECT_THAT(result.optimize_stats(), EqualsProto(expected));
+}
+
+TEST_F(IcingSearchEngineTest, StorageInfoTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // Create three documents.
+ DocumentProto document1 = CreateMessageDocument("namespace", "uri1");
+ DocumentProto document2 = CreateMessageDocument("namespace", "uri2");
+ DocumentProto document3 = CreateMessageDocument("namespace", "uri3");
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ // Ensure that total_storage_size is set. All the other stats are covered by
+ // the classes that generate them.
+ StorageInfoResultProto result = icing.GetStorageInfo();
+ EXPECT_THAT(result.status(), ProtoIsOk());
+ EXPECT_THAT(result.storage_info().total_storage_size(), Ge(0));
+}
+
+TEST_F(IcingSearchEngineTest, SnippetErrorTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("Generic").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(icing.SetSchema(schema).status(), ProtoIsOk());
+
+ DocumentProto document1 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetScore(10)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cats", "I like dogs",
+ "I like birds", "I like fish")
+ .Build();
+ DocumentProto document2 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri2")
+ .SetScore(20)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like red", "I like green",
+ "I like blue", "I like yellow")
+ .Build();
+ DocumentProto document3 =
+ DocumentBuilder()
+ .SetKey("namespace", "uri3")
+ .SetScore(5)
+ .SetSchema("Generic")
+ .AddStringProperty("subject", "I like cupcakes", "I like donuts",
+ "I like eclairs", "I like froyo")
+ .Build();
+ ASSERT_THAT(icing.Put(document1).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document2).status(), ProtoIsOk());
+ ASSERT_THAT(icing.Put(document3).status(), ProtoIsOk());
+
+ SearchSpecProto search_spec;
+ search_spec.add_schema_type_filters("Generic");
+ search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
+ search_spec.set_query("like");
+ ScoringSpecProto scoring_spec;
+ scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(2);
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(3);
+ result_spec.mutable_snippet_spec()->set_max_window_bytes(4);
+ SearchResultProto search_results =
+ icing.Search(search_spec, scoring_spec, result_spec);
+
+ ASSERT_THAT(search_results.results(), SizeIs(3));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri2"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ std::string_view content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ EXPECT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ EXPECT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(1);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+ ASSERT_THAT(result->snippet().entries(), SizeIs(3));
+ entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), "subject[0]");
+ content = GetString(&result->document(), "subject[0]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(1);
+ ASSERT_THAT(entry->property_name(), "subject[1]");
+ content = GetString(&result->document(), "subject[1]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ entry = &result->snippet().entries(2);
+ ASSERT_THAT(entry->property_name(), "subject[2]");
+ content = GetString(&result->document(), "subject[2]");
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("like"));
+
+ result = &search_results.results(2);
+ ASSERT_THAT(result->document().uri(), Eq("uri3"));
+ ASSERT_THAT(result->snippet().entries(), IsEmpty());
+}
+
+TEST_F(IcingSearchEngineTest, CJKSnippetTest) {
+ IcingSearchEngine icing(GetDefaultIcingOptions(), GetTestJniCache());
+ ASSERT_THAT(icing.Initialize().status(), ProtoIsOk());
+ ASSERT_THAT(icing.SetSchema(CreateMessageSchema()).status(), ProtoIsOk());
+
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF8 idx: 0 3 9 15 18
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ constexpr std::string_view kChinese = "我每天走路去上班。";
+ DocumentProto document = DocumentBuilder()
+ .SetKey("namespace", "uri1")
+ .SetSchema("Message")
+ .AddStringProperty("body", kChinese)
+ .Build();
+ ASSERT_THAT(icing.Put(document).status(), ProtoIsOk());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto search_spec;
+ search_spec.set_query("走");
+ search_spec.set_term_match_type(MATCH_PREFIX);
+
+ ResultSpecProto result_spec;
+ result_spec.mutable_snippet_spec()->set_num_to_snippet(
+ std::numeric_limits<int>::max());
+ result_spec.mutable_snippet_spec()->set_num_matches_per_property(
+ std::numeric_limits<int>::max());
+
+ // Search and make sure that we got a single successful result
+ SearchResultProto search_results = icing.Search(
+ search_spec, ScoringSpecProto::default_instance(), result_spec);
+ ASSERT_THAT(search_results.status(), ProtoIsOk());
+ ASSERT_THAT(search_results.results(), SizeIs(1));
+ const SearchResultProto::ResultProto* result = &search_results.results(0);
+ EXPECT_THAT(result->document().uri(), Eq("uri1"));
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(result->snippet().entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &result->snippet().entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("body"));
+
+ // Get the content for "subject" and see what the match is.
+ std::string_view content = GetString(&result->document(), "body");
+ ASSERT_THAT(content, Eq(kChinese));
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ EXPECT_THAT(match_proto.exact_match_byte_position(), Eq(9));
+ EXPECT_THAT(match_proto.exact_match_byte_length(), Eq(6));
+ std::string_view match =
+ content.substr(match_proto.exact_match_byte_position(),
+ match_proto.exact_match_byte_length());
+ ASSERT_THAT(match, Eq("走路"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
} // namespace
} // namespace lib
} // namespace icing
diff --git a/icing/index/hit/hit.cc b/icing/index/hit/hit.cc
index 2a5a0d9..887e6e4 100644
--- a/icing/index/hit/hit.cc
+++ b/icing/index/hit/hit.cc
@@ -67,9 +67,10 @@
&temp_value);
bit_util::BitfieldSet(section_id, kNumFlags, kSectionIdBits, &temp_value);
bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
- kHasTermFrequency, 1, &temp_value);
- bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, 1, &temp_value);
- bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection, 1, &temp_value);
+ kHasTermFrequency, /*len=*/1, &temp_value);
+ bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
+ bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
+ /*len=*/1, &temp_value);
value_ = temp_value;
}
diff --git a/icing/index/index-processor.cc b/icing/index/index-processor.cc
index d2f9d41..6d8632f 100644
--- a/icing/index/index-processor.cc
+++ b/icing/index/index-processor.cc
@@ -55,7 +55,7 @@
libtextclassifier3::Status IndexProcessor::IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> index_timer = clock_.GetNewTimer();
if (index_->last_added_document_id() != kInvalidDocumentId &&
@@ -64,6 +64,7 @@
"DocumentId %d must be greater than last added document_id %d",
document_id, index_->last_added_document_id()));
}
+ index_->set_last_added_document_id(document_id);
uint32_t num_tokens = 0;
libtextclassifier3::Status overall_status;
for (const TokenizedSection& section : tokenized_document.sections()) {
diff --git a/icing/index/index-processor.h b/icing/index/index-processor.h
index 9fc7c46..6b07c98 100644
--- a/icing/index/index-processor.h
+++ b/icing/index/index-processor.h
@@ -81,7 +81,7 @@
// INTERNAL_ERROR if any other errors occur
libtextclassifier3::Status IndexDocument(
const TokenizedDocument& tokenized_document, DocumentId document_id,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
private:
IndexProcessor(const Normalizer* normalizer, Index* index,
diff --git a/icing/index/index-processor_test.cc b/icing/index/index-processor_test.cc
index e6bb615..8a6a9f5 100644
--- a/icing/index/index-processor_test.cc
+++ b/icing/index/index-processor_test.cc
@@ -36,9 +36,11 @@
#include "icing/index/term-property-id.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/schema-util.h"
#include "icing/schema/section-manager.h"
@@ -46,7 +48,6 @@
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -103,6 +104,22 @@
using ::testing::IsEmpty;
using ::testing::Test;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_BYTES =
+ PropertyConfigProto_DataType_Code_BYTES;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
class IndexProcessorTest : public Test {
protected:
void SetUp() override {
@@ -131,7 +148,49 @@
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, GetTestTempDir(), &fake_clock_));
- SchemaProto schema = CreateFakeSchema();
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kFakeType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kExactProperty)
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPrefixedProperty)
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kUnindexedProperty1)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kUnindexedProperty2)
+ .SetDataType(TYPE_BYTES)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kRepeatedProperty)
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kSubProperty)
+ .SetDataTypeDocument(
+ kNestedType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kNestedType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kNestedProperty)
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
IndexProcessor::Options processor_options;
@@ -162,72 +221,6 @@
std::unique_ptr<Index> index_;
std::unique_ptr<SchemaStore> schema_store_;
std::unique_ptr<IndexProcessor> index_processor_;
-
- private:
- static void AddStringProperty(std::string_view name, DataType::Code type,
- Cardinality::Code cardinality,
- TermMatchType::Code term_match_type,
- SchemaTypeConfigProto* type_config) {
- auto* prop = type_config->add_properties();
- prop->set_property_name(std::string(name));
- prop->set_data_type(type);
- prop->set_cardinality(cardinality);
- prop->mutable_string_indexing_config()->set_term_match_type(
- term_match_type);
- prop->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- }
-
- static void AddNonIndexedProperty(std::string_view name, DataType::Code type,
- Cardinality::Code cardinality,
- SchemaTypeConfigProto* type_config) {
- auto* prop = type_config->add_properties();
- prop->set_property_name(std::string(name));
- prop->set_data_type(type);
- prop->set_cardinality(cardinality);
- }
-
- static SchemaProto CreateFakeSchema() {
- SchemaProto schema;
-
- // Add top-level type
- auto* type_config = schema.add_types();
- type_config->set_schema_type(std::string(kFakeType));
-
- AddStringProperty(std::string(kExactProperty), DataType::STRING,
- Cardinality::OPTIONAL, TermMatchType::EXACT_ONLY,
- type_config);
-
- AddStringProperty(std::string(kPrefixedProperty), DataType::STRING,
- Cardinality::OPTIONAL, TermMatchType::PREFIX,
- type_config);
-
- AddNonIndexedProperty(std::string(kUnindexedProperty1), DataType::STRING,
- Cardinality::OPTIONAL, type_config);
-
- AddNonIndexedProperty(std::string(kUnindexedProperty2), DataType::BYTES,
- Cardinality::OPTIONAL, type_config);
-
- AddStringProperty(std::string(kRepeatedProperty), DataType::STRING,
- Cardinality::REPEATED, TermMatchType::PREFIX,
- type_config);
-
- auto* prop = type_config->add_properties();
- prop->set_property_name(std::string(kSubProperty));
- prop->set_data_type(DataType::DOCUMENT);
- prop->set_cardinality(Cardinality::OPTIONAL);
- prop->set_schema_type(std::string(kNestedType));
- prop->mutable_document_indexing_config()->set_index_nested_properties(true);
-
- // Add nested type
- type_config = schema.add_types();
- type_config->set_schema_type(std::string(kNestedType));
-
- AddStringProperty(kNestedProperty, DataType::STRING, Cardinality::OPTIONAL,
- TermMatchType::PREFIX, type_config);
-
- return schema;
- }
};
std::vector<DocHitInfo> GetHits(std::unique_ptr<DocHitInfoIterator> iterator) {
@@ -268,7 +261,23 @@
document));
EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
IsOk());
- EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
+}
+
+TEST_F(IndexProcessorTest, NoValidContent) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "fake_type/1")
+ .SetSchema(std::string(kFakeType))
+ .AddStringProperty(std::string(kExactProperty), "?...!")
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ TokenizedDocument tokenized_document,
+ TokenizedDocument::Create(schema_store_.get(), lang_segmenter_.get(),
+ document));
+ EXPECT_THAT(index_processor_->IndexDocument(tokenized_document, kDocumentId0),
+ IsOk());
+ EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexProcessorTest, OneDoc) {
@@ -434,9 +443,8 @@
IndexProcessor::Options::TokenLimitBehavior::kReturnError;
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(), options,
- &fake_clock_));
+ index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+ options, &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -477,9 +485,8 @@
IndexProcessor::Options::TokenLimitBehavior::kSuppressError;
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(), options,
- &fake_clock_));
+ index_processor_, IndexProcessor::Create(normalizer_.get(), index_.get(),
+ options, &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -522,9 +529,8 @@
/*max_term_byte_size=*/4));
ICING_ASSERT_OK_AND_ASSIGN(
- index_processor_,
- IndexProcessor::Create(normalizer.get(), index_.get(), options,
- &fake_clock_));
+ index_processor_, IndexProcessor::Create(normalizer.get(), index_.get(),
+ options, &fake_clock_));
DocumentProto document =
DocumentBuilder()
@@ -693,8 +699,8 @@
ICING_ASSERT_OK_AND_ASSIGN(
index_processor_,
- IndexProcessor::Create(normalizer_.get(), index_.get(),
- processor_options, &fake_clock_));
+ IndexProcessor::Create(normalizer_.get(), index_.get(), processor_options,
+ &fake_clock_));
DocumentProto document =
DocumentBuilder()
diff --git a/icing/index/index.cc b/icing/index/index.cc
index bd41b51..db59ad2 100644
--- a/icing/index/index.cc
+++ b/icing/index/index.cc
@@ -164,7 +164,7 @@
icing_filesystem));
return std::unique_ptr<Index>(new Index(options, std::move(term_id_codec),
std::move(lite_index),
- std::move(main_index)));
+ std::move(main_index), filesystem));
}
libtextclassifier3::Status Index::TruncateTo(DocumentId document_id) {
@@ -277,6 +277,18 @@
std::move(main_term_metadata_list), num_to_return);
}
+IndexStorageInfoProto Index::GetStorageInfo() const {
+ IndexStorageInfoProto storage_info;
+ int64_t directory_size = filesystem_->GetDiskUsage(options_.base_dir.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_index_size(directory_size);
+ } else {
+ storage_info.set_index_size(-1);
+ }
+ storage_info = lite_index_->GetStorageInfo(std::move(storage_info));
+ return main_index_->GetStorageInfo(std::move(storage_info));
+}
+
libtextclassifier3::Status Index::Editor::BufferTerm(const char* term) {
// Step 1: See if this term is already in the lexicon
uint32_t tvi;
diff --git a/icing/index/index.h b/icing/index/index.h
index a4ea719..eab5be8 100644
--- a/icing/index/index.h
+++ b/icing/index/index.h
@@ -32,6 +32,7 @@
#include "icing/index/term-id-codec.h"
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -126,6 +127,16 @@
return main_index_->last_added_document_id();
}
+ // Sets last_added_document_id to document_id so long as document_id >
+ // last_added_document_id()
+ void set_last_added_document_id(DocumentId document_id) {
+ DocumentId lite_document_id = lite_index_->last_added_document_id();
+ if (lite_document_id == kInvalidDocumentId ||
+ document_id >= lite_document_id) {
+ lite_index_->set_last_added_document_id(document_id);
+ }
+ }
+
// Returns debug information for the index in out.
// verbosity <= 0, simplest debug information - just the lexicons and lite
// index.
@@ -151,6 +162,12 @@
return lite_index_size + main_index_size;
}
+ // Calculates the StorageInfo for the Index.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo() const;
+
// Create an iterator to iterate through all doc hit infos in the index that
// match the term. section_id_mask can be set to ignore hits from sections not
// listed in the mask. Eg. section_id_mask = 1U << 3; would only return hits
@@ -242,11 +259,12 @@
private:
Index(const Options& options, std::unique_ptr<TermIdCodec> term_id_codec,
std::unique_ptr<LiteIndex> lite_index,
- std::unique_ptr<MainIndex> main_index)
+ std::unique_ptr<MainIndex> main_index, const Filesystem* filesystem)
: lite_index_(std::move(lite_index)),
main_index_(std::move(main_index)),
options_(options),
- term_id_codec_(std::move(term_id_codec)) {}
+ term_id_codec_(std::move(term_id_codec)),
+ filesystem_(filesystem) {}
libtextclassifier3::StatusOr<std::vector<TermMetadata>> FindLiteTermsByPrefix(
const std::string& prefix, const std::vector<NamespaceId>& namespace_ids,
@@ -256,6 +274,7 @@
std::unique_ptr<MainIndex> main_index_;
const Options options_;
std::unique_ptr<TermIdCodec> term_id_codec_;
+ const Filesystem* filesystem_;
};
} // namespace lib
diff --git a/icing/index/index_test.cc b/icing/index/index_test.cc
index 3479ab1..16593ef 100644
--- a/icing/index/index_test.cc
+++ b/icing/index/index_test.cc
@@ -31,6 +31,7 @@
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/index/icing-filesystem.h"
#include "icing/legacy/index/icing-mock-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -46,6 +47,7 @@
using ::testing::ElementsAre;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::IsEmpty;
using ::testing::IsTrue;
@@ -151,8 +153,6 @@
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
TEST_F(IndexTest, EmptyIndexAfterMerge) {
@@ -170,8 +170,6 @@
index_->GetIterator("foo", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
}
TEST_F(IndexTest, AdvancePastEnd) {
@@ -236,8 +234,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, SingleHitSingleTermIndexAfterMerge) {
@@ -254,8 +250,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, SingleHitMultiTermIndex) {
@@ -271,8 +265,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, SingleHitMultiTermIndexAfterMerge) {
@@ -290,8 +282,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, NoHitMultiTermIndex) {
@@ -306,7 +296,6 @@
index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, NoHitMultiTermIndexAfterMerge) {
@@ -323,7 +312,6 @@
index_->GetIterator("baz", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
EXPECT_THAT(itr->Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, MultiHitMultiTermIndex) {
@@ -350,7 +338,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
}
TEST_F(IndexTest, MultiHitMultiTermIndexAfterMerge) {
@@ -379,7 +366,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
}
TEST_F(IndexTest, MultiHitSectionRestrict) {
@@ -400,8 +386,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, MultiHitSectionRestrictAfterMerge) {
@@ -424,8 +408,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, SingleHitDedupeIndex) {
@@ -447,8 +429,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, PrefixHit) {
@@ -463,8 +443,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, PrefixHitAfterMerge) {
@@ -481,8 +459,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, MultiPrefixHit) {
@@ -504,8 +480,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, MultiPrefixHitAfterMerge) {
@@ -529,8 +503,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, NoExactHitInPrefixQuery) {
@@ -550,7 +522,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, NoExactHitInPrefixQueryAfterMerge) {
@@ -572,7 +543,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId1, std::vector<SectionId>{kSectionId3})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, PrefixHitDedupe) {
@@ -588,7 +558,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, PrefixHitDedupeAfterMerge) {
@@ -606,7 +575,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, PrefixToString) {
@@ -703,9 +671,11 @@
std::default_random_engine random;
std::vector<std::string> query_terms;
+ std::string prefix = "prefix";
for (int i = 0; i < 2600; ++i) {
constexpr int kTokenSize = 5;
- query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+ query_terms.push_back(prefix +
+ RandomString(kAlNumAlphabet, kTokenSize, &random));
}
DocumentId document_id = 0;
@@ -714,7 +684,7 @@
while (status.ok()) {
for (int i = 0; i < 100; ++i) {
Index::Editor edit =
- index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+ index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
size_t idx = uniform(random);
status = edit.BufferTerm(query_terms.at(idx).c_str());
@@ -731,11 +701,14 @@
// Adding more hits should fail.
Index::Editor edit =
- index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+ index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
- EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
- EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ std::string term = prefix + "foo";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "bar";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "baz";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -743,12 +716,17 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator(query_terms.at(i).c_str(), kSectionIdMaskAll,
- TermMatchType::EXACT_ONLY));
+ TermMatchType::PREFIX));
// Each query term should contain at least one hit - there may have been
// other hits for this term that were added.
EXPECT_THAT(itr->Advance(), IsOk());
}
- EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> last_itr,
+ index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ TermMatchType::PREFIX));
+ EXPECT_THAT(last_itr->Advance(), IsOk());
+ EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
}
TEST_F(IndexTest, FullIndexMerge) {
@@ -759,9 +737,11 @@
std::default_random_engine random;
std::vector<std::string> query_terms;
+ std::string prefix = "prefix";
for (int i = 0; i < 2600; ++i) {
constexpr int kTokenSize = 5;
- query_terms.push_back(RandomString(kAlNumAlphabet, kTokenSize, &random));
+ query_terms.push_back(prefix +
+ RandomString(kAlNumAlphabet, kTokenSize, &random));
}
DocumentId document_id = 0;
@@ -770,7 +750,7 @@
while (status.ok()) {
for (int i = 0; i < 100; ++i) {
Index::Editor edit =
- index_->Edit(document_id, kSectionId2, TermMatchType::EXACT_ONLY,
+ index_->Edit(document_id, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
size_t idx = uniform(random);
status = edit.BufferTerm(query_terms.at(idx).c_str());
@@ -789,30 +769,45 @@
// Adding more hits should fail.
Index::Editor edit =
- index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY,
+ index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
- EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
- EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
- EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ std::string term = prefix + "foo";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "bar";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "baz";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
- EXPECT_THAT(index_->last_added_document_id(), Eq(document_id - 1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<DocHitInfoIterator> last_itr,
+ index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ TermMatchType::PREFIX));
+ EXPECT_THAT(last_itr->Advance(), IsOk());
+ EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id - 1));
// After merging with the main index. Adding more hits should succeed now.
ICING_ASSERT_OK(index_->Merge());
- edit =
- index_->Edit(document_id + 1, kSectionId2, TermMatchType::EXACT_ONLY, 0);
- EXPECT_THAT(edit.BufferTerm("foo"), IsOk());
- EXPECT_THAT(edit.BufferTerm("bar"), IsOk());
- EXPECT_THAT(edit.BufferTerm("baz"), IsOk());
+ edit = index_->Edit(document_id + 1, kSectionId2, TermMatchType::PREFIX, 0);
+ prefix + "foo";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "bar";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
+ term = prefix + "baz";
+ EXPECT_THAT(edit.BufferTerm(term.c_str()), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<DocHitInfoIterator> itr,
- index_->GetIterator("bar", kSectionIdMaskAll, TermMatchType::EXACT_ONLY));
+ index_->GetIterator(prefix + "bar", kSectionIdMaskAll,
+ TermMatchType::EXACT_ONLY));
// We know that "bar" should have at least one hit because we just added it!
EXPECT_THAT(itr->Advance(), IsOk());
EXPECT_THAT(itr->doc_hit_info().document_id(), Eq(document_id + 1));
- EXPECT_THAT(index_->last_added_document_id(), Eq(document_id + 1));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ last_itr, index_->GetIterator(prefix.c_str(), kSectionIdMaskAll,
+ TermMatchType::PREFIX));
+ EXPECT_THAT(last_itr->Advance(), IsOk());
+ EXPECT_THAT(last_itr->doc_hit_info().document_id(), Eq(document_id + 1));
}
TEST_F(IndexTest, IndexCreateIOFailure) {
@@ -881,8 +876,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, IndexPersistenceAfterMerge) {
@@ -910,8 +903,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, InvalidHitBufferSize) {
@@ -1278,8 +1269,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
}
TEST_F(IndexTest, PrefixResultsFromLiteAndMain) {
@@ -1312,8 +1301,6 @@
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
}
TEST_F(IndexTest, GetDebugInfo) {
@@ -1420,8 +1407,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId3})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId2));
}
TEST_F(IndexTest, BackfillingNewTermsSucceeds) {
@@ -1476,8 +1461,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId2, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId3));
}
TEST_F(IndexTest, TruncateToInvalidDocumentIdHasNoEffect) {
@@ -1525,8 +1508,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, TruncateToLastAddedDocumentIdHasNoEffect) {
@@ -1542,6 +1523,7 @@
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId0);
ICING_EXPECT_OK(index_->TruncateTo(index_->last_added_document_id()));
// Clipping to invalid should have no effect.
ICING_ASSERT_OK_AND_ASSIGN(
@@ -1563,6 +1545,7 @@
/*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId1);
// Clipping to invalid should still have no effect even if both indices have
// hits.
@@ -1574,8 +1557,6 @@
ElementsAre(
EqualsDocHitInfo(kDocumentId1, std::vector<SectionId>{kSectionId3}),
EqualsDocHitInfo(kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId1));
}
TEST_F(IndexTest, TruncateToThrowsOutLiteIndex) {
@@ -1584,6 +1565,7 @@
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId0);
ICING_ASSERT_OK(index_->Merge());
@@ -1592,6 +1574,7 @@
/*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId1);
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
@@ -1602,8 +1585,6 @@
EXPECT_THAT(GetHits(std::move(itr)),
ElementsAre(EqualsDocHitInfo(
kDocumentId0, std::vector<SectionId>{kSectionId2})));
-
- EXPECT_THAT(index_->last_added_document_id(), Eq(kDocumentId0));
}
TEST_F(IndexTest, TruncateToThrowsOutBothIndices) {
@@ -1612,10 +1593,12 @@
TermMatchType::PREFIX, /*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId0);
edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
/*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId1);
ICING_ASSERT_OK(index_->Merge());
@@ -1624,6 +1607,7 @@
/*namespace_id=*/0);
ASSERT_THAT(edit.BufferTerm("foot"), IsOk());
EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ index_->set_last_added_document_id(kDocumentId2);
EXPECT_THAT(index_->TruncateTo(kDocumentId0), IsOk());
@@ -1632,8 +1616,33 @@
std::unique_ptr<DocHitInfoIterator> itr,
index_->GetIterator("f", kSectionIdMaskAll, TermMatchType::PREFIX));
EXPECT_THAT(GetHits(std::move(itr)), IsEmpty());
+}
- EXPECT_THAT(index_->last_added_document_id(), Eq(kInvalidDocumentId));
+TEST_F(IndexTest, IndexStorageInfoProto) {
+ // Add two documents to the lite index and merge them into main.
+ {
+ Index::Editor edit = index_->Edit(
+ kDocumentId0, kSectionId2, TermMatchType::PREFIX, /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foo"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+ edit = index_->Edit(kDocumentId1, kSectionId2, TermMatchType::PREFIX,
+ /*namespace_id=*/0);
+ ASSERT_THAT(edit.BufferTerm("foul"), IsOk());
+ EXPECT_THAT(edit.IndexAllBufferedTerms(), IsOk());
+
+ ICING_ASSERT_OK(index_->Merge());
+ }
+
+ IndexStorageInfoProto storage_info = index_->GetStorageInfo();
+ EXPECT_THAT(storage_info.index_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.lite_index_hit_buffer_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_lexicon_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_storage_size(), Ge(0));
+ EXPECT_THAT(storage_info.main_index_block_size(), Ge(0));
+ // There should be 1 block for the header and 1 block for two posting lists.
+ EXPECT_THAT(storage_info.num_blocks(), Eq(2));
+ EXPECT_THAT(storage_info.min_free_fraction(), Ge(0));
}
} // namespace
diff --git a/icing/index/iterator/doc-hit-info-iterator-and.h b/icing/index/iterator/doc-hit-info-iterator-and.h
index faca785..8ceff44 100644
--- a/icing/index/iterator/doc-hit-info-iterator-and.h
+++ b/icing/index/iterator/doc-hit-info-iterator-and.h
@@ -47,13 +47,16 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- short_->PopulateMatchedTermsStats(matched_terms_stats);
- long_->PopulateMatchedTermsStats(matched_terms_stats);
+ short_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
+ long_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
@@ -78,13 +81,15 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < iterators_.size(); ++i) {
- iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.cc b/icing/index/iterator/doc-hit-info-iterator-filter.cc
index c6cb86d..933f9b5 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.cc
@@ -31,7 +31,6 @@
#include "icing/store/document-filter-data.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
-#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -39,12 +38,11 @@
DocHitInfoIteratorFilter::DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Clock* clock, const Options& options)
+ const Options& options)
: delegate_(std::move(delegate)),
document_store_(*document_store),
schema_store_(*schema_store),
- options_(options),
- current_time_milliseconds_(clock->GetSystemTimeMilliseconds()) {
+ options_(options) {
// Precompute all the NamespaceIds
for (std::string_view name_space : options_.namespaces) {
auto namespace_id_or = document_store_.GetNamespaceId(name_space);
@@ -67,61 +65,50 @@
}
libtextclassifier3::Status DocHitInfoIteratorFilter::Advance() {
- if (!delegate_->Advance().ok()) {
- // Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
+ while (delegate_->Advance().ok()) {
+ if (!document_store_.DoesDocumentExist(
+ delegate_->doc_hit_info().document_id())) {
+ // Document doesn't exist, keep searching. This handles deletions and
+ // expired documents.
+ continue;
+ }
+
+ // Try to get the DocumentFilterData
+ auto document_filter_data_or = document_store_.GetDocumentFilterData(
+ delegate_->doc_hit_info().document_id());
+ if (!document_filter_data_or.ok()) {
+ // Didn't find the DocumentFilterData in the filter cache. This could be
+ // because the DocumentId isn't valid or the filter cache is in some
+ // invalid state. This is bad, but not the query's responsibility to fix,
+ // so just skip this result for now.
+ continue;
+ }
+ // We should be guaranteed that this exists now.
+ DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
+
+ if (!options_.namespaces.empty() &&
+ target_namespace_ids_.count(data.namespace_id()) == 0) {
+ // Doesn't match one of the specified namespaces. Keep searching
+ continue;
+ }
+
+ if (!options_.schema_types.empty() &&
+ target_schema_type_ids_.count(data.schema_type_id()) == 0) {
+ // Doesn't match one of the specified schema types. Keep searching
+ continue;
+ }
+
+ // Satisfied all our specified filters
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ =
+ delegate_->hit_intersect_section_ids_mask();
+ return libtextclassifier3::Status::OK;
}
- if (current_time_milliseconds_ < 0) {
- // This shouldn't happen, but we add a sanity check here for any unknown
- // errors.
- return absl_ports::InternalError(
- "Couldn't get current time. Try again in a bit");
- }
-
- if (!document_store_.DoesDocumentExist(
- delegate_->doc_hit_info().document_id())) {
- // Document doesn't exist, keep searching
- return Advance();
- }
-
- // Try to get the DocumentFilterData
- auto document_filter_data_or = document_store_.GetDocumentFilterData(
- delegate_->doc_hit_info().document_id());
- if (!document_filter_data_or.ok()) {
- // Didn't find the DocumentFilterData in the filter cache. This could be
- // because the DocumentId isn't valid or the filter cache is in some invalid
- // state. This is bad, but not the query's responsibility to fix, so just
- // skip this result for now.
- return Advance();
- }
- // We should be guaranteed that this exists now.
- DocumentFilterData data = std::move(document_filter_data_or).ValueOrDie();
-
- if (!options_.namespaces.empty() &&
- target_namespace_ids_.count(data.namespace_id()) == 0) {
- // Doesn't match one of the specified namespaces. Keep searching
- return Advance();
- }
-
- if (!options_.schema_types.empty() &&
- target_schema_type_ids_.count(data.schema_type_id()) == 0) {
- // Doesn't match one of the specified schema types. Keep searching
- return Advance();
- }
-
- if (current_time_milliseconds_ >= data.expiration_timestamp_ms()) {
- // Current time has exceeded the document's expiration time
- return Advance();
- }
-
- // Satisfied all our specified filters
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ = delegate_->hit_intersect_section_ids_mask();
- return libtextclassifier3::Status::OK;
+ // Didn't find anything on the delegate iterator.
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorFilter::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter.h b/icing/index/iterator/doc-hit-info-iterator-filter.h
index fb60e38..5051607 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter.h
+++ b/icing/index/iterator/doc-hit-info-iterator-filter.h
@@ -27,7 +27,6 @@
#include "icing/schema/schema-store.h"
#include "icing/store/document-store.h"
#include "icing/store/namespace-id.h"
-#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -57,7 +56,7 @@
explicit DocHitInfoIteratorFilter(
std::unique_ptr<DocHitInfoIterator> delegate,
const DocumentStore* document_store, const SchemaStore* schema_store,
- const Clock* clock, const Options& options);
+ const Options& options);
libtextclassifier3::Status Advance() override;
@@ -68,8 +67,10 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ delegate_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
private:
@@ -79,7 +80,6 @@
const Options options_;
std::unordered_set<NamespaceId> target_namespace_ids_;
std::unordered_set<SchemaTypeId> target_schema_type_ids_;
- const int64_t current_time_milliseconds_;
};
} // namespace lib
diff --git a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
index e0a8cd0..f80d1ea 100644
--- a/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-filter_test.cc
@@ -28,6 +28,7 @@
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/proto/document.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -59,10 +60,10 @@
test_document3_ =
DocumentBuilder().SetKey("icing", "email/3").SetSchema("email").Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -100,9 +101,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -124,9 +125,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id3));
@@ -150,9 +151,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id3));
@@ -163,9 +164,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -177,9 +178,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -194,9 +195,9 @@
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(filtered_iterator.Advance(),
StatusIs(libtextclassifier3::StatusCode::RESOURCE_EXHAUSTED));
@@ -226,10 +227,10 @@
.SetSchema("email")
.Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -270,9 +271,9 @@
std::make_unique<DocHitInfoIteratorDummy>();
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -288,9 +289,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{"nonexistent_namespace"};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -305,9 +306,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -329,9 +330,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2));
@@ -355,9 +356,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.namespaces = std::vector<std::string_view>{namespace1_, namespace3_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id1, document_id2, document_id4));
@@ -379,14 +380,12 @@
document4_schema1_ =
DocumentBuilder().SetKey("namespace", "4").SetSchema(schema1_).Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(schema1_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema2_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema3_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema3_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -427,9 +426,9 @@
std::make_unique<DocHitInfoIteratorDummy>();
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator_empty), document_store_.get(),
- schema_store_.get(), &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator_empty),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -446,9 +445,9 @@
options_.schema_types =
std::vector<std::string_view>{"nonexistent_schema_type"};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
@@ -463,9 +462,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -484,9 +483,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{schema1_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -507,9 +506,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
options_.schema_types = std::vector<std::string_view>{schema2_, schema3_};
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator),
ElementsAre(document_id2, document_id3));
@@ -523,10 +522,10 @@
void SetUp() override {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(email_schema_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(email_schema_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -557,6 +556,16 @@
};
TEST_F(DocHitInfoIteratorExpirationFilterTest, TtlZeroIsntFilteredOut) {
+ // Arbitrary value
+ fake_clock_.SetSystemTimeMilliseconds(100);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
@@ -565,23 +574,30 @@
.SetTtlMs(0)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value
- fake_clock_.SetSystemTimeMilliseconds(100);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, BeforeTtlNotFilteredOut) {
+ // Arbitrary value, but must be less than document's creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(50);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
@@ -590,92 +606,84 @@
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value, but must be less than document's creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(50);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, EqualTtlFilteredOut) {
+ // Current time is exactly the document's creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(150);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema(email_schema_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(50)
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Current time is exactly the document's creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(100);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
TEST_F(DocHitInfoIteratorExpirationFilterTest, PastTtlFilteredOut) {
+ // Arbitrary value, but must be greater than the document's
+ // creation_timestamp + ttl
+ fake_clock_.SetSystemTimeMilliseconds(151);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
// Insert a document
DocumentProto document = DocumentBuilder()
.SetKey("namespace", "1")
.SetSchema(email_schema_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(50)
.SetTtlMs(100)
.Build();
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
- document_store_->Put(document));
+ document_store->Put(document));
std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(document_id1)};
std::unique_ptr<DocHitInfoIterator> original_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
- // Arbitrary value, but must be greater than the document's
- // creation_timestamp + ttl
- fake_clock_.SetSystemTimeMilliseconds(101);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options_);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
}
-TEST_F(DocHitInfoIteratorExpirationFilterTest,
- InvalidTimeFiltersReturnsInternalError) {
- // Put something in the original iterator so we don't get a ResourceExhausted
- // error
- std::vector<DocHitInfo> doc_hit_infos = {DocHitInfo(/*document_id_in=*/0)};
- std::unique_ptr<DocHitInfoIterator> original_iterator =
- std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
-
- // -1 is an invalid timestamp
- fake_clock_.SetSystemTimeMilliseconds(-1);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options_);
-
- EXPECT_THAT(filtered_iterator.Advance(),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
-}
-
class DocHitInfoIteratorFilterTest : public ::testing::Test {
protected:
DocHitInfoIteratorFilterTest() : test_dir_(GetTestTempDir() + "/icing") {}
@@ -709,16 +717,15 @@
document5_namespace1_schema1_ = DocumentBuilder()
.SetKey(namespace1_, "5")
.SetSchema(schema1_)
- .SetCreationTimestampMs(0)
+ .SetCreationTimestampMs(1)
.SetTtlMs(100)
.Build();
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type(schema1_);
- type_config = schema.add_types();
- type_config->set_schema_type(schema2_);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(schema1_))
+ .AddType(SchemaTypeConfigBuilder().SetType(schema2_))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
@@ -756,26 +763,36 @@
};
TEST_F(DocHitInfoIteratorFilterTest, CombineAllFiltersOk) {
+ // Filters out document5 since it's expired
+ fake_clock_.SetSystemTimeMilliseconds(199);
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, test_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
- document_store_->Put(document1_namespace1_schema1_));
+ document_store->Put(document1_namespace1_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
- document_store_->Put(document2_namespace1_schema1_));
+ document_store->Put(document2_namespace1_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id3,
- document_store_->Put(document3_namespace2_schema1_));
+ document_store->Put(document3_namespace2_schema1_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id4,
- document_store_->Put(document4_namespace1_schema2_));
+ document_store->Put(document4_namespace1_schema2_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id5,
- document_store_->Put(document5_namespace1_schema1_));
+ document_store->Put(document5_namespace1_schema1_));
// Deletes document2, causing it to be filtered out
ICING_ASSERT_OK(
- document_store_->Delete(document2_namespace1_schema1_.namespace_(),
- document2_namespace1_schema1_.uri()));
+ document_store->Delete(document2_namespace1_schema1_.namespace_(),
+ document2_namespace1_schema1_.uri()));
std::vector<DocHitInfo> doc_hit_infos = {
DocHitInfo(document_id1), DocHitInfo(document_id2),
@@ -793,13 +810,9 @@
// Filters out document4 by schema type
options.schema_types = std::vector<std::string_view>{schema1_};
- // Filters out document5 since it's expired
- FakeClock fake_clock;
- fake_clock.SetSystemTimeMilliseconds(199);
-
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store.get(),
+ schema_store_.get(), options);
EXPECT_THAT(GetDocumentIds(&filtered_iterator), ElementsAre(document_id1));
}
@@ -830,9 +843,9 @@
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(GetDocHitInfos(&filtered_iterator),
ElementsAre(EqualsDocHitInfo(document_id1, section_ids1),
@@ -845,9 +858,9 @@
original_iterator->SetNumBlocksInspected(5);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(filtered_iterator.GetNumBlocksInspected(), Eq(5));
}
@@ -857,9 +870,9 @@
original_iterator->SetNumLeafAdvanceCalls(6);
DocHitInfoIteratorFilter::Options options;
- DocHitInfoIteratorFilter filtered_iterator(
- std::move(original_iterator), document_store_.get(), schema_store_.get(),
- &fake_clock_, options);
+ DocHitInfoIteratorFilter filtered_iterator(std::move(original_iterator),
+ document_store_.get(),
+ schema_store_.get(), options);
EXPECT_THAT(filtered_iterator.GetNumLeafAdvanceCalls(), Eq(6));
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-not.cc b/icing/index/iterator/doc-hit-info-iterator-not.cc
index e1ece5c..8fb3659 100644
--- a/icing/index/iterator/doc-hit-info-iterator-not.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-not.cc
@@ -35,30 +35,29 @@
DocHitInfoIteratorAllDocumentId(document_id_limit)) {}
libtextclassifier3::Status DocHitInfoIteratorNot::Advance() {
- if (!all_document_id_iterator_.Advance().ok()) {
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
+ while (all_document_id_iterator_.Advance().ok()) {
+ if (all_document_id_iterator_.doc_hit_info().document_id() <
+ to_be_excluded_->doc_hit_info().document_id()) {
+ // Since DocumentIds are returned from DocHitInfoIterators in decreasing
+ // order, we have passed the last NOT result if we're smaller than its
+ // DocumentId. Advance the NOT result if so.
+ to_be_excluded_->Advance().IgnoreError();
+ }
+
+ if (all_document_id_iterator_.doc_hit_info().document_id() ==
+ to_be_excluded_->doc_hit_info().document_id()) {
+ // This is a NOT result, skip and Advance to the next result.
+ continue;
+ }
+
+ // No errors, we've found a valid result
+ doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
+ return libtextclassifier3::Status::OK;
}
- if (all_document_id_iterator_.doc_hit_info().document_id() <
- to_be_excluded_->doc_hit_info().document_id()) {
- // Since DocumentIds are returned from DocHitInfoIterators in decreasing
- // order, we have passed the last NOT result if we're smaller than its
- // DocumentId. Advance the NOT result if so.
- to_be_excluded_->Advance().IgnoreError();
- }
-
- if (all_document_id_iterator_.doc_hit_info().document_id() ==
- to_be_excluded_->doc_hit_info().document_id()) {
- // This is a NOT result, skip and Advance to the next result.
- return Advance();
- }
-
- // No errors, we've found a valid result
- doc_hit_info_ = all_document_id_iterator_.doc_hit_info();
-
- return libtextclassifier3::Status::OK;
+ // Didn't find a hit, return with error
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorNot::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-or.h b/icing/index/iterator/doc-hit-info-iterator-or.h
index 2f49430..2dae68d 100644
--- a/icing/index/iterator/doc-hit-info-iterator-or.h
+++ b/icing/index/iterator/doc-hit-info-iterator-or.h
@@ -43,15 +43,18 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- current_->PopulateMatchedTermsStats(matched_terms_stats);
+ current_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
// If equal, then current_ == left_. Combine with results from right_.
if (left_document_id_ == right_document_id_) {
- right_->PopulateMatchedTermsStats(matched_terms_stats);
+ right_->PopulateMatchedTermsStats(matched_terms_stats,
+ filtering_section_mask);
}
}
@@ -83,13 +86,15 @@
std::string ToString() const override;
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo> *matched_terms_stats) const override {
+ std::vector<TermMatchInfo> *matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
for (size_t i = 0; i < current_iterators_.size(); i++) {
- current_iterators_.at(i)->PopulateMatchedTermsStats(matched_terms_stats);
+ current_iterators_.at(i)->PopulateMatchedTermsStats(
+ matched_terms_stats, filtering_section_mask);
}
}
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
index 8acb91a..034c8cb 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.cc
@@ -45,57 +45,54 @@
target_section_(target_section) {}
libtextclassifier3::Status DocHitInfoIteratorSectionRestrict::Advance() {
- if (!delegate_->Advance().ok()) {
- // Didn't find anything on the delegate iterator.
- doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
- hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
- return absl_ports::ResourceExhaustedError(
- "No more DocHitInfos in iterator");
- }
+ while (delegate_->Advance().ok()) {
+ DocumentId document_id = delegate_->doc_hit_info().document_id();
- DocumentId document_id = delegate_->doc_hit_info().document_id();
+ SectionIdMask section_id_mask =
+ delegate_->doc_hit_info().hit_section_ids_mask();
- SectionIdMask section_id_mask =
- delegate_->doc_hit_info().hit_section_ids_mask();
-
- auto data_or = document_store_.GetDocumentFilterData(document_id);
- if (!data_or.ok()) {
- // Ran into some error retrieving information on this hit, skip
- return Advance();
- }
-
- // Guaranteed that the DocumentFilterData exists at this point
- DocumentFilterData data = std::move(data_or).ValueOrDie();
- SchemaTypeId schema_type_id = data.schema_type_id();
-
- // A hit can be in multiple sections at once, need to check that at least one
- // of the confirmed section ids match the name of the target section
- while (section_id_mask != 0) {
- // There was a hit in this section id
- SectionId section_id = __builtin_ctz(section_id_mask);
-
- auto section_metadata_or =
- schema_store_.GetSectionMetadata(schema_type_id, section_id);
-
- if (section_metadata_or.ok()) {
- const SectionMetadata* section_metadata =
- section_metadata_or.ValueOrDie();
-
- if (section_metadata->path == target_section_) {
- // The hit was in the target section name, return OK/found
- doc_hit_info_ = delegate_->doc_hit_info();
- hit_intersect_section_ids_mask_ =
- delegate_->hit_intersect_section_ids_mask();
- return libtextclassifier3::Status::OK;
- }
+ auto data_or = document_store_.GetDocumentFilterData(document_id);
+ if (!data_or.ok()) {
+ // Ran into some error retrieving information on this hit, skip
+ continue;
}
- // Mark this section as checked
- section_id_mask &= ~(1U << section_id);
+ // Guaranteed that the DocumentFilterData exists at this point
+ DocumentFilterData data = std::move(data_or).ValueOrDie();
+ SchemaTypeId schema_type_id = data.schema_type_id();
+
+ // A hit can be in multiple sections at once, need to check that at least
+ // one of the confirmed section ids match the name of the target section
+ while (section_id_mask != 0) {
+ // There was a hit in this section id
+ SectionId section_id = __builtin_ctz(section_id_mask);
+
+ auto section_metadata_or =
+ schema_store_.GetSectionMetadata(schema_type_id, section_id);
+
+ if (section_metadata_or.ok()) {
+ const SectionMetadata* section_metadata =
+ section_metadata_or.ValueOrDie();
+
+ if (section_metadata->path == target_section_) {
+ // The hit was in the target section name, return OK/found
+ doc_hit_info_ = delegate_->doc_hit_info();
+ hit_intersect_section_ids_mask_ = 1u << section_id;
+ return libtextclassifier3::Status::OK;
+ }
+ }
+
+ // Mark this section as checked
+ section_id_mask &= ~(1U << section_id);
+ }
+
+ // Didn't find a matching section name for this hit. Continue.
}
- // Didn't find a matching section name for this hit, go to the next hit
- return Advance();
+ // Didn't find anything on the delegate iterator.
+ doc_hit_info_ = DocHitInfo(kInvalidDocumentId);
+ hit_intersect_section_ids_mask_ = kSectionIdMaskNone;
+ return absl_ports::ResourceExhaustedError("No more DocHitInfos in iterator");
}
int32_t DocHitInfoIteratorSectionRestrict::GetNumBlocksInspected() const {
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
index ba74384..52b243a 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict.h
@@ -52,13 +52,21 @@
std::string ToString() const override;
- // NOTE: currently, section restricts does decide which documents to
- // return, but doesn't impact the relevance score of a document.
- // TODO(b/173156803): decide whether we want to filter the matched_terms_stats
- // for the restricted sections.
+ // Note that the DocHitInfoIteratorSectionRestrict is the only iterator that
+ // should set filtering_section_mask, hence the received
+ // filtering_section_mask is ignored and the filtering_section_mask passed to
+ // the delegate will be set to hit_intersect_section_ids_mask_. This will
+ // allow to filter the matching sections in the delegate.
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
- delegate_->PopulateMatchedTermsStats(matched_terms_stats);
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
+ if (doc_hit_info_.document_id() == kInvalidDocumentId) {
+ // Current hit isn't valid, return.
+ return;
+ }
+ delegate_->PopulateMatchedTermsStats(
+ matched_terms_stats,
+ /*filtering_section_mask=*/hit_intersect_section_ids_mask_);
}
private:
diff --git a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
index 91e0cbe..43a846b 100644
--- a/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
+++ b/icing/index/iterator/doc-hit-info-iterator-section-restrict_test.cc
@@ -29,6 +29,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -43,9 +44,18 @@
namespace {
using ::testing::ElementsAre;
+using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::IsEmpty;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
class DocHitInfoIteratorSectionRestrictTest : public ::testing::Test {
protected:
DocHitInfoIteratorSectionRestrictTest()
@@ -56,18 +66,18 @@
document_ =
DocumentBuilder().SetKey("namespace", "uri").SetSchema("email").Build();
- auto type_config = schema_.add_types();
- type_config->set_schema_type("email");
-
- // Add an indexed property so we generate section metadata on it
- auto property = type_config->add_properties();
- property->set_property_name(indexed_property_);
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ schema_ = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an indexed property so we generate section
+ // metadata on it
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(indexed_property_)
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// First and only indexed property, so it gets the first id of 0
indexed_section_id_ = 0;
@@ -101,6 +111,57 @@
FakeClock fake_clock_;
};
+TEST_F(DocHitInfoIteratorSectionRestrictTest,
+ PopulateMatchedTermsStats_IncludesHitWithMatchingSection) {
+ // Populate the DocumentStore's FilterCache with this document's data
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store_->Put(document_));
+
+ // Arbitrary section ids for the documents in the DocHitInfoIterators.
+ // Created to test correct section_id_mask behavior.
+ SectionIdMask original_section_id_mask = 0b00000101; // hits in sections 0, 2
+
+ DocHitInfo doc_hit_info1 = DocHitInfo(document_id);
+ doc_hit_info1.UpdateSection(/*section_id=*/0, /*hit_term_frequency=*/1);
+ doc_hit_info1.UpdateSection(/*section_id=*/2, /*hit_term_frequency=*/2);
+
+ // Create a hit that was found in the indexed section
+ std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
+
+ auto original_iterator =
+ std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "hi");
+ original_iterator->set_hit_intersect_section_ids_mask(
+ original_section_id_mask);
+
+ // Filtering for the indexed section name (which has a section id of 0) should
+ // get a result.
+ DocHitInfoIteratorSectionRestrict section_restrict_iterator(
+ std::move(original_iterator), document_store_.get(), schema_store_.get(),
+ /*target_section=*/indexed_property_);
+
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
+
+ ICING_EXPECT_OK(section_restrict_iterator.Advance());
+ EXPECT_THAT(section_restrict_iterator.doc_hit_info().document_id(),
+ Eq(document_id));
+ SectionIdMask expected_section_id_mask = 0b00000001; // hits in sections 0
+ EXPECT_EQ(section_restrict_iterator.hit_intersect_section_ids_mask(),
+ expected_section_id_mask);
+
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_EQ(matched_terms_stats.at(0).term, "hi");
+ std::array<Hit::TermFrequency, kMaxSectionId> expected_term_frequencies{
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ EXPECT_THAT(matched_terms_stats.at(0).term_frequencies,
+ ElementsAreArray(expected_term_frequencies));
+ EXPECT_EQ(matched_terms_stats.at(0).section_ids_mask,
+ expected_section_id_mask);
+
+ EXPECT_FALSE(section_restrict_iterator.Advance().ok());
+}
+
TEST_F(DocHitInfoIteratorSectionRestrictTest, EmptyOriginalIterator) {
std::unique_ptr<DocHitInfoIterator> original_iterator_empty =
std::make_unique<DocHitInfoIteratorDummy>();
@@ -110,6 +171,9 @@
schema_store_.get(), /*target_section=*/"");
EXPECT_THAT(GetDocumentIds(&filtered_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ filtered_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, IncludesHitWithMatchingSection) {
@@ -148,6 +212,9 @@
/*target_section=*/"");
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -171,6 +238,9 @@
"some_section_name");
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -192,6 +262,9 @@
indexed_property_);
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest,
@@ -216,6 +289,9 @@
indexed_property_);
EXPECT_THAT(GetDocumentIds(§ion_restrict_iterator), IsEmpty());
+ std::vector<TermMatchInfo> matched_terms_stats;
+ section_restrict_iterator.PopulateMatchedTermsStats(&matched_terms_stats);
+ EXPECT_THAT(matched_terms_stats, IsEmpty());
}
TEST_F(DocHitInfoIteratorSectionRestrictTest, GetNumBlocksInspected) {
diff --git a/icing/index/iterator/doc-hit-info-iterator-test-util.h b/icing/index/iterator/doc-hit-info-iterator-test-util.h
index 913696a..45acc8f 100644
--- a/icing/index/iterator/doc-hit-info-iterator-test-util.h
+++ b/icing/index/iterator/doc-hit-info-iterator-test-util.h
@@ -56,23 +56,25 @@
// Imitates behavior of DocHitInfoIteratorTermMain/DocHitInfoIteratorTermLite
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
-
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
- section_term_frequencies);
+ TermMatchInfo term_stats(term_, section_mask,
+ std::move(section_term_frequencies));
for (auto& cur_term_stats : *matched_terms_stats) {
if (cur_term_stats.term == term_stats.term) {
diff --git a/icing/index/iterator/doc-hit-info-iterator.h b/icing/index/iterator/doc-hit-info-iterator.h
index 67bd74f..bf90202 100644
--- a/icing/index/iterator/doc-hit-info-iterator.h
+++ b/icing/index/iterator/doc-hit-info-iterator.h
@@ -66,6 +66,8 @@
// Returns:
// OK if was able to advance to a new document_id.
+ // INVALID_ARGUMENT if there are less than 2 iterators for an AND/OR
+ // iterator
// RESOUCE_EXHAUSTED if we've run out of document_ids to iterate over
virtual libtextclassifier3::Status Advance() = 0;
@@ -94,11 +96,14 @@
// For the last hit docid, retrieves all the matched query terms and other
// stats, see TermMatchInfo.
+ // filtering_section_mask filters the matching sections and should be set only
+ // by DocHitInfoIteratorSectionRestrict.
// If Advance() wasn't called after construction, Advance() returned false or
// the concrete HitIterator didn't override this method, the vectors aren't
// populated.
virtual void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const {}
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const {}
protected:
DocHitInfo doc_hit_info_;
diff --git a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
index 90e4888..f975989 100644
--- a/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
+++ b/icing/index/iterator/doc-hit-info-iterator_benchmark.cc
@@ -14,15 +14,15 @@
#include <vector>
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/index/iterator/doc-hit-info-iterator-and.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
-#include "testing/base/public/benchmark.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
namespace icing {
namespace lib {
diff --git a/icing/index/lite/doc-hit-info-iterator-term-lite.h b/icing/index/lite/doc-hit-info-iterator-term-lite.h
index ac5e97f..8dbe043 100644
--- a/icing/index/lite/doc-hit-info-iterator-term-lite.h
+++ b/icing/index/lite/doc-hit-info-iterator-term-lite.h
@@ -50,21 +50,24 @@
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/lite/lite-index.cc b/icing/index/lite/lite-index.cc
index e0379b8..fb23934 100644
--- a/icing/index/lite/lite-index.cc
+++ b/icing/index/lite/lite-index.cc
@@ -310,8 +310,6 @@
return absl_ports::ResourceExhaustedError("Hit buffer is full!");
}
- header_->set_last_added_docid(hit.document_id());
-
TermIdHitPair term_id_hit_pair(term_id, hit);
uint32_t cur_size = header_->cur_size();
TermIdHitPair::Value* valp =
@@ -394,26 +392,36 @@
}
libtextclassifier3::StatusOr<int64_t> LiteIndex::GetElementsSize() const {
- int64_t header_and_hit_buffer_file_size =
- filesystem_->GetFileSize(hit_buffer_fd_.get());
-
- if (header_and_hit_buffer_file_size == Filesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of the LiteIndex's header and hit buffer");
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.lite_index_hit_buffer_size() == -1 ||
+ storage_info.lite_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of LiteIndex's members.");
}
-
- int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
- if (lexicon_disk_usage == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
- }
-
// On initialization, we grow the file to a padded size first. So this size
// won't count towards the size taken up by elements
size_t header_padded_size = IcingMMapper::page_aligned_size(header_size());
+ return storage_info.lite_index_hit_buffer_size() - header_padded_size +
+ storage_info.lite_index_lexicon_size();
+}
- return header_and_hit_buffer_file_size - header_padded_size +
- lexicon_disk_usage;
+IndexStorageInfoProto LiteIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
+ int64_t header_and_hit_buffer_file_size =
+ filesystem_->GetFileSize(hit_buffer_fd_.get());
+ if (header_and_hit_buffer_file_size != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_hit_buffer_size(
+ header_and_hit_buffer_file_size);
+ } else {
+ storage_info.set_lite_index_hit_buffer_size(-1);
+ }
+ int64_t lexicon_disk_usage = lexicon_.GetElementsSize();
+ if (lexicon_disk_usage != Filesystem::kBadFileSize) {
+ storage_info.set_lite_index_lexicon_size(lexicon_disk_usage);
+ } else {
+ storage_info.set_lite_index_lexicon_size(-1);
+ }
+ return storage_info;
}
uint32_t LiteIndex::Seek(uint32_t term_id) {
diff --git a/icing/index/lite/lite-index.h b/icing/index/lite/lite-index.h
index 7b51aa4..b134aba 100644
--- a/icing/index/lite/lite-index.h
+++ b/icing/index/lite/lite-index.h
@@ -37,6 +37,7 @@
#include "icing/legacy/index/icing-lite-index-header.h"
#include "icing/legacy/index/icing-lite-index-options.h"
#include "icing/legacy/index/icing-mmapper.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -224,6 +225,9 @@
DocumentId last_added_document_id() const {
return header_->last_added_docid();
}
+ void set_last_added_document_id(DocumentId document_id) const {
+ header_->set_last_added_docid(document_id);
+ }
const IcingDynamicTrie& lexicon() const { return lexicon_; }
@@ -240,6 +244,14 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the lite
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
private:
static IcingDynamicTrie::RuntimeOptions MakeTrieRuntimeOptions();
diff --git a/icing/index/main/doc-hit-info-iterator-term-main.h b/icing/index/main/doc-hit-info-iterator-term-main.h
index d626d7a..f3cf701 100644
--- a/icing/index/main/doc-hit-info-iterator-term-main.h
+++ b/icing/index/main/doc-hit-info-iterator-term-main.h
@@ -50,21 +50,24 @@
int32_t GetNumLeafAdvanceCalls() const override { return num_advance_calls_; }
void PopulateMatchedTermsStats(
- std::vector<TermMatchInfo>* matched_terms_stats) const override {
+ std::vector<TermMatchInfo>* matched_terms_stats,
+ SectionIdMask filtering_section_mask = kSectionIdMaskAll) const override {
if (doc_hit_info_.document_id() == kInvalidDocumentId) {
// Current hit isn't valid, return.
return;
}
- SectionIdMask section_mask = doc_hit_info_.hit_section_ids_mask();
+ SectionIdMask section_mask =
+ doc_hit_info_.hit_section_ids_mask() & filtering_section_mask;
+ SectionIdMask section_mask_copy = section_mask;
std::array<Hit::TermFrequency, kMaxSectionId> section_term_frequencies = {
Hit::kNoTermFrequency};
- while (section_mask) {
- SectionId section_id = __builtin_ctz(section_mask);
+ while (section_mask_copy) {
+ SectionId section_id = __builtin_ctz(section_mask_copy);
section_term_frequencies.at(section_id) =
doc_hit_info_.hit_term_frequency(section_id);
- section_mask &= ~(1u << section_id);
+ section_mask_copy &= ~(1u << section_id);
}
- TermMatchInfo term_stats(term_, doc_hit_info_.hit_section_ids_mask(),
+ TermMatchInfo term_stats(term_, section_mask,
std::move(section_term_frequencies));
for (const TermMatchInfo& cur_term_stats : *matched_terms_stats) {
diff --git a/icing/index/main/main-index.cc b/icing/index/main/main-index.cc
index 636f631..8ae6b27 100644
--- a/icing/index/main/main-index.cc
+++ b/icing/index/main/main-index.cc
@@ -121,14 +121,34 @@
}
libtextclassifier3::StatusOr<int64_t> MainIndex::GetElementsSize() const {
- int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
- int64_t index_elt_size = flash_index_storage_->GetElementsSize();
- if (lexicon_elt_size == IcingFilesystem::kBadFileSize ||
- index_elt_size == IcingFilesystem::kBadFileSize) {
- return absl_ports::InternalError(
- "Failed to get element size of LiteIndex's lexicon");
+ IndexStorageInfoProto storage_info = GetStorageInfo(IndexStorageInfoProto());
+ if (storage_info.main_index_storage_size() == -1 ||
+ storage_info.main_index_lexicon_size() == -1) {
+ return absl_ports::AbortedError(
+ "Failed to get size of MainIndex's members.");
}
- return lexicon_elt_size + index_elt_size;
+ return storage_info.main_index_storage_size() +
+ storage_info.main_index_lexicon_size();
+}
+
+IndexStorageInfoProto MainIndex::GetStorageInfo(
+ IndexStorageInfoProto storage_info) const {
+ int64_t lexicon_elt_size = main_lexicon_->GetElementsSize();
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_lexicon_size(lexicon_elt_size);
+ } else {
+ storage_info.set_main_index_lexicon_size(-1);
+ }
+ int64_t index_elt_size = flash_index_storage_->GetElementsSize();
+ if (lexicon_elt_size != IcingFilesystem::kBadFileSize) {
+ storage_info.set_main_index_storage_size(index_elt_size);
+ } else {
+ storage_info.set_main_index_storage_size(-1);
+ }
+ storage_info.set_main_index_block_size(flash_index_storage_->block_size());
+ storage_info.set_num_blocks(flash_index_storage_->num_blocks());
+ storage_info.set_min_free_fraction(flash_index_storage_->min_free_fraction());
+ return storage_info;
}
libtextclassifier3::StatusOr<std::unique_ptr<PostingListAccessor>>
diff --git a/icing/index/main/main-index.h b/icing/index/main/main-index.h
index 7403b8c..43635ca 100644
--- a/icing/index/main/main-index.h
+++ b/icing/index/main/main-index.h
@@ -27,6 +27,7 @@
#include "icing/index/term-metadata.h"
#include "icing/legacy/index/icing-dynamic-trie.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/proto/storage.pb.h"
#include "icing/store/namespace-id.h"
#include "icing/util/status-macros.h"
@@ -172,6 +173,14 @@
// - INTERNAL on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsSize() const;
+ // Takes the provided storage_info, populates the fields related to the main
+ // index and returns that storage_info.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ IndexStorageInfoProto GetStorageInfo(
+ IndexStorageInfoProto storage_info) const;
+
// Returns debug information for the main index in out.
// verbosity <= 0, simplest debug information - just the lexicon
// verbosity > 0, more detailed debug information including raw postings
diff --git a/icing/jni/icing-search-engine-jni.cc b/icing/jni/icing-search-engine-jni.cc
index bf709cd..ea2bcf7 100644
--- a/icing/jni/icing-search-engine-jni.cc
+++ b/icing/jni/icing-search-engine-jni.cc
@@ -27,6 +27,7 @@
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/proto/usage.pb.h"
#include "icing/util/status-macros.h"
@@ -356,12 +357,19 @@
JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativePersistToDisk(
- JNIEnv* env, jclass clazz, jobject object) {
+ JNIEnv* env, jclass clazz, jobject object, jint persist_type_code) {
icing::lib::IcingSearchEngine* icing =
GetIcingSearchEnginePointer(env, object);
+ if (!icing::lib::PersistType::Code_IsValid(persist_type_code)) {
+ ICING_LOG(ERROR) << persist_type_code
+ << " is an invalid value for PersistType::Code";
+ return nullptr;
+ }
+ icing::lib::PersistType::Code persist_type_code_enum =
+ static_cast<icing::lib::PersistType::Code>(persist_type_code);
icing::lib::PersistToDiskResultProto persist_to_disk_result_proto =
- icing->PersistToDisk();
+ icing->PersistToDisk(persist_type_code_enum);
return SerializeProtoToJniByteArray(env, persist_to_disk_result_proto);
}
@@ -390,6 +398,18 @@
}
JNIEXPORT jbyteArray JNICALL
+Java_com_google_android_icing_IcingSearchEngine_nativeGetStorageInfo(
+ JNIEnv* env, jclass clazz, jobject object) {
+ icing::lib::IcingSearchEngine* icing =
+ GetIcingSearchEnginePointer(env, object);
+
+ icing::lib::StorageInfoResultProto storage_info_result_proto =
+ icing->GetStorageInfo();
+
+ return SerializeProtoToJniByteArray(env, storage_info_result_proto);
+}
+
+JNIEXPORT jbyteArray JNICALL
Java_com_google_android_icing_IcingSearchEngine_nativeReset(
JNIEnv* env, jclass clazz, jobject object) {
icing::lib::IcingSearchEngine* icing =
diff --git a/icing/jni/jni-cache.cc b/icing/jni/jni-cache.cc
index 58eb8bf..9b75db6 100644
--- a/icing/jni/jni-cache.cc
+++ b/icing/jni/jni-cache.cc
@@ -14,6 +14,8 @@
#include "icing/jni/jni-cache.h"
+#ifdef ICING_REVERSE_JNI_SEGMENTATION
+
#include "icing/text_classifier/lib3/utils/java/jni-base.h"
#include "icing/text_classifier/lib3/utils/java/jni-helper.h"
#include "icing/absl_ports/canonical_errors.h"
@@ -214,3 +216,5 @@
} // namespace lib
} // namespace icing
+
+#endif // ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/jni/jni-cache.h b/icing/jni/jni-cache.h
index a5f16c7..3faaed6 100644
--- a/icing/jni/jni-cache.h
+++ b/icing/jni/jni-cache.h
@@ -15,6 +15,16 @@
#ifndef ICING_JNI_JNI_CACHE_H_
#define ICING_JNI_JNI_CACHE_H_
+#ifndef ICING_REVERSE_JNI_SEGMENTATION
+namespace icing {
+namespace lib {
+
+class JniCache {}; // Declare an empty class definition for non-Android builds.
+
+} // namespace lib
+} // namespace icing
+#else // ICING_REVERSE_JNI_SEGMENTATION
+
#include <jni.h>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
@@ -75,4 +85,6 @@
} // namespace lib
} // namespace icing
+#endif // !ICING_REVERSE_JNI_SEGMENTATION
+
#endif // ICING_JNI_JNI_CACHE_H_
diff --git a/icing/jni/reverse-jni-break-iterator.cc b/icing/jni/reverse-jni-break-iterator.cc
deleted file mode 100644
index 1a8a799..0000000
--- a/icing/jni/reverse-jni-break-iterator.cc
+++ /dev/null
@@ -1,187 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/jni/reverse-jni-break-iterator.h"
-
-#include <jni.h>
-#include <math.h>
-
-#include <cassert>
-#include <cctype>
-#include <map>
-
-#include "icing/jni/jni-cache.h"
-#include "icing/text_classifier/lib3/utils/base/statusor.h"
-#include "icing/text_classifier/lib3/utils/java/jni-base.h"
-#include "icing/text_classifier/lib3/utils/java/jni-helper.h"
-#include "icing/absl_ports/canonical_errors.h"
-#include "icing/util/status-macros.h"
-
-namespace icing {
-namespace lib {
-
-namespace {
-// Chosen based on results in go/reverse-jni-benchmarks
-static constexpr int kBatchSize = 100;
-} // namespace
-
-// -----------------------------------------------------------------------------
-// Implementations that call out to JVM. Behold the beauty.
-// -----------------------------------------------------------------------------
-libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>>
-ReverseJniBreakIterator::Create(const JniCache* jni_cache,
- std::string_view text,
- std::string_view locale) {
- if (jni_cache == nullptr) {
- return absl_ports::InvalidArgumentError(
- "Create must be called with a valid JniCache pointer!");
- }
-
- ICING_ASSIGN_OR_RETURN(
- libtextclassifier3::ScopedLocalRef<jstring> java_text,
- jni_cache->ConvertToJavaString(text.data(), text.length()));
- if (java_text.get() == nullptr) {
- return absl_ports::AbortedError("Failed to create Java String from input.");
- }
-
- ICING_ASSIGN_OR_RETURN(
- libtextclassifier3::ScopedLocalRef<jstring> java_locale_string,
- jni_cache->ConvertToJavaString(locale.data(), locale.length()));
- if (java_locale_string.get() == nullptr) {
- return absl_ports::AbortedError(
- "Failed to create Java String from locale.");
- }
-
- JNIEnv* jenv = jni_cache->GetEnv();
- ICING_ASSIGN_OR_RETURN(
- libtextclassifier3::ScopedLocalRef<jobject> java_locale,
- libtextclassifier3::JniHelper::NewObject(
- jenv, jni_cache->locale_class.get(), jni_cache->locale_constructor,
- java_locale_string.get()));
- if (java_locale.get() == nullptr) {
- return absl_ports::AbortedError(
- "Failed to create Java Locale from locale.");
- }
-
- ICING_ASSIGN_OR_RETURN(
- libtextclassifier3::ScopedLocalRef<jobject> local_iterator_batcher,
- libtextclassifier3::JniHelper::NewObject(
- jenv, jni_cache->breakiterator_class.get(),
- jni_cache->breakiterator_constructor, java_locale.get()));
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher =
- libtextclassifier3::MakeGlobalRef(local_iterator_batcher.get(), jenv,
- jni_cache->jvm);
- if (iterator_batcher.get() == nullptr) {
- return absl_ports::AbortedError(
- "Failed to create Java BreakIteratorBatcher.");
- }
-
- ICING_RETURN_IF_ERROR(libtextclassifier3::JniHelper::CallVoidMethod(
- jenv, iterator_batcher.get(), jni_cache->breakiterator_settext,
- java_text.get()));
- return std::unique_ptr<ReverseJniBreakIterator>(
- new ReverseJniBreakIterator(jni_cache, std::move(iterator_batcher)));
-}
-
-ReverseJniBreakIterator::ReverseJniBreakIterator(
- const JniCache* jni_cache,
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher)
- : jni_cache_(jni_cache),
- iterator_batcher_(std::move(iterator_batcher)),
- is_done_(false),
- is_almost_done_(false) {}
-
-int ReverseJniBreakIterator::Next() {
- if (is_done_) {
- return ReverseJniBreakIterator::kDone;
- }
- if (break_indices_cache_.empty()) {
- if (FetchNextBatch() == ReverseJniBreakIterator::kDone) {
- // Either there were no more results or an error occurred. Either way,
- // mark ourselves as done and return.
- is_done_ = true;
- return ReverseJniBreakIterator::kDone;
- }
- is_almost_done_ = break_indices_cache_.size() < kBatchSize;
- }
- int break_index = break_indices_cache_.front();
- break_indices_cache_.pop();
- is_done_ = is_almost_done_ && break_indices_cache_.empty();
- return break_index;
-}
-
-int ReverseJniBreakIterator::First() {
- const int first_index = jni_cache_->GetEnv()->CallIntMethod(
- iterator_batcher_.get(), jni_cache_->breakiterator_first);
- if (jni_cache_->ExceptionCheckAndClear()) {
- return ReverseJniBreakIterator::kDone;
- }
- ClearCache();
- return first_index;
-}
-
-int ReverseJniBreakIterator::Preceding(int offset) {
- const int preceding_index = jni_cache_->GetEnv()->CallIntMethod(
- iterator_batcher_.get(), jni_cache_->breakiterator_preceding, offset);
- if (jni_cache_->ExceptionCheckAndClear()) {
- return ReverseJniBreakIterator::kDone;
- }
- ClearCache();
- return preceding_index;
-}
-
-int ReverseJniBreakIterator::Following(int offset) {
- const int following_index = jni_cache_->GetEnv()->CallIntMethod(
- iterator_batcher_.get(), jni_cache_->breakiterator_following, offset);
- if (jni_cache_->ExceptionCheckAndClear()) {
- return ReverseJniBreakIterator::kDone;
- }
- ClearCache();
- return following_index;
-}
-
-int ReverseJniBreakIterator::FetchNextBatch() {
- ICING_ASSIGN_OR_RETURN(
- libtextclassifier3::ScopedLocalRef<jintArray> break_indices,
- libtextclassifier3::JniHelper::CallObjectMethod<jintArray>(
- jni_cache_->GetEnv(), iterator_batcher_.get(),
- jni_cache_->breakiterator_next, kBatchSize),
- ReverseJniBreakIterator::kDone);
- if (break_indices == nullptr || jni_cache_->ExceptionCheckAndClear()) {
- return ReverseJniBreakIterator::kDone;
- }
- jint num_indices = jni_cache_->GetEnv()->GetArrayLength(break_indices.get());
- if (num_indices == 0) {
- return ReverseJniBreakIterator::kDone;
- }
- jint* break_indices_arr =
- static_cast<jint*>(jni_cache_->GetEnv()->GetPrimitiveArrayCritical(
- break_indices.get(), nullptr));
- for (int i = 0; i < num_indices; ++i) {
- break_indices_cache_.push(break_indices_arr[i]);
- }
- jni_cache_->GetEnv()->ReleasePrimitiveArrayCritical(break_indices.get(),
- break_indices_arr,
- /*mode=*/0);
- return num_indices;
-}
-
-void ReverseJniBreakIterator::ClearCache() {
- break_indices_cache_ = std::queue<int>();
- is_done_ = false;
- is_almost_done_ = false;
-}
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/jni/reverse-jni-break-iterator.h b/icing/jni/reverse-jni-break-iterator.h
deleted file mode 100644
index c1f05f4..0000000
--- a/icing/jni/reverse-jni-break-iterator.h
+++ /dev/null
@@ -1,124 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
-#define ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
-
-#include <jni.h>
-
-#include <queue>
-#include <string>
-
-#include "icing/jni/jni-cache.h"
-#include "icing/text_classifier/lib3/utils/java/jni-base.h"
-
-namespace icing {
-namespace lib {
-
-// A class that handles the cross-JNI interactions with BreakIteratorBatcher and
-// hides the batching element to provide an interface akin to
-// java.text.BreakIterator.
-//
-// Example:
-// std::string text = "我每天走路去上班。";
-// ASSERT_THAT(text, SizeIs(27));
-// std::unique_ptr<ReverseJniBreakIterator> itr =
-// ReverseJniBreakIterator::Create(jni_cache, text, locale);
-// std::vector<int> nexts;
-// int next = itr->Next();
-// while (next != ReverseJniBreakIterator::kDone) {
-// nexts.push_back(next);
-// next = itr->Next();
-// }
-// EXPECT_THAT(nexts, ElementsAre(1, 3, 5, 6, 8));
-class ReverseJniBreakIterator {
- public:
- static constexpr int kDone = -1;
-
- // Creates a ReverseJniBreakiterator with the given text and locale.
- //
- // Returns:
- // A ReverseJniBreakIterator on success
- // INVALID_ARGUMENT if jni_cache isn't a valid JniCache pointer
- // INTERNAL if unable to create any of the required Java objects
- static libtextclassifier3::StatusOr<std::unique_ptr<ReverseJniBreakIterator>>
- Create(const JniCache* jni_cache, std::string_view text,
- std::string_view locale);
-
- // Returns the UTF-16 boundary following the current boundary. If the current
- // boundary is the last text boundary, it returns
- // ReverseJniBreakIterator::kDONE.
- //
- // NOTE: The 'boundary' refers to the UTF-16 boundary - NOT the UTF-8
- // boundary. Callers interested in the UTF-8 boundary are required to maintain
- // whatever state is necessary to translate from UTF-16 to UTF-8 boundaries.
- int Next();
-
- // Returns the first UTF-16 boundary. The iterator's current position is set
- // to the first text boundary and any cached data is cleared.
- int First();
-
- // Returns the position of the first UTF-16 boundary preceding the UTF-16
- // offset. If there is no boundary preceding the specified offset, then
- // ReverseJniBreakIterator::kDone is returned.
- //
- // The iterator's current position is set to the segment whose boundary was
- // returned and any cached data is cleared.
- int Preceding(int offset);
-
- // Returns the position of the first UTF-16 boundary following the UTF-16
- // offset. If there is no boundary following the specified offset, then
- // ReverseJniBreakIterator::kDone is returned.
- //
- // The iterator's current position is set to the segment whose boundary
- // was returned and any cached data is cleared.
- int Following(int offset);
-
- private:
- ReverseJniBreakIterator(
- const JniCache* jni_cache,
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher);
-
- // Fetches the results of up to kBatchSize next calls and stores them in
- // break_indices_cache_. Returns the number of results or kDone if no more
- // results could be fetched.
- int FetchNextBatch();
-
- // Empties the cache and sets is_done_ and is_almost_done_ to false.
- void ClearCache();
-
- // Keeps track of references to Java classes and methods. Does NOT own.
- const JniCache* jni_cache_;
-
- // The reference to the actual instance of BreakIteratorBatcher that
- // this class interacts with.
- libtextclassifier3::ScopedGlobalRef<jobject> iterator_batcher_;
-
- // The cache holding the most recent batch of return values from
- // BreakIteratorBatcher#next.
- std::queue<int> break_indices_cache_;
-
- bool is_done_;
-
- // The last batch was incomplete (< kBatchSize results were returned). The
- // next call to BreakIteratorBatcher#next is guaranteed to return an
- // empty array. Once the results from the last batch are evicted from
- // break_indices_cache, ReverseJniBreakIterator will transition to is_done_.
- bool is_almost_done_;
-};
-
-} // namespace lib
-} // namespace icing
-
-#endif // ICING_JNI_REVERSE_JNI_BREAK_ITERATOR_H_
diff --git a/icing/performance-configuration.cc b/icing/performance-configuration.cc
index aeaa449..45b03d3 100644
--- a/icing/performance-configuration.cc
+++ b/icing/performance-configuration.cc
@@ -15,6 +15,7 @@
#include "icing/performance-configuration.h"
#include "icing/result/result-state.h"
+#include "icing/scoring/scored-document-hit.h"
namespace icing {
namespace lib {
@@ -60,32 +61,14 @@
// value.
constexpr int kSafeMemoryUsage = 16 * 1024 * 1024; // 16MB
-// This number is not determined by benchmarks. We just assume that returning
-// the best 1000 scored document hits of a query is enough. To find the best
-// 1000 scored document hits from a heap, we need roughly 0.7 ms on a Pixel 3 XL
-// according to //icing/scoring:ranker_benchmark.
-constexpr int kMaxNumHitsPerQuery = 1000;
+// The maximum number of hits that can fit below the kSafeMemoryUsage threshold.
+constexpr int kMaxNumTotalHits = kSafeMemoryUsage / sizeof(ScoredDocumentHit);
-// A rough estimation of the size of ResultState if it stores the maximum number
-// of scored document hits.
-constexpr int kMaxMemoryPerResult =
- sizeof(ResultState) + kMaxNumHitsPerQuery * sizeof(ScoredDocumentHit);
-
-// To be safer, we assume that all the Results contain the maximum number of
-// hits and only use half of the memory allowed.
-constexpr int kDefaultNumResultsToCache =
- kSafeMemoryUsage / 2 / kMaxMemoryPerResult;
-
-static_assert(
- kDefaultNumResultsToCache > 500,
- "Default number of results to cache has changed, please update and make "
- "sure it still meets our requirements.");
} // namespace
PerformanceConfiguration::PerformanceConfiguration()
: PerformanceConfiguration(kMaxQueryLength, kDefaultNumToScore,
- kMaxNumHitsPerQuery, kDefaultNumResultsToCache) {
-}
+ kMaxNumTotalHits) {}
} // namespace lib
} // namespace icing
diff --git a/icing/performance-configuration.h b/icing/performance-configuration.h
index fa4050b..b9282ca 100644
--- a/icing/performance-configuration.h
+++ b/icing/performance-configuration.h
@@ -24,12 +24,10 @@
PerformanceConfiguration();
PerformanceConfiguration(int max_query_length_in, int num_to_score_in,
- int max_num_hits_per_query_in,
- int max_num_cache_results_in)
+ int max_num_total_hits)
: max_query_length(max_query_length_in),
num_to_score(num_to_score_in),
- max_num_hits_per_query(max_num_hits_per_query_in),
- max_num_cache_results(max_num_cache_results_in) {}
+ max_num_total_hits(max_num_total_hits) {}
// Search performance
@@ -41,11 +39,9 @@
// Memory
- // Maximum number of ScoredDocumentHits to return per query.
- int max_num_hits_per_query;
-
- // Maximum number of ResultStates to store in ResultStateManager.
- int max_num_cache_results;
+ // Maximum number of ScoredDocumentHits to cache in the ResultStateManager at
+ // one time.
+ int max_num_total_hits;
};
// TODO(b/149040810): Consider creating a class to manage performance
diff --git a/icing/portable/endian.h b/icing/portable/endian.h
new file mode 100644
index 0000000..42f6c02
--- /dev/null
+++ b/icing/portable/endian.h
@@ -0,0 +1,206 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Utility functions that depend on bytesex. We define htonll and ntohll,
+// as well as "Google" versions of all the standards: ghtonl, ghtons, and
+// so on. These functions do exactly the same as their standard variants,
+// but don't require including the dangerous netinet/in.h.
+
+#ifndef ICING_PORTABLE_ENDIAN_H_
+#define ICING_PORTABLE_ENDIAN_H_
+
+#include <cstdint>
+
+// IS_LITTLE_ENDIAN, IS_BIG_ENDIAN
+#if defined OS_LINUX || defined OS_ANDROID || defined(__ANDROID__)
+// _BIG_ENDIAN
+#include <endian.h>
+
+#elif defined(__APPLE__)
+
+// BIG_ENDIAN
+#include <machine/endian.h> // NOLINT(build/include)
+
+/* Let's try and follow the Linux convention */
+#define __BYTE_ORDER BYTE_ORDER
+#define __LITTLE_ENDIAN LITTLE_ENDIAN
+#define __BIG_ENDIAN BIG_ENDIAN
+
+#endif // operating system
+
+// defines __BYTE_ORDER for MSVC
+#ifdef COMPILER_MSVC
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#else // COMPILER_MSVC
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif // __BYTE_ORDER == __LITTLE_ENDIAN
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif // __BYTE_ORDER == __BIG_ENDIAN
+
+#else // __BYTE_ORDER
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif // __LITTLE_ENDIAN__ or __BIG_ENDIAN__
+
+#endif // __BYTE_ORDER
+#endif // COMPILER_MSVC
+
+// byte swap functions (bswap_16, bswap_32, bswap_64).
+// byte swap functions reverse the order of bytes, e.g.
+// byteswap of 102030405060708 = 807060504030201
+// byteswap of 1020304 = 4030201
+
+// The following guarantees declaration of the byte swap functions
+#ifdef COMPILER_MSVC
+#include <stdlib.h> // NOLINT(build/include)
+
+#define bswap_16(x) _byteswap_ushort(x)
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+
+#define bswap_16(x) OSSwapInt16(x)
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__ASYLO__)
+#include <byteswap.h> // IWYU pragma: export
+
+#else // built-in byteswap functions
+
+static inline uint16 bswap_16(uint16 x) {
+#ifdef __cplusplus
+ return static_cast<uint16>(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));
+#else // __cplusplus
+ return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8)); // NOLINT
+#endif // __cplusplus
+}
+#define bswap_16(x) bswap_16(x)
+static inline uint32 bswap_32(uint32 x) {
+ return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+ ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+static inline uint64 bswap_64(uint64 x) {
+ return (((x & (uint64_t)0xFF) << 56) | ((x & (uint64_t)0xFF00) << 40) |
+ ((x & (uint64_t)0xFF0000) << 24) | ((x & (uint64_t)0xFF000000) << 8) |
+ ((x & (uint64_t)0xFF00000000) >> 8) |
+ ((x & (uint64_t)0xFF0000000000) >> 24) |
+ ((x & (uint64_t)0xFF000000000000) >> 40) |
+ ((x & (uint64_t)0xFF00000000000000) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+
+#endif // end byteswap functions
+
+// Use compiler byte-swapping intrinsics if they are available. 32-bit
+// and 64-bit versions are available in Clang and GCC as of GCC 4.3.0.
+// The 16-bit version is available in Clang and GCC only as of GCC 4.8.0.
+// For simplicity, we enable them all only for GCC 4.8.0 or later.
+#if defined(__clang__) || \
+ (defined(__GNUC__) && \
+ ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5))
+
+inline uint64_t gbswap_64(uint64_t host_int) {
+ return __builtin_bswap64(host_int);
+}
+inline uint32_t gbswap_32(uint32_t host_int) {
+ return __builtin_bswap32(host_int);
+}
+inline uint16_t gbswap_16(uint16_t host_int) {
+ return __builtin_bswap16(host_int);
+}
+
+#else // intrinsics available
+
+inline uint64 gbswap_64(uint64 host_int) {
+#if defined(__GNUC__) && defined(__x86_64__) && \
+ !(defined(__APPLE__) && defined(__MACH__))
+ // Adapted from /usr/include/byteswap.h. Not available on Mac.
+ if (__builtin_constant_p(host_int)) {
+ return __bswap_constant_64(host_int);
+ } else {
+ uint64 result;
+ __asm__("bswap %0" : "=r"(result) : "0"(host_int));
+ return result;
+ }
+#elif defined(bswap_64)
+ return bswap_64(host_int);
+#else // bswap_64
+ return static_cast<uint64>(bswap_32(static_cast<uint32>(host_int >> 32))) |
+ (static_cast<uint64>(bswap_32(static_cast<uint32>(host_int))) << 32);
+#endif // bswap_64
+}
+inline uint32 gbswap_32(uint32 host_int) { return bswap_32(host_int); }
+inline uint16 gbswap_16(uint16 host_int) { return bswap_16(host_int); }
+
+#endif // intrinsics available
+
+#ifdef IS_LITTLE_ENDIAN
+
+// Definitions for ntohl etc. that don't require us to include
+// netinet/in.h. We wrap gbswap_32 and gbswap_16 in functions rather
+// than just #defining them because in debug mode, gcc doesn't
+// correctly handle the (rather involved) definitions of bswap_32.
+// gcc guarantees that inline functions are as fast as macros, so
+// this isn't a performance hit.
+inline uint16_t ghtons(uint16_t x) { return gbswap_16(x); }
+inline uint32_t ghtonl(uint32_t x) { return gbswap_32(x); }
+inline uint64_t ghtonll(uint64_t x) { return gbswap_64(x); }
+
+#elif defined IS_BIG_ENDIAN
+
+// These definitions are simpler on big-endian machines
+// These are functions instead of macros to avoid self-assignment warnings
+// on calls such as "i = ghtnol(i);". This also provides type checking.
+inline uint16 ghtons(uint16 x) { return x; }
+inline uint32 ghtonl(uint32 x) { return x; }
+inline uint64 ghtonll(uint64 x) { return x; }
+
+#else // bytesex
+#error \
+ "Unsupported bytesex: Either IS_BIG_ENDIAN or IS_LITTLE_ENDIAN must be defined" // NOLINT
+#endif // bytesex
+
+#ifndef htonll
+// With the rise of 64-bit, some systems are beginning to define this.
+#define htonll(x) ghtonll(x)
+#endif // htonll
+
+// ntoh* and hton* are the same thing for any size and bytesex,
+// since the function is an involution, i.e., its own inverse.
+inline uint16_t gntohs(uint16_t x) { return ghtons(x); }
+inline uint32_t gntohl(uint32_t x) { return ghtonl(x); }
+inline uint64_t gntohll(uint64_t x) { return ghtonll(x); }
+
+#ifndef ntohll
+#define ntohll(x) htonll(x)
+#endif // ntohll
+
+#endif // ICING_PORTABLE_ENDIAN_H_
diff --git a/icing/testing/platform.h b/icing/portable/platform.h
similarity index 74%
rename from icing/testing/platform.h
rename to icing/portable/platform.h
index ad612d5..8712835 100644
--- a/icing/testing/platform.h
+++ b/icing/portable/platform.h
@@ -12,11 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#ifndef ICING_TESTING_PLATFORM_H_
-#define ICING_TESTING_PLATFORM_H_
+#ifndef ICING_PORTABLE_PLATFORM_H_
+#define ICING_PORTABLE_PLATFORM_H_
-// This file is meant to hold util functions for tests that help the test
-// determine which platform-specific configuration it may be running in.
namespace icing {
namespace lib {
@@ -52,7 +50,27 @@
return false;
}
+enum Architecture {
+ UNKNOWN,
+ BIT_32,
+ BIT_64,
+};
+
+// Returns which architecture we're running on.
+//
+// Architecture macros pulled from
+// https://developer.android.com/ndk/guides/cpu-features
+inline Architecture GetArchitecture() {
+#if defined(__arm__) || defined(__i386__)
+ return BIT_32;
+#elif defined(__aarch64__) || defined(__x86_64__)
+ return BIT_64;
+#else
+ return UNKNOWN;
+#endif
+}
+
} // namespace lib
} // namespace icing
-#endif // ICING_TESTING_PLATFORM_H_
+#endif // ICING_PORTABLE_PLATFORM_H_
diff --git a/icing/query/query-processor.cc b/icing/query/query-processor.cc
index 0732ed0..1f937fd 100644
--- a/icing/query/query-processor.cc
+++ b/icing/query/query-processor.cc
@@ -46,7 +46,6 @@
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
#include "icing/util/status-macros.h"
namespace icing {
@@ -105,31 +104,27 @@
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store, const Clock* clock) {
+ const SchemaStore* schema_store) {
ICING_RETURN_ERROR_IF_NULL(index);
ICING_RETURN_ERROR_IF_NULL(language_segmenter);
ICING_RETURN_ERROR_IF_NULL(normalizer);
ICING_RETURN_ERROR_IF_NULL(document_store);
ICING_RETURN_ERROR_IF_NULL(schema_store);
- ICING_RETURN_ERROR_IF_NULL(clock);
- return std::unique_ptr<QueryProcessor>(
- new QueryProcessor(index, language_segmenter, normalizer, document_store,
- schema_store, clock));
+ return std::unique_ptr<QueryProcessor>(new QueryProcessor(
+ index, language_segmenter, normalizer, document_store, schema_store));
}
QueryProcessor::QueryProcessor(Index* index,
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store,
- const Clock* clock)
+ const SchemaStore* schema_store)
: index_(*index),
language_segmenter_(*language_segmenter),
normalizer_(*normalizer),
document_store_(*document_store),
- schema_store_(*schema_store),
- clock_(*clock) {}
+ schema_store_(*schema_store) {}
DocHitInfoIteratorFilter::Options QueryProcessor::getFilterOptions(
const SearchSpecProto& search_spec) {
@@ -156,7 +151,7 @@
DocHitInfoIteratorFilter::Options options = getFilterOptions(search_spec);
results.root_iterator = std::make_unique<DocHitInfoIteratorFilter>(
std::move(results.root_iterator), &document_store_, &schema_store_,
- &clock_, options);
+ options);
return results;
}
@@ -279,7 +274,7 @@
results.query_term_iterators[normalized_text] =
std::make_unique<DocHitInfoIteratorFilter>(
std::move(term_iterator), &document_store_, &schema_store_,
- &clock_, options);
+ options);
results.query_terms[frames.top().section_restrict].insert(
std::move(normalized_text));
diff --git a/icing/query/query-processor.h b/icing/query/query-processor.h
index 0932ec5..bdf9ef2 100644
--- a/icing/query/query-processor.h
+++ b/icing/query/query-processor.h
@@ -27,7 +27,6 @@
#include "icing/store/document-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/transform/normalizer.h"
-#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -47,7 +46,7 @@
static libtextclassifier3::StatusOr<std::unique_ptr<QueryProcessor>> Create(
Index* index, const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer, const DocumentStore* document_store,
- const SchemaStore* schema_store, const Clock* clock);
+ const SchemaStore* schema_store);
struct QueryResults {
std::unique_ptr<DocHitInfoIterator> root_iterator;
@@ -77,7 +76,7 @@
const LanguageSegmenter* language_segmenter,
const Normalizer* normalizer,
const DocumentStore* document_store,
- const SchemaStore* schema_store, const Clock* clock);
+ const SchemaStore* schema_store);
// Parse the query into a one DocHitInfoIterator that represents the root of a
// query tree.
@@ -103,7 +102,6 @@
const Normalizer& normalizer_;
const DocumentStore& document_store_;
const SchemaStore& schema_store_;
- const Clock& clock_;
};
} // namespace lib
diff --git a/icing/query/query-processor_benchmark.cc b/icing/query/query-processor_benchmark.cc
index eb8b7a4..bdd40aa 100644
--- a/icing/query/query-processor_benchmark.cc
+++ b/icing/query/query-processor_benchmark.cc
@@ -147,7 +147,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &clock));
+ schema_store.get()));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -278,7 +278,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &clock));
+ schema_store.get()));
const std::string query_string = absl_ports::StrCat(
input_string_a, " ", input_string_b, " ", input_string_c, " ",
@@ -402,7 +402,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &clock));
+ schema_store.get()));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
@@ -522,7 +522,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
- schema_store.get(), &clock));
+ schema_store.get()));
SearchSpecProto search_spec;
search_spec.set_query(input_string);
diff --git a/icing/query/query-processor_test.cc b/icing/query/query-processor_test.cc
index 6ec0a2a..daeb479 100644
--- a/icing/query/query-processor_test.cc
+++ b/icing/query/query-processor_test.cc
@@ -29,9 +29,11 @@
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/index/iterator/doc-hit-info-iterator.h"
#include "icing/legacy/index/icing-filesystem.h"
+#include "icing/portable/platform.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
@@ -39,7 +41,6 @@
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/jni-test-helpers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -60,30 +61,16 @@
using ::testing::Test;
using ::testing::UnorderedElementsAre;
-SchemaTypeConfigProto* AddSchemaType(SchemaProto* schema,
- std::string schema_type) {
- SchemaTypeConfigProto* type_config = schema->add_types();
- type_config->set_schema_type(schema_type);
- return type_config;
-}
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
-void AddIndexedProperty(SchemaTypeConfigProto* type_config, std::string name) {
- PropertyConfigProto* property_config = type_config->add_properties();
- property_config->set_property_name(name);
- property_config->set_data_type(PropertyConfigProto::DataType::STRING);
- property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-}
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
-void AddUnindexedProperty(SchemaTypeConfigProto* type_config,
- std::string name) {
- PropertyConfigProto* property_config = type_config->add_properties();
- property_config->set_property_name(name);
- property_config->set_data_type(PropertyConfigProto::DataType::STRING);
-}
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
class QueryProcessorTest : public Test {
protected:
@@ -159,37 +146,33 @@
EXPECT_THAT(
QueryProcessor::Create(/*index=*/nullptr, language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_),
+ schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), /*language_segmenter=*/nullptr,
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_),
+ schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
/*normalizer=*/nullptr, document_store_.get(),
- schema_store_.get(), &fake_clock_),
+ schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(
- QueryProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get(), /*document_store=*/nullptr,
- schema_store_.get(), &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
- EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
- normalizer_.get(), document_store_.get(),
- /*schema_store=*/nullptr, &fake_clock_),
+ EXPECT_THAT(QueryProcessor::Create(
+ index_.get(), language_segmenter_.get(), normalizer_.get(),
+ /*document_store=*/nullptr, schema_store_.get()),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
EXPECT_THAT(QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), /*clock=*/nullptr),
+ /*schema_store=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(QueryProcessorTest, EmptyGroupMatchAllDocuments) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -221,7 +204,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("()");
@@ -238,8 +221,9 @@
TEST_F(QueryProcessorTest, EmptyQueryMatchAllDocuments) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -271,7 +255,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("");
@@ -288,8 +272,9 @@
TEST_F(QueryProcessorTest, QueryTermNormalized) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -330,7 +315,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hElLo WORLD");
@@ -363,8 +348,9 @@
TEST_F(QueryProcessorTest, OneTermPrefixMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -402,7 +388,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("he");
@@ -430,8 +416,9 @@
TEST_F(QueryProcessorTest, OneTermExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -469,7 +456,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello");
@@ -497,8 +484,9 @@
TEST_F(QueryProcessorTest, AndSameTermExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -536,7 +524,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello hello");
@@ -566,8 +554,9 @@
TEST_F(QueryProcessorTest, AndTwoTermExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -608,7 +597,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello world");
@@ -640,8 +629,9 @@
TEST_F(QueryProcessorTest, AndSameTermPrefixMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -679,7 +669,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("he he");
@@ -709,8 +699,9 @@
TEST_F(QueryProcessorTest, AndTwoTermPrefixMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -751,7 +742,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("he wo");
@@ -784,8 +775,9 @@
TEST_F(QueryProcessorTest, AndTwoTermPrefixAndExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -826,7 +818,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello wo");
@@ -859,8 +851,9 @@
TEST_F(QueryProcessorTest, OrTwoTermExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -906,7 +899,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello OR world");
@@ -947,8 +940,9 @@
TEST_F(QueryProcessorTest, OrTwoTermPrefixMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -994,7 +988,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("he OR wo");
@@ -1034,8 +1028,9 @@
TEST_F(QueryProcessorTest, OrTwoTermPrefixAndExactMatch) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1080,7 +1075,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello OR wo");
@@ -1120,8 +1115,9 @@
TEST_F(QueryProcessorTest, CombinedAndOrTerms) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1179,7 +1175,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
{
// OR gets precedence over AND, this is parsed as ((puppy OR kitten) AND
@@ -1305,8 +1301,9 @@
TEST_F(QueryProcessorTest, OneGroup) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1356,7 +1353,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
// Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
// no documents would match. But with grouping, Document 1 matches puppy
@@ -1380,8 +1377,9 @@
TEST_F(QueryProcessorTest, TwoGroups) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1430,7 +1428,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
// Without grouping, this would be parsed as (puppy AND (dog OR kitten) AND
// cat) and wouldn't match any documents. But with grouping, Document 1
@@ -1457,8 +1455,9 @@
TEST_F(QueryProcessorTest, ManyLevelNestedGrouping) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1508,7 +1507,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
// Without grouping, this would be parsed as ((puppy OR kitten) AND foo) and
// no documents would match. But with grouping, Document 1 matches puppy
@@ -1532,8 +1531,9 @@
TEST_F(QueryProcessorTest, OneLevelNestedGrouping) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1583,7 +1583,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
// Document 1 will match puppy and Document 2 matches (kitten AND (cat))
SearchSpecProto search_spec;
@@ -1608,8 +1608,9 @@
TEST_F(QueryProcessorTest, ExcludeTerm) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1652,7 +1653,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("-hello");
@@ -1672,8 +1673,9 @@
TEST_F(QueryProcessorTest, ExcludeNonexistentTerm) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1715,7 +1717,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("-foo");
@@ -1734,8 +1736,9 @@
TEST_F(QueryProcessorTest, ExcludeAnd) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1785,7 +1788,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
{
SearchSpecProto search_spec;
@@ -1823,8 +1826,9 @@
TEST_F(QueryProcessorTest, ExcludeOr) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1874,7 +1878,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
{
SearchSpecProto search_spec;
@@ -1918,8 +1922,9 @@
TEST_F(QueryProcessorTest, DeletedFilter) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -1970,7 +1975,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("animal");
@@ -1991,8 +1996,9 @@
TEST_F(QueryProcessorTest, NamespaceFilter) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -2042,7 +2048,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("animal");
@@ -2064,9 +2070,11 @@
TEST_F(QueryProcessorTest, SchemaTypeFilter) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
- AddSchemaType(&schema, "message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -2112,7 +2120,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("animal");
@@ -2134,11 +2142,15 @@
TEST_F(QueryProcessorTest, SectionFilterForOneDocument) {
// Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// First and only indexed property, so it gets a section_id of 0
- AddIndexedProperty(email_type, "subject");
int subject_section_id = 0;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2174,7 +2186,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
@@ -2196,18 +2208,31 @@
TEST_F(QueryProcessorTest, SectionFilterAcrossSchemaTypes) {
// Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(email_type, "a"); // Section "a" would get sectionId 0
- AddIndexedProperty(email_type, "foo");
- int email_foo_section_id = 1;
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Section "a" would get sectionId 0
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("a")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
- SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
// SectionIds are assigned in ascending order per schema type,
// alphabetically.
- AddIndexedProperty(message_type, "foo");
+ int email_foo_section_id = 1;
int message_foo_section_id = 0;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2253,7 +2278,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
@@ -2277,18 +2302,20 @@
}
TEST_F(QueryProcessorTest, SectionFilterWithinSchemaType) {
- // Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(email_type, "foo");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
int email_foo_section_id = 0;
-
- SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(message_type, "foo");
int message_foo_section_id = 0;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2334,7 +2361,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
@@ -2359,17 +2386,20 @@
TEST_F(QueryProcessorTest, SectionFilterRespectsDifferentSectionIds) {
// Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(email_type, "foo");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("bar")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
int email_foo_section_id = 0;
-
- SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(message_type, "bar");
int message_foo_section_id = 0;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2417,7 +2447,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
@@ -2441,8 +2471,9 @@
TEST_F(QueryProcessorTest, NonexistentSectionFilterReturnsEmptyResults) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -2477,7 +2508,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
@@ -2499,9 +2530,17 @@
TEST_F(QueryProcessorTest, UnindexedSectionFilterReturnsEmptyResults) {
// Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- AddUnindexedProperty(email_type, "foo");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("email")
+ // Add an unindexed property so we generate section
+ // metadata on it
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -2536,7 +2575,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>', but only look
@@ -2557,17 +2596,20 @@
TEST_F(QueryProcessorTest, SectionFilterTermAndUnrestrictedTerm) {
// Create the schema and document store
- SchemaProto schema;
- SchemaTypeConfigProto* email_type = AddSchemaType(&schema, "email");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(email_type, "foo");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder().SetType("message").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
int email_foo_section_id = 0;
-
- SchemaTypeConfigProto* message_type = AddSchemaType(&schema, "message");
- // SectionIds are assigned in ascending order per schema type,
- // alphabetically.
- AddIndexedProperty(message_type, "foo");
int message_foo_section_id = 0;
ICING_ASSERT_OK_AND_ASSIGN(
@@ -2615,7 +2657,7 @@
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
// Create a section filter '<section name>:<query term>'
@@ -2641,27 +2683,34 @@
TEST_F(QueryProcessorTest, DocumentBeforeTtlNotFilteredOut) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ // Arbitrary value, just has to be less than the document's creation
+ // timestamp + ttl
+ FakeClock fake_clock;
+ fake_clock.SetSystemTimeMilliseconds(50);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+ DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("email")
- .SetCreationTimestampMs(0)
- .SetTtlMs(100)
- .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(10)
+ .SetTtlMs(100)
+ .Build()));
// Populate the index
int section_id = 0;
@@ -2671,17 +2720,12 @@
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
- // Arbitrary value, just has to be less than the document's creation
- // timestamp + ttl
- FakeClock fake_clock;
- fake_clock.SetSystemTimeMilliseconds(50);
-
// Perform query
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock_));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello");
@@ -2698,27 +2742,34 @@
TEST_F(QueryProcessorTest, DocumentPastTtlFilteredOut) {
// Create the schema and document store
- SchemaProto schema;
- AddSchemaType(&schema, "email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
+ // Arbitrary value, just has to be greater than the document's creation
+ // timestamp + ttl
+ FakeClock fake_clock;
+ fake_clock.SetSystemTimeMilliseconds(200);
+
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, store_dir_, &fake_clock_,
+ DocumentStore::Create(&filesystem_, store_dir_, &fake_clock,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- document_store_->Put(DocumentBuilder()
- .SetKey("namespace", "1")
- .SetSchema("email")
- .SetCreationTimestampMs(0)
- .SetTtlMs(100)
- .Build()));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentId document_id,
+ document_store_->Put(DocumentBuilder()
+ .SetKey("namespace", "1")
+ .SetSchema("email")
+ .SetCreationTimestampMs(50)
+ .SetTtlMs(100)
+ .Build()));
// Populate the index
int section_id = 0;
@@ -2728,17 +2779,12 @@
AddTokenToIndex(document_id, section_id, term_match_type, "hello"),
IsOk());
- // Arbitrary value, just has to be greater than the document's creation
- // timestamp + ttl
- FakeClock fake_clock;
- fake_clock.SetSystemTimeMilliseconds(200);
-
// Perform query
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<QueryProcessor> query_processor,
QueryProcessor::Create(index_.get(), language_segmenter_.get(),
normalizer_.get(), document_store_.get(),
- schema_store_.get(), &fake_clock));
+ schema_store_.get()));
SearchSpecProto search_spec;
search_spec.set_query("hello");
diff --git a/icing/result/result-retriever.cc b/icing/result/result-retriever.cc
index 85e78a8..943350c 100644
--- a/icing/result/result-retriever.cc
+++ b/icing/result/result-retriever.cc
@@ -107,6 +107,7 @@
// Add the document, itself.
*result.mutable_document() = std::move(document);
+ result.set_score(scored_document_hit.score());
search_results.push_back(std::move(result));
}
return search_results;
diff --git a/icing/result/result-retriever_test.cc b/icing/result/result-retriever_test.cc
index 7cb2d62..1c9684d 100644
--- a/icing/result/result-retriever_test.cc
+++ b/icing/result/result-retriever_test.cc
@@ -24,17 +24,18 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/result/projection-tree.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -54,6 +55,15 @@
using ::testing::Return;
using ::testing::SizeIs;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
class ResultRetrieverTest : public testing::Test {
protected:
ResultRetrieverTest() : test_dir_(GetTestTempDir() + "/icing") {
@@ -78,65 +88,47 @@
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
/*max_term_byte_size=*/10000));
- ASSERT_THAT(schema_store_->SetSchema(CreatePersonAndEmailSchema()), IsOk());
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("Email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT,
+ TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ "Person", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("Person")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("emailAddress")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ASSERT_THAT(schema_store_->SetSchema(schema), IsOk());
}
void TearDown() override {
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
- SchemaProto CreatePersonAndEmailSchema() {
- SchemaProto schema;
-
- auto* type = schema.add_types();
- type->set_schema_type("Email");
-
- auto* subj = type->add_properties();
- subj->set_property_name("name");
- subj->set_data_type(PropertyConfigProto::DataType::STRING);
- subj->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- subj->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- subj->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* body = type->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* sender = type->add_properties();
- sender->set_property_name("sender");
- sender->set_schema_type("Person");
- sender->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- sender->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- sender->mutable_document_indexing_config()->set_index_nested_properties(
- true);
-
- auto* person_type = schema.add_types();
- person_type->set_schema_type("Person");
- auto* name = person_type->add_properties();
- name->set_property_name("name");
- name->set_data_type(PropertyConfigProto::DataType::STRING);
- name->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- name->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- name->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- auto* address = person_type->add_properties();
- address->set_property_name("emailAddress");
- address->set_data_type(PropertyConfigProto::DataType::STRING);
- address->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- address->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- address->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- return schema;
- }
-
SectionId GetSectionId(const std::string& type, const std::string& property) {
auto type_id_or = schema_store_->GetSchemaTypeId(type);
if (!type_id_or.ok()) {
@@ -236,9 +228,9 @@
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
- {document_id3, hit_section_id_mask, /*score=*/0}};
+ {document_id1, hit_section_id_mask, /*score=*/19},
+ {document_id2, hit_section_id_mask, /*score=*/5},
+ {document_id3, hit_section_id_mask, /*score=*/1}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
ResultRetriever::Create(doc_store.get(), schema_store_.get(),
@@ -246,10 +238,13 @@
SearchResultProto::ResultProto result1;
*result1.mutable_document() = CreateDocument(/*id=*/1);
+ result1.set_score(19);
SearchResultProto::ResultProto result2;
*result2.mutable_document() = CreateDocument(/*id=*/2);
+ result2.set_score(5);
SearchResultProto::ResultProto result3;
*result3.mutable_document() = CreateDocument(/*id=*/3);
+ result3.set_score(1);
SnippetContext snippet_context(
/*query_terms_in=*/{},
@@ -285,8 +280,8 @@
GetSectionId("Email", "body")};
SectionIdMask hit_section_id_mask = CreateSectionIdMask(hit_section_ids);
std::vector<ScoredDocumentHit> scored_document_hits = {
- {document_id1, hit_section_id_mask, /*score=*/0},
- {document_id2, hit_section_id_mask, /*score=*/0},
+ {document_id1, hit_section_id_mask, /*score=*/12},
+ {document_id2, hit_section_id_mask, /*score=*/4},
{invalid_document_id, hit_section_id_mask, /*score=*/0}};
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ResultRetriever> result_retriever,
@@ -296,8 +291,10 @@
SearchResultProto::ResultProto result1;
*result1.mutable_document() = CreateDocument(/*id=*/1);
+ result1.set_score(12);
SearchResultProto::ResultProto result2;
*result2.mutable_document() = CreateDocument(/*id=*/2);
+ result2.set_score(4);
SnippetContext snippet_context(
/*query_terms_in=*/{},
@@ -495,35 +492,63 @@
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
- Eq("body bar 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
- Eq("bar"));
- EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
- EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "name", 0),
- Eq("subject foo 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[1].document(), result[1].snippet(), "body", 0),
- Eq("body bar 2"));
- EXPECT_THAT(GetMatch(result[1].document(), result[1].snippet(), "body", 0),
- Eq("bar"));
+ const DocumentProto& result_document_one = result.at(0).document();
+ const SnippetProto& result_snippet_one = result.at(0).snippet();
+ EXPECT_THAT(result_document_one, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet_one.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_one.entries(0).property_name(), Eq("body"));
+ std::string_view content = GetString(
+ &result_document_one, result_snippet_one.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_one.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_one,
+ result_snippet_one.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_one.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet_one.entries(1)),
+ ElementsAre("foo"));
- EXPECT_THAT(result[2].document(), EqualsProto(CreateDocument(/*id=*/3)));
- EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "name", 0),
- Eq("subject foo 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[2].document(), result[2].snippet(), "body", 0),
- Eq("body bar 3"));
- EXPECT_THAT(GetMatch(result[2].document(), result[2].snippet(), "body", 0),
- Eq("bar"));
+ const DocumentProto& result_document_two = result.at(1).document();
+ const SnippetProto& result_snippet_two = result.at(1).snippet();
+ EXPECT_THAT(result_document_two, EqualsProto(CreateDocument(/*id=*/2)));
+ EXPECT_THAT(result_snippet_two.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_two.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(0)),
+ ElementsAre("body bar 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_two.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_two,
+ result_snippet_two.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_two.entries(1)),
+ ElementsAre("subject foo 2"));
+ EXPECT_THAT(GetMatches(content, result_snippet_two.entries(1)),
+ ElementsAre("foo"));
+
+ const DocumentProto& result_document_three = result.at(2).document();
+ const SnippetProto& result_snippet_three = result.at(2).snippet();
+ EXPECT_THAT(result_document_three, EqualsProto(CreateDocument(/*id=*/3)));
+ EXPECT_THAT(result_snippet_three.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet_three.entries(0).property_name(), Eq("body"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(0)),
+ ElementsAre("body bar 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet_three.entries(1).property_name(), Eq("name"));
+ content = GetString(&result_document_three,
+ result_snippet_three.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet_three.entries(1)),
+ ElementsAre("subject foo 3"));
+ EXPECT_THAT(GetMatches(content, result_snippet_three.entries(1)),
+ ElementsAre("foo"));
}
TEST_F(ResultRetrieverTest, OnlyOneDocumentSnippeted) {
@@ -568,15 +593,25 @@
std::vector<SearchResultProto::ResultProto> result,
result_retriever->RetrieveResults(page_result_state));
EXPECT_THAT(result, SizeIs(3));
- EXPECT_THAT(result[0].document(), EqualsProto(CreateDocument(/*id=*/1)));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "name", 0),
- Eq("subject foo 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "name", 0),
- Eq("foo"));
- EXPECT_THAT(GetWindow(result[0].document(), result[0].snippet(), "body", 0),
- Eq("body bar 1"));
- EXPECT_THAT(GetMatch(result[0].document(), result[0].snippet(), "body", 0),
- Eq("bar"));
+
+ const DocumentProto& result_document = result.at(0).document();
+ const SnippetProto& result_snippet = result.at(0).snippet();
+ EXPECT_THAT(result_document, EqualsProto(CreateDocument(/*id=*/1)));
+ EXPECT_THAT(result_snippet.entries(), SizeIs(2));
+ EXPECT_THAT(result_snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&result_document, result_snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(0)),
+ ElementsAre("body bar 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(0)),
+ ElementsAre("bar"));
+ EXPECT_THAT(result_snippet.entries(1).property_name(), Eq("name"));
+ content =
+ GetString(&result_document, result_snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, result_snippet.entries(1)),
+ ElementsAre("subject foo 1"));
+ EXPECT_THAT(GetMatches(content, result_snippet.entries(1)),
+ ElementsAre("foo"));
EXPECT_THAT(result[1].document(), EqualsProto(CreateDocument(/*id=*/2)));
EXPECT_THAT(result[1].snippet(),
diff --git a/icing/result/result-state-manager.cc b/icing/result/result-state-manager.cc
index 0f27d9e..d606e79 100644
--- a/icing/result/result-state-manager.cc
+++ b/icing/result/result-state-manager.cc
@@ -16,15 +16,17 @@
#include "icing/proto/search.pb.h"
#include "icing/util/clock.h"
+#include "icing/util/logging.h"
#include "icing/util/status-macros.h"
namespace icing {
namespace lib {
-ResultStateManager::ResultStateManager(int max_hits_per_query,
- int max_result_states)
- : max_hits_per_query_(max_hits_per_query),
- max_result_states_(max_result_states),
+ResultStateManager::ResultStateManager(int max_total_hits,
+ const DocumentStore& document_store)
+ : document_store_(document_store),
+ max_total_hits_(max_total_hits),
+ num_total_hits_(0),
random_generator_(GetSteadyTimeNanoseconds()) {}
libtextclassifier3::StatusOr<PageResultState>
@@ -33,16 +35,13 @@
return absl_ports::InvalidArgumentError("ResultState has no results");
}
- // Truncates scored document hits so that they don't take up too much space.
- result_state.TruncateHitsTo(max_hits_per_query_);
-
// Gets the number before calling GetNextPage() because num_returned() may
// change after returning more results.
int num_previously_returned = result_state.num_returned();
int num_per_page = result_state.num_per_page();
std::vector<ScoredDocumentHit> page_result_document_hits =
- result_state.GetNextPage();
+ result_state.GetNextPage(document_store_);
SnippetContext snippet_context_copy = result_state.snippet_context();
@@ -68,10 +67,12 @@
}
uint64_t ResultStateManager::Add(ResultState result_state) {
- RemoveStatesIfNeeded();
+ RemoveStatesIfNeeded(result_state);
+ result_state.TruncateHitsTo(max_total_hits_);
uint64_t new_token = GetUniqueToken();
+ num_total_hits_ += result_state.num_remaining();
result_state_map_.emplace(new_token, std::move(result_state));
// Tracks the insertion order
token_queue_.push(new_token);
@@ -91,7 +92,7 @@
int num_returned = state_iterator->second.num_returned();
int num_per_page = state_iterator->second.num_per_page();
std::vector<ScoredDocumentHit> result_of_page =
- state_iterator->second.GetNextPage();
+ state_iterator->second.GetNextPage(document_store_);
if (result_of_page.empty()) {
// This shouldn't happen, all our active states should contain results, but
// a sanity check here in case of any data inconsistency.
@@ -112,6 +113,7 @@
next_page_token = kInvalidNextPageToken;
}
+ num_total_hits_ -= result_of_page.size();
return PageResultState(
result_of_page, next_page_token, std::move(snippet_context_copy),
std::move(projection_tree_map_copy), num_returned, num_per_page);
@@ -129,10 +131,14 @@
void ResultStateManager::InvalidateAllResultStates() {
absl_ports::unique_lock l(&mutex_);
+ InternalInvalidateAllResultStates();
+}
+void ResultStateManager::InternalInvalidateAllResultStates() {
result_state_map_.clear();
invalidated_token_set_.clear();
- token_queue_ = {};
+ token_queue_ = std::queue<uint64_t>();
+ num_total_hits_ = 0;
}
uint64_t ResultStateManager::GetUniqueToken() {
@@ -148,12 +154,21 @@
return new_token;
}
-void ResultStateManager::RemoveStatesIfNeeded() {
+void ResultStateManager::RemoveStatesIfNeeded(const ResultState& result_state) {
if (result_state_map_.empty() || token_queue_.empty()) {
return;
}
- // Removes any tokens that were previously invalidated.
+ // 1. Check if this new result_state would take up the entire result state
+ // manager budget.
+ if (result_state.num_remaining() > max_total_hits_) {
+ // This single result state will exceed our budget. Drop everything else to
+ // accomodate it.
+ InternalInvalidateAllResultStates();
+ return;
+ }
+
+ // 2. Remove any tokens that were previously invalidated.
while (!token_queue_.empty() &&
invalidated_token_set_.find(token_queue_.front()) !=
invalidated_token_set_.end()) {
@@ -161,11 +176,13 @@
token_queue_.pop();
}
- // Removes the oldest state
- if (result_state_map_.size() >= max_result_states_ && !token_queue_.empty()) {
- result_state_map_.erase(token_queue_.front());
+ // 3. If we're over budget, remove states from oldest to newest until we fit
+ // into our budget.
+ while (result_state.num_remaining() + num_total_hits_ > max_total_hits_) {
+ InternalInvalidateResultState(token_queue_.front());
token_queue_.pop();
}
+ invalidated_token_set_.clear();
}
void ResultStateManager::InternalInvalidateResultState(uint64_t token) {
@@ -173,7 +190,10 @@
// invalidated_token_set_. The entry in token_queue_ can't be easily removed
// right now (may need O(n) time), so we leave it there and later completely
// remove the token in RemoveStatesIfNeeded().
- if (result_state_map_.erase(token) > 0) {
+ auto itr = result_state_map_.find(token);
+ if (itr != result_state_map_.end()) {
+ num_total_hits_ -= itr->second.num_remaining();
+ result_state_map_.erase(itr);
invalidated_token_set_.insert(token);
}
}
diff --git a/icing/result/result-state-manager.h b/icing/result/result-state-manager.h
index eaf9eb5..c04217f 100644
--- a/icing/result/result-state-manager.h
+++ b/icing/result/result-state-manager.h
@@ -37,7 +37,8 @@
// Used to store and manage ResultState.
class ResultStateManager {
public:
- explicit ResultStateManager(int max_hits_per_query, int max_result_states);
+ explicit ResultStateManager(int max_total_hits,
+ const DocumentStore& document_store);
ResultStateManager(const ResultStateManager&) = delete;
ResultStateManager& operator=(const ResultStateManager&) = delete;
@@ -77,13 +78,17 @@
private:
absl_ports::shared_mutex mutex_;
- // The maximum number of scored document hits to return for a query. When we
- // have more than the maximum number, extra hits will be truncated.
- const int max_hits_per_query_;
+ const DocumentStore& document_store_;
- // The maximum number of result states. When we have more than the maximum
- // number, the oldest / firstly added result state will be removed.
- const int max_result_states_;
+ // The maximum number of scored document hits that all result states may
+ // have. When a new result state is added such that num_total_hits_ would
+ // exceed max_total_hits_, the oldest result states are evicted until
+ // num_total_hits_ is below max_total_hits.
+ const int max_total_hits_;
+
+ // The number of scored document hits that all result states currently held by
+ // the result state manager have.
+ int num_total_hits_;
// A hash map of (next-page token -> result state)
std::unordered_map<uint64_t, ResultState> result_state_map_
@@ -112,13 +117,21 @@
uint64_t GetUniqueToken() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to remove old states to make room for incoming states.
- void RemoveStatesIfNeeded() ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+ void RemoveStatesIfNeeded(const ResultState& result_state)
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
// Helper method to remove a result state from result_state_map_, the token
// will then be temporarily kept in invalidated_token_set_ until it's finally
// removed from token_queue_.
void InternalInvalidateResultState(uint64_t token)
ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
+
+ // Internal method to invalidates all result states / tokens currently in
+ // ResultStateManager. We need this separate method so that other public
+ // methods don't need to call InvalidateAllResultStates(). Public methods
+ // calling each other may cause deadlock issues.
+ void InternalInvalidateAllResultStates()
+ ICING_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
};
} // namespace lib
diff --git a/icing/result/result-state-manager_test.cc b/icing/result/result-state-manager_test.cc
index 6defa6f..32e45aa 100644
--- a/icing/result/result-state-manager_test.cc
+++ b/icing/result/result-state-manager_test.cc
@@ -14,9 +14,15 @@
#include "icing/result/result-state-manager.h"
+#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include "icing/file/filesystem.h"
#include "icing/portable/equals-proto.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -27,10 +33,6 @@
using ::testing::Gt;
using ::testing::IsEmpty;
-ScoredDocumentHit CreateScoredDocumentHit(DocumentId document_id) {
- return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
-}
-
ScoringSpecProto CreateScoringSpec() {
ScoringSpecProto scoring_spec;
scoring_spec.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
@@ -43,24 +45,73 @@
return result_spec;
}
-ResultState CreateResultState(
- const std::vector<ScoredDocumentHit>& scored_document_hits,
- int num_per_page) {
- return ResultState(scored_document_hits, /*query_terms=*/{},
- SearchSpecProto::default_instance(), CreateScoringSpec(),
- CreateResultSpec(num_per_page));
+ScoredDocumentHit CreateScoredHit(DocumentId document_id) {
+ return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
}
-TEST(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
+class ResultStateManagerTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+ filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+ SchemaProto schema;
+ schema.add_types()->set_schema_type("Document");
+ ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+ doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+ filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult result,
+ DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+ schema_store_.get()));
+ document_store_ = std::move(result.document_store);
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+ filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+ }
+
+ ResultState CreateResultState(
+ const std::vector<ScoredDocumentHit>& scored_document_hits,
+ int num_per_page) {
+ return ResultState(scored_document_hits, /*query_terms=*/{},
+ SearchSpecProto::default_instance(), CreateScoringSpec(),
+ CreateResultSpec(num_per_page), *document_store_);
+ }
+
+ ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+ DocumentProto document;
+ document.set_namespace_("namespace");
+ document.set_uri(std::to_string(document_id));
+ document.set_schema("Document");
+ document_store_->Put(std::move(document));
+ return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+ }
+
+ const DocumentStore& document_store() const { return *document_store_; }
+
+ private:
+ Filesystem filesystem_;
+ std::string doc_store_base_dir_;
+ std::string schema_store_base_dir_;
+ Clock clock_;
+ std::unique_ptr<DocumentStore> document_store_;
+ std::unique_ptr<SchemaStore> schema_store_;
+};
+
+TEST_F(ResultStateManagerTest, ShouldRankAndPaginateOnePage) {
ResultState original_result_state =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2)},
/*num_per_page=*/10);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -70,24 +121,22 @@
// Should get the original scored document hits
EXPECT_THAT(
page_result_state.scored_document_hits,
- ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/2)),
+ EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/1)),
+ EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/0))));
}
-TEST(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
+TEST_F(ResultStateManagerTest, ShouldRankAndPaginateMultiplePages) {
ResultState original_result_state =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4)},
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -95,9 +144,8 @@
result_state_manager.RankAndPaginate(std::move(original_result_state)));
EXPECT_THAT(
page_result_state1.scored_document_hits,
- ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/4)),
+ EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/3))));
uint64_t next_page_token = page_result_state1.next_page_token;
@@ -106,48 +154,45 @@
result_state_manager.GetNextPage(next_page_token));
EXPECT_THAT(
page_result_state2.scored_document_hits,
- ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/2)),
+ EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/1))));
// Third page, 1 result
ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
result_state_manager.GetNextPage(next_page_token));
- EXPECT_THAT(page_result_state3.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(
- CreateScoredDocumentHit(/*document_id=*/1))));
+ EXPECT_THAT(
+ page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/0))));
// No results
EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST(ResultStateManagerTest, EmptyStateShouldReturnError) {
+TEST_F(ResultStateManagerTest, EmptyStateShouldReturnError) {
ResultState empty_result_state = CreateResultState({}, /*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
EXPECT_THAT(
result_state_manager.RankAndPaginate(std::move(empty_result_state)),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST(ResultStateManagerTest, ShouldInvalidateOneToken) {
+TEST_F(ResultStateManagerTest, ShouldInvalidateOneToken) {
ResultState result_state1 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2)},
/*num_per_page=*/1);
ResultState result_state2 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/6)},
+ CreateResultState({AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -167,26 +212,25 @@
ICING_ASSERT_OK_AND_ASSIGN(
page_result_state2,
result_state_manager.GetNextPage(page_result_state2.next_page_token));
- EXPECT_THAT(page_result_state2.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(
- CreateScoredDocumentHit(/*document_id=*/5))));
+ EXPECT_THAT(
+ page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(/*document_id=*/4))));
}
-TEST(ResultStateManagerTest, ShouldInvalidateAllTokens) {
+TEST_F(ResultStateManagerTest, ShouldInvalidateAllTokens) {
ResultState result_state1 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2)},
/*num_per_page=*/1);
ResultState result_state2 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/6)},
+ CreateResultState({AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
/*num_per_page=*/1);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -207,23 +251,22 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST(ResultStateManagerTest, ShouldRemoveOldestResultState) {
+TEST_F(ResultStateManagerTest, ShouldRemoveOldestResultState) {
ResultState result_state1 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
/*num_per_page=*/1);
ResultState result_state2 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4)},
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
/*num_per_page=*/1);
ResultState result_state3 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/6)},
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
/*num_per_page=*/1);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/2);
+ ResultStateManager result_state_manager(/*max_total_hits=*/2,
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -243,39 +286,38 @@
page_result_state2,
result_state_manager.GetNextPage(page_result_state2.next_page_token));
EXPECT_THAT(page_result_state2.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
- /*document_id=*/3))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/2))));
ICING_ASSERT_OK_AND_ASSIGN(
page_result_state3,
result_state_manager.GetNextPage(page_result_state3.next_page_token));
EXPECT_THAT(page_result_state3.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
- /*document_id=*/5))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
}
-TEST(ResultStateManagerTest,
- PreviouslyInvalidatedResultStateShouldNotBeCounted) {
+TEST_F(ResultStateManagerTest,
+ InvalidatedResultStateShouldDecreaseCurrentHitsCount) {
ResultState result_state1 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
/*num_per_page=*/1);
ResultState result_state2 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4)},
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
/*num_per_page=*/1);
ResultState result_state3 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/6)},
- /*num_per_page=*/1);
- ResultState result_state4 =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/7),
- CreateScoredDocumentHit(/*document_id=*/8)},
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
/*num_per_page=*/1);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/3);
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3,
+ document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(result_state1)));
@@ -286,11 +328,18 @@
PageResultState page_result_state3,
result_state_manager.RankAndPaginate(std::move(result_state3)));
- // Invalidates state 2, so that the number of valid tokens becomes 2.
+ // Invalidates state 2, so that the number of hits current cached should be
+ // decremented to 2.
result_state_manager.InvalidateResultState(
page_result_state2.next_page_token);
- // Adding state 4 shouldn't affect rest of the states
+ // If invalidating state 2 correctly decremented the current hit count to 2,
+ // then adding state 4 should still be within our budget and no other result
+ // states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state4,
result_state_manager.RankAndPaginate(std::move(result_state4)));
@@ -299,8 +348,8 @@
page_result_state1,
result_state_manager.GetNextPage(page_result_state1.next_page_token));
EXPECT_THAT(page_result_state1.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
- /*document_id=*/1))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/0))));
EXPECT_THAT(
result_state_manager.GetNextPage(page_result_state2.next_page_token),
@@ -310,18 +359,493 @@
page_result_state3,
result_state_manager.GetNextPage(page_result_state3.next_page_token));
EXPECT_THAT(page_result_state3.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
- /*document_id=*/5))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
ICING_ASSERT_OK_AND_ASSIGN(
page_result_state4,
result_state_manager.GetNextPage(page_result_state4.next_page_token));
EXPECT_THAT(page_result_state4.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(CreateScoredDocumentHit(
- /*document_id=*/7))));
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
}
-TEST(ResultStateManagerTest, ShouldGetSnippetContext) {
+TEST_F(ResultStateManagerTest,
+ InvalidatedAllResultStatesShouldResetCurrentHitCount) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3,
+ document_store());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // Invalidates all states so that the current hit count will be 0.
+ result_state_manager.InvalidateAllResultStates();
+
+ // If invalidating all states correctly reset the current hit count to 0,
+ // then the entirety of state 4 should still be within our budget and no other
+ // result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ResultState result_state5 =
+ CreateResultState({AddScoredDocument(/*document_id=*/8),
+ AddScoredDocument(/*document_id=*/9)},
+ /*num_per_page=*/1);
+ ResultState result_state6 =
+ CreateResultState({AddScoredDocument(/*document_id=*/10),
+ AddScoredDocument(/*document_id=*/11)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state6,
+ result_state_manager.RankAndPaginate(std::move(result_state6)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state3.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/8))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state6,
+ result_state_manager.GetNextPage(page_result_state6.next_page_token));
+ EXPECT_THAT(page_result_state6.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/10))));
+}
+
+TEST_F(
+ ResultStateManagerTest,
+ InvalidatedResultStateShouldDecreaseCurrentHitsCountByExactStateHitCount) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3,
+ document_store());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // Invalidates state 2, so that the number of hits current cached should be
+ // decremented to 2.
+ result_state_manager.InvalidateResultState(
+ page_result_state2.next_page_token);
+
+ // If invalidating state 2 correctly decremented the current hit count to 2,
+ // then adding state 4 should still be within our budget and no other result
+ // states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ // If invalidating result state 2 correctly decremented the current hit count
+ // to 2 and adding state 4 correctly incremented it to 3, then adding this
+ // result state should trigger the eviction of state 1.
+ ResultState result_state5 =
+ CreateResultState({AddScoredDocument(/*document_id=*/8),
+ AddScoredDocument(/*document_id=*/9)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/8))));
+}
+
+TEST_F(ResultStateManagerTest, GetNextPageShouldDecreaseCurrentHitsCount) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3,
+ document_store());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 should return its result and decrement the
+ // number of cached hits to 2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state1,
+ result_state_manager.GetNextPage(page_result_state1.next_page_token));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/0))));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2, then adding state 4 should still be within our
+ // budget and no other result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state2,
+ result_state_manager.GetNextPage(page_result_state2.next_page_token));
+ EXPECT_THAT(page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/2))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+}
+
+TEST_F(ResultStateManagerTest,
+ GetNextPageShouldDecreaseCurrentHitsCountByExactlyOnePage) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3)},
+ /*num_per_page=*/1);
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+
+ // Add the first three states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1 and a result set of 2 hits. So each
+ // result will take up one hit of our three hit budget.
+ ResultStateManager result_state_manager(/*max_total_hits=*/3,
+ document_store());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 should return its result and decrement the
+ // number of cached hits to 2.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state1,
+ result_state_manager.GetNextPage(page_result_state1.next_page_token));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/0))));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2, then adding state 4 should still be within our
+ // budget and no other result states should be evicted.
+ ResultState result_state4 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state4,
+ result_state_manager.RankAndPaginate(std::move(result_state4)));
+
+ // If retrieving the next page for result state 1 correctly decremented the
+ // current hit count to 2 and adding state 4 correctly incremented it to 3,
+ // then adding this result state should trigger the eviction of state 2.
+ ResultState result_state5 =
+ CreateResultState({AddScoredDocument(/*document_id=*/8),
+ AddScoredDocument(/*document_id=*/9)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state5,
+ result_state_manager.RankAndPaginate(std::move(result_state5)));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/4))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state4,
+ result_state_manager.GetNextPage(page_result_state4.next_page_token));
+ EXPECT_THAT(page_result_state4.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state5,
+ result_state_manager.GetNextPage(page_result_state5.next_page_token));
+ EXPECT_THAT(page_result_state5.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/8))));
+}
+
+TEST_F(ResultStateManagerTest,
+ AddingOverBudgetResultStateShouldEvictAllStates) {
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2)},
+ /*num_per_page=*/1);
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4)},
+ /*num_per_page=*/1);
+
+ // Add the first two states. Remember, the first page for each result state
+ // won't be cached (since it is returned immediately from RankAndPaginate).
+ // Each result state has a page size of 1. So 3 hits will remain cached.
+ ResultStateManager result_state_manager(/*max_total_hits=*/4,
+ document_store());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+ // Add a result state that is larger than the entire budget. This should
+ // result in all previous result states being evicted, the first hit from
+ // result state 3 being returned and the next four hits being cached (the last
+ // hit should be dropped because it exceeds the max).
+ ResultState result_state3 =
+ CreateResultState({AddScoredDocument(/*document_id=*/5),
+ AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7),
+ AddScoredDocument(/*document_id=*/8),
+ AddScoredDocument(/*document_id=*/9),
+ AddScoredDocument(/*document_id=*/10)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state3,
+ result_state_manager.RankAndPaginate(std::move(result_state3)));
+
+ // GetNextPage for result state 1 and 2 should return NOT_FOUND.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state2.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ // Only the next four results in state 3 should be retrievable.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/9))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/8))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/7))));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state3,
+ result_state_manager.GetNextPage(page_result_state3.next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+
+ // The final result should have been dropped because it exceeded the budget.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state3.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+}
+
+TEST_F(ResultStateManagerTest,
+ AddingResultStateShouldEvictOverBudgetResultState) {
+ ResultStateManager result_state_manager(/*max_total_hits=*/4,
+ document_store());
+ // Add a result state that is larger than the entire budget. The entire result
+ // state will still be cached
+ ResultState result_state1 =
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4),
+ AddScoredDocument(/*document_id=*/5)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state1,
+ result_state_manager.RankAndPaginate(std::move(result_state1)));
+
+ // Add a result state. Because state2 + state1 is larger than the budget,
+ // state1 should be evicted.
+ ResultState result_state2 =
+ CreateResultState({AddScoredDocument(/*document_id=*/6),
+ AddScoredDocument(/*document_id=*/7)},
+ /*num_per_page=*/1);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ PageResultState page_result_state2,
+ result_state_manager.RankAndPaginate(std::move(result_state2)));
+
+ // state1 should have been evicted and state2 should still be retrievable.
+ EXPECT_THAT(
+ result_state_manager.GetNextPage(page_result_state1.next_page_token),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ page_result_state2,
+ result_state_manager.GetNextPage(page_result_state2.next_page_token));
+ EXPECT_THAT(page_result_state2.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(CreateScoredHit(
+ /*document_id=*/6))));
+}
+
+TEST_F(ResultStateManagerTest, ShouldGetSnippetContext) {
ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
@@ -334,13 +858,13 @@
query_terms_map.emplace("term1", std::unordered_set<std::string>());
ResultState original_result_state = ResultState(
- /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2)},
- query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+ /*scored_document_hits=*/{AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ query_terms_map, search_spec, CreateScoringSpec(), result_spec,
+ document_store());
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -355,7 +879,7 @@
EqualsProto(result_spec.snippet_spec()));
}
-TEST(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
+TEST_F(ResultStateManagerTest, ShouldGetDefaultSnippetContext) {
ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/1);
// 0 indicates no snippeting
result_spec.mutable_snippet_spec()->set_num_to_snippet(0);
@@ -369,13 +893,13 @@
query_terms_map.emplace("term1", std::unordered_set<std::string>());
ResultState original_result_state = ResultState(
- /*scored_document_hits=*/{CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2)},
- query_terms_map, search_spec, CreateScoringSpec(), result_spec);
+ /*scored_document_hits=*/{AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1)},
+ query_terms_map, search_spec, CreateScoringSpec(), result_spec,
+ document_store());
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
@@ -390,18 +914,17 @@
Eq(TermMatchType::UNKNOWN));
}
-TEST(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
+TEST_F(ResultStateManagerTest, ShouldGetCorrectNumPreviouslyReturned) {
ResultState original_result_state =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5)},
+ CreateResultState({AddScoredDocument(/*document_id=*/0),
+ AddScoredDocument(/*document_id=*/1),
+ AddScoredDocument(/*document_id=*/2),
+ AddScoredDocument(/*document_id=*/3),
+ AddScoredDocument(/*document_id=*/4)},
/*num_per_page=*/2);
ResultStateManager result_state_manager(
- /*max_hits_per_query=*/std::numeric_limits<int>::max(),
- /*max_result_states=*/std::numeric_limits<int>::max());
+ /*max_total_hits=*/std::numeric_limits<int>::max(), document_store());
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
@@ -435,41 +958,48 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST(ResultStateManagerTest, ShouldStoreMaxNumberOfScoredDocumentHits) {
- ResultState original_result_state =
- CreateResultState({CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/4),
- CreateScoredDocumentHit(/*document_id=*/5)},
- /*num_per_page=*/2);
+TEST_F(ResultStateManagerTest, ShouldStoreAllHits) {
+ ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/0);
+ ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_5 = AddScoredDocument(/*document_id=*/4);
- ResultStateManager result_state_manager(
- /*max_hits_per_query=*/3,
- /*max_result_states=*/std::numeric_limits<int>::max());
+ ResultState original_result_state = CreateResultState(
+ {scored_hit_1, scored_hit_2, scored_hit_3, scored_hit_4, scored_hit_5},
+ /*num_per_page=*/2);
- // The 5 input scored document hits will be truncated to 3.
+ ResultStateManager result_state_manager(/*max_total_hits=*/4,
+ document_store());
+
+ // The 5 input scored document hits will not be truncated. The first page of
+ // two hits will be returned immediately and the other three hits will fit
+ // within our caching budget.
// First page, 2 results
ICING_ASSERT_OK_AND_ASSIGN(
PageResultState page_result_state1,
result_state_manager.RankAndPaginate(std::move(original_result_state)));
- EXPECT_THAT(
- page_result_state1.scored_document_hits,
- ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+ EXPECT_THAT(page_result_state1.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_5),
+ EqualsScoredDocumentHit(scored_hit_4)));
uint64_t next_page_token = page_result_state1.next_page_token;
- // Second page, 1 results.
+ // Second page, 2 results.
ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state2,
result_state_manager.GetNextPage(next_page_token));
EXPECT_THAT(page_result_state2.scored_document_hits,
- ElementsAre(EqualsScoredDocumentHit(
- CreateScoredDocumentHit(/*document_id=*/3))));
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2)));
- // No third page.
+ // Third page, 1 result.
+ ICING_ASSERT_OK_AND_ASSIGN(PageResultState page_result_state3,
+ result_state_manager.GetNextPage(next_page_token));
+ EXPECT_THAT(page_result_state3.scored_document_hits,
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_1)));
+
+ // Fourth page, 0 results.
EXPECT_THAT(result_state_manager.GetNextPage(next_page_token),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
diff --git a/icing/result/result-state.cc b/icing/result/result-state.cc
index 82738a9..fc89185 100644
--- a/icing/result/result-state.cc
+++ b/icing/result/result-state.cc
@@ -16,6 +16,7 @@
#include "icing/result/projection-tree.h"
#include "icing/scoring/ranker.h"
+#include "icing/store/namespace-id.h"
#include "icing/util/logging.h"
namespace icing {
@@ -39,7 +40,8 @@
SectionRestrictQueryTermsMap query_terms,
const SearchSpecProto& search_spec,
const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec)
+ const ResultSpecProto& result_spec,
+ const DocumentStore& document_store)
: scored_document_hits_(std::move(scored_document_hits)),
snippet_context_(CreateSnippetContext(std::move(query_terms), search_spec,
result_spec)),
@@ -52,14 +54,82 @@
projection_tree_map_.insert(
{type_field_mask.schema_type(), ProjectionTree(type_field_mask)});
}
+
+ for (const ResultSpecProto::ResultGrouping& result_grouping :
+ result_spec.result_groupings()) {
+ int group_id = group_result_limits_.size();
+ group_result_limits_.push_back(result_grouping.max_results());
+ for (const std::string& name_space : result_grouping.namespaces()) {
+ auto namespace_id_or = document_store.GetNamespaceId(name_space);
+ if (!namespace_id_or.ok()) {
+ continue;
+ }
+ namespace_group_id_map_.insert({namespace_id_or.ValueOrDie(), group_id});
+ }
+ }
BuildHeapInPlace(&scored_document_hits_, scored_document_hit_comparator_);
}
-std::vector<ScoredDocumentHit> ResultState::GetNextPage() {
- std::vector<ScoredDocumentHit> scored_document_hits = PopTopResultsFromHeap(
- &scored_document_hits_, num_per_page_, scored_document_hit_comparator_);
- num_returned_ += scored_document_hits.size();
- return scored_document_hits;
+class GroupResultLimiter {
+ public:
+ GroupResultLimiter(
+ const std::unordered_map<NamespaceId, int>& namespace_group_id_map,
+ std::vector<int>& group_result_limits,
+ const DocumentStore& document_store)
+ : namespace_group_id_map_(namespace_group_id_map),
+ group_result_limits_(&group_result_limits),
+ document_store_(document_store) {}
+
+ // Returns true if the scored_document_hit should be removed.
+ bool operator()(const ScoredDocumentHit& scored_document_hit) {
+ auto document_filter_data_or = document_store_.GetDocumentFilterData(
+ scored_document_hit.document_id());
+ if (!document_filter_data_or.ok()) {
+ return true;
+ }
+ NamespaceId namespace_id =
+ document_filter_data_or.ValueOrDie().namespace_id();
+ auto iter = namespace_group_id_map_.find(namespace_id);
+ if (iter == namespace_group_id_map_.end()) {
+ return false;
+ }
+ int& count = group_result_limits_->at(iter->second);
+ if (count <= 0) {
+ return true;
+ }
+ --count;
+ return false;
+ }
+
+ private:
+ const std::unordered_map<NamespaceId, int>& namespace_group_id_map_;
+ std::vector<int>* group_result_limits_;
+ const DocumentStore& document_store_;
+};
+
+std::vector<ScoredDocumentHit> ResultState::GetNextPage(
+ const DocumentStore& document_store) {
+ int num_requested = num_per_page_;
+ bool more_results_available = true;
+ std::vector<ScoredDocumentHit> final_scored_document_hits;
+ while (more_results_available && num_requested > 0) {
+ std::vector<ScoredDocumentHit> scored_document_hits = PopTopResultsFromHeap(
+ &scored_document_hits_, num_requested, scored_document_hit_comparator_);
+ more_results_available = scored_document_hits.size() == num_requested;
+ auto itr = std::remove_if(
+ scored_document_hits.begin(), scored_document_hits.end(),
+ GroupResultLimiter(namespace_group_id_map_, group_result_limits_,
+ document_store));
+ scored_document_hits.erase(itr, scored_document_hits.end());
+ final_scored_document_hits.reserve(final_scored_document_hits.size() +
+ scored_document_hits.size());
+ std::move(scored_document_hits.begin(), scored_document_hits.end(),
+ std::back_inserter(final_scored_document_hits));
+ num_requested = num_per_page_ - final_scored_document_hits.size();
+ }
+
+ num_returned_ += final_scored_document_hits.size();
+ return final_scored_document_hits;
}
void ResultState::TruncateHitsTo(int new_size) {
diff --git a/icing/result/result-state.h b/icing/result/result-state.h
index be92b85..303d610 100644
--- a/icing/result/result-state.h
+++ b/icing/result/result-state.h
@@ -23,6 +23,8 @@
#include "icing/result/projection-tree.h"
#include "icing/result/snippet-context.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
+#include "icing/store/namespace-id.h"
namespace icing {
namespace lib {
@@ -31,17 +33,19 @@
// same query. Stored in ResultStateManager.
class ResultState {
public:
- explicit ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
- SectionRestrictQueryTermsMap query_terms,
- const SearchSpecProto& search_spec,
- const ScoringSpecProto& scoring_spec,
- const ResultSpecProto& result_spec);
+ ResultState(std::vector<ScoredDocumentHit> scored_document_hits,
+ SectionRestrictQueryTermsMap query_terms,
+ const SearchSpecProto& search_spec,
+ const ScoringSpecProto& scoring_spec,
+ const ResultSpecProto& result_spec,
+ const DocumentStore& document_store);
// Returns the next page of results. The size of page is passed in from
// ResultSpecProto in constructor. Calling this method could increase the
// value of num_returned(), so be careful of the order of calling these
// methods.
- std::vector<ScoredDocumentHit> GetNextPage();
+ std::vector<ScoredDocumentHit> GetNextPage(
+ const DocumentStore& document_store);
// Truncates the vector of ScoredDocumentHits to the given size. The best
// ScoredDocumentHits are kept.
@@ -67,6 +71,10 @@
// increased when GetNextPage() is called.
int num_returned() const { return num_returned_; }
+ // The number of results yet to be returned. This number is decreased when
+ // GetNextPage is called.
+ int num_remaining() const { return scored_document_hits_.size(); }
+
private:
// The scored document hits. It represents a heap data structure when ranking
// is required so that we can get top K hits in O(KlgN) time. If no ranking is
@@ -79,6 +87,13 @@
// Information needed for projection.
std::unordered_map<std::string, ProjectionTree> projection_tree_map_;
+ // A map between namespace id and the id of the group that it appears in.
+ std::unordered_map<NamespaceId, int> namespace_group_id_map_;
+
+ // The count of remaining results to return for a group where group id is the
+ // index.
+ std::vector<int> group_result_limits_;
+
// Number of results to return in each page.
int num_per_page_;
diff --git a/icing/result/result-state_test.cc b/icing/result/result-state_test.cc
index 85cb242..f2121a5 100644
--- a/icing/result/result-state_test.cc
+++ b/icing/result/result-state_test.cc
@@ -15,9 +15,15 @@
#include "icing/result/result-state.h"
#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
#include "icing/portable/equals-proto.h"
+#include "icing/schema/schema-store.h"
#include "icing/scoring/scored-document-hit.h"
+#include "icing/store/document-store.h"
#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
namespace icing {
namespace lib {
@@ -50,42 +56,90 @@
return result_spec;
}
+class ResultStateTest : public testing::Test {
+ protected:
+ void SetUp() override {
+ schema_store_base_dir_ = GetTestTempDir() + "/schema_store";
+ filesystem_.CreateDirectoryRecursively(schema_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ schema_store_,
+ SchemaStore::Create(&filesystem_, schema_store_base_dir_, &clock_));
+ SchemaProto schema;
+ schema.add_types()->set_schema_type("Document");
+ ICING_ASSERT_OK(schema_store_->SetSchema(std::move(schema)));
+
+ doc_store_base_dir_ = GetTestTempDir() + "/document_store";
+ filesystem_.CreateDirectoryRecursively(doc_store_base_dir_.c_str());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult result,
+ DocumentStore::Create(&filesystem_, doc_store_base_dir_, &clock_,
+ schema_store_.get()));
+ document_store_ = std::move(result.document_store);
+ }
+
+ void TearDown() override {
+ filesystem_.DeleteDirectoryRecursively(doc_store_base_dir_.c_str());
+ filesystem_.DeleteDirectoryRecursively(schema_store_base_dir_.c_str());
+ }
+
+ ScoredDocumentHit AddScoredDocument(DocumentId document_id) {
+ DocumentProto document;
+ document.set_namespace_("namespace");
+ document.set_uri(std::to_string(document_id));
+ document.set_schema("Document");
+ document_store_->Put(std::move(document));
+ return ScoredDocumentHit(document_id, kSectionIdMaskNone, /*score=*/1);
+ }
+
+ DocumentStore& document_store() { return *document_store_; }
+
+ private:
+ Filesystem filesystem_;
+ std::string doc_store_base_dir_;
+ std::string schema_store_base_dir_;
+ Clock clock_;
+ std::unique_ptr<DocumentStore> document_store_;
+ std::unique_ptr<SchemaStore> schema_store_;
+};
+
// ResultState::ResultState() and ResultState::GetNextPage() are calling
// Ranker::BuildHeapInPlace() and Ranker::PopTopResultsFromHeap() directly, so
// we don't need to test much on what order is returned as that is tested in
// Ranker's tests. Here we just need one sanity test to make sure that the
// correct functions are called.
-TEST(ResultStateTest, ShouldReturnNextPage) {
+TEST_F(ResultStateTest, ShouldReturnNextPage) {
+ ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+ ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
std::vector<ScoredDocumentHit> scored_document_hits = {
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/4)};
+ scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
ResultState result_state(scored_document_hits, /*query_terms=*/{},
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true),
- CreateResultSpec(/*num_per_page=*/2));
+ CreateResultSpec(/*num_per_page=*/2),
+ document_store());
EXPECT_THAT(
- result_state.GetNextPage(),
+ result_state.GetNextPage(document_store()),
ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4))));
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3))));
EXPECT_THAT(
- result_state.GetNextPage(),
+ result_state.GetNextPage(document_store()),
ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
- EXPECT_THAT(result_state.GetNextPage(),
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
ElementsAre(EqualsScoredDocumentHit(
- CreateScoredDocumentHit(/*document_id=*/1))));
+ CreateScoredDocumentHit(/*document_id=*/0))));
}
-TEST(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
+TEST_F(ResultStateTest, ShouldReturnSnippetContextAccordingToSpecs) {
ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
result_spec.mutable_snippet_spec()->set_num_to_snippet(5);
result_spec.mutable_snippet_spec()->set_num_matches_per_property(5);
@@ -97,7 +151,8 @@
ResultState result_state(
/*scored_document_hits=*/{}, query_terms_map,
CreateSearchSpec(TermMatchType::EXACT_ONLY),
- CreateScoringSpec(/*is_descending_order=*/true), result_spec);
+ CreateScoringSpec(/*is_descending_order=*/true), result_spec,
+ document_store());
const SnippetContext& snippet_context = result_state.snippet_context();
@@ -117,7 +172,7 @@
EXPECT_THAT(snippet_context2.match_type, Eq(TermMatchType::EXACT_ONLY));
}
-TEST(ResultStateTest, NoSnippetingShouldReturnNull) {
+TEST_F(ResultStateTest, NoSnippetingShouldReturnNull) {
ResultSpecProto result_spec = CreateResultSpec(/*num_per_page=*/2);
// Setting num_to_snippet to 0 so that snippeting info won't be
// stored.
@@ -131,7 +186,7 @@
ResultState result_state(/*scored_document_hits=*/{}, query_terms_map,
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true),
- result_spec);
+ result_spec, document_store());
const SnippetContext& snippet_context = result_state.snippet_context();
EXPECT_THAT(snippet_context.query_terms, IsEmpty());
@@ -141,72 +196,375 @@
EXPECT_THAT(snippet_context.match_type, TermMatchType::UNKNOWN);
}
-TEST(ResultStateTest, ShouldTruncateToNewSize) {
+TEST_F(ResultStateTest, ShouldTruncateToNewSize) {
+ ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+ ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
std::vector<ScoredDocumentHit> scored_document_hits = {
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/4)};
+ scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
// Creates a ResultState with 5 ScoredDocumentHits.
ResultState result_state(scored_document_hits, /*query_terms=*/{},
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true),
- CreateResultSpec(/*num_per_page=*/5));
+ CreateResultSpec(/*num_per_page=*/5),
+ document_store());
result_state.TruncateHitsTo(/*new_size=*/3);
// The best 3 are left.
EXPECT_THAT(
- result_state.GetNextPage(),
+ result_state.GetNextPage(document_store()),
ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3))));
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2))));
}
-TEST(ResultStateTest, ShouldTruncateToZero) {
+TEST_F(ResultStateTest, ShouldTruncateToZero) {
+ ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+ ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
std::vector<ScoredDocumentHit> scored_document_hits = {
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/4)};
+ scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
// Creates a ResultState with 5 ScoredDocumentHits.
ResultState result_state(scored_document_hits, /*query_terms=*/{},
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true),
- CreateResultSpec(/*num_per_page=*/5));
+ CreateResultSpec(/*num_per_page=*/5),
+ document_store());
result_state.TruncateHitsTo(/*new_size=*/0);
- EXPECT_THAT(result_state.GetNextPage(), IsEmpty());
+ EXPECT_THAT(result_state.GetNextPage(document_store()), IsEmpty());
}
-TEST(ResultStateTest, ShouldNotTruncateToNegative) {
+TEST_F(ResultStateTest, ShouldNotTruncateToNegative) {
+ ScoredDocumentHit scored_hit_0 = AddScoredDocument(/*document_id=*/0);
+ ScoredDocumentHit scored_hit_1 = AddScoredDocument(/*document_id=*/1);
+ ScoredDocumentHit scored_hit_2 = AddScoredDocument(/*document_id=*/2);
+ ScoredDocumentHit scored_hit_3 = AddScoredDocument(/*document_id=*/3);
+ ScoredDocumentHit scored_hit_4 = AddScoredDocument(/*document_id=*/4);
std::vector<ScoredDocumentHit> scored_document_hits = {
- CreateScoredDocumentHit(/*document_id=*/2),
- CreateScoredDocumentHit(/*document_id=*/1),
- CreateScoredDocumentHit(/*document_id=*/3),
- CreateScoredDocumentHit(/*document_id=*/5),
- CreateScoredDocumentHit(/*document_id=*/4)};
+ scored_hit_1, scored_hit_0, scored_hit_2, scored_hit_4, scored_hit_3};
// Creates a ResultState with 5 ScoredDocumentHits.
ResultState result_state(scored_document_hits, /*query_terms=*/{},
CreateSearchSpec(TermMatchType::EXACT_ONLY),
CreateScoringSpec(/*is_descending_order=*/true),
- CreateResultSpec(/*num_per_page=*/5));
+ CreateResultSpec(/*num_per_page=*/5),
+ document_store());
result_state.TruncateHitsTo(/*new_size=*/-1);
// Results are not affected.
EXPECT_THAT(
- result_state.GetNextPage(),
+ result_state.GetNextPage(document_store()),
ElementsAre(
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/5)),
EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/4)),
EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/3)),
EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/2)),
- EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1))));
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/1)),
+ EqualsScoredDocumentHit(CreateScoredDocumentHit(/*document_id=*/0))));
+}
+
+TEST_F(ResultStateTest, ResultGroupingShouldLimitResults) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store().Put(document1));
+ ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store().Put(document2));
+ ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+ scored_hit_1};
+
+ // Create a ResultSpec that limits "namespace" to a single result.
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultState result_state(scored_document_hits, /*query_terms=*/{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ result_spec, document_store());
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned.
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingDoesNotLimitOtherNamespaceResults) {
+ // Creates 4 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Document")
+ .SetScore(3)
+ .Build();
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Document")
+ .SetScore(4)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store().Put(document1));
+ ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store().Put(document2));
+ ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store().Put(document3));
+ ScoredDocumentHit scored_hit_3(document_id3, kSectionIdMaskNone,
+ document3.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ document_store().Put(document4));
+ ScoredDocumentHit scored_hit_4(document_id4, kSectionIdMaskNone,
+ document4.score());
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ scored_hit_4, scored_hit_3, scored_hit_2, scored_hit_1};
+
+ // Create a ResultSpec that limits "namespace1" to a single result, but
+ // doesn't limit "namespace2".
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+
+ // Creates a ResultState with 4 ScoredDocumentHits.
+ ResultState result_state(scored_document_hits, /*query_terms=*/{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ result_spec, document_store());
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned.
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_4),
+ EqualsScoredDocumentHit(scored_hit_3),
+ EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingNonexistentNamespaceShouldBeIgnored) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store().Put(document1));
+ ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store().Put(document2));
+ ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+ scored_hit_1};
+
+ // Create a ResultSpec that limits "namespace"+"nonExistentNamespace" to a
+ // single result.
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace");
+ result_grouping->add_namespaces("nonexistentNamespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultState result_state(scored_document_hits, /*query_terms=*/{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ result_spec, document_store());
+
+ // Only the top ranked document in "namespace" (document2), should be
+ // returned. The presence of "nonexistentNamespace" in the same result
+ // grouping should have no effect.
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingMultiNamespaceGrouping) {
+ // Creates 6 documents and ensures the relationship in terms of document
+ // score is: document1 < document2 < document3 < document4 < document5 <
+ // document6
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace1", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace1", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .Build();
+ DocumentProto document3 = DocumentBuilder()
+ .SetKey("namespace2", "uri/3")
+ .SetSchema("Document")
+ .SetScore(3)
+ .Build();
+ DocumentProto document4 = DocumentBuilder()
+ .SetKey("namespace2", "uri/4")
+ .SetSchema("Document")
+ .SetScore(4)
+ .Build();
+ DocumentProto document5 = DocumentBuilder()
+ .SetKey("namespace3", "uri/5")
+ .SetSchema("Document")
+ .SetScore(5)
+ .Build();
+ DocumentProto document6 = DocumentBuilder()
+ .SetKey("namespace3", "uri/6")
+ .SetSchema("Document")
+ .SetScore(6)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store().Put(document1));
+ ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store().Put(document2));
+ ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
+ document_store().Put(document3));
+ ScoredDocumentHit scored_hit_3(document_id3, kSectionIdMaskNone,
+ document3.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id4,
+ document_store().Put(document4));
+ ScoredDocumentHit scored_hit_4(document_id4, kSectionIdMaskNone,
+ document4.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id5,
+ document_store().Put(document5));
+ ScoredDocumentHit scored_hit_5(document_id5, kSectionIdMaskNone,
+ document5.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id6,
+ document_store().Put(document6));
+ ScoredDocumentHit scored_hit_6(document_id6, kSectionIdMaskNone,
+ document6.score());
+ std::vector<ScoredDocumentHit> scored_document_hits = {
+ scored_hit_6, scored_hit_5, scored_hit_4,
+ scored_hit_3, scored_hit_2, scored_hit_1};
+
+ // Create a ResultSpec that limits "namespace1" to a single result and limits
+ // "namespace2"+"namespace3" to a total of two results.
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("namespace1");
+ result_grouping = result_spec.add_result_groupings();
+ result_grouping->set_max_results(2);
+ result_grouping->add_namespaces("namespace2");
+ result_grouping->add_namespaces("namespace3");
+
+ // Creates a ResultState with 4 ScoredDocumentHits.
+ ResultState result_state(scored_document_hits, /*query_terms=*/{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ result_spec, document_store());
+
+ // Only the top-ranked result in "namespace1" (document2) should be returned.
+ // Only the top-ranked results across "namespace2" and "namespace3"
+ // (document6, document5) should be returned.
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_6),
+ EqualsScoredDocumentHit(scored_hit_5),
+ EqualsScoredDocumentHit(scored_hit_2)));
+}
+
+TEST_F(ResultStateTest, ResultGroupingOnlyNonexistentNamespaces) {
+ // Creates 2 documents and ensures the relationship in terms of document
+ // score is: document1 < document2
+ DocumentProto document1 = DocumentBuilder()
+ .SetKey("namespace", "uri/1")
+ .SetSchema("Document")
+ .SetScore(1)
+ .Build();
+ DocumentProto document2 = DocumentBuilder()
+ .SetKey("namespace", "uri/2")
+ .SetSchema("Document")
+ .SetScore(2)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
+ document_store().Put(document1));
+ ScoredDocumentHit scored_hit_1(document_id1, kSectionIdMaskNone,
+ document1.score());
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
+ document_store().Put(document2));
+ ScoredDocumentHit scored_hit_2(document_id2, kSectionIdMaskNone,
+ document2.score());
+ std::vector<ScoredDocumentHit> scored_document_hits = {scored_hit_2,
+ scored_hit_1};
+
+ // Create a ResultSpec that limits "nonexistentNamespace" to a single result.
+ // but doesn't limit "namespace"
+ ResultSpecProto result_spec;
+ result_spec.set_num_per_page(5);
+ ResultSpecProto::ResultGrouping* result_grouping =
+ result_spec.add_result_groupings();
+ result_grouping->set_max_results(1);
+ result_grouping->add_namespaces("nonexistentNamespace");
+
+ // Creates a ResultState with 2 ScoredDocumentHits.
+ ResultState result_state(scored_document_hits, /*query_terms=*/{},
+ CreateSearchSpec(TermMatchType::EXACT_ONLY),
+ CreateScoringSpec(/*is_descending_order=*/true),
+ result_spec, document_store());
+
+ // All documents in "namespace" should be returned. The presence of
+ // "nonexistentNamespace" should have no effect.
+ EXPECT_THAT(result_state.GetNextPage(document_store()),
+ ElementsAre(EqualsScoredDocumentHit(scored_hit_2),
+ EqualsScoredDocumentHit(scored_hit_1)));
}
} // namespace
diff --git a/icing/result/snippet-retriever.cc b/icing/result/snippet-retriever.cc
index d4a5f79..dc9f8be 100644
--- a/icing/result/snippet-retriever.cc
+++ b/icing/result/snippet-retriever.cc
@@ -15,6 +15,7 @@
#include "icing/result/snippet-retriever.h"
#include <algorithm>
+#include <iterator>
#include <memory>
#include <string>
#include <string_view>
@@ -25,9 +26,12 @@
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/absl_ports/canonical_errors.h"
+#include "icing/absl_ports/str_cat.h"
+#include "icing/absl_ports/str_join.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
#include "icing/schema/schema-store.h"
+#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/tokenization/language-segmenter.h"
@@ -35,6 +39,7 @@
#include "icing/tokenization/tokenizer-factory.h"
#include "icing/tokenization/tokenizer.h"
#include "icing/transform/normalizer.h"
+#include "icing/util/character-iterator.h"
#include "icing/util/i18n-utils.h"
#include "icing/util/status-macros.h"
@@ -43,6 +48,33 @@
namespace {
+const PropertyProto* GetProperty(const DocumentProto& document,
+ std::string_view property_name) {
+ for (const PropertyProto& property : document.properties()) {
+ if (property.name() == property_name) {
+ return &property;
+ }
+ }
+ return nullptr;
+}
+
+inline std::string AddPropertyToPath(const std::string& current_path,
+ std::string_view property) {
+ if (current_path.empty()) {
+ return std::string(property);
+ }
+ return absl_ports::StrCat(current_path, kPropertySeparator, property);
+}
+
+inline std::string AddIndexToPath(int values_size, int index,
+ const std::string& property_path) {
+ if (values_size == 1) {
+ return property_path;
+ }
+ return absl_ports::StrCat(property_path, kLBracket, std::to_string(index),
+ kRBracket);
+}
+
class TokenMatcher {
public:
virtual ~TokenMatcher() = default;
@@ -124,20 +156,17 @@
}
}
-// Returns true if token matches any of the terms in query terms according to
-// the provided match type.
+// Finds the start position of a valid token that is after
+// window_start_min_exclusive
//
// Returns:
// the position of the window start if successful
// INTERNAL_ERROR - if a tokenizer error is encountered
libtextclassifier3::StatusOr<int> DetermineWindowStart(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
- std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
- int window_start_min = (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
- if (window_start_min < 0) {
- return 0;
- }
- if (!iterator->ResetToTokenAfter(window_start_min)) {
+ std::string_view value, int window_start_min_exclusive,
+ Tokenizer::Iterator* iterator) {
+ if (!iterator->ResetToTokenAfter(window_start_min_exclusive)) {
return absl_ports::InternalError(
"Couldn't reset tokenizer to determine snippet window!");
}
@@ -165,17 +194,16 @@
return window_end_exclusive;
}
+// Finds the end position of a valid token that is before the
+// window_end_max_exclusive.
+//
// Returns:
// the position of the window end if successful
// INTERNAL_ERROR - if a tokenizer error is encountered
libtextclassifier3::StatusOr<int> DetermineWindowEnd(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
- std::string_view value, int match_mid, Tokenizer::Iterator* iterator) {
- int window_end_max_exclusive =
- match_mid + snippet_spec.max_window_bytes() / 2;
- if (window_end_max_exclusive >= value.length()) {
- return value.length();
- }
+ std::string_view value, int window_end_max_exclusive,
+ Tokenizer::Iterator* iterator) {
if (!iterator->ResetToTokenBefore(window_end_max_exclusive)) {
return absl_ports::InternalError(
"Couldn't reset tokenizer to determine snippet window!");
@@ -189,41 +217,111 @@
struct SectionData {
std::string_view section_name;
std::string_view section_subcontent;
- // Identifies which subsection of the section content, section_subcontent has
- // come from.
- // Ex. "recipient.address" :
- // ["foo@google.com", "bar@google.com", "baz@google.com"]
- // The subcontent_index of "bar@google.com" is 1.
- int subcontent_index;
};
+// Creates a snippet match proto for the match pointed to by the iterator and
+// char_iterator
+//
+// Returns:
+// the position of the window start if successful
+// INTERNAL_ERROR - if a tokenizer error is encountered and iterator is left
+// in an invalid state
+// ABORTED_ERROR - if an invalid utf-8 sequence is encountered
libtextclassifier3::StatusOr<SnippetMatchProto> RetrieveMatch(
const ResultSpecProto::SnippetSpecProto& snippet_spec,
- const SectionData& value, Tokenizer::Iterator* iterator) {
+ const SectionData& value, Tokenizer::Iterator* iterator,
+ const CharacterIterator& char_iterator) {
SnippetMatchProto snippet_match;
- snippet_match.set_values_index(value.subcontent_index);
-
Token match = iterator->GetToken();
- int match_pos = match.text.data() - value.section_subcontent.data();
+ int match_pos = char_iterator.utf8_index();
+
+ // When finding boundaries, we have a few cases:
+ //
+ // Case 1:
+ // If we have an odd length match an odd length window, the window surrounds
+ // the match perfectly.
+ // match = "bar" in "foo bar baz"
+ // window = |---|
+ //
+ // Case 2:
+ // If we have an even length match with an even length window, the window
+ // surrounds the match perfectly.
+ // match = "baar" in "foo baar baz"
+ // window = |----|
+ //
+ // Case 3:
+ // If we have an odd length match with an even length window, we allocate
+ // that extra window byte to the beginning.
+ // match = "bar" in "foo bar baz"
+ // window = |----|
+ //
+ // Case 4:
+ // If we have an even length match with an odd length window, we allocate
+ // that extra window byte to the end.
+ // match = "baar" in "foo baar baz"
+ // window = |-----|
+ //
+ // We have do +1/-1 below to get the math to match up.
int match_mid = match_pos + match.text.length() / 2;
+ int window_start_min_exclusive =
+ (match_mid - snippet_spec.max_window_bytes() / 2) - 1;
+ int window_end_max_exclusive =
+ match_mid + (snippet_spec.max_window_bytes() + 1) / 2;
- snippet_match.set_exact_match_position(match_pos);
- snippet_match.set_exact_match_bytes(match.text.length());
+ snippet_match.set_exact_match_byte_position(match_pos);
+ snippet_match.set_exact_match_utf16_position(char_iterator.utf16_index());
- if (snippet_spec.max_window_bytes() > match.text.length()) {
+ // Create character iterators to find the beginning and end of the window.
+ CharacterIterator forward_char_iterator(char_iterator);
+ CharacterIterator backwards_char_iterator(char_iterator);
+
+ if (!backwards_char_iterator.AdvanceToUtf8(match_pos + match.text.length())) {
+ return absl_ports::AbortedError("Could not retrieve valid utf8 character!");
+ }
+ snippet_match.set_exact_match_byte_length(match.text.length());
+ snippet_match.set_exact_match_utf16_length(
+ backwards_char_iterator.utf16_index() - char_iterator.utf16_index());
+
+ // Only include windows if it'll at least include the matched text. Otherwise,
+ // it'll just be an empty string anyways.
+ if (snippet_spec.max_window_bytes() >= match.text.length()) {
// Find the beginning of the window.
- ICING_ASSIGN_OR_RETURN(
- int window_start,
- DetermineWindowStart(snippet_spec, value.section_subcontent, match_mid,
- iterator));
- snippet_match.set_window_position(window_start);
+ int window_start;
+ int window_start_utf16;
+ if (window_start_min_exclusive < 0) {
+ window_start = 0;
+ window_start_utf16 = 0;
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ window_start,
+ DetermineWindowStart(snippet_spec, value.section_subcontent,
+ window_start_min_exclusive, iterator));
+ if (!forward_char_iterator.RewindToUtf8(window_start)) {
+ return absl_ports::AbortedError(
+ "Could not retrieve valid utf8 character!");
+ }
+ window_start_utf16 = forward_char_iterator.utf16_index();
+ }
+ snippet_match.set_window_byte_position(window_start);
+ snippet_match.set_window_utf16_position(window_start_utf16);
// Find the end of the window.
- ICING_ASSIGN_OR_RETURN(
- int window_end_exclusive,
- DetermineWindowEnd(snippet_spec, value.section_subcontent, match_mid,
- iterator));
- snippet_match.set_window_bytes(window_end_exclusive - window_start);
+ int window_end_exclusive;
+ if (window_end_max_exclusive >= value.section_subcontent.length()) {
+ window_end_exclusive = value.section_subcontent.length();
+ } else {
+ ICING_ASSIGN_OR_RETURN(
+ window_end_exclusive,
+ DetermineWindowEnd(snippet_spec, value.section_subcontent,
+ window_end_max_exclusive, iterator));
+ }
+ if (!backwards_char_iterator.AdvanceToUtf8(window_end_exclusive)) {
+ return absl_ports::AbortedError(
+ "Could not retrieve valid utf8 character!");
+ }
+ snippet_match.set_window_byte_length(window_end_exclusive - window_start);
+ snippet_match.set_window_utf16_length(
+ backwards_char_iterator.utf16_index() - window_start_utf16);
// DetermineWindowStart/End may change the position of the iterator. So,
// reset the iterator back to the original position.
@@ -243,33 +341,131 @@
int max_matches_remaining;
};
-libtextclassifier3::StatusOr<SnippetProto::EntryProto> RetrieveMatches(
- const TokenMatcher* matcher, const MatchOptions& match_options,
- const SectionData& value, const Tokenizer* tokenizer) {
- SnippetProto::EntryProto snippet_entry;
- snippet_entry.set_property_name(std::string(value.section_name));
- ICING_ASSIGN_OR_RETURN(std::unique_ptr<Tokenizer::Iterator> iterator,
- tokenizer->Tokenize(value.section_subcontent));
- while (iterator->Advance()) {
- if (snippet_entry.snippet_matches_size() >=
- match_options.max_matches_remaining) {
- break;
+// Retrieves snippets in the string values of current_property.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// current_property is the property with the string values to snippet.
+// property_path is the path in the document to current_property.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to GetEntriesFromProperty will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void GetEntriesFromProperty(const PropertyProto* current_property,
+ const std::string& property_path,
+ const TokenMatcher* matcher,
+ const Tokenizer* tokenizer,
+ MatchOptions* match_options,
+ SnippetProto* snippet_proto) {
+ // We're at the end. Let's check our values.
+ for (int i = 0; i < current_property->string_values_size(); ++i) {
+ SnippetProto::EntryProto snippet_entry;
+ snippet_entry.set_property_name(AddIndexToPath(
+ current_property->string_values_size(), /*index=*/i, property_path));
+ std::string_view value = current_property->string_values(i);
+ std::unique_ptr<Tokenizer::Iterator> iterator =
+ tokenizer->Tokenize(value).ValueOrDie();
+ CharacterIterator char_iterator(value);
+ while (iterator->Advance()) {
+ Token token = iterator->GetToken();
+ if (matcher->Matches(token)) {
+ if (!char_iterator.AdvanceToUtf8(token.text.data() - value.data())) {
+ // We can't get the char_iterator to a valid position, so there's no
+ // way for us to provide valid utf-16 indices. There's nothing more we
+ // can do here, so just return whatever we've built up so far.
+ if (!snippet_entry.snippet_matches().empty()) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
+ }
+ return;
+ }
+ SectionData data = {property_path, value};
+ auto match_or = RetrieveMatch(match_options->snippet_spec, data,
+ iterator.get(), char_iterator);
+ if (!match_or.ok()) {
+ if (absl_ports::IsAborted(match_or.status())) {
+ // Only an aborted. We can't get this match, but we might be able to
+ // retrieve others. Just continue.
+ continue;
+ } else {
+ // Probably an internal error. The tokenizer iterator is probably in
+ // an invalid state. There's nothing more we can do here, so just
+ // return whatever we've built up so far.
+ if (!snippet_entry.snippet_matches().empty()) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
+ }
+ return;
+ }
+ }
+ SnippetMatchProto match = std::move(match_or).ValueOrDie();
+ snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+ if (--match_options->max_matches_remaining <= 0) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
+ return;
+ }
+ }
}
- Token token = iterator->GetToken();
- if (matcher->Matches(token)) {
- // If there was an error while retrieving the match, the tokenizer
- // iterator is probably in an invalid state. There's nothing we can do
- // here, so just return.
- ICING_ASSIGN_OR_RETURN(
- SnippetMatchProto match,
- RetrieveMatch(match_options.snippet_spec, value, iterator.get()));
- snippet_entry.mutable_snippet_matches()->Add(std::move(match));
+ if (!snippet_entry.snippet_matches().empty()) {
+ *snippet_proto->add_entries() = std::move(snippet_entry);
}
}
- if (snippet_entry.snippet_matches().empty()) {
- return absl_ports::NotFoundError("No matches found in value!");
+}
+
+// Retrieves snippets in document from content at section_path.
+// Tokenizer is provided to tokenize string content and matcher is provided to
+// indicate when a token matches content in the query.
+//
+// section_path_index refers to the current property that is held by document.
+// current_path is equivalent to the first section_path_index values in
+// section_path, but with value indices present.
+//
+// For example, suppose that a hit appeared somewhere in the "bcc.emailAddress".
+// The arguments for RetrieveSnippetForSection might be
+// {section_path=["bcc", "emailAddress"], section_path_index=0, current_path=""}
+// on the first call and
+// {section_path=["bcc", "emailAddress"], section_path_index=1,
+// current_path="bcc[1]"} on the second recursive call.
+//
+// MatchOptions holds the snippet spec and number of desired matches remaining.
+// Each call to RetrieveSnippetForSection will decrement max_matches_remaining
+// by the number of entries that it adds to snippet_proto.
+//
+// The SnippetEntries found for matched content will be added to snippet_proto.
+void RetrieveSnippetForSection(
+ const DocumentProto& document, const TokenMatcher* matcher,
+ const Tokenizer* tokenizer,
+ const std::vector<std::string_view>& section_path, int section_path_index,
+ const std::string& current_path, MatchOptions* match_options,
+ SnippetProto* snippet_proto) {
+ std::string_view next_property_name = section_path.at(section_path_index);
+ const PropertyProto* current_property =
+ GetProperty(document, next_property_name);
+ if (current_property == nullptr) {
+ ICING_VLOG(1) << "No property " << next_property_name << " found at path "
+ << current_path;
+ return;
}
- return snippet_entry;
+ std::string property_path =
+ AddPropertyToPath(current_path, next_property_name);
+ if (section_path_index == section_path.size() - 1) {
+ // We're at the end. Let's check our values.
+ GetEntriesFromProperty(current_property, property_path, matcher, tokenizer,
+ match_options, snippet_proto);
+ } else {
+ // Still got more to go. Let's look through our subdocuments.
+ std::vector<SnippetProto::EntryProto> entries;
+ for (int i = 0; i < current_property->document_values_size(); ++i) {
+ std::string new_path = AddIndexToPath(
+ current_property->document_values_size(), /*index=*/i, property_path);
+ RetrieveSnippetForSection(current_property->document_values(i), matcher,
+ tokenizer, section_path, section_path_index + 1,
+ new_path, match_options, snippet_proto);
+ if (match_options->max_matches_remaining <= 0) {
+ break;
+ }
+ }
+ }
}
} // namespace
@@ -304,6 +500,10 @@
// Remove this section from the mask.
section_id_mask &= ~(1u << section_id);
+ MatchOptions match_options = {snippet_spec};
+ match_options.max_matches_remaining =
+ snippet_spec.num_matches_per_property();
+
// Determine the section name and match type.
auto section_metadata_or =
schema_store_.GetSectionMetadata(type_id, section_id);
@@ -311,7 +511,9 @@
continue;
}
const SectionMetadata* metadata = section_metadata_or.ValueOrDie();
- MatchOptions match_options = {snippet_spec};
+ std::vector<std::string_view> section_path =
+ absl_ports::StrSplit(metadata->path, kPropertySeparator);
+
// Match type must be as restrictive as possible. Prefix matches for a
// snippet should only be included if both the query is Prefix and the
// section has prefixes enabled.
@@ -330,38 +532,18 @@
if (!matcher_or.ok()) {
continue;
}
- match_options.max_matches_remaining =
- snippet_spec.num_matches_per_property();
+ std::unique_ptr<TokenMatcher> matcher = std::move(matcher_or).ValueOrDie();
- // Retrieve values and snippet them.
- auto values_or =
- schema_store_.GetStringSectionContent(document, metadata->path);
- if (!values_or.ok()) {
- continue;
- }
auto tokenizer_or = tokenizer_factory::CreateIndexingTokenizer(
metadata->tokenizer, &language_segmenter_);
if (!tokenizer_or.ok()) {
// If we couldn't create the tokenizer properly, just skip this section.
continue;
}
- std::vector<std::string_view> values = values_or.ValueOrDie();
- for (int value_index = 0; value_index < values.size(); ++value_index) {
- if (match_options.max_matches_remaining <= 0) {
- break;
- }
- SectionData value = {metadata->path, values.at(value_index), value_index};
- auto entry_or =
- RetrieveMatches(matcher_or.ValueOrDie().get(), match_options, value,
- tokenizer_or.ValueOrDie().get());
-
- // Drop any entries that encountered errors or didn't find any matches.
- if (entry_or.ok()) {
- match_options.max_matches_remaining -=
- entry_or.ValueOrDie().snippet_matches_size();
- snippet_proto.mutable_entries()->Add(std::move(entry_or).ValueOrDie());
- }
- }
+ std::unique_ptr<Tokenizer> tokenizer = std::move(tokenizer_or).ValueOrDie();
+ RetrieveSnippetForSection(
+ document, matcher.get(), tokenizer.get(), section_path,
+ /*section_path_index=*/0, "", &match_options, &snippet_proto);
}
return snippet_proto;
}
diff --git a/icing/result/snippet-retriever_test.cc b/icing/result/snippet-retriever_test.cc
index ecda400..c052a9e 100644
--- a/icing/result/snippet-retriever_test.cc
+++ b/icing/result/snippet-retriever_test.cc
@@ -24,17 +24,19 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/search.pb.h"
+#include "icing/proto/term.pb.h"
#include "icing/query/query-terms.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section-manager.h"
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/snippet-helpers.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
@@ -49,10 +51,30 @@
namespace {
+using ::testing::ElementsAre;
using ::testing::Eq;
using ::testing::IsEmpty;
using ::testing::SizeIs;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
+
+std::vector<std::string_view> GetPropertyPaths(const SnippetProto& snippet) {
+ std::vector<std::string_view> paths;
+ for (const SnippetProto::EntryProto& entry : snippet.entries()) {
+ paths.push_back(entry.property_name());
+ }
+ return paths;
+}
+
class SnippetRetrieverTest : public testing::Test {
protected:
void SetUp() override {
@@ -75,25 +97,22 @@
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- SchemaTypeConfigProto* type_config = schema.add_types();
- type_config->set_schema_type("email");
- PropertyConfigProto* prop_config = type_config->add_properties();
- prop_config->set_property_name("subject");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
- prop_config = type_config->add_properties();
- prop_config->set_property_name("body");
- prop_config->set_data_type(PropertyConfigProto::DataType::STRING);
- prop_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- prop_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- prop_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK(schema_store_->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(normalizer_, normalizer_factory::Create(
@@ -156,11 +175,65 @@
// "three". len=4, orig_window= "thre"
snippet_spec_.set_max_window_bytes(4);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq(""));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+}
+
+TEST_F(SnippetRetrieverTest,
+ SnippetingWindowMaxWindowSizeEqualToMatch_OddLengthMatch) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "counting")
+ .AddStringProperty("body", "one two three four.... five")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"three"}}};
+
+ // Window starts at the beginning of "three" and at the exact end of
+ // "three". len=5, orig_window= "three"
+ snippet_spec_.set_max_window_bytes(5);
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("three"));
+}
+
+TEST_F(SnippetRetrieverTest,
+ SnippetingWindowMaxWindowSizeEqualToMatch_EvenLengthMatch) {
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "counting")
+ .AddStringProperty("body", "one two three four.... five")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"four"}}};
+
+ // Window starts at the beginning of "four" and at the exact end of
+ // "four". len=4, orig_window= "four"
+ snippet_spec_.set_max_window_bytes(4);
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("four"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsInWhitespace) {
@@ -180,11 +253,14 @@
// len=14, orig_window=" two three fou"
snippet_spec_.set_max_window_bytes(14);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("two three"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("two three"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsMidToken) {
@@ -203,11 +279,14 @@
// len=16, orig_window="e two three four"
snippet_spec_.set_max_window_bytes(16);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("two three four"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("two three four"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInPunctuation) {
@@ -226,11 +305,14 @@
// len=20, orig_window="one two three four.."
snippet_spec_.set_max_window_bytes(20);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.."));
}
TEST_F(SnippetRetrieverTest,
@@ -251,11 +333,14 @@
// len=26, orig_window="pside down in Australia\xC2"
snippet_spec_.set_max_window_bytes(24);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("down in Australia"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("down in Australia"));
}
TEST_F(SnippetRetrieverTest,
@@ -276,11 +361,14 @@
// len=26, orig_window="upside down in Australia\xC2\xBF"
snippet_spec_.set_max_window_bytes(26);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("upside down in Australia¿"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("upside down in Australia¿"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowStartsBeforeValueStart) {
@@ -299,11 +387,14 @@
// len=22, orig_window="one two three four..."
snippet_spec_.set_max_window_bytes(22);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four..."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four..."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsInWhitespace) {
@@ -322,11 +413,14 @@
// len=26, orig_window="one two three four.... "
snippet_spec_.set_max_window_bytes(26);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four...."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four...."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowEndsMidToken) {
@@ -345,11 +439,14 @@
// len=32, orig_window="one two three four.... fiv"
snippet_spec_.set_max_window_bytes(32);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four...."));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four...."));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeEqualToValueSize) {
@@ -368,11 +465,14 @@
// len=34, orig_window="one two three four.... five"
snippet_spec_.set_max_window_bytes(34);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.... five"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.... five"));
}
TEST_F(SnippetRetrieverTest, SnippetingWindowMaxWindowSizeLargerThanValueSize) {
@@ -391,11 +491,14 @@
// len=36, orig_window="one two three four.... five"
snippet_spec_.set_max_window_bytes(36);
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", /*snippet_index=*/0),
- Eq("one two three four.... five"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("one two three four.... five"));
}
TEST_F(SnippetRetrieverTest, PrefixSnippeting) {
@@ -409,14 +512,17 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets. 'f' should match prefix-enabled property 'subject', but
// not exact-only property 'body'
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, ExactSnippeting) {
@@ -431,8 +537,7 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"f"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), IsEmpty());
@@ -452,13 +557,15 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre(""));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatches) {
@@ -474,20 +581,25 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1),
- Eq("our options regarding body bar."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre("Concerning the subject of foo, we need to begin considering",
+ "our options regarding body bar."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("foo", "bar"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrict) {
@@ -505,18 +617,19 @@
SectionIdMask section_mask = 0b00000001;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(1));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1),
- Eq("our options regarding body bar."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("bar"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre("Concerning the subject of foo, we need to begin considering",
+ "our options regarding body bar."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("foo", "bar"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesSectionRestrictedTerm) {
@@ -536,25 +649,26 @@
SectionRestrictQueryTermsMap query_terms{{"", {"subject"}},
{"body", {"foo"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- // 'subject' section should only have the one match for "subject".
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("subject"));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 1), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "subject", 1), IsEmpty());
-
- // 'body' section should have matches for "subject" and "foo".
- EXPECT_THAT(GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("subject"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 1),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), Eq("foo"));
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre(
+ "Concerning the subject of foo, we need to begin",
+ "Concerning the subject of foo, we need to begin considering"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)),
+ ElementsAre("subject", "foo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("subject"));
}
TEST_F(SnippetRetrieverTest, SnippetingMultipleMatchesOneMatchPerProperty) {
@@ -573,19 +687,24 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"foo", "bar"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
// Check the snippets
EXPECT_THAT(snippet.entries(), SizeIs(2));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("subject foo"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("foo"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
EXPECT_THAT(
- GetWindow(document, snippet, "body", 0),
- Eq("Concerning the subject of foo, we need to begin considering"));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("foo"));
- EXPECT_THAT(GetWindow(document, snippet, "body", 1), IsEmpty());
- EXPECT_THAT(GetMatch(document, snippet, "body", 1), IsEmpty());
+ GetWindows(content, snippet.entries(0)),
+ ElementsAre(
+ "Concerning the subject of foo, we need to begin considering"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("foo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("subject"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)),
+ ElementsAre("subject foo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("foo"));
}
TEST_F(SnippetRetrieverTest, PrefixSnippetingNormalization) {
@@ -599,12 +718,14 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"md"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::PREFIX, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "subject", 0), Eq("MDI team"));
- EXPECT_THAT(GetMatch(document, snippet, "subject", 0), Eq("MDI"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("MDI team"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("MDI"));
}
TEST_F(SnippetRetrieverTest, ExactSnippetingNormalization) {
@@ -619,13 +740,593 @@
SectionIdMask section_mask = 0b00000011;
SectionRestrictQueryTermsMap query_terms{{"", {"zurich"}}};
SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
- query_terms, TermMatchType::EXACT_ONLY, snippet_spec_, document,
- section_mask);
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
EXPECT_THAT(snippet.entries(), SizeIs(1));
- EXPECT_THAT(GetWindow(document, snippet, "body", 0),
- Eq("Some members are in Zürich."));
- EXPECT_THAT(GetMatch(document, snippet, "body", 0), Eq("Zürich"));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("body"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)),
+ ElementsAre("Some members are in Zürich."));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("Zürich"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestOneLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto document;
+ document.set_schema("SingleLevelType");
+ PropertyProto* prop = document.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = document.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = document.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ SectionIdMask section_mask = 0b00000111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(6));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(GetPropertyPaths(snippet),
+ ElementsAre("X[1]", "X[3]", "Y[1]", "Y[3]", "Z[1]", "Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevel) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(18));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A.X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A.X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(
+ GetPropertyPaths(snippet),
+ ElementsAre("A.X[1]", "A.X[3]", "A.Y[1]", "A.Y[3]", "A.Z[1]", "A.Z[3]",
+ "B.X[1]", "B.X[3]", "B.Y[1]", "B.Y[3]", "B.Z[1]", "B.Z[3]",
+ "C.X[1]", "C.X[3]", "C.Y[1]", "C.Y[3]", "C.Z[1]", "C.Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelRepeated) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ std::vector<std::string> string_values = {"marco", "polo", "marco", "polo"};
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ for (const std::string& s : string_values) {
+ prop->add_string_values(s);
+ }
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(36));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X[1]"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[0].X[3]"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(GetPropertyPaths(snippet),
+ ElementsAre("A[0].X[1]", "A[0].X[3]", "A[1].X[1]", "A[1].X[3]",
+ "A[0].Y[1]", "A[0].Y[3]", "A[1].Y[1]", "A[1].Y[3]",
+ "A[0].Z[1]", "A[0].Z[3]", "A[1].Z[1]", "A[1].Z[3]",
+ "B[0].X[1]", "B[0].X[3]", "B[1].X[1]", "B[1].X[3]",
+ "B[0].Y[1]", "B[0].Y[3]", "B[1].Y[1]", "B[1].Y[3]",
+ "B[0].Z[1]", "B[0].Z[3]", "B[1].Z[1]", "B[1].Z[3]",
+ "C[0].X[1]", "C[0].X[3]", "C[1].X[1]", "C[1].X[3]",
+ "C[0].Y[1]", "C[0].Y[3]", "C[1].Y[1]", "C[1].Y[3]",
+ "C[0].Z[1]", "C[0].Z[3]", "C[1].Z[1]", "C[1].Z[3]"));
+}
+
+TEST_F(SnippetRetrieverTest, SnippetingTestMultiLevelSingleValue) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("SingleLevelType")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("X")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Y")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Z")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("MultiLevelType")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("A")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("B")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("C")
+ .SetDataTypeDocument(
+ "SingleLevelType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ICING_ASSERT_OK(schema_store_->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ DocumentProto subdocument;
+ PropertyProto* prop = subdocument.add_properties();
+ prop->set_name("X");
+ prop->add_string_values("polo");
+ prop = subdocument.add_properties();
+ prop->set_name("Y");
+ prop->add_string_values("marco");
+ prop = subdocument.add_properties();
+ prop->set_name("Z");
+ prop->add_string_values("polo");
+
+ DocumentProto document;
+ document.set_schema("MultiLevelType");
+ prop = document.add_properties();
+ prop->set_name("A");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("B");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ prop = document.add_properties();
+ prop->set_name("C");
+ *prop->add_document_values() = subdocument;
+ *prop->add_document_values() = subdocument;
+
+ SectionIdMask section_mask = 0b111111111;
+ SectionRestrictQueryTermsMap query_terms{{"", {"polo"}}};
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_EXACT, snippet_spec_, document, section_mask);
+
+ EXPECT_THAT(snippet.entries(), SizeIs(12));
+ EXPECT_THAT(snippet.entries(0).property_name(), Eq("A[0].X"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(0)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(0)), ElementsAre("polo"));
+
+ EXPECT_THAT(snippet.entries(1).property_name(), Eq("A[1].X"));
+ content = GetString(&document, snippet.entries(1).property_name());
+ EXPECT_THAT(GetWindows(content, snippet.entries(1)), ElementsAre("polo"));
+ EXPECT_THAT(GetMatches(content, snippet.entries(1)), ElementsAre("polo"));
+
+ EXPECT_THAT(
+ GetPropertyPaths(snippet),
+ ElementsAre("A[0].X", "A[1].X", "A[0].Z", "A[1].Z", "B[0].X", "B[1].X",
+ "B[0].Z", "B[1].Z", "C[0].X", "C[1].X", "C[0].Z", "C[1].Z"));
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetMatchTest) {
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF8 idx: 0 3 9 15 18
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ constexpr std::string_view kChinese = "我每天走路去上班。";
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", kChinese)
+ .AddStringProperty("body",
+ "Concerning the subject of foo, we need to begin "
+ "considering our options regarding body bar.")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(snippet.entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &snippet.entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ // Ensure that the match is correct.
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("走路"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(3));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(2));
+}
+
+TEST_F(SnippetRetrieverTest, CJKSnippetWindowTest) {
+ language_segmenter_factory::SegmenterOptions options(ULOC_SIMPLIFIED_CHINESE);
+ ICING_ASSERT_OK_AND_ASSIGN(
+ language_segmenter_,
+ language_segmenter_factory::Create(std::move(options)));
+ ICING_ASSERT_OK_AND_ASSIGN(
+ snippet_retriever_,
+ SnippetRetriever::Create(schema_store_.get(), language_segmenter_.get(),
+ normalizer_.get()));
+
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF8 idx: 0 3 9 15 18
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ constexpr std::string_view kChinese = "我每天走路去上班。";
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", kChinese)
+ .AddStringProperty("body",
+ "Concerning the subject of foo, we need to begin "
+ "considering our options regarding body bar.")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"走"}}};
+
+ // Set a twenty byte window. This will produce a window like this:
+ // String: "我每天走路去上班。"
+ // ^ ^
+ // UTF8 idx: 3 18
+ // UTF16 idx: 1 6
+ snippet_spec_.set_max_window_bytes(20);
+
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(snippet.entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &snippet.entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ // Ensure that the match is correct.
+ EXPECT_THAT(GetWindows(content, *entry), ElementsAre("每天走路去"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.window_utf16_position(), Eq(1));
+ EXPECT_THAT(match_proto.window_utf16_length(), Eq(5));
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitSnippetMatchTest) {
+ // The following string has four-byte UTF-8 characters. Most importantly, it
+ // is also two code units in UTF-16.
+ // String: "𐀀𐀁 𐀂𐀃 𐀄"
+ // ^ ^ ^
+ // UTF8 idx: 0 9 18
+ // UTF16 idx: 0 5 10
+ // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+ constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", kText)
+ .AddStringProperty("body",
+ "Concerning the subject of foo, we need to begin "
+ "considering our options regarding body bar.")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(snippet.entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &snippet.entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ // Ensure that the match is correct.
+ EXPECT_THAT(GetMatches(content, *entry), ElementsAre("𐀂𐀃"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.exact_match_utf16_position(), Eq(5));
+ EXPECT_THAT(match_proto.exact_match_utf16_length(), Eq(4));
+}
+
+TEST_F(SnippetRetrieverTest, Utf16MultiCodeUnitWindowTest) {
+ // The following string has four-byte UTF-8 characters. Most importantly, it
+ // is also two code units in UTF-16.
+ // String: "𐀀𐀁 𐀂𐀃 𐀄"
+ // ^ ^ ^
+ // UTF8 idx: 0 9 18
+ // UTF16 idx: 0 5 10
+ // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+ constexpr std::string_view kText = "𐀀𐀁 𐀂𐀃 𐀄";
+ DocumentProto document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", kText)
+ .AddStringProperty("body",
+ "Concerning the subject of foo, we need to begin "
+ "considering our options regarding body bar.")
+ .Build();
+
+ SectionIdMask section_mask = 0b00000011;
+ SectionRestrictQueryTermsMap query_terms{{"", {"𐀂"}}};
+
+ // Set a twenty byte window. This will produce a window like this:
+ // String: "𐀀𐀁 𐀂𐀃 𐀄"
+ // ^ ^
+ // UTF8 idx: 9 22
+ // UTF16 idx: 5 12
+ snippet_spec_.set_max_window_bytes(20);
+
+ SnippetProto snippet = snippet_retriever_->RetrieveSnippet(
+ query_terms, MATCH_PREFIX, snippet_spec_, document, section_mask);
+
+ // Ensure that one and only one property was matched and it was "body"
+ ASSERT_THAT(snippet.entries(), SizeIs(1));
+ const SnippetProto::EntryProto* entry = &snippet.entries(0);
+ EXPECT_THAT(entry->property_name(), Eq("subject"));
+ std::string_view content =
+ GetString(&document, snippet.entries(0).property_name());
+
+ // Ensure that there is one and only one match within "subject"
+ ASSERT_THAT(entry->snippet_matches(), SizeIs(1));
+ const SnippetMatchProto& match_proto = entry->snippet_matches(0);
+
+ // Ensure that the match is correct.
+ EXPECT_THAT(GetWindows(content, *entry), ElementsAre("𐀂𐀃 𐀄"));
+
+ // Ensure that the utf-16 values are also as expected
+ EXPECT_THAT(match_proto.window_utf16_position(), Eq(5));
+ EXPECT_THAT(match_proto.window_utf16_length(), Eq(7));
}
} // namespace
diff --git a/icing/schema-builder.h b/icing/schema-builder.h
new file mode 100644
index 0000000..59ed7c5
--- /dev/null
+++ b/icing/schema-builder.h
@@ -0,0 +1,130 @@
+// Copyright (C) 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ICING_SCHEMA_BUILDER_H_
+#define ICING_SCHEMA_BUILDER_H_
+
+#include <cstdint>
+#include <initializer_list>
+#include <string>
+#include <string_view>
+#include <utility>
+
+#include "icing/proto/schema.pb.h"
+
+namespace icing {
+namespace lib {
+
+class PropertyConfigBuilder {
+ public:
+ PropertyConfigBuilder() = default;
+ explicit PropertyConfigBuilder(PropertyConfigProto property)
+ : property_(std::move(property)) {}
+
+ PropertyConfigBuilder& SetName(std::string_view name) {
+ property_.set_property_name(std::string(name));
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataType(
+ PropertyConfigProto::DataType::Code data_type) {
+ property_.set_data_type(data_type);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataTypeString(
+ TermMatchType::Code match_type,
+ StringIndexingConfig::TokenizerType::Code tokenizer) {
+ property_.set_data_type(PropertyConfigProto::DataType::STRING);
+ property_.mutable_string_indexing_config()->set_term_match_type(match_type);
+ property_.mutable_string_indexing_config()->set_tokenizer_type(tokenizer);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetDataTypeDocument(std::string_view schema_type,
+ bool index_nested_properties) {
+ property_.set_data_type(PropertyConfigProto::DataType::DOCUMENT);
+ property_.set_schema_type(std::string(schema_type));
+ property_.mutable_document_indexing_config()->set_index_nested_properties(
+ index_nested_properties);
+ return *this;
+ }
+
+ PropertyConfigBuilder& SetCardinality(
+ PropertyConfigProto::Cardinality::Code cardinality) {
+ property_.set_cardinality(cardinality);
+ return *this;
+ }
+
+ PropertyConfigProto Build() const { return std::move(property_); }
+
+ private:
+ PropertyConfigProto property_;
+};
+
+class SchemaTypeConfigBuilder {
+ public:
+ SchemaTypeConfigBuilder() = default;
+ SchemaTypeConfigBuilder(SchemaTypeConfigProto type_config)
+ : type_config_(std::move(type_config)) {}
+
+ SchemaTypeConfigBuilder& SetType(std::string_view type) {
+ type_config_.set_schema_type(std::string(type));
+ return *this;
+ }
+
+ SchemaTypeConfigBuilder& SetVersion(int version) {
+ type_config_.set_version(version);
+ return *this;
+ }
+
+ SchemaTypeConfigBuilder& AddProperty(PropertyConfigProto property) {
+ *type_config_.add_properties() = std::move(property);
+ return *this;
+ }
+ SchemaTypeConfigBuilder& AddProperty(PropertyConfigBuilder property_builder) {
+ *type_config_.add_properties() = property_builder.Build();
+ return *this;
+ }
+
+ SchemaTypeConfigProto Build() { return std::move(type_config_); }
+
+ private:
+ SchemaTypeConfigProto type_config_;
+};
+
+class SchemaBuilder {
+ public:
+ SchemaBuilder() = default;
+ SchemaBuilder(SchemaProto schema) : schema_(std::move(schema)) {}
+
+ SchemaBuilder& AddType(SchemaTypeConfigProto type) {
+ *schema_.add_types() = std::move(type);
+ return *this;
+ }
+ SchemaBuilder& AddType(SchemaTypeConfigBuilder type_builder) {
+ *schema_.add_types() = type_builder.Build();
+ return *this;
+ }
+
+ SchemaProto Build() { return std::move(schema_); }
+
+ private:
+ SchemaProto schema_;
+};
+
+} // namespace lib
+} // namespace icing
+
+#endif // ICING_SCHEMA_BUILDER_H_
diff --git a/icing/schema/schema-store.cc b/icing/schema/schema-store.cc
index b43d2a4..7040a31 100644
--- a/icing/schema/schema-store.cc
+++ b/icing/schema/schema-store.cc
@@ -104,7 +104,7 @@
libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> SchemaStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
- const Clock* clock, NativeInitializeStats* initialize_stats) {
+ const Clock* clock, InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
@@ -122,7 +122,7 @@
schema_file_(*filesystem, MakeSchemaFilename(base_dir_)) {}
SchemaStore::~SchemaStore() {
- if (initialized_) {
+ if (has_schema_successfully_set_) {
if (!PersistToDisk().ok()) {
ICING_LOG(ERROR) << "Error persisting to disk in SchemaStore destructor";
}
@@ -130,7 +130,7 @@
}
libtextclassifier3::Status SchemaStore::Initialize(
- NativeInitializeStats* initialize_stats) {
+ InitializeStatsProto* initialize_stats) {
auto schema_proto_or = GetSchema();
if (absl_ports::IsNotFound(schema_proto_or.status())) {
// Don't have an existing schema proto, that's fine
@@ -139,6 +139,7 @@
// Real error when trying to read the existing schema
return schema_proto_or.status();
}
+ has_schema_successfully_set_ = true;
if (!InitializeDerivedFiles().ok()) {
ICING_VLOG(3)
@@ -147,7 +148,7 @@
std::unique_ptr<Timer> regenerate_timer = clock_.GetNewTimer();
if (initialize_stats != nullptr) {
initialize_stats->set_schema_store_recovery_cause(
- NativeInitializeStats::IO_ERROR);
+ InitializeStatsProto::IO_ERROR);
}
ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
if (initialize_stats != nullptr) {
@@ -156,7 +157,6 @@
}
}
- initialized_ = true;
if (initialize_stats != nullptr) {
initialize_stats->set_num_schema_types(type_config_map_.size());
}
@@ -253,9 +253,12 @@
header.magic = SchemaStore::Header::kMagic;
header.checksum = checksum.Get();
+ ScopedFd scoped_fd(
+ filesystem_.OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
// This should overwrite the header.
- if (!filesystem_.Write(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
+ if (!scoped_fd.is_valid() ||
+ !filesystem_.Write(scoped_fd.get(), &header, sizeof(header)) ||
+ !filesystem_.DataSync(scoped_fd.get())) {
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to write SchemaStore header: ", MakeHeaderFilename(base_dir_)));
}
@@ -285,18 +288,11 @@
libtextclassifier3::StatusOr<Crc32> SchemaStore::ComputeChecksum() const {
Crc32 total_checksum;
-
- auto schema_proto_or = GetSchema();
- if (absl_ports::IsNotFound(schema_proto_or.status())) {
+ if (!has_schema_successfully_set_) {
// Nothing to checksum
return total_checksum;
- } else if (!schema_proto_or.ok()) {
- // Some real error. Pass it up
- return schema_proto_or.status();
}
-
- // Guaranteed to have a schema proto now
- const SchemaProto* schema_proto = schema_proto_or.ValueOrDie();
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema_proto, GetSchema());
Crc32 schema_checksum;
schema_checksum.Append(schema_proto->SerializeAsString());
@@ -390,6 +386,7 @@
// Write the schema (and potentially overwrite a previous schema)
ICING_RETURN_IF_ERROR(
schema_file_.Write(std::make_unique<SchemaProto>(new_schema)));
+ has_schema_successfully_set_ = true;
ICING_RETURN_IF_ERROR(RegenerateDerivedFiles());
}
@@ -399,14 +396,7 @@
libtextclassifier3::StatusOr<const SchemaTypeConfigProto*>
SchemaStore::GetSchemaTypeConfig(std::string_view schema_type) const {
- auto schema_proto_or = GetSchema();
- if (absl_ports::IsNotFound(schema_proto_or.status())) {
- return absl_ports::FailedPreconditionError("Schema not set yet.");
- } else if (!schema_proto_or.ok()) {
- // Some other real error, pass it up
- return schema_proto_or.status();
- }
-
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
const auto& type_config_iter =
type_config_map_.find(std::string(schema_type));
if (type_config_iter == type_config_map_.end()) {
@@ -418,39 +408,42 @@
libtextclassifier3::StatusOr<SchemaTypeId> SchemaStore::GetSchemaTypeId(
std::string_view schema_type) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return schema_type_mapper_->Get(schema_type);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
SchemaStore::GetStringSectionContent(const DocumentProto& document,
std::string_view section_path) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetStringSectionContent(document, section_path);
}
libtextclassifier3::StatusOr<std::vector<std::string_view>>
SchemaStore::GetStringSectionContent(const DocumentProto& document,
SectionId section_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetStringSectionContent(document, section_id);
}
libtextclassifier3::StatusOr<const SectionMetadata*>
SchemaStore::GetSectionMetadata(SchemaTypeId schema_type_id,
SectionId section_id) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->GetSectionMetadata(schema_type_id, section_id);
}
libtextclassifier3::StatusOr<std::vector<Section>> SchemaStore::ExtractSections(
const DocumentProto& document) const {
+ ICING_RETURN_IF_ERROR(CheckSchemaSet());
return section_manager_->ExtractSections(document);
}
libtextclassifier3::Status SchemaStore::PersistToDisk() {
- if (schema_type_mapper_ != nullptr) {
- // It's possible we haven't had a schema set yet, so SchemaTypeMapper hasn't
- // been initialized and is still a nullptr
- ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
+ if (!has_schema_successfully_set_) {
+ return libtextclassifier3::Status::OK;
}
-
+ ICING_RETURN_IF_ERROR(schema_type_mapper_->PersistToDisk());
// Write the header
ICING_ASSIGN_OR_RETURN(Crc32 checksum, ComputeChecksum());
ICING_RETURN_IF_ERROR(UpdateHeader(checksum));
@@ -458,5 +451,35 @@
return libtextclassifier3::Status::OK;
}
+SchemaStoreStorageInfoProto SchemaStore::GetStorageInfo() const {
+ SchemaStoreStorageInfoProto storage_info;
+ int64_t directory_size = filesystem_.GetDiskUsage(base_dir_.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_schema_store_size(directory_size);
+ } else {
+ storage_info.set_schema_store_size(-1);
+ }
+ ICING_ASSIGN_OR_RETURN(const SchemaProto* schema, GetSchema(), storage_info);
+ storage_info.set_num_schema_types(schema->types_size());
+ int total_sections = 0;
+ int num_types_sections_exhausted = 0;
+ for (const SchemaTypeConfigProto& type : schema->types()) {
+ auto sections_list_or =
+ section_manager_->GetMetadataList(type.schema_type());
+ if (!sections_list_or.ok()) {
+ continue;
+ }
+ total_sections += sections_list_or.ValueOrDie()->size();
+ if (sections_list_or.ValueOrDie()->size() == kMaxSectionId + 1) {
+ ++num_types_sections_exhausted;
+ }
+ }
+
+ storage_info.set_num_total_sections(total_sections);
+ storage_info.set_num_schema_types_sections_exhausted(
+ num_types_sections_exhausted);
+ return storage_info;
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/schema-store.h b/icing/schema/schema-store.h
index 3854704..dd1edb8 100644
--- a/icing/schema/schema-store.h
+++ b/icing/schema/schema-store.h
@@ -29,6 +29,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/logging.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-util.h"
#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
@@ -115,7 +116,7 @@
// INTERNAL_ERROR on any IO errors
static libtextclassifier3::StatusOr<std::unique_ptr<SchemaStore>> Create(
const Filesystem* filesystem, const std::string& base_dir,
- const Clock* clock, NativeInitializeStats* initialize_stats = nullptr);
+ const Clock* clock, InitializeStatsProto* initialize_stats = nullptr);
// Not copyable
SchemaStore(const SchemaStore&) = delete;
@@ -167,6 +168,7 @@
//
// Returns:
// SchemaTypeId on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND_ERROR if we don't know about the schema type
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<SchemaTypeId> GetSchemaTypeId(
@@ -176,6 +178,7 @@
//
// Returns:
// A string of content on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND if:
// 1. Property is optional and not found in the document
// 2. section_path is invalid
@@ -188,6 +191,7 @@
//
// Returns:
// A string of content on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// INVALID_ARGUMENT if section id is invalid
// NOT_FOUND if type config name of document not found
libtextclassifier3::StatusOr<std::vector<std::string_view>>
@@ -199,6 +203,7 @@
//
// Returns:
// pointer to SectionMetadata on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// INVALID_ARGUMENT if schema type id or section is invalid
libtextclassifier3::StatusOr<const SectionMetadata*> GetSectionMetadata(
SchemaTypeId schema_type_id, SectionId section_id) const;
@@ -209,6 +214,7 @@
//
// Returns:
// A list of sections on success
+ // FAILED_PRECONDITION if schema hasn't been set yet
// NOT_FOUND if type config name of document not found
libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
const DocumentProto& document) const;
@@ -228,6 +234,12 @@
// INTERNAL_ERROR on compute error
libtextclassifier3::StatusOr<Crc32> ComputeChecksum() const;
+ // Calculates the StorageInfo for the Schema Store.
+ //
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ SchemaStoreStorageInfoProto GetStorageInfo() const;
+
private:
// Use SchemaStore::Create instead.
explicit SchemaStore(const Filesystem* filesystem, std::string base_dir,
@@ -238,8 +250,7 @@
// Returns:
// OK on success
// INTERNAL_ERROR on IO error
- libtextclassifier3::Status Initialize(
- NativeInitializeStats* initialize_stats);
+ libtextclassifier3::Status Initialize(InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -275,16 +286,20 @@
// Returns any IO errors.
libtextclassifier3::Status ResetSchemaTypeMapper();
+ libtextclassifier3::Status CheckSchemaSet() const {
+ return has_schema_successfully_set_
+ ? libtextclassifier3::Status::OK
+ : absl_ports::FailedPreconditionError("Schema not set yet.");
+ }
+
const Filesystem& filesystem_;
const std::string base_dir_;
const Clock& clock_;
- // Used internally to indicate whether the class has been initialized. This is
- // to guard against cases where the object has been created, but Initialize
- // fails in the constructor. If we have successfully exited the constructor,
- // then this field can be ignored. Clients of SchemaStore should not need to
- // worry about this field.
- bool initialized_ = false;
+ // Used internally to indicate whether the class has been successfully
+ // initialized with a valid schema. Will be false if Initialize failed or no
+ // schema has ever been set.
+ bool has_schema_successfully_set_ = false;
// Cached schema
FileBackedProto<SchemaProto> schema_file_;
diff --git a/icing/schema/schema-store_test.cc b/icing/schema/schema-store_test.cc
index 7df3dd9..69663b5 100644
--- a/icing/schema/schema-store_test.cc
+++ b/icing/schema/schema-store_test.cc
@@ -25,13 +25,15 @@
#include "icing/portable/equals-proto.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-util.h"
#include "icing/schema/section-manager.h"
#include "icing/schema/section.h"
#include "icing/store/document-filter-data.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/tmp-directory.h"
#include "icing/testing/fake-clock.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/crc32.h"
namespace icing {
namespace lib {
@@ -41,26 +43,37 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::ElementsAre;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Not;
using ::testing::Pointee;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+ PropertyConfigProto_DataType_Code_DOUBLE;
+
class SchemaStoreTest : public ::testing::Test {
protected:
SchemaStoreTest() : test_dir_(GetTestTempDir() + "/icing") {
filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
- auto type = schema_.add_types();
- type->set_schema_type("email");
-
- // Add an indexed property so we generate section metadata on it
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ schema_ =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ // Add an indexed property so we generate section metadata on it
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
}
void TearDown() override {
@@ -74,8 +87,9 @@
};
TEST_F(SchemaStoreTest, CreationWithNullPointerShouldFail) {
- EXPECT_THAT(SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
- StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(
+ SchemaStore::Create(/*filesystem=*/nullptr, test_dir_, &fake_clock_),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(SchemaStoreTest, CorruptSchemaError) {
@@ -97,9 +111,10 @@
// "Corrupt" the ground truth schema by adding new data to it. This will mess
// up the checksum of the schema store
- SchemaProto corrupt_schema;
- auto type = corrupt_schema.add_types();
- type->set_schema_type("corrupted");
+ SchemaProto corrupt_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("corrupted"))
+ .Build();
const std::string schema_file = absl_ports::StrCat(test_dir_, "/schema.pb");
const std::string serialized_schema = corrupt_schema.SerializeAsString();
@@ -190,7 +205,36 @@
}
TEST_F(SchemaStoreTest, CreateNoPreviousSchemaOk) {
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ // The apis to retrieve information about the schema should fail gracefully.
+ EXPECT_THAT(store->GetSchema(),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(store->GetSchemaTypeConfig("foo"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetSchemaTypeId("foo"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetSectionMetadata(/*schema_type_id=*/0, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ // The apis to extract content from a document should fail gracefully.
+ DocumentProto doc;
+ PropertyProto* prop = doc.add_properties();
+ prop->set_name("name");
+ prop->add_string_values("foo bar baz");
+
+ EXPECT_THAT(store->GetStringSectionContent(doc, /*section_id=*/0),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->GetStringSectionContent(doc, "name"),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+ EXPECT_THAT(store->ExtractSections(doc),
+ StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
+
+ // The apis to persist and checksum data should succeed.
+ EXPECT_THAT(store->ComputeChecksum(), IsOkAndHolds(Crc32()));
+ EXPECT_THAT(store->PersistToDisk(), IsOk());
}
TEST_F(SchemaStoreTest, CreateWithPreviousSchemaOk) {
@@ -204,7 +248,8 @@
IsOkAndHolds(EqualsSetSchemaResult(result)));
schema_store.reset();
- EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_), IsOk());
+ EXPECT_THAT(SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_),
+ IsOk());
}
TEST_F(SchemaStoreTest, MultipleCreateOk) {
@@ -314,9 +359,9 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -328,8 +373,9 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
// Add a type, shouldn't affect the index or cached SchemaTypeIds
- type = schema.add_types();
- type->set_schema_type("new_type");
+ schema = SchemaBuilder(schema)
+ .AddType(SchemaTypeConfigBuilder().SetType("new_type"))
+ .Build();
// Set the compatible schema
EXPECT_THAT(schema_store->SetSchema(schema),
@@ -343,11 +389,11 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
- type = schema.add_types();
- type->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -364,9 +410,9 @@
schema_store->GetSchemaTypeId("message"));
// Remove "email" type, this also changes previous SchemaTypeIds
- schema.Clear();
- type = schema.add_types();
- type->set_schema_type("message");
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
SchemaStore::SetSchemaResult incompatible_result;
incompatible_result.success = false;
@@ -399,11 +445,11 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
- type = schema.add_types();
- type->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -415,11 +461,10 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
// Reorder the types
- schema.clear_types();
- type = schema.add_types();
- type->set_schema_type("message");
- type = schema.add_types();
- type->set_schema_type("email");
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
// Since we assign SchemaTypeIds based on order in the SchemaProto, this will
// cause SchemaTypeIds to change
@@ -439,15 +484,15 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
-
- // Add an unindexed property
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ // Add an unindexed property
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -459,11 +504,13 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
// Make a previously unindexed property indexed
- property = schema.mutable_types(0)->mutable_properties(0);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// With a new indexed property, we'll need to reindex
result.index_incompatible = true;
@@ -480,15 +527,15 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type = schema.add_types();
- type->set_schema_type("email");
-
- // Add a STRING property
- auto property = type->add_properties();
- property->set_property_name("subject");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ // Add a STRING property
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// Set it for the first time
SchemaStore::SetSchemaResult result;
@@ -503,8 +550,14 @@
schema_store->GetSchemaTypeId("email"));
// Make a previously STRING property into DOUBLE
- property = schema.mutable_types(0)->mutable_properties(0);
- property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ // Add a STRING property
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaStore::SetSchemaResult incompatible_result;
incompatible_result.success = false;
@@ -570,9 +623,8 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto foo_schema;
- auto type_config = foo_schema.add_types();
- type_config->set_schema_type("foo");
+ SchemaProto foo_schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
@@ -587,9 +639,8 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto foo_schema;
- auto type_config = foo_schema.add_types();
- type_config->set_schema_type("foo");
+ SchemaProto foo_schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
@@ -608,20 +659,19 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto foo_schema;
- auto type_config = foo_schema.add_types();
- type_config->set_schema_type("foo");
+ SchemaProto foo_schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
ICING_EXPECT_OK(schema_store->SetSchema(foo_schema));
ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, schema_store->ComputeChecksum());
// Modifying the SchemaStore changes the checksum
- SchemaProto foo_bar_schema;
- type_config = foo_bar_schema.add_types();
- type_config->set_schema_type("foo");
- type_config = foo_bar_schema.add_types();
- type_config->set_schema_type("bar");
+ SchemaProto foo_bar_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("foo"))
+ .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(foo_bar_schema));
@@ -642,9 +692,8 @@
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("foo");
+ SchemaProto schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("foo")).Build();
ICING_EXPECT_OK(schema_store->SetSchema(schema));
@@ -656,8 +705,9 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
// Modify the schema so that something different is persisted next time
- type_config = schema.add_types();
- type_config->set_schema_type("bar");
+ schema = SchemaBuilder(schema)
+ .AddType(SchemaTypeConfigBuilder().SetType("bar"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(schema));
// Should also persist on destruction
@@ -670,6 +720,56 @@
EXPECT_THAT(*actual_schema, EqualsProto(schema));
}
+TEST_F(SchemaStoreTest, SchemaStoreStorageInfoProto) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
+
+ // Create a schema with two types: one simple type and one type that uses all
+ // 16 sections.
+ PropertyConfigProto prop =
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)
+ .Build();
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder(prop)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("fullSectionsType")
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop0"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop1"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop2"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop3"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop4"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop5"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop6"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop7"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop8"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop9"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop10"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop11"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop12"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop13"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop14"))
+ .AddProperty(PropertyConfigBuilder(prop).SetName("prop15")))
+ .Build();
+
+ SchemaStore::SetSchemaResult result;
+ result.success = true;
+ EXPECT_THAT(schema_store->SetSchema(schema),
+ IsOkAndHolds(EqualsSetSchemaResult(result)));
+
+ SchemaStoreStorageInfoProto storage_info = schema_store->GetStorageInfo();
+ EXPECT_THAT(storage_info.schema_store_size(), Ge(0));
+ EXPECT_THAT(storage_info.num_schema_types(), Eq(2));
+ EXPECT_THAT(storage_info.num_total_sections(), Eq(17));
+ EXPECT_THAT(storage_info.num_schema_types_sections_exhausted(), Eq(1));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/schema/schema-util_test.cc b/icing/schema/schema-util_test.cc
index 61a861c..a2fc8d9 100644
--- a/icing/schema/schema-util_test.cc
+++ b/icing/schema/schema-util_test.cc
@@ -22,6 +22,7 @@
#include "gtest/gtest.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
namespace icing {
@@ -35,285 +36,310 @@
constexpr char kEmailType[] = "EmailMessage";
constexpr char kPersonType[] = "Person";
-class SchemaUtilTest : public ::testing::Test {
- protected:
- SchemaProto schema_proto_;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOCUMENT =
+ PropertyConfigProto_DataType_Code_DOCUMENT;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+ PropertyConfigProto_DataType_Code_INT64;
+constexpr PropertyConfigProto_DataType_Code TYPE_DOUBLE =
+ PropertyConfigProto_DataType_Code_DOUBLE;
- static SchemaTypeConfigProto CreateSchemaTypeConfig(
- const std::string_view schema_type,
- const std::string_view nested_schema_type = "") {
- SchemaTypeConfigProto type;
- type.set_schema_type(std::string(schema_type));
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_UNKNOWN =
+ PropertyConfigProto_Cardinality_Code_UNKNOWN;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
- auto string_property = type.add_properties();
- string_property->set_property_name("string");
- string_property->set_data_type(PropertyConfigProto::DataType::STRING);
- string_property->set_cardinality(
- PropertyConfigProto::Cardinality::REQUIRED);
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_NONE =
+ StringIndexingConfig_TokenizerType_Code_NONE;
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
- auto int_property = type.add_properties();
- int_property->set_property_name("int");
- int_property->set_data_type(PropertyConfigProto::DataType::INT64);
- int_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+constexpr TermMatchType_Code MATCH_UNKNOWN = TermMatchType_Code_UNKNOWN;
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+constexpr TermMatchType_Code MATCH_PREFIX = TermMatchType_Code_PREFIX;
- auto double_property = type.add_properties();
- double_property->set_property_name("double");
- double_property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
- double_property->set_cardinality(
- PropertyConfigProto::Cardinality::REPEATED);
-
- auto bool_property = type.add_properties();
- bool_property->set_property_name("boolean");
- bool_property->set_data_type(PropertyConfigProto::DataType::BOOLEAN);
- bool_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
- auto bytes_property = type.add_properties();
- bytes_property->set_property_name("bytes");
- bytes_property->set_data_type(PropertyConfigProto::DataType::BYTES);
- bytes_property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
- if (!nested_schema_type.empty()) {
- auto document_property = type.add_properties();
- document_property->set_property_name("document");
- document_property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- document_property->set_cardinality(
- PropertyConfigProto::Cardinality::REPEATED);
- document_property->set_schema_type(std::string(nested_schema_type));
- }
-
- return type;
- }
-};
-
-TEST_F(SchemaUtilTest, EmptySchemaProtoIsValid) {
- ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+TEST(SchemaUtilTest, EmptySchemaProtoIsValid) {
+ SchemaProto schema;
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema));
}
-TEST_F(SchemaUtilTest, Valid_Nested) {
- auto email_type = schema_proto_.add_types();
- *email_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
+TEST(SchemaUtilTest, Valid_Nested) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("sender")
+ .SetDataTypeDocument(
+ kPersonType,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto person_type = schema_proto_.add_types();
- *person_type = CreateSchemaTypeConfig(kPersonType);
-
- ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema));
}
-TEST_F(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
+TEST(SchemaUtilTest, ClearedPropertyConfigsIsValid) {
// No property fields is technically ok, but probably not realistic.
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type->clear_properties();
-
- ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(kEmailType))
+ .Build();
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema));
}
-TEST_F(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type->clear_schema_type();
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedSchemaTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder()).Build();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type->set_schema_type("");
+TEST(SchemaUtilTest, EmptySchemaTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder().AddType(SchemaTypeConfigBuilder().SetType("")).Build();
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, AnySchemaTypeOk) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type->set_schema_type("abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好");
+TEST(SchemaUtilTest, AnySchemaTypeOk) {
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType(
+ "abc123!@#$%^&*()_-+=[{]}|\\;:'\",<.>?你好"))
+ .Build();
- ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema));
}
-TEST_F(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->clear_property_name();
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedPropertyNameIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("foo")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ schema.mutable_types(0)->mutable_properties(0)->clear_property_name();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+TEST(SchemaUtilTest, EmptyPropertyNameIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto property = type->add_properties();
- property->set_property_name("");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+TEST(SchemaUtilTest, NonAlphanumericPropertyNameIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("a_b")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto property = type->add_properties();
- property->set_property_name("_");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, AlphanumericPropertyNameOk) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+TEST(SchemaUtilTest, AlphanumericPropertyNameOk) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("abc123")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto property = type->add_properties();
- property->set_property_name("abc123");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ICING_ASSERT_OK(SchemaUtil::Validate(schema_proto_));
+ ICING_ASSERT_OK(SchemaUtil::Validate(schema));
}
-TEST_F(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto first_property = type->add_properties();
- first_property->set_property_name("DuplicatedProperty");
- first_property->set_data_type(PropertyConfigProto::DataType::STRING);
- first_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- auto second_property = type->add_properties();
- second_property->set_property_name("DuplicatedProperty");
- second_property->set_data_type(PropertyConfigProto::DataType::STRING);
- second_property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, DuplicatePropertyNameIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("DuplicatedProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("DuplicatedProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::ALREADY_EXISTS));
}
-TEST_F(SchemaUtilTest, ClearedDataTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->clear_data_type();
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedDataTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ schema.mutable_types(0)->mutable_properties(0)->clear_data_type();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, UnknownDataTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::UNKNOWN);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, UnknownDataTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataType(PropertyConfigProto::DataType::UNKNOWN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, ClearedCardinalityIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->clear_cardinality();
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedCardinalityIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
+ schema.mutable_types(0)->mutable_properties(0)->clear_cardinality();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, UnknownCardinalityIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::UNKNOWN);
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, UnknownCardinalityIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_UNKNOWN)))
+ .Build();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- property->clear_schema_type();
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+TEST(SchemaUtilTest, ClearedPropertySchemaTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataType(TYPE_DOCUMENT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+TEST(SchemaUtilTest, Invalid_EmptyPropertySchemaType) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeDocument(
+ /*schema_type=*/"",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- property->set_schema_type("");
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
-TEST_F(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
- auto type = schema_proto_.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+TEST(SchemaUtilTest, NoMatchingSchemaTypeIsInvalid) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewProperty")
+ .SetDataTypeDocument(
+ /*schema_type=*/"NewSchemaType",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
- auto property = type->add_properties();
- property->set_property_name("NewProperty");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- property->set_schema_type("NewSchemaType");
-
- ASSERT_THAT(SchemaUtil::Validate(schema_proto_),
+ ASSERT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::UNKNOWN,
HasSubstr("Undefined 'schema_type'")));
}
-TEST_F(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
+TEST(SchemaUtilTest, NewOptionalPropertyIsCompatible) {
// Configure old schema
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
// Configure new schema with an optional field, not considered incompatible
// since it's fine if old data doesn't have this optional field
- SchemaProto new_schema_with_optional;
- type = new_schema_with_optional.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewOptional");
- property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto new_schema_with_optional =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewOptional")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema,
@@ -321,22 +347,33 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
+TEST(SchemaUtilTest, NewRequiredPropertyIsIncompatible) {
// Configure old schema
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
// Configure new schema with a required field, considered incompatible since
// old data won't have this required field
- SchemaProto new_schema_with_required;
- type = new_schema_with_required.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("NewRequired");
- property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
- property->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ SchemaProto new_schema_with_required =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NewRequired")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
@@ -345,22 +382,33 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
+TEST(SchemaUtilTest, NewSchemaMissingPropertyIsIncompatible) {
// Configure old schema
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("OldOptional");
- property->set_data_type(PropertyConfigProto::DataType::INT64);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("OldOptional")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// Configure new schema, new schema needs to at least have all the
// previously defined properties
- SchemaProto new_schema;
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop1")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
@@ -368,28 +416,30 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
+TEST(SchemaUtilTest, CompatibilityOfDifferentCardinalityOk) {
// Configure less restrictive schema based on cardinality
- SchemaProto less_restrictive_schema;
- auto type = less_restrictive_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::INT64);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+ SchemaProto less_restrictive_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// Configure more restrictive schema based on cardinality
- SchemaProto more_restrictive_schema;
- type = more_restrictive_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto more_restrictive_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
- property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::INT64);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // We can't have a new schema be less restrictive, REQUIRED->OPTIONAL
+ // We can't have a new schema be more restrictive, REPEATED->OPTIONAL
SchemaUtil::SchemaDelta incompatible_schema_delta;
incompatible_schema_delta.schema_types_incompatible.emplace(kEmailType);
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
@@ -397,7 +447,7 @@
/*new_schema=*/more_restrictive_schema),
Eq(incompatible_schema_delta));
- // We can have the new schema be more restrictive, OPTIONAL->REPEATED;
+ // We can have the new schema be less restrictive, OPTIONAL->REPEATED;
SchemaUtil::SchemaDelta compatible_schema_delta;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
/*old_schema=*/more_restrictive_schema,
@@ -405,26 +455,28 @@
Eq(compatible_schema_delta));
}
-TEST_F(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
+TEST(SchemaUtilTest, DifferentDataTypeIsIncompatible) {
// Configure old schema, with an int64_t property
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- auto property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::INT64);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// Configure new schema, with a double property
- SchemaProto new_schema;
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
-
- property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::DOUBLE);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataType(TYPE_DOUBLE)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
@@ -432,30 +484,44 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
+TEST(SchemaUtilTest, DifferentSchemaTypeIsIncompatible) {
// Configure old schema, where Property is supposed to be a Person type
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kPersonType);
-
- *type = CreateSchemaTypeConfig(kEmailType);
- auto property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- property->set_schema_type(kPersonType);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeDocument(
+ kPersonType,
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// Configure new schema, where Property is supposed to be an Email type
- SchemaProto new_schema;
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kPersonType);
-
- *type = CreateSchemaTypeConfig(kEmailType);
- property = type->add_properties();
- property->set_property_name("Property");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- property->set_schema_type(kEmailType);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("prop")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeDocument(
+ kEmailType, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_incompatible.emplace(kEmailType);
@@ -463,74 +529,74 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
+TEST(SchemaUtilTest, ChangingIndexedPropertiesMakesIndexIncompatible) {
// Configure old schema
- SchemaProto old_schema;
- auto old_type = old_schema.add_types();
- *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
- auto old_property = old_type->add_properties();
- old_property->set_property_name("Property");
- old_property->set_data_type(PropertyConfigProto::DataType::STRING);
- old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema_with_indexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// Configure new schema
- SchemaProto new_schema;
- auto new_type = new_schema.add_types();
- *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
- auto new_property = new_type->add_properties();
- new_property->set_property_name("Property");
- new_property->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema_with_unindexed_property =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_UNKNOWN, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.index_incompatible = true;
// New schema gained a new indexed property.
- old_property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::UNKNOWN);
- new_property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_indexed_property, schema_with_unindexed_property),
Eq(schema_delta));
// New schema lost an indexed property.
- old_property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- new_property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::UNKNOWN);
- EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
+ EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(
+ schema_with_indexed_property, schema_with_unindexed_property),
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
+TEST(SchemaUtilTest, AddingNewIndexedPropertyMakesIndexIncompatible) {
// Configure old schema
- SchemaProto old_schema;
- auto old_type = old_schema.add_types();
- *old_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
- auto old_property = old_type->add_properties();
- old_property->set_property_name("Property");
- old_property->set_data_type(PropertyConfigProto::DataType::STRING);
- old_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
// Configure new schema
- SchemaProto new_schema;
- auto new_type = new_schema.add_types();
- *new_type = CreateSchemaTypeConfig(kEmailType, kPersonType);
-
- auto new_property = new_type->add_properties();
- new_property->set_property_name("Property");
- new_property->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- new_property = new_type->add_properties();
- new_property->set_property_name("NewIndexedProperty");
- new_property->set_data_type(PropertyConfigProto::DataType::STRING);
- new_property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- new_property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("NewIndexedProperty")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.index_incompatible = true;
@@ -538,37 +604,75 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, AddingTypeIsCompatible) {
+TEST(SchemaUtilTest, AddingTypeIsCompatible) {
// Can add a new type, existing data isn't incompatible, since none of them
// are of this new schema type
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
- SchemaProto new_schema;
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kPersonType);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
EXPECT_THAT(SchemaUtil::ComputeCompatibilityDelta(old_schema, new_schema),
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, DeletingTypeIsNoted) {
+TEST(SchemaUtilTest, DeletingTypeIsNoted) {
// Can't remove an old type, new schema needs to at least have all the
// previously defined schema otherwise the Documents of the missing schema
// are invalid
- SchemaProto old_schema;
- auto type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
- type = old_schema.add_types();
- *type = CreateSchemaTypeConfig(kPersonType);
+ SchemaProto old_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kPersonType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
- SchemaProto new_schema;
- type = new_schema.add_types();
- *type = CreateSchemaTypeConfig(kEmailType);
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kEmailType)
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Property")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
SchemaUtil::SchemaDelta schema_delta;
schema_delta.schema_types_deleted.emplace(kPersonType);
@@ -576,148 +680,147 @@
Eq(schema_delta));
}
-TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("MyType");
-
- auto* prop = type->add_properties();
- prop->set_property_name("Foo");
- prop->set_data_type(PropertyConfigProto::DataType::STRING);
- prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- prop->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTermMatchType) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataTypeString(MATCH_UNKNOWN, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
// Error if we don't set a term match type
EXPECT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a term match type
- prop->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
-TEST_F(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
- SchemaProto schema;
- auto* type = schema.add_types();
- type->set_schema_type("MyType");
-
- auto* prop = type->add_properties();
- prop->set_property_name("Foo");
- prop->set_data_type(PropertyConfigProto::DataType::STRING);
- prop->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
- prop->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
+TEST(SchemaUtilTest, ValidateStringIndexingConfigShouldHaveTokenizer) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_NONE)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
// Error if we don't set a tokenizer type
EXPECT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
// Passes once we set a tokenizer type
- prop->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("MyType").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("Foo")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
-TEST_F(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
- SchemaProto schema;
-
- // Create a parent schema
- auto type = schema.add_types();
- type->set_schema_type("ParentSchema");
-
- // Create multiple references to the same child schema
- auto property = type->add_properties();
- property->set_property_name("ChildProperty1");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("ChildSchema");
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
- property = type->add_properties();
- property->set_property_name("ChildProperty2");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("ChildSchema");
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
- // Create a child schema
- type = schema.add_types();
- type->set_schema_type("ChildSchema");
+TEST(SchemaUtilTest, MultipleReferencesToSameNestedSchemaOk) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("ChildSchema"))
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("ParentSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("ChildProperty1")
+ .SetDataTypeDocument(
+ "ChildSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("ChildProperty2")
+ .SetDataTypeDocument(
+ "ChildSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
EXPECT_THAT(SchemaUtil::Validate(schema), IsOk());
}
-TEST_F(SchemaUtilTest, InvalidSelfReference) {
- SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidSelfReference) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
- auto type = schema.add_types();
- type->set_schema_type("OwnSchema");
-
- // Reference a child schema, so far so good
- auto property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("OwnSchema");
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OwnSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "OwnSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
EXPECT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("Infinite loop")));
}
-TEST_F(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
- SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidSelfReferenceEvenWithOtherProperties) {
// Create a schema with a self-reference cycle in it: OwnSchema -> OwnSchema
- auto type = schema.add_types();
- type->set_schema_type("OwnSchema");
-
- // Reference a child schema, so far so good
- auto property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("OwnSchema");
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- property = type->add_properties();
- property->set_property_name("SomeString");
- property->set_data_type(PropertyConfigProto::DataType::STRING);
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::PREFIX);
- property->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("OwnSchema")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "OwnSchema",
+ /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("SomeString")
+ .SetDataTypeString(MATCH_PREFIX, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
EXPECT_THAT(SchemaUtil::Validate(schema),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT,
HasSubstr("Infinite loop")));
}
-TEST_F(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
- SchemaProto schema;
-
+TEST(SchemaUtilTest, InvalidInfiniteLoopTwoDegrees) {
// Create a schema for the parent schema
- auto type = schema.add_types();
- type->set_schema_type("A");
-
- // Reference schema B, so far so good
- auto property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("B");
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Create the child schema
- type = schema.add_types();
- type->set_schema_type("B");
-
- // Reference the schema A, causing an infinite loop of references.
- property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("A");
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ // Reference schema B, so far so good
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ // Create the child schema
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ // Reference the schema A, causing an infinite loop of
+ // references.
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "A", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// Two degrees of referencing: A -> B -> A
EXPECT_THAT(SchemaUtil::Validate(schema),
@@ -725,41 +828,40 @@
HasSubstr("Infinite loop")));
}
-TEST_F(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
- SchemaProto schema;
-
- // Create a schema for the parent schema
- auto type = schema.add_types();
- type->set_schema_type("A");
-
- // Reference schema B , so far so good
- auto property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("B");
- property->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- // Create the child schema
- type = schema.add_types();
- type->set_schema_type("B");
-
- // Reference schema C, so far so good
- property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("C");
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
-
- // Create the child schema
- type = schema.add_types();
- type->set_schema_type("C");
-
- // Reference schema A, no good
- property = type->add_properties();
- property->set_property_name("NestedDocument");
- property->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- property->set_schema_type("A");
- property->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
+TEST(SchemaUtilTest, InvalidInfiniteLoopThreeDegrees) {
+ SchemaProto schema =
+ SchemaBuilder()
+ // Create a schema for the parent schema
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("A")
+ // Reference schema B, so far so good
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "B", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ // Create the child schema
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("B")
+ // Reference schema C, so far so good
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "C", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ // Create the child schema
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("C")
+ // Reference schema C, so far so good
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("NestedDocument")
+ .SetDataTypeDocument(
+ "A", /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// Three degrees of referencing: A -> B -> C -> A
EXPECT_THAT(SchemaUtil::Validate(schema),
diff --git a/icing/schema/section-manager.cc b/icing/schema/section-manager.cc
index a10e9b9..a0893e6 100644
--- a/icing/schema/section-manager.cc
+++ b/icing/schema/section-manager.cc
@@ -165,16 +165,6 @@
return values;
}
-// Helper function to get metadata list of a type config
-libtextclassifier3::StatusOr<std::vector<SectionMetadata>> GetMetadataList(
- const KeyMapper<SchemaTypeId>& schema_type_mapper,
- const std::vector<std::vector<SectionMetadata>>& section_metadata_cache,
- const std::string& type_config_name) {
- ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
- schema_type_mapper.Get(type_config_name));
- return section_metadata_cache.at(schema_type_id);
-}
-
} // namespace
SectionManager::SectionManager(
@@ -263,18 +253,16 @@
"Section id %d is greater than the max value %d", section_id,
kMaxSectionId));
}
- ICING_ASSIGN_OR_RETURN(
- const std::vector<SectionMetadata>& metadata_list,
- GetMetadataList(schema_type_mapper_, section_metadata_cache_,
- document.schema()));
- if (section_id >= metadata_list.size()) {
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
+ if (section_id >= metadata_list->size()) {
return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
"Section with id %d doesn't exist in type config %s", section_id,
document.schema().c_str()));
}
// The index of metadata list is the same as the section id, so we can use
// section id as the index.
- return GetStringSectionContent(document, metadata_list[section_id].path);
+ return GetStringSectionContent(document, metadata_list->at(section_id).path);
}
libtextclassifier3::StatusOr<const SectionMetadata*>
@@ -300,12 +288,10 @@
libtextclassifier3::StatusOr<std::vector<Section>>
SectionManager::ExtractSections(const DocumentProto& document) const {
- ICING_ASSIGN_OR_RETURN(
- const std::vector<SectionMetadata>& metadata_list,
- GetMetadataList(schema_type_mapper_, section_metadata_cache_,
- document.schema()));
+ ICING_ASSIGN_OR_RETURN(const std::vector<SectionMetadata>* metadata_list,
+ GetMetadataList(document.schema()));
std::vector<Section> sections;
- for (const auto& section_metadata : metadata_list) {
+ for (const auto& section_metadata : *metadata_list) {
auto section_content_or =
GetStringSectionContent(document, section_metadata.path);
// Adds to result vector if section is found in document
@@ -317,5 +303,12 @@
return sections;
}
+libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+SectionManager::GetMetadataList(const std::string& type_config_name) const {
+ ICING_ASSIGN_OR_RETURN(SchemaTypeId schema_type_id,
+ schema_type_mapper_.Get(type_config_name));
+ return §ion_metadata_cache_.at(schema_type_id);
+}
+
} // namespace lib
} // namespace icing
diff --git a/icing/schema/section-manager.h b/icing/schema/section-manager.h
index 191a169..51eb133 100644
--- a/icing/schema/section-manager.h
+++ b/icing/schema/section-manager.h
@@ -30,7 +30,9 @@
namespace icing {
namespace lib {
-inline constexpr char kPropertySeparator[] = ".";
+inline constexpr std::string_view kPropertySeparator = ".";
+inline constexpr std::string_view kLBracket = "[";
+inline constexpr std::string_view kRBracket = "]";
// This class provides section-related operations. It assigns sections according
// to type configs and extracts section / sections from documents.
@@ -94,6 +96,12 @@
libtextclassifier3::StatusOr<std::vector<Section>> ExtractSections(
const DocumentProto& document) const;
+ // Returns:
+ // - On success, the section metadatas for the specified type
+ // - NOT_FOUND if the type config name is not present in the schema
+ libtextclassifier3::StatusOr<const std::vector<SectionMetadata>*>
+ GetMetadataList(const std::string& type_config_name) const;
+
private:
// Use SectionManager::Create() to instantiate
explicit SectionManager(
diff --git a/icing/schema/section-manager_test.cc b/icing/schema/section-manager_test.cc
index 15d9a19..3dcc5a9 100644
--- a/icing/schema/section-manager_test.cc
+++ b/icing/schema/section-manager_test.cc
@@ -20,7 +20,6 @@
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
-#include "icing/proto/schema.proto.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/schema/schema-util.h"
diff --git a/icing/scoring/bm25f-calculator.cc b/icing/scoring/bm25f-calculator.cc
index 7495e98..4822d7f 100644
--- a/icing/scoring/bm25f-calculator.cc
+++ b/icing/scoring/bm25f-calculator.cc
@@ -42,24 +42,25 @@
constexpr float b_ = 0.7f;
// TODO(b/158603900): add tests for Bm25fCalculator
-Bm25fCalculator::Bm25fCalculator(const DocumentStore *document_store)
+Bm25fCalculator::Bm25fCalculator(const DocumentStore* document_store)
: document_store_(document_store) {}
// During initialization, Bm25fCalculator iterates through
// hit-iterators for each query term to pre-compute n(q_i) for each corpus under
// consideration.
void Bm25fCalculator::PrepareToScore(
- std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
- *query_term_iterators) {
+ std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>*
+ query_term_iterators) {
Clear();
TermId term_id = 0;
- for (auto &iter : *query_term_iterators) {
- const std::string &term = iter.first;
+ for (auto& iter : *query_term_iterators) {
+ const std::string& term = iter.first;
if (term_id_map_.find(term) != term_id_map_.end()) {
continue;
}
term_id_map_[term] = ++term_id;
- DocHitInfoIterator *term_it = iter.second.get();
+ DocHitInfoIterator* term_it = iter.second.get();
+
while (term_it->Advance().ok()) {
auto status_or = document_store_->GetDocumentAssociatedScoreData(
term_it->doc_hit_info().document_id());
@@ -89,8 +90,8 @@
// where IDF(q_i) is the Inverse Document Frequency (IDF) weight of the query
// term q_i in the corpus with document D, and tf(q_i, D) is the weighted and
// normalized term frequency of query term q_i in the document D.
-float Bm25fCalculator::ComputeScore(const DocHitInfoIterator *query_it,
- const DocHitInfo &hit_info,
+float Bm25fCalculator::ComputeScore(const DocHitInfoIterator* query_it,
+ const DocHitInfo& hit_info,
double default_score) {
auto status_or =
document_store_->GetDocumentAssociatedScoreData(hit_info.document_id());
@@ -103,7 +104,7 @@
query_it->PopulateMatchedTermsStats(&matched_terms_stats);
float score = 0;
- for (const TermMatchInfo &term_match_info : matched_terms_stats) {
+ for (const TermMatchInfo& term_match_info : matched_terms_stats) {
float idf_weight =
GetCorpusIdfWeightForTerm(term_match_info.term, data.corpus_id());
float normalized_tf =
@@ -186,8 +187,8 @@
// |D| is the #tokens in D, avgdl is the average document length in the corpus,
// k1 and b are smoothing parameters.
float Bm25fCalculator::ComputedNormalizedTermFrequency(
- const TermMatchInfo &term_match_info, const DocHitInfo &hit_info,
- const DocumentAssociatedScoreData &data) {
+ const TermMatchInfo& term_match_info, const DocHitInfo& hit_info,
+ const DocumentAssociatedScoreData& data) {
uint32_t dl = data.length_in_tokens();
float avgdl = GetCorpusAvgDocLength(data.corpus_id());
float f_q =
@@ -204,7 +205,7 @@
// Note: once we support section weights, we should update this function to
// compute the weighted term frequency.
float Bm25fCalculator::ComputeTermFrequencyForMatchedSections(
- CorpusId corpus_id, const TermMatchInfo &term_match_info) const {
+ CorpusId corpus_id, const TermMatchInfo& term_match_info) const {
float sum = 0.0f;
SectionIdMask sections = term_match_info.section_ids_mask;
while (sections != 0) {
diff --git a/icing/scoring/scorer.cc b/icing/scoring/scorer.cc
index b7e1b92..a4734b4 100644
--- a/icing/scoring/scorer.cc
+++ b/icing/scoring/scorer.cc
@@ -89,6 +89,7 @@
if (!query_it) {
return default_score_;
}
+
return static_cast<double>(
bm25f_calculator_->ComputeScore(query_it, hit_info, default_score_));
}
@@ -122,11 +123,11 @@
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_COUNT:
return usage_scores.usage_type3_count;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type1_last_used_timestamp_s;
+ return usage_scores.usage_type1_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE2_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type2_last_used_timestamp_s;
+ return usage_scores.usage_type2_last_used_timestamp_s * 1000.0;
case ScoringSpecProto::RankingStrategy::USAGE_TYPE3_LAST_USED_TIMESTAMP:
- return usage_scores.usage_type3_last_used_timestamp_s;
+ return usage_scores.usage_type3_last_used_timestamp_s * 1000.0;
default:
// This shouldn't happen if this scorer is used correctly.
return default_score_;
diff --git a/icing/scoring/scorer_test.cc b/icing/scoring/scorer_test.cc
index b114515..8b89514 100644
--- a/icing/scoring/scorer_test.cc
+++ b/icing/scoring/scorer_test.cc
@@ -25,6 +25,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/store/document-id.h"
#include "icing/store/document-store.h"
@@ -38,6 +39,12 @@
namespace {
using ::testing::Eq;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+
class ScorerTest : public testing::Test {
protected:
ScorerTest()
@@ -64,13 +71,14 @@
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
- SchemaProto test_email_schema;
- auto type_config = test_email_schema.add_types();
- type_config->set_schema_type("email");
- auto subject = type_config->add_properties();
- subject->set_property_name("subject");
- subject->set_data_type(PropertyConfigProto::DataType::STRING);
- subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
+ SchemaProto test_email_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED)))
+ .Build();
ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
}
@@ -87,6 +95,10 @@
const FakeClock& fake_clock2() { return fake_clock2_; }
+ void SetFakeClock1Time(int64_t new_time) {
+ fake_clock1_.SetSystemTimeMilliseconds(new_time);
+ }
+
private:
const std::string test_dir_;
const std::string doc_store_dir_;
@@ -115,7 +127,7 @@
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
-TEST_F(ScorerTest, ShouldGetDefaultScore) {
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentDoesntExist) {
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<Scorer> scorer,
Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
@@ -127,6 +139,66 @@
EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
}
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsDeleted) {
+ // Creates a test document with a provided score
+ DocumentProto test_document = DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(42)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+ /*default_score=*/10, document_store()));
+
+ DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+ // The document's score is returned
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
+
+ // Delete the document and check that the caller-provided default score is
+ // returned
+ EXPECT_THAT(document_store()->Delete(document_id), IsOk());
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
+}
+
+TEST_F(ScorerTest, ShouldGetDefaultScoreIfDocumentIsExpired) {
+ // Creates a test document with a provided score
+ int64_t creation_time = fake_clock1().GetSystemTimeMilliseconds();
+ int64_t ttl = 100;
+ DocumentProto test_document = DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(42)
+ .SetCreationTimestampMs(creation_time)
+ .SetTtlMs(ttl)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer,
+ Scorer::Create(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE,
+ /*default_score=*/10, document_store()));
+
+ DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+ // The document's score is returned since the document hasn't expired yet.
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(42));
+
+ // Expire the document and check that the caller-provided default score is
+ // returned
+ SetFakeClock1Time(creation_time + ttl + 10);
+ EXPECT_THAT(scorer->GetScore(docHitInfo), Eq(10));
+}
+
TEST_F(ScorerTest, ShouldGetDefaultDocumentScore) {
// Creates a test document with the default document score 0
DocumentProto test_document =
@@ -389,7 +461,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time1));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
@@ -398,7 +470,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time5));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
@@ -407,7 +479,7 @@
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/3000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1_time3));
- EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
@@ -450,7 +522,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(1000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 5000ms, score should be updated.
@@ -459,7 +531,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
// Report usage with timestamp = 3000ms, score should not be updated.
@@ -468,7 +540,7 @@
UsageReport::USAGE_TYPE2);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type2_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(5000));
EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(0));
}
@@ -511,7 +583,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time1));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(1000));
// Report usage with timestamp = 5000ms, score should be updated.
UsageReport usage_report_type3_time5 = CreateUsageReport(
@@ -520,7 +592,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time5));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
// Report usage with timestamp = 3000ms, score should not be updated.
UsageReport usage_report_type3_time3 = CreateUsageReport(
@@ -529,7 +601,7 @@
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type3_time3));
EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(0));
EXPECT_THAT(scorer2->GetScore(docHitInfo), Eq(0));
- EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5));
+ EXPECT_THAT(scorer3->GetScore(docHitInfo), Eq(5000));
}
TEST_F(ScorerTest, NoScorerShouldAlwaysReturnDefaultScore) {
@@ -557,6 +629,37 @@
EXPECT_THAT(scorer->GetScore(docHitInfo3), Eq(111));
}
+TEST_F(ScorerTest, ShouldScaleUsageTimestampScoreForMaxTimestamp) {
+ DocumentProto test_document =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetCreationTimestampMs(fake_clock1().GetSystemTimeMilliseconds())
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
+ document_store()->Put(test_document));
+
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<Scorer> scorer1,
+ Scorer::Create(
+ ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP,
+ /*default_score=*/0, document_store()));
+ DocHitInfo docHitInfo = DocHitInfo(document_id);
+
+ // Create usage report for the maximum allowable timestamp.
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"icing", /*uri=*/"email/1",
+ /*timestamp_ms=*/std::numeric_limits<uint32_t>::max() * 1000.0,
+ UsageReport::USAGE_TYPE1);
+
+ double max_int_usage_timestamp_score =
+ std::numeric_limits<uint32_t>::max() * 1000.0;
+ ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_type1));
+ EXPECT_THAT(scorer1->GetScore(docHitInfo), Eq(max_int_usage_timestamp_score));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/scoring/scoring-processor_test.cc b/icing/scoring/scoring-processor_test.cc
index 65eecd1..125e2a7 100644
--- a/icing/scoring/scoring-processor_test.cc
+++ b/icing/scoring/scoring-processor_test.cc
@@ -24,6 +24,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
+#include "icing/schema-builder.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
@@ -36,6 +37,12 @@
using ::testing::IsEmpty;
using ::testing::SizeIs;
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
class ScoringProcessorTest : public testing::Test {
protected:
ScoringProcessorTest()
@@ -60,14 +67,14 @@
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
- SchemaProto test_email_schema;
- auto type_config = test_email_schema.add_types();
- type_config->set_schema_type("email");
- auto subject = type_config->add_properties();
- subject->set_property_name("subject");
- subject->set_data_type(PropertyConfigProto::DataType::STRING);
- subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
+ SchemaProto test_email_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
}
@@ -603,9 +610,9 @@
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
- /*score=*/1);
+ /*score=*/1000);
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
- /*score=*/5);
+ /*score=*/5000);
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
/*score=*/0);
diff --git a/icing/store/document-store.cc b/icing/store/document-store.cc
index 72bf736..5f478fa 100644
--- a/icing/store/document-store.cc
+++ b/icing/store/document-store.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
#include <string_view>
+#include <unordered_map>
#include <utility>
#include <vector>
@@ -36,6 +37,7 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -44,6 +46,7 @@
#include "icing/store/document-id.h"
#include "icing/store/key-mapper.h"
#include "icing/store/namespace-id.h"
+#include "icing/store/usage-store.h"
#include "icing/tokenization/language-segmenter.h"
#include "icing/util/clock.h"
#include "icing/util/crc32.h"
@@ -82,33 +85,6 @@
return document_wrapper;
}
-DocumentWrapper CreateDocumentTombstone(std::string_view document_namespace,
- std::string_view document_uri) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_namespace_(std::string(document_namespace));
- document->set_uri(std::string(document_uri));
- return document_wrapper;
-}
-
-DocumentWrapper CreateNamespaceTombstone(std::string_view document_namespace) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_namespace_(std::string(document_namespace));
- return document_wrapper;
-}
-
-DocumentWrapper CreateSchemaTypeTombstone(
- std::string_view document_schema_type) {
- DocumentWrapper document_wrapper;
- document_wrapper.set_deleted(true);
- DocumentProto* document = document_wrapper.mutable_document();
- document->set_schema(std::string(document_schema_type));
- return document_wrapper;
-}
-
std::string MakeHeaderFilename(const std::string& base_dir) {
return absl_ports::StrCat(base_dir, "/", kDocumentStoreHeaderFilename);
}
@@ -203,20 +179,20 @@
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
const DocumentProto& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
return Put(DocumentProto(document), num_tokens, put_document_stats);
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::Put(
DocumentProto&& document, int32_t num_tokens,
- NativePutDocumentStats* put_document_stats) {
+ PutDocumentStatsProto* put_document_stats) {
document.mutable_internal_fields()->set_length_in_tokens(num_tokens);
return InternalPut(document, put_document_stats);
}
DocumentStore::~DocumentStore() {
if (initialized_) {
- if (!PersistToDisk().ok()) {
+ if (!PersistToDisk(PersistType::FULL).ok()) {
ICING_LOG(ERROR)
<< "Error persisting to disk in DocumentStore destructor";
}
@@ -226,15 +202,18 @@
libtextclassifier3::StatusOr<DocumentStore::CreateResult> DocumentStore::Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats) {
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats) {
ICING_RETURN_ERROR_IF_NULL(filesystem);
ICING_RETURN_ERROR_IF_NULL(clock);
ICING_RETURN_ERROR_IF_NULL(schema_store);
auto document_store = std::unique_ptr<DocumentStore>(
new DocumentStore(filesystem, base_dir, clock, schema_store));
- ICING_ASSIGN_OR_RETURN(DataLoss data_loss,
- document_store->Initialize(initialize_stats));
+ ICING_ASSIGN_OR_RETURN(
+ DataLoss data_loss,
+ document_store->Initialize(force_recovery_and_revalidate_documents,
+ initialize_stats));
CreateResult create_result;
create_result.document_store = std::move(document_store);
@@ -243,7 +222,8 @@
}
libtextclassifier3::StatusOr<DataLoss> DocumentStore::Initialize(
- NativeInitializeStats* initialize_stats) {
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats) {
auto create_result_or = FileBackedProtoLog<DocumentWrapper>::Create(
filesystem_, MakeDocumentLogFilename(base_dir_),
FileBackedProtoLog<DocumentWrapper>::Options(
@@ -259,25 +239,27 @@
std::move(create_result_or).ValueOrDie();
document_log_ = std::move(create_result.proto_log);
- if (create_result.has_data_loss()) {
- ICING_LOG(WARNING)
- << "Data loss in document log, regenerating derived files.";
- if (initialize_stats != nullptr) {
+ if (force_recovery_and_revalidate_documents ||
+ create_result.has_data_loss()) {
+ if (create_result.has_data_loss() && initialize_stats != nullptr) {
+ ICING_LOG(WARNING)
+ << "Data loss in document log, regenerating derived files.";
initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::DATA_LOSS);
+ InitializeStatsProto::DATA_LOSS);
if (create_result.data_loss == DataLoss::PARTIAL) {
// Ground truth is partially lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::PARTIAL_LOSS);
+ InitializeStatsProto::PARTIAL_LOSS);
} else {
// Ground truth is completely lost.
initialize_stats->set_document_store_data_status(
- NativeInitializeStats::COMPLETE_LOSS);
+ InitializeStatsProto::COMPLETE_LOSS);
}
}
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
- libtextclassifier3::Status status = RegenerateDerivedFiles();
+ libtextclassifier3::Status status =
+ RegenerateDerivedFiles(force_recovery_and_revalidate_documents);
if (initialize_stats != nullptr) {
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
@@ -292,13 +274,12 @@
ICING_VLOG(1)
<< "Couldn't find derived files or failed to initialize them, "
"regenerating derived files for DocumentStore.";
- if (initialize_stats != nullptr) {
- initialize_stats->set_document_store_recovery_cause(
- NativeInitializeStats::IO_ERROR);
- }
std::unique_ptr<Timer> document_recovery_timer = clock_.GetNewTimer();
- libtextclassifier3::Status status = RegenerateDerivedFiles();
- if (initialize_stats != nullptr) {
+ libtextclassifier3::Status status = RegenerateDerivedFiles(
+ /*force_recovery_and_revalidate_documents*/ false);
+ if (initialize_stats != nullptr && num_documents() > 0) {
+ initialize_stats->set_document_store_recovery_cause(
+ InitializeStatsProto::IO_ERROR);
initialize_stats->set_document_store_recovery_latency_ms(
document_recovery_timer->GetElapsedMilliseconds());
}
@@ -404,7 +385,8 @@
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles() {
+libtextclassifier3::Status DocumentStore::RegenerateDerivedFiles(
+ bool revalidate_documents) {
ICING_RETURN_IF_ERROR(ResetDocumentKeyMapper());
ICING_RETURN_IF_ERROR(ResetDocumentIdMapper());
ICING_RETURN_IF_ERROR(ResetDocumentAssociatedScoreCache());
@@ -438,148 +420,80 @@
DocumentWrapper document_wrapper =
std::move(document_wrapper_or).ValueOrDie();
- if (document_wrapper.deleted()) {
- if (!document_wrapper.document().uri().empty()) {
- // Individual document deletion.
- auto document_id_or =
- GetDocumentId(document_wrapper.document().namespace_(),
- document_wrapper.document().uri());
- // Updates document_id mapper with deletion
- if (document_id_or.ok()) {
- ICING_RETURN_IF_ERROR(document_id_mapper_->Set(
- document_id_or.ValueOrDie(), kDocDeletedFlag));
- } else if (!absl_ports::IsNotFound(document_id_or.status())) {
- // Real error
- return absl_ports::Annotate(
- document_id_or.status(),
- absl_ports::StrCat("Failed to find document id. namespace: ",
- document_wrapper.document().namespace_(),
- ", uri: ", document_wrapper.document().uri()));
- }
- } else if (!document_wrapper.document().namespace_().empty()) {
- // Namespace deletion.
- ICING_ASSIGN_OR_RETURN(
- NamespaceId namespace_id,
- namespace_mapper_->Get(document_wrapper.document().namespace_()));
- // Tombstone indicates it's a soft delete.
- ICING_RETURN_IF_ERROR(BatchDelete(namespace_id, kInvalidSchemaTypeId,
- /*soft_delete=*/true));
- } else if (!document_wrapper.document().schema().empty()) {
- // SchemaType deletion.
- auto schema_type_id_or = schema_store_->GetSchemaTypeId(
- document_wrapper.document().schema());
-
- if (schema_type_id_or.ok()) {
- // Tombstone indicates it's a soft delete.
- ICING_RETURN_IF_ERROR(BatchDelete(kInvalidNamespaceId,
- schema_type_id_or.ValueOrDie(),
- /*soft_delete=*/true));
- } else {
- // The deleted schema type doesn't have a SchemaTypeId we can refer
- // to in the FilterCache.
- //
- // TODO(cassiewang): We could avoid reading out all the documents.
- // When we see a schema type doesn't have a SchemaTypeId, assign the
- // unknown schema type a unique, temporary SchemaTypeId and store
- // that in the FilterCache. Then, when we see the schema type
- // tombstone here, we can look up its temporary SchemaTypeId and
- // just iterate through the FilterCache to mark those documents as
- // deleted.
- int size = document_id_mapper_->num_elements();
- for (DocumentId document_id = 0; document_id < size; document_id++) {
- auto document_or = Get(document_id);
- if (absl_ports::IsNotFound(document_or.status())) {
- // Skip nonexistent documents
- continue;
- } else if (!document_or.ok()) {
- // Real error, pass up
- return absl_ports::Annotate(
- document_or.status(),
- IcingStringUtil::StringPrintf(
- "Failed to retrieve Document for DocumentId %d",
- document_id));
- }
-
- // Guaranteed to have a document now.
- DocumentProto document = document_or.ValueOrDie();
-
- if (document.schema() == document_wrapper.document().schema()) {
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- }
- }
- }
- } else {
- return absl_ports::InternalError(
- "Encountered an invalid tombstone during recovery!");
+ // Revalidate that this document is still compatible if requested.
+ if (revalidate_documents) {
+ if (!document_validator_.Validate(document_wrapper.document()).ok()) {
+ // Document is no longer valid with the current schema. Mark as
+ // deleted
+ DocumentId new_document_id = document_id_mapper_->num_elements();
+ ICING_RETURN_IF_ERROR(document_log_->EraseProto(iterator.GetOffset()));
+ ICING_RETURN_IF_ERROR(ClearDerivedData(new_document_id));
+ continue;
}
- } else {
- // Updates key mapper and document_id mapper with the new document
- DocumentId new_document_id = document_id_mapper_->num_elements();
- ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
- MakeFingerprint(document_wrapper.document().namespace_(),
- document_wrapper.document().uri()),
- new_document_id));
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
-
- SchemaTypeId schema_type_id;
- auto schema_type_id_or =
- schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
- if (absl_ports::IsNotFound(schema_type_id_or.status())) {
- // Didn't find a SchemaTypeId. This means that the DocumentStore and
- // the SchemaStore are out of sync. But DocumentStore can't do
- // anything about it so just ignore this for now. This should be
- // detected/handled by the owner of DocumentStore. Set it to some
- // arbitrary invalid value for now, it'll get updated to the correct
- // ID later.
- schema_type_id = -1;
- } else if (!schema_type_id_or.ok()) {
- // Real error. Pass it up
- return schema_type_id_or.status();
- } else {
- // We're guaranteed that SchemaTypeId is valid now
- schema_type_id = schema_type_id_or.ValueOrDie();
- }
-
- ICING_ASSIGN_OR_RETURN(
- NamespaceId namespace_id,
- namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
- namespace_mapper_->num_keys()));
-
- // Update corpus maps
- std::string corpus =
- MakeFingerprint(document_wrapper.document().namespace_(),
- document_wrapper.document().schema());
- ICING_ASSIGN_OR_RETURN(
- CorpusId corpusId,
- corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
-
- ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
- GetCorpusAssociatedScoreDataToUpdate(corpusId));
- scoring_data.AddDocument(
- document_wrapper.document().internal_fields().length_in_tokens());
-
- ICING_RETURN_IF_ERROR(
- UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
-
- ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
- new_document_id,
- DocumentAssociatedScoreData(
- corpusId, document_wrapper.document().score(),
- document_wrapper.document().creation_timestamp_ms(),
- document_wrapper.document()
- .internal_fields()
- .length_in_tokens())));
-
- int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
- document_wrapper.document().creation_timestamp_ms(),
- document_wrapper.document().ttl_ms());
-
- ICING_RETURN_IF_ERROR(UpdateFilterCache(
- new_document_id, DocumentFilterData(namespace_id, schema_type_id,
- expiration_timestamp_ms)));
}
+ // Updates key mapper and document_id mapper with the new document
+ DocumentId new_document_id = document_id_mapper_->num_elements();
+ ICING_RETURN_IF_ERROR(document_key_mapper_->Put(
+ MakeFingerprint(document_wrapper.document().namespace_(),
+ document_wrapper.document().uri()),
+ new_document_id));
+ ICING_RETURN_IF_ERROR(
+ document_id_mapper_->Set(new_document_id, iterator.GetOffset()));
+
+ SchemaTypeId schema_type_id;
+ auto schema_type_id_or =
+ schema_store_->GetSchemaTypeId(document_wrapper.document().schema());
+ if (absl_ports::IsNotFound(schema_type_id_or.status())) {
+ // Didn't find a SchemaTypeId. This means that the DocumentStore and
+ // the SchemaStore are out of sync. But DocumentStore can't do
+ // anything about it so just ignore this for now. This should be
+ // detected/handled by the owner of DocumentStore. Set it to some
+ // arbitrary invalid value for now, it'll get updated to the correct
+ // ID later.
+ schema_type_id = -1;
+ } else if (!schema_type_id_or.ok()) {
+ // Real error. Pass it up
+ return schema_type_id_or.status();
+ } else {
+ // We're guaranteed that SchemaTypeId is valid now
+ schema_type_id = schema_type_id_or.ValueOrDie();
+ }
+
+ ICING_ASSIGN_OR_RETURN(
+ NamespaceId namespace_id,
+ namespace_mapper_->GetOrPut(document_wrapper.document().namespace_(),
+ namespace_mapper_->num_keys()));
+
+ // Update corpus maps
+ std::string corpus =
+ MakeFingerprint(document_wrapper.document().namespace_(),
+ document_wrapper.document().schema());
+ ICING_ASSIGN_OR_RETURN(
+ CorpusId corpusId,
+ corpus_mapper_->GetOrPut(corpus, corpus_mapper_->num_keys()));
+
+ ICING_ASSIGN_OR_RETURN(CorpusAssociatedScoreData scoring_data,
+ GetCorpusAssociatedScoreDataToUpdate(corpusId));
+ scoring_data.AddDocument(
+ document_wrapper.document().internal_fields().length_in_tokens());
+
+ ICING_RETURN_IF_ERROR(
+ UpdateCorpusAssociatedScoreCache(corpusId, scoring_data));
+
+ ICING_RETURN_IF_ERROR(UpdateDocumentAssociatedScoreCache(
+ new_document_id,
+ DocumentAssociatedScoreData(
+ corpusId, document_wrapper.document().score(),
+ document_wrapper.document().creation_timestamp_ms(),
+ document_wrapper.document().internal_fields().length_in_tokens())));
+
+ int64_t expiration_timestamp_ms = CalculateExpirationTimestampMs(
+ document_wrapper.document().creation_timestamp_ms(),
+ document_wrapper.document().ttl_ms());
+
+ ICING_RETURN_IF_ERROR(UpdateFilterCache(
+ new_document_id, DocumentFilterData(namespace_id, schema_type_id,
+ expiration_timestamp_ms)));
iterator_status = iterator.Advance();
}
@@ -788,6 +702,11 @@
}
Crc32 corpus_score_cache_checksum = std::move(checksum_or).ValueOrDie();
+ // NOTE: We purposely don't include usage_store checksum here because we can't
+ // regenerate it from ground truth documents. If it gets corrupted, we'll just
+ // clear all usage reports, but we shouldn't throw everything else in the
+ // document store out.
+
total_checksum.Append(std::to_string(document_log_checksum.Get()));
total_checksum.Append(std::to_string(document_key_mapper_checksum.Get()));
total_checksum.Append(std::to_string(document_id_mapper_checksum.Get()));
@@ -819,8 +738,11 @@
header.checksum = checksum.Get();
// This should overwrite the header.
- if (!filesystem_->Write(MakeHeaderFilename(base_dir_).c_str(), &header,
- sizeof(header))) {
+ ScopedFd sfd(
+ filesystem_->OpenForWrite(MakeHeaderFilename(base_dir_).c_str()));
+ if (!sfd.is_valid() ||
+ !filesystem_->Write(sfd.get(), &header, sizeof(header)) ||
+ !filesystem_->DataSync(sfd.get())) {
return absl_ports::InternalError(absl_ports::StrCat(
"Failed to write DocStore header: ", MakeHeaderFilename(base_dir_)));
}
@@ -828,7 +750,7 @@
}
libtextclassifier3::StatusOr<DocumentId> DocumentStore::InternalPut(
- DocumentProto& document, NativePutDocumentStats* put_document_stats) {
+ DocumentProto& document, PutDocumentStatsProto* put_document_stats) {
std::unique_ptr<Timer> put_timer = clock_.GetNewTimer();
ICING_RETURN_IF_ERROR(document_validator_.Validate(document));
@@ -909,18 +831,20 @@
expiration_timestamp_ms)));
if (old_document_id_or.ok()) {
+ // The old document exists, copy over the usage scores and delete the old
+ // document.
DocumentId old_document_id = old_document_id_or.ValueOrDie();
- auto offset_or = DoesDocumentExistAndGetFileOffset(old_document_id);
- if (offset_or.ok()) {
- // The old document exists, copy over the usage scores.
- ICING_RETURN_IF_ERROR(
- usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
- /*to_document_id=*/new_document_id));
+ ICING_RETURN_IF_ERROR(
+ usage_store_->CloneUsageScores(/*from_document_id=*/old_document_id,
+ /*to_document_id=*/new_document_id));
- // Hard delete the old document.
- ICING_RETURN_IF_ERROR(
- HardDelete(old_document_id, offset_or.ValueOrDie()));
+ // Delete the old document. It's fine if it's not found since it might have
+ // been deleted previously.
+ auto delete_status = Delete(old_document_id);
+ if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+ // Real error, pass it up.
+ return delete_status;
}
}
@@ -939,7 +863,7 @@
// existing Status.
auto document_id_or = GetDocumentId(name_space, uri);
if (absl_ports::IsNotFound(document_id_or.status())) {
- ICING_LOG(ERROR) << document_id_or.status().error_message();
+ ICING_VLOG(1) << document_id_or.status().error_message();
return libtextclassifier3::Status(
document_id_or.status().CanonicalCode(),
IcingStringUtil::StringPrintf("Document (%s, %s) not found.",
@@ -962,8 +886,16 @@
libtextclassifier3::StatusOr<DocumentProto> DocumentStore::Get(
DocumentId document_id, bool clear_internal_fields) const {
- ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
- DoesDocumentExistAndGetFileOffset(document_id));
+ ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
+
+ auto document_log_offset_or = document_id_mapper_->Get(document_id);
+ if (!document_log_offset_or.ok()) {
+ // Since we've just checked that our document_id is valid a few lines
+ // above, there's no reason this should fail and an error should never
+ // happen.
+ return absl_ports::InternalError("Failed to find document offset.");
+ }
+ int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
// TODO(b/144458732): Implement a more robust version of TC_ASSIGN_OR_RETURN
// that can support error logging.
@@ -1014,7 +946,7 @@
}
const DocumentFilterData* data = status_or_data.ValueOrDie();
- if (DoesDocumentExist(document_id)) {
+ if (InternalDoesDocumentExist(document_id)) {
existing_namespace_ids.insert(data->namespace_id());
}
}
@@ -1027,45 +959,78 @@
return existing_namespaces;
}
-libtextclassifier3::StatusOr<int64_t>
-DocumentStore::DoesDocumentExistAndGetFileOffset(DocumentId document_id) const {
+bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
if (!IsDocumentIdValid(document_id)) {
- return absl_ports::InvalidArgumentError(
- IcingStringUtil::StringPrintf("DocumentId %d is invalid", document_id));
+ return false;
}
- auto file_offset_or = document_id_mapper_->Get(document_id);
-
- bool deleted =
- file_offset_or.ok() && *file_offset_or.ValueOrDie() == kDocDeletedFlag;
- if (deleted || absl_ports::IsOutOfRange(file_offset_or.status())) {
- // Document has been deleted or doesn't exist
- return absl_ports::NotFoundError(
- IcingStringUtil::StringPrintf("Document %d not found", document_id));
+ if (document_id >= document_id_mapper_->num_elements()) {
+ // Somehow got an validly constructed document_id that the document store
+ // doesn't know about
+ return false;
}
- ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
- filter_cache_->Get(document_id));
- if (clock_.GetSystemTimeMilliseconds() >=
- filter_data->expiration_timestamp_ms()) {
- // Past the expiration time, so also return NOT FOUND since it *shouldn't*
- // exist anymore.
- return absl_ports::NotFoundError(
- IcingStringUtil::StringPrintf("Document %d not found", document_id));
- }
-
- ICING_RETURN_IF_ERROR(file_offset_or.status());
- return *file_offset_or.ValueOrDie();
+ return InternalDoesDocumentExist(document_id);
}
-bool DocumentStore::DoesDocumentExist(DocumentId document_id) const {
- // If we can successfully get the document log offset, the document exists.
- return DoesDocumentExistAndGetFileOffset(document_id).ok();
+libtextclassifier3::Status DocumentStore::DoesDocumentExistWithStatus(
+ DocumentId document_id) const {
+ if (!IsDocumentIdValid(document_id)) {
+ return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
+ "Document id '%d' invalid.", document_id));
+ }
+
+ if (document_id >= document_id_mapper_->num_elements()) {
+ // Somehow got a validly constructed document_id that the document store
+ // doesn't know about.
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Unknown document id '%d'.", document_id));
+ }
+
+ if (!InternalDoesDocumentExist(document_id)) {
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Document id '%d' doesn't exist", document_id));
+ };
+ return libtextclassifier3::Status::OK;
+}
+
+bool DocumentStore::InternalDoesDocumentExist(DocumentId document_id) const {
+ return !IsDeleted(document_id) && !IsExpired(document_id);
+}
+
+bool DocumentStore::IsDeleted(DocumentId document_id) const {
+ auto file_offset_or = document_id_mapper_->Get(document_id);
+ if (!file_offset_or.ok()) {
+ // This would only happen if document_id is out of range of the
+ // document_id_mapper, meaning we got some invalid document_id. Callers
+ // should already have checked that their document_id is valid or used
+ // DoesDocumentExist(WithStatus). Regardless, return true since the
+ // document doesn't exist.
+ return true;
+ }
+ int64_t file_offset = *file_offset_or.ValueOrDie();
+ return file_offset == kDocDeletedFlag;
+}
+
+bool DocumentStore::IsExpired(DocumentId document_id) const {
+ auto filter_data_or = filter_cache_->Get(document_id);
+ if (!filter_data_or.ok()) {
+ // This would only happen if document_id is out of range of the
+ // filter_cache, meaning we got some invalid document_id. Callers should
+ // already have checked that their document_id is valid or used
+ // DoesDocumentExist(WithStatus). Regardless, return true since the
+ // document doesn't exist.
+ return true;
+ }
+ const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+
+ // Check if it's past the expiration time
+ return clock_.GetSystemTimeMilliseconds() >=
+ filter_data->expiration_timestamp_ms();
}
libtextclassifier3::Status DocumentStore::Delete(
- const std::string_view name_space, const std::string_view uri,
- bool soft_delete) {
+ const std::string_view name_space, const std::string_view uri) {
// Try to get the DocumentId first
auto document_id_or = GetDocumentId(name_space, uri);
if (!document_id_or.ok()) {
@@ -1074,69 +1039,18 @@
absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
", uri: ", uri));
}
-
- // Check if the DocumentId's Document still exists.
- DocumentId document_id = document_id_or.ValueOrDie();
- auto file_offset_or = DoesDocumentExistAndGetFileOffset(document_id);
- if (!file_offset_or.ok()) {
- return absl_ports::Annotate(
- file_offset_or.status(),
- absl_ports::StrCat("Failed to delete Document. namespace: ", name_space,
- ", uri: ", uri));
- }
-
- if (soft_delete) {
- return SoftDelete(name_space, uri, document_id);
- } else {
- return HardDelete(document_id, file_offset_or.ValueOrDie());
- }
+ return Delete(document_id_or.ValueOrDie());
}
-libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id,
- bool soft_delete) {
- // Copy out the document to get namespace and uri.
- ICING_ASSIGN_OR_RETURN(int64_t document_log_offset,
- DoesDocumentExistAndGetFileOffset(document_id));
+libtextclassifier3::Status DocumentStore::Delete(DocumentId document_id) {
+ ICING_RETURN_IF_ERROR(DoesDocumentExistWithStatus(document_id));
- if (soft_delete) {
- auto document_wrapper_or = document_log_->ReadProto(document_log_offset);
- if (!document_wrapper_or.ok()) {
- ICING_LOG(ERROR) << document_wrapper_or.status().error_message()
- << "Failed to read from document log";
- return document_wrapper_or.status();
- }
- DocumentWrapper document_wrapper =
- std::move(document_wrapper_or).ValueOrDie();
-
- return SoftDelete(document_wrapper.document().namespace_(),
- document_wrapper.document().uri(), document_id);
- } else {
- return HardDelete(document_id, document_log_offset);
+ auto document_log_offset_or = document_id_mapper_->Get(document_id);
+ if (!document_log_offset_or.ok()) {
+ return absl_ports::InternalError("Failed to find document offset.");
}
-}
+ int64_t document_log_offset = *document_log_offset_or.ValueOrDie();
-// TODO(b/169969469): Consider removing SoftDelete().
-libtextclassifier3::Status DocumentStore::SoftDelete(
- std::string_view name_space, std::string_view uri, DocumentId document_id) {
- // Update ground truth first.
- // Mark the document as deleted by appending a tombstone of it and actually
- // remove it from file later in Optimize()
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateDocumentTombstone(name_space, uri))
- .status();
- if (!status.ok()) {
- return absl_ports::Annotate(
- status, absl_ports::StrCat("Failed to delete Document. namespace:",
- name_space, ", uri: ", uri));
- }
-
- return document_id_mapper_->Set(document_id, kDocDeletedFlag);
-}
-
-libtextclassifier3::Status DocumentStore::HardDelete(
- DocumentId document_id, int64_t document_log_offset) {
// Erases document proto.
ICING_RETURN_IF_ERROR(document_log_->EraseProto(document_log_offset));
return ClearDerivedData(document_id);
@@ -1154,7 +1068,12 @@
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
DocumentStore::GetDocumentAssociatedScoreData(DocumentId document_id) const {
- auto score_data_or = score_cache_->Get(document_id);
+ if (!DoesDocumentExist(document_id)) {
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Can't get usage scores, document id '%d' doesn't exist", document_id));
+ }
+
+ auto score_data_or = score_cache_->GetCopy(document_id);
if (!score_data_or.ok()) {
ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
<< " from score_cache_";
@@ -1162,7 +1081,7 @@
}
DocumentAssociatedScoreData document_associated_score_data =
- *std::move(score_data_or).ValueOrDie();
+ std::move(score_data_or).ValueOrDie();
if (document_associated_score_data.document_score() < 0) {
// An negative / invalid score means that the score data has been deleted.
return absl_ports::NotFoundError("Document score data not found.");
@@ -1172,13 +1091,13 @@
libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
DocumentStore::GetCorpusAssociatedScoreData(CorpusId corpus_id) const {
- auto score_data_or = corpus_score_cache_->Get(corpus_id);
+ auto score_data_or = corpus_score_cache_->GetCopy(corpus_id);
if (!score_data_or.ok()) {
return score_data_or.status();
}
CorpusAssociatedScoreData corpus_associated_score_data =
- *std::move(score_data_or).ValueOrDie();
+ std::move(score_data_or).ValueOrDie();
return corpus_associated_score_data;
}
@@ -1200,14 +1119,14 @@
libtextclassifier3::StatusOr<DocumentFilterData>
DocumentStore::GetDocumentFilterData(DocumentId document_id) const {
- auto filter_data_or = filter_cache_->Get(document_id);
+ auto filter_data_or = filter_cache_->GetCopy(document_id);
if (!filter_data_or.ok()) {
ICING_LOG(ERROR) << " while trying to access DocumentId " << document_id
<< " from filter_cache_";
return filter_data_or.status();
}
DocumentFilterData document_filter_data =
- *std::move(filter_data_or).ValueOrDie();
+ std::move(filter_data_or).ValueOrDie();
if (document_filter_data.namespace_id() == kInvalidNamespaceId) {
// An invalid namespace id means that the filter data has been deleted.
return absl_ports::NotFoundError("Document filter data not found.");
@@ -1217,6 +1136,10 @@
libtextclassifier3::StatusOr<UsageStore::UsageScores>
DocumentStore::GetUsageScores(DocumentId document_id) const {
+ if (!DoesDocumentExist(document_id)) {
+ return absl_ports::NotFoundError(IcingStringUtil::StringPrintf(
+ "Can't get usage scores, document id '%d' doesn't exist", document_id));
+ }
return usage_store_->GetUsageScores(document_id);
}
@@ -1225,11 +1148,22 @@
ICING_ASSIGN_OR_RETURN(DocumentId document_id,
GetDocumentId(usage_report.document_namespace(),
usage_report.document_uri()));
+ // We can use the internal version here because we got our document_id from
+ // our internal data structures. We would have thrown some error if the
+ // namespace and/or uri were incorrect.
+ if (!InternalDoesDocumentExist(document_id)) {
+ // Document was probably deleted or expired.
+ return absl_ports::NotFoundError(absl_ports::StrCat(
+ "Couldn't report usage on a nonexistent document: (namespace: '",
+ usage_report.document_namespace(), "', uri: '",
+ usage_report.document_uri(), "')"));
+ }
+
return usage_store_->AddUsageReport(usage_report, document_id);
}
DocumentStore::DeleteByGroupResult DocumentStore::DeleteByNamespace(
- std::string_view name_space, bool soft_delete) {
+ std::string_view name_space) {
DeleteByGroupResult result;
auto namespace_id_or = namespace_mapper_->Get(name_space);
if (!namespace_id_or.ok()) {
@@ -1239,26 +1173,7 @@
return result;
}
NamespaceId namespace_id = namespace_id_or.ValueOrDie();
-
- if (soft_delete) {
- // To delete an entire namespace, we append a tombstone that only contains
- // the deleted bit and the name of the deleted namespace.
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateNamespaceTombstone(name_space))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete namespace. namespace = "
- << name_space;
- result.status = std::move(status);
- return result;
- }
- }
-
- auto num_deleted_or =
- BatchDelete(namespace_id, kInvalidSchemaTypeId, soft_delete);
+ auto num_deleted_or = BatchDelete(namespace_id, kInvalidSchemaTypeId);
if (!num_deleted_or.ok()) {
result.status = std::move(num_deleted_or).status();
return result;
@@ -1277,7 +1192,7 @@
}
DocumentStore::DeleteByGroupResult DocumentStore::DeleteBySchemaType(
- std::string_view schema_type, bool soft_delete) {
+ std::string_view schema_type) {
DeleteByGroupResult result;
auto schema_type_id_or = schema_store_->GetSchemaTypeId(schema_type);
if (!schema_type_id_or.ok()) {
@@ -1288,26 +1203,7 @@
return result;
}
SchemaTypeId schema_type_id = schema_type_id_or.ValueOrDie();
-
- if (soft_delete) {
- // To soft-delete an entire schema type, we append a tombstone that only
- // contains the deleted bit and the name of the deleted schema type.
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete schema_type. schema_type = "
- << schema_type;
- result.status = std::move(status);
- return result;
- }
- }
-
- auto num_deleted_or =
- BatchDelete(kInvalidNamespaceId, schema_type_id, soft_delete);
+ auto num_deleted_or = BatchDelete(kInvalidNamespaceId, schema_type_id);
if (!num_deleted_or.ok()) {
result.status = std::move(num_deleted_or).status();
return result;
@@ -1324,7 +1220,7 @@
}
libtextclassifier3::StatusOr<int> DocumentStore::BatchDelete(
- NamespaceId namespace_id, SchemaTypeId schema_type_id, bool soft_delete) {
+ NamespaceId namespace_id, SchemaTypeId schema_type_id) {
// Tracks if there were any existing documents with this namespace that we
// will mark as deleted.
int num_updated_documents = 0;
@@ -1356,37 +1252,27 @@
continue;
}
- // The document has the desired namespace and schema type, it either exists
- // or has been soft-deleted / expired.
- if (soft_delete) {
- if (DoesDocumentExist(document_id)) {
- ++num_updated_documents;
- }
-
- // docid_mapper_->Set can only fail if document_id is < 0
- // or >= docid_mapper_->num_elements. So the only possible way to get an
- // error here would be if filter_cache_->num_elements >
- // docid_mapper_->num_elements, which SHOULD NEVER HAPPEN.
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- } else {
- // Hard delete.
- libtextclassifier3::Status delete_status =
- Delete(document_id, /*soft_delete=*/false);
- if (absl_ports::IsNotFound(delete_status)) {
- continue;
- } else if (!delete_status.ok()) {
- // Real error, pass up.
- return delete_status;
- }
- ++num_updated_documents;
+ // The document has the desired namespace and schema type, it either
+ // exists or has expired.
+ libtextclassifier3::Status delete_status = Delete(document_id);
+ if (absl_ports::IsNotFound(delete_status)) {
+ continue;
+ } else if (!delete_status.ok()) {
+ // Real error, pass up.
+ return delete_status;
}
+ ++num_updated_documents;
}
return num_updated_documents;
}
-libtextclassifier3::Status DocumentStore::PersistToDisk() {
+libtextclassifier3::Status DocumentStore::PersistToDisk(
+ PersistType::Code persist_type) {
+ if (persist_type == PersistType::LITE) {
+ // only persist the document log.
+ return document_log_->PersistToDisk();
+ }
ICING_RETURN_IF_ERROR(document_log_->PersistToDisk());
ICING_RETURN_IF_ERROR(document_key_mapper_->PersistToDisk());
ICING_RETURN_IF_ERROR(document_id_mapper_->PersistToDisk());
@@ -1404,30 +1290,139 @@
return libtextclassifier3::Status::OK;
}
-libtextclassifier3::StatusOr<int64_t> DocumentStore::GetDiskUsage() const {
- ICING_ASSIGN_OR_RETURN(const int64_t document_log_disk_usage,
- document_log_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_disk_usage,
- document_key_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t document_id_mapper_disk_usage,
- document_id_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t score_cache_disk_usage,
- score_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t filter_cache_disk_usage,
- filter_cache_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t namespace_mapper_disk_usage,
- namespace_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_mapper_disk_usage,
- corpus_mapper_->GetDiskUsage());
- ICING_ASSIGN_OR_RETURN(const int64_t corpus_score_cache_disk_usage,
- corpus_score_cache_->GetDiskUsage());
+int64_t GetValueOrDefault(const libtextclassifier3::StatusOr<int64_t>& value_or,
+ int64_t default_value) {
+ return (value_or.ok()) ? value_or.ValueOrDie() : default_value;
+}
- int64_t disk_usage = document_log_disk_usage +
- document_key_mapper_disk_usage +
- document_id_mapper_disk_usage + score_cache_disk_usage +
- filter_cache_disk_usage + namespace_mapper_disk_usage +
- corpus_mapper_disk_usage + corpus_score_cache_disk_usage;
- return disk_usage;
+DocumentStorageInfoProto DocumentStore::GetMemberStorageInfo() const {
+ DocumentStorageInfoProto storage_info;
+ storage_info.set_document_log_size(
+ GetValueOrDefault(document_log_->GetDiskUsage(), -1));
+ storage_info.set_key_mapper_size(
+ GetValueOrDefault(document_key_mapper_->GetDiskUsage(), -1));
+ storage_info.set_document_id_mapper_size(
+ GetValueOrDefault(document_id_mapper_->GetDiskUsage(), -1));
+ storage_info.set_score_cache_size(
+ GetValueOrDefault(score_cache_->GetDiskUsage(), -1));
+ storage_info.set_filter_cache_size(
+ GetValueOrDefault(filter_cache_->GetDiskUsage(), -1));
+ storage_info.set_namespace_id_mapper_size(
+ GetValueOrDefault(namespace_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_mapper_size(
+ GetValueOrDefault(corpus_mapper_->GetDiskUsage(), -1));
+ storage_info.set_corpus_score_cache_size(
+ GetValueOrDefault(corpus_score_cache_->GetDiskUsage(), -1));
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const {
+ int total_num_alive = 0;
+ int total_num_expired = 0;
+ int total_num_deleted = 0;
+ std::unordered_map<NamespaceId, std::string> namespace_id_to_namespace =
+ namespace_mapper_->GetValuesToKeys();
+ std::unordered_map<std::string, NamespaceStorageInfoProto>
+ namespace_to_storage_info;
+
+ for (DocumentId document_id = 0;
+ document_id < document_id_mapper_->num_elements(); ++document_id) {
+ // Check if it's deleted first.
+ if (IsDeleted(document_id)) {
+ // We don't have the namespace id of hard deleted documents anymore, so
+ // we can't add to our namespace storage info.
+ ++total_num_deleted;
+ continue;
+ }
+
+ // At this point, the document is either alive or expired, we can get
+ // namespace info for it.
+ auto filter_data_or = filter_cache_->Get(document_id);
+ if (!filter_data_or.ok()) {
+ ICING_VLOG(1) << "Error trying to get filter data for document store "
+ "storage info counts.";
+ continue;
+ }
+ const DocumentFilterData* filter_data = filter_data_or.ValueOrDie();
+ auto itr = namespace_id_to_namespace.find(filter_data->namespace_id());
+ if (itr == namespace_id_to_namespace.end()) {
+ ICING_VLOG(1) << "Error trying to find namespace for document store "
+ "storage info counts.";
+ continue;
+ }
+ const std::string& name_space = itr->second;
+
+ // Always set the namespace, if the NamespaceStorageInfoProto didn't exist
+ // before, we'll get back a default instance of it.
+ NamespaceStorageInfoProto& namespace_storage_info =
+ namespace_to_storage_info[name_space];
+ namespace_storage_info.set_namespace_(name_space);
+
+ // Get usage scores
+ auto usage_scores_or = usage_store_->GetUsageScores(document_id);
+ if (!usage_scores_or.ok()) {
+ ICING_VLOG(1) << "Error trying to get usage scores for document store "
+ "storage info counts.";
+ continue;
+ }
+ UsageStore::UsageScores usage_scores = usage_scores_or.ValueOrDie();
+
+ // Update our stats
+ if (IsExpired(document_id)) {
+ ++total_num_expired;
+ namespace_storage_info.set_num_expired_documents(
+ namespace_storage_info.num_expired_documents() + 1);
+ if (usage_scores.usage_type1_count > 0) {
+ namespace_storage_info.set_num_expired_documents_usage_type1(
+ namespace_storage_info.num_expired_documents_usage_type1() + 1);
+ }
+ if (usage_scores.usage_type2_count > 0) {
+ namespace_storage_info.set_num_expired_documents_usage_type2(
+ namespace_storage_info.num_expired_documents_usage_type2() + 1);
+ }
+ if (usage_scores.usage_type3_count > 0) {
+ namespace_storage_info.set_num_expired_documents_usage_type3(
+ namespace_storage_info.num_expired_documents_usage_type3() + 1);
+ }
+ } else {
+ ++total_num_alive;
+ namespace_storage_info.set_num_alive_documents(
+ namespace_storage_info.num_alive_documents() + 1);
+ if (usage_scores.usage_type1_count > 0) {
+ namespace_storage_info.set_num_alive_documents_usage_type1(
+ namespace_storage_info.num_alive_documents_usage_type1() + 1);
+ }
+ if (usage_scores.usage_type2_count > 0) {
+ namespace_storage_info.set_num_alive_documents_usage_type2(
+ namespace_storage_info.num_alive_documents_usage_type2() + 1);
+ }
+ if (usage_scores.usage_type3_count > 0) {
+ namespace_storage_info.set_num_alive_documents_usage_type3(
+ namespace_storage_info.num_alive_documents_usage_type3() + 1);
+ }
+ }
+ }
+
+ for (auto& itr : namespace_to_storage_info) {
+ storage_info.mutable_namespace_storage_info()->Add(std::move(itr.second));
+ }
+ storage_info.set_num_alive_documents(total_num_alive);
+ storage_info.set_num_deleted_documents(total_num_deleted);
+ storage_info.set_num_expired_documents(total_num_expired);
+ return storage_info;
+}
+
+DocumentStorageInfoProto DocumentStore::GetStorageInfo() const {
+ DocumentStorageInfoProto storage_info = GetMemberStorageInfo();
+ int64_t directory_size = filesystem_->GetDiskUsage(base_dir_.c_str());
+ if (directory_size != Filesystem::kBadFileSize) {
+ storage_info.set_document_store_size(directory_size);
+ } else {
+ storage_info.set_document_store_size(-1);
+ }
+ storage_info.set_num_namespaces(namespace_mapper_->num_keys());
+ return CalculateDocumentStatusCounts(std::move(storage_info));
}
libtextclassifier3::Status DocumentStore::UpdateSchemaStore(
@@ -1486,50 +1481,19 @@
schema_store_ = schema_store;
document_validator_.UpdateSchemaStore(schema_store);
- // Append a tombstone for each deleted schema type. This way, we don't have
- // to read out each document, check if the schema type has been deleted, and
- // append a tombstone per-document.
- for (const auto& schema_type :
- set_schema_result.schema_types_deleted_by_name) {
- // TODO(b/144458732): Implement a more robust version of
- // ICING_RETURN_IF_ERROR that can support error logging.
- libtextclassifier3::Status status =
- document_log_->WriteProto(CreateSchemaTypeTombstone(schema_type))
- .status();
- if (!status.ok()) {
- ICING_LOG(ERROR) << status.error_message()
- << "Failed to delete schema_type. schema_type = "
- << schema_type;
- return status;
- }
- }
-
int size = document_id_mapper_->num_elements();
for (DocumentId document_id = 0; document_id < size; document_id++) {
- auto exists_or = DoesDocumentExistAndGetFileOffset(document_id);
- if (absl_ports::IsNotFound(exists_or.status())) {
+ if (!InternalDoesDocumentExist(document_id)) {
// Skip nonexistent documents
continue;
- } else if (!exists_or.ok()) {
- // Real error, pass up
- return absl_ports::Annotate(
- exists_or.status(),
- IcingStringUtil::StringPrintf("Failed to retrieve DocumentId %d",
- document_id));
}
// Guaranteed that the document exists now.
ICING_ASSIGN_OR_RETURN(const DocumentFilterData* filter_data,
filter_cache_->Get(document_id));
- if (set_schema_result.schema_types_deleted_by_id.count(
- filter_data->schema_type_id()) != 0) {
- // We already created a tombstone for this deleted type. Just update the
- // derived files now.
- ICING_RETURN_IF_ERROR(
- document_id_mapper_->Set(document_id, kDocDeletedFlag));
- continue;
- }
+ bool delete_document = set_schema_result.schema_types_deleted_by_id.count(
+ filter_data->schema_type_id()) != 0;
// Check if we need to update the FilterCache entry for this document. It
// may have been assigned a different SchemaTypeId in the new SchemaStore.
@@ -1553,17 +1517,17 @@
filter_cache_->mutable_array()[document_id].set_schema_type_id(
schema_type_id);
}
-
if (revalidate_document) {
- if (!document_validator_.Validate(document).ok()) {
- // Document is no longer valid with the new SchemaStore. Mark as
- // deleted
- auto delete_status = Delete(document.namespace_(), document.uri());
- if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
- // Real error, pass up
- return delete_status;
- }
- }
+ delete_document = !document_validator_.Validate(document).ok();
+ }
+ }
+
+ if (delete_document) {
+ // Document is no longer valid with the new SchemaStore. Mark as deleted
+ auto delete_status = Delete(document_id);
+ if (!delete_status.ok() && !absl_ports::IsNotFound(delete_status)) {
+ // Real error, pass up
+ return delete_status;
}
}
}
@@ -1577,7 +1541,8 @@
}
libtextclassifier3::Status DocumentStore::OptimizeInto(
- const std::string& new_directory, const LanguageSegmenter* lang_segmenter) {
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats) {
// Validates directory
if (new_directory == base_dir_) {
return absl_ports::InvalidArgumentError(
@@ -1592,10 +1557,16 @@
// Writes all valid docs into new document store (new directory)
int size = document_id_mapper_->num_elements();
+ int num_deleted = 0;
+ int num_expired = 0;
for (DocumentId document_id = 0; document_id < size; document_id++) {
auto document_or = Get(document_id, /*clear_internal_fields=*/false);
if (absl_ports::IsNotFound(document_or.status())) {
- // Skip nonexistent documents
+ if (IsDeleted(document_id)) {
+ ++num_deleted;
+ } else if (IsExpired(document_id)) {
+ ++num_expired;
+ }
continue;
} else if (!document_or.ok()) {
// Real error, pass up
@@ -1636,12 +1607,17 @@
// Copy over usage scores.
ICING_ASSIGN_OR_RETURN(UsageStore::UsageScores usage_scores,
usage_store_->GetUsageScores(document_id));
+
DocumentId new_document_id = new_document_id_or.ValueOrDie();
ICING_RETURN_IF_ERROR(
new_doc_store->SetUsageScores(new_document_id, usage_scores));
}
-
- ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk());
+ if (stats != nullptr) {
+ stats->set_num_original_documents(size);
+ stats->set_num_deleted_documents(num_deleted);
+ stats->set_num_expired_documents(num_expired);
+ }
+ ICING_RETURN_IF_ERROR(new_doc_store->PersistToDisk(PersistType::FULL));
return libtextclassifier3::Status::OK;
}
@@ -1653,7 +1629,7 @@
int32_t num_documents = document_id_mapper_->num_elements();
for (DocumentId document_id = kMinDocumentId; document_id < num_documents;
++document_id) {
- if (!DoesDocumentExist(document_id)) {
+ if (!InternalDoesDocumentExist(document_id)) {
++optimize_info.optimizable_docs;
}
@@ -1691,10 +1667,10 @@
ICING_ASSIGN_OR_RETURN(const int64_t document_key_mapper_size,
document_key_mapper_->GetElementsSize());
- // We don't include the namespace_mapper or the corpus_mapper because it's not
- // clear if we could recover any space even if Optimize were called. Deleting
- // 100s of documents could still leave a few documents of a namespace, and
- // then there would be no change.
+ // We don't include the namespace_mapper or the corpus_mapper because it's
+ // not clear if we could recover any space even if Optimize were called.
+ // Deleting 100s of documents could still leave a few documents of a
+ // namespace, and then there would be no change.
int64_t total_size = document_log_file_size + document_key_mapper_size +
document_id_mapper_file_size + score_cache_file_size +
@@ -1724,8 +1700,8 @@
libtextclassifier3::Status DocumentStore::ClearDerivedData(
DocumentId document_id) {
// We intentionally leave the data in key_mapper_ because locating that data
- // requires fetching namespace and uri. Leaving data in key_mapper_ should be
- // fine because the data is hashed.
+ // requires fetching namespace and uri. Leaving data in key_mapper_ should
+ // be fine because the data is hashed.
ICING_RETURN_IF_ERROR(document_id_mapper_->Set(document_id, kDocDeletedFlag));
diff --git a/icing/store/document-store.h b/icing/store/document-store.h
index b2908f0..9e1b3ec 100644
--- a/icing/store/document-store.h
+++ b/icing/store/document-store.h
@@ -29,6 +29,9 @@
#include "icing/proto/document.pb.h"
#include "icing/proto/document_wrapper.pb.h"
#include "icing/proto/logging.pb.h"
+#include "icing/proto/optimize.pb.h"
+#include "icing/proto/persist.pb.h"
+#include "icing/proto/storage.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -106,6 +109,11 @@
// previously initialized with this directory, it will reload the files saved
// by the last instance.
//
+ // force_recovery_and_revalidate_documents=true will pre-emptively throw out
+ // the derived files and validate each document while recreating them. This
+ // can be used to indicate that the schema (and type ids) may have changed and
+ // those changes might not have been applied to the document store.
+ //
// If initialize_stats is present, the fields related to DocumentStore will be
// populated.
//
@@ -122,7 +130,8 @@
static libtextclassifier3::StatusOr<DocumentStore::CreateResult> Create(
const Filesystem* filesystem, const std::string& base_dir,
const Clock* clock, const SchemaStore* schema_store,
- NativeInitializeStats* initialize_stats = nullptr);
+ bool force_recovery_and_revalidate_documents = false,
+ InitializeStatsProto* initialize_stats = nullptr);
// Returns the maximum DocumentId that the DocumentStore has assigned. If
// there has not been any DocumentIds assigned, i.e. the DocumentStore is
@@ -152,10 +161,10 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<DocumentId> Put(
const DocumentProto& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
libtextclassifier3::StatusOr<DocumentId> Put(
DocumentProto&& document, int32_t num_tokens = 0,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Finds and returns the document identified by the given key (namespace +
// uri). If 'clear_internal_fields' is true, document level data that's
@@ -189,18 +198,21 @@
// Check if a document exists. Existence means it hasn't been deleted and it
// hasn't expired yet.
//
+ // NOTE: This should be used when callers don't care about error messages,
+ // expect documents to be deleted/not found, or in frequently called code
+ // paths that could cause performance issues. A signficant amount of CPU
+ // cycles can be saved if we don't construct strings and create new Status
+ // objects on the heap. See b/185822483.
+ //
// Returns:
// boolean whether a document exists or not
bool DoesDocumentExist(DocumentId document_id) const;
// Deletes the document identified by the given namespace and uri. The
- // document proto will be marked as deleted if 'soft_delete' is true,
- // otherwise the document proto will be erased immediately.
+ // document proto will be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
@@ -208,26 +220,20 @@
// NOT_FOUND if no document exists with namespace, uri
// INTERNAL_ERROR on IO error
libtextclassifier3::Status Delete(std::string_view name_space,
- std::string_view uri,
- bool soft_delete = false);
+ std::string_view uri);
- // Deletes the document identified by the given document_id. The
- // document proto will be marked as deleted if 'soft_delete' is true,
- // otherwise the document proto will be erased immediately.
+ // Deletes the document identified by the given document_id. The document
+ // proto will be erased immediately.
//
// NOTE:
- // 1. If possible, please use the other method Delete(name_space, uri,
- // soft_delete) for soft deletes because we need namespace and uri to
- // perform soft deletes.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// INTERNAL_ERROR on IO error
// INVALID_ARGUMENT if document_id is invalid.
- libtextclassifier3::Status Delete(DocumentId document_id,
- bool soft_delete = false);
+ libtextclassifier3::Status Delete(DocumentId document_id);
// Returns the NamespaceId of the string namespace
//
@@ -250,16 +256,9 @@
// Returns the DocumentAssociatedScoreData of the document specified by the
// DocumentId.
//
- // NOTE: This does not check if the document exists and will return the
- // DocumentFilterData of the document even if it has been deleted. Users
- // should check DoesDocumentExist(document_id) if they only want existing
- // documents' DocumentFilterData.
- //
// Returns:
// DocumentAssociatedScoreData on success
- // OUT_OF_RANGE if document_id is negative or exceeds previously seen
- // DocumentIds
- // NOT_FOUND if no score data is found
+ // NOT_FOUND if the document or the score data is not found
libtextclassifier3::StatusOr<DocumentAssociatedScoreData>
GetDocumentAssociatedScoreData(DocumentId document_id) const;
@@ -296,8 +295,8 @@
//
// Returns:
// UsageScores on success
+ // NOT_FOUND if document_id no longer exists.
// INVALID_ARGUMENT if document_id is invalid
- // INTERNAL_ERROR on I/O errors
libtextclassifier3::StatusOr<UsageStore::UsageScores> GetUsageScores(
DocumentId document_id) const;
@@ -311,56 +310,43 @@
libtextclassifier3::Status ReportUsage(const UsageReport& usage_report);
// Deletes all documents belonging to the given namespace. The documents will
- // be marked as deleted if 'soft_delete' is true, otherwise they will be
- // erased immediately.
+ // be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// NOT_FOUND if namespace doesn't exist
// INTERNAL_ERROR on IO error
- DeleteByGroupResult DeleteByNamespace(std::string_view name_space,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteByNamespace(std::string_view name_space);
// Deletes all documents belonging to the given schema type. The documents
- // will be marked as deleted if 'soft_delete' is true, otherwise they will be
- // erased immediately.
+ // will be erased immediately.
//
// NOTE:
- // 1. The soft deletion uses less CPU power, it can be applied on
- // non-sensitive data.
- // 2. Space is not reclaimed for deleted documents until Optimize() is
+ // Space is not reclaimed for deleted documents until Optimize() is
// called.
//
// Returns:
// OK on success
// NOT_FOUND if schema_type doesn't exist
// INTERNAL_ERROR on IO error
- DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type,
- bool soft_delete = false);
+ DeleteByGroupResult DeleteBySchemaType(std::string_view schema_type);
// Syncs all the data and metadata changes to disk.
//
// Returns:
// OK on success
// INTERNAL on I/O error
- libtextclassifier3::Status PersistToDisk();
+ libtextclassifier3::Status PersistToDisk(PersistType::Code persist_type);
- // Calculates and returns the disk usage in bytes. Rounds up to the nearest
- // block size.
+ // Calculates the StorageInfo for the Document Store.
//
- // Returns:
- // Disk usage on success
- // INTERNAL_ERROR on IO error
- //
- // TODO(tjbarron): consider returning a struct which has the breakdown of each
- // component.
- libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+ // If an IO error occurs while trying to calculate the value for a field, then
+ // that field will be set to -1.
+ DocumentStorageInfoProto GetStorageInfo() const;
// Update any derived data off of the SchemaStore with the new SchemaStore.
// This may include pointers, SchemaTypeIds, etc.
@@ -407,6 +393,8 @@
// reassigned so any files / classes that are based on old document ids may be
// outdated.
//
+ // stats will be set if non-null.
+ //
// NOTE: The tasks in this method are too expensive to be executed in
// real-time. The caller should decide how frequently and when to call this
// method based on device usage.
@@ -416,8 +404,8 @@
// INVALID_ARGUMENT if new_directory is same as current base directory
// INTERNAL_ERROR on IO error
libtextclassifier3::Status OptimizeInto(
- const std::string& new_directory,
- const LanguageSegmenter* lang_segmenter);
+ const std::string& new_directory, const LanguageSegmenter* lang_segmenter,
+ OptimizeStatsProto* stats = nullptr);
// Calculates status for a potential Optimize call. Includes how many docs
// there are vs how many would be optimized away. And also includes an
@@ -508,7 +496,8 @@
bool initialized_ = false;
libtextclassifier3::StatusOr<DataLoss> Initialize(
- NativeInitializeStats* initialize_stats);
+ bool force_recovery_and_revalidate_documents,
+ InitializeStatsProto* initialize_stats);
// Creates sub-components and verifies the integrity of each sub-component.
//
@@ -518,6 +507,9 @@
// Re-generates all files derived from the ground truth: the document log.
//
+ // revalidate_documents=true will also cause each document to be revalidated
+ // the schema as it is read out of the document log.
+ //
// NOTE: if this function fails, the only thing we can do is to retry it until
// it succeeds or prevent the initialization of a DocumentStore. The
// DocumentStore object wouldn't work reliably if this fails.
@@ -528,7 +520,7 @@
// document_id
// mapper.
// 3. Create header and store the updated combined checksum
- libtextclassifier3::Status RegenerateDerivedFiles();
+ libtextclassifier3::Status RegenerateDerivedFiles(bool revalidate_documents);
// Resets the unique_ptr to the document_key_mapper, deletes the underlying
// file, and re-creates a new instance of the document_key_mapper .
@@ -576,8 +568,8 @@
// if it doesn't exist.
bool HeaderExists();
- // Update and replace the header file. Creates the header file if it doesn't
- // exist.
+ // Update, replace and persist the header file. Creates the header file if it
+ // doesn't exist.
//
// Returns:
// OK on success
@@ -586,14 +578,13 @@
libtextclassifier3::StatusOr<DocumentId> InternalPut(
DocumentProto& document,
- NativePutDocumentStats* put_document_stats = nullptr);
+ PutDocumentStatsProto* put_document_stats = nullptr);
// Helper function to do batch deletes. Documents with the given
// "namespace_id" and "schema_type_id" will be deleted. If callers don't need
// to specify the namespace or schema type, pass in kInvalidNamespaceId or
- // kInvalidSchemaTypeId. The document protos will be marked as deleted if
- // 'soft_delete' is true, otherwise the document protos with their derived
- // data will be erased / cleared immediately.
+ // kInvalidSchemaTypeId. The document protos with their derived data will be
+ // erased / cleared immediately.
//
// NOTE: Space is not reclaimed in the derived files until Optimize() is
// called.
@@ -602,28 +593,7 @@
// Number of documents that were actually updated to be deleted
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int> BatchDelete(NamespaceId namespace_id,
- SchemaTypeId schema_type_id,
- bool soft_delete);
-
- // Marks the document identified by the given name_space, uri and document_id
- // as deleted, to be removed later during Optimize().
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on IO error
- libtextclassifier3::Status SoftDelete(std::string_view name_space,
- std::string_view uri,
- DocumentId document_id);
-
- // Erases the document at the given document_log_offset from the document_log
- // and clears the derived data identified by the given document_id. The space
- // will be reclaimed later during Optimize().
- //
- // Returns:
- // OK on success
- // INTERNAL_ERROR on IO error
- libtextclassifier3::Status HardDelete(DocumentId document_id,
- int64_t document_log_offset);
+ SchemaTypeId schema_type_id);
// Helper method to find a DocumentId that is associated with the given
// namespace and uri.
@@ -654,22 +624,46 @@
libtextclassifier3::StatusOr<CorpusAssociatedScoreData>
GetCorpusAssociatedScoreDataToUpdate(CorpusId corpus_id) const;
- // Helper method to validate the document id and return the file offset of the
- // associated document in document_log_.
- //
- // This can be a more informative call than just DoesDocumentExist because it
- // can return more status errors on whether the Document actually doesn't
- // exist or if there was an internal error while accessing files.
+ // Check if a document exists. Existence means it hasn't been deleted and it
+ // hasn't expired yet.
//
// Returns:
- // The file offset on success
+ // OK if the document exists
// INVALID_ARGUMENT if document_id is less than 0 or greater than the
// maximum value
// NOT_FOUND if the document doesn't exist (i.e. deleted or expired)
// INTERNAL_ERROR on IO error
- libtextclassifier3::StatusOr<int64_t> DoesDocumentExistAndGetFileOffset(
+ libtextclassifier3::Status DoesDocumentExistWithStatus(
DocumentId document_id) const;
+ // Check if a document exists. Existence means it hasn't been deleted and it
+ // hasn't expired yet.
+ //
+ // This is for internal-use only because we assume that the document_id is
+ // already valid. If you're unsure if the document_id is valid, use
+ // DoesDocumentExist(document_id) instead, which will perform those additional
+ // checks.
+ //
+ // Returns:
+ // boolean whether a document exists or not
+ bool InternalDoesDocumentExist(DocumentId document_id) const;
+
+ // Checks if a document has been deleted
+ //
+ // This is for internal-use only because we assume that the document_id is
+ // already valid. If you're unsure if the document_id is valid, use
+ // DoesDocumentExist(document_id) instead, which will perform those additional
+ // checks.
+ bool IsDeleted(DocumentId document_id) const;
+
+ // Checks if a document has expired.
+ //
+ // This is for internal-use only because we assume that the document_id is
+ // already valid. If you're unsure if the document_id is valid, use
+ // DoesDocumentExist(document_id) instead, which will perform those additional
+ // checks.
+ bool IsExpired(DocumentId document_id) const;
+
// Updates the entry in the score cache for document_id.
libtextclassifier3::Status UpdateDocumentAssociatedScoreCache(
DocumentId document_id, const DocumentAssociatedScoreData& score_data);
@@ -688,6 +682,20 @@
// Sets usage scores for the given document.
libtextclassifier3::Status SetUsageScores(
DocumentId document_id, const UsageStore::UsageScores& usage_scores);
+
+ // Returns:
+ // - on success, a DocumentStorageInfoProto with the fields relating to the
+ // size of Document Store member variables populated.
+ // - INTERNAL on failure to get file size
+ DocumentStorageInfoProto GetMemberStorageInfo() const;
+
+ // Returns:
+ // - on success, the storage_info that was passed in but with the number of
+ // alive, deleted and expired documents also set.
+ // - OUT_OF_RANGE, this should never happen. This could only be returned if
+ // the document_id_mapper somehow became larger than the filter cache.
+ DocumentStorageInfoProto CalculateDocumentStatusCounts(
+ DocumentStorageInfoProto storage_info) const;
};
} // namespace lib
diff --git a/icing/store/document-store_benchmark.cc b/icing/store/document-store_benchmark.cc
new file mode 100644
index 0000000..f68e115
--- /dev/null
+++ b/icing/store/document-store_benchmark.cc
@@ -0,0 +1,174 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <unistd.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <ostream>
+#include <random>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <string_view>
+#include <unordered_set>
+#include <vector>
+
+#include "testing/base/public/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include "icing/document-builder.h"
+#include "icing/file/filesystem.h"
+#include "icing/proto/document.pb.h"
+#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
+#include "icing/schema/schema-store.h"
+#include "icing/store/document-store.h"
+#include "icing/testing/common-matchers.h"
+#include "icing/testing/tmp-directory.h"
+#include "icing/util/clock.h"
+
+// Run on a Linux workstation:
+// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/store:document-store_benchmark
+//
+// $ blaze-bin/icing/store/document-store_benchmark
+// --benchmarks=all --benchmark_memory_usage
+//
+// Run on an Android device:
+// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
+// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
+// //icing/store:document-store_benchmark
+//
+// $ adb push blaze-bin/icing/store/document-store_benchmark
+// /data/local/tmp/
+//
+// $ adb shell /data/local/tmp/document-store_benchmark
+// --benchmarks=all
+
+namespace icing {
+namespace lib {
+
+namespace {
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+class DestructibleDirectory {
+ public:
+ explicit DestructibleDirectory(const Filesystem& filesystem,
+ const std::string& dir)
+ : filesystem_(filesystem), dir_(dir) {
+ filesystem_.CreateDirectoryRecursively(dir_.c_str());
+ }
+ ~DestructibleDirectory() {
+ filesystem_.DeleteDirectoryRecursively(dir_.c_str());
+ }
+
+ private:
+ Filesystem filesystem_;
+ std::string dir_;
+};
+
+DocumentProto CreateDocument(const std::string namespace_,
+ const std::string uri) {
+ return DocumentBuilder()
+ .SetKey(namespace_, uri)
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .Build();
+}
+
+SchemaProto CreateSchema() {
+ return SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
+}
+
+std::unique_ptr<SchemaStore> CreateSchemaStore(Filesystem filesystem,
+ const std::string directory,
+ const Clock* clock) {
+ const std::string schema_store_dir = directory + "/schema";
+ filesystem.CreateDirectoryRecursively(schema_store_dir.data());
+ std::unique_ptr<SchemaStore> schema_store =
+ SchemaStore::Create(&filesystem, schema_store_dir, clock).ValueOrDie();
+
+ auto set_schema_status = schema_store->SetSchema(CreateSchema());
+ if (!set_schema_status.ok()) {
+ ICING_LOG(ERROR) << set_schema_status.status().error_message();
+ }
+
+ return schema_store;
+}
+
+void BM_DoesDocumentExistBenchmark(benchmark::State& state) {
+ Filesystem filesystem;
+ Clock clock;
+
+ std::string directory = GetTestTempDir() + "/icing";
+ DestructibleDirectory ddir(filesystem, directory);
+
+ std::string document_store_dir = directory + "/store";
+ std::unique_ptr<SchemaStore> schema_store =
+ CreateSchemaStore(filesystem, directory, &clock);
+
+ filesystem.CreateDirectoryRecursively(document_store_dir.data());
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem, document_store_dir, &clock,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ int max_document_id = 300000;
+ for (int i = 0; i < max_document_id; ++i) {
+ // Put and delete a lot of documents to fill up our derived files with
+ // stuff.
+ ICING_ASSERT_OK(document_store->Put(
+ CreateDocument("namespace", /*uri=*/std::to_string(i))));
+ document_store->Delete("namespace", /*uri=*/std::to_string(i));
+ }
+
+ std::default_random_engine random;
+ std::uniform_int_distribution<> dist(1, max_document_id);
+ for (auto s : state) {
+ // Check random document ids to see if they exist. Hopefully to simulate
+ // page faulting in different sections of our mmapped derived files.
+ int document_id = dist(random);
+ benchmark::DoNotOptimize(document_store->DoesDocumentExist(document_id));
+ }
+}
+BENCHMARK(BM_DoesDocumentExistBenchmark);
+
+} // namespace
+
+} // namespace lib
+} // namespace icing
diff --git a/icing/store/document-store_test.cc b/icing/store/document-store_test.cc
index 7754373..b37c6de 100644
--- a/icing/store/document-store_test.cc
+++ b/icing/store/document-store_test.cc
@@ -19,6 +19,7 @@
#include <memory>
#include <string>
+#include "icing/text_classifier/lib3/utils/base/status.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
@@ -29,8 +30,11 @@
#include "icing/file/mock-filesystem.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
#include "icing/portable/equals-proto.h"
+#include "icing/portable/platform.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
+#include "icing/proto/storage.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/store/corpus-associated-scoring-data.h"
#include "icing/store/corpus-id.h"
@@ -39,7 +43,6 @@
#include "icing/store/namespace-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/tokenization/language-segmenter-factory.h"
@@ -55,6 +58,7 @@
using ::icing::lib::portable_equals_proto::EqualsProto;
using ::testing::_;
using ::testing::Eq;
+using ::testing::Ge;
using ::testing::Gt;
using ::testing::HasSubstr;
using ::testing::IsEmpty;
@@ -64,6 +68,32 @@
using ::testing::Return;
using ::testing::UnorderedElementsAre;
+const NamespaceStorageInfoProto& GetNamespaceStorageInfo(
+ const DocumentStorageInfoProto& storage_info,
+ const std::string& name_space) {
+ for (const NamespaceStorageInfoProto& namespace_storage_info :
+ storage_info.namespace_storage_info()) {
+ if (namespace_storage_info.namespace_() == name_space) {
+ return namespace_storage_info;
+ }
+ }
+ // Didn't find our namespace, fail the test.
+ EXPECT_TRUE(false) << "Failed to find namespace '" << name_space
+ << "' in DocumentStorageInfoProto.";
+ return std::move(NamespaceStorageInfoProto());
+}
+
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+
+constexpr StringIndexingConfig_TokenizerType_Code TOKENIZER_PLAIN =
+ StringIndexingConfig_TokenizerType_Code_PLAIN;
+
+constexpr TermMatchType_Code MATCH_EXACT = TermMatchType_Code_EXACT_ONLY;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_INT =
+ PropertyConfigProto_DataType_Code_INT64;
+
UsageReport CreateUsageReport(std::string name_space, std::string uri,
int64 timestamp_ms,
UsageReport::UsageType usage_type) {
@@ -124,28 +154,22 @@
filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
- auto subject = type_config->add_properties();
- subject->set_property_name("subject");
- subject->set_data_type(PropertyConfigProto::DataType::STRING);
- subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- subject->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- subject->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
- auto body = type_config->add_properties();
- body->set_property_name("body");
- body->set_data_type(PropertyConfigProto::DataType::STRING);
- body->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- body->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- body->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
-
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
@@ -161,6 +185,19 @@
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
+ void CorruptDocStoreHeaderChecksumFile() {
+ // Change the DocStore's header combined checksum so that it won't match the
+ // recalculated checksum on initialization. This will force a regeneration
+ // of derived files from ground truth.
+ const std::string header_file =
+ absl_ports::StrCat(document_store_dir_, "/document_store_header");
+ DocumentStore::Header header;
+ header.magic = DocumentStore::Header::kMagic;
+ header.checksum = 10; // Arbitrary garbage checksum
+ filesystem_.DeleteFile(header_file.c_str());
+ filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ }
+
const Filesystem filesystem_;
const std::string test_dir_;
FakeClock fake_clock_;
@@ -290,7 +327,7 @@
EXPECT_THAT(doc_store->Put(document3), IsOkAndHolds(Not(document_id1)));
}
-TEST_F(DocumentStoreTest, IsDocumentExisting) {
+TEST_F(DocumentStoreTest, IsDocumentExistingWithoutStatus) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -322,7 +359,7 @@
IsFalse());
}
-TEST_F(DocumentStoreTest, GetSoftDeletedDocumentNotFound) {
+TEST_F(DocumentStoreTest, GetDeletedDocumentNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -336,29 +373,7 @@
IsOkAndHolds(EqualsProto(test_document1_)));
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri(),
- /*soft_delete=*/true));
- EXPECT_THAT(
- document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, GetHardDeletedDocumentNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(DocumentProto(test_document1_)));
- EXPECT_THAT(
- document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
- IsOkAndHolds(EqualsProto(test_document1_)));
-
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri(),
- /*soft_delete=*/false));
+ test_document1_.uri()));
EXPECT_THAT(
document_store->Get(test_document1_.namespace_(), test_document1_.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -436,16 +451,16 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
EXPECT_THAT(
document_store->Delete("nonexistent_namespace", "nonexistent_uri"),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
TEST_F(DocumentStoreTest, DeleteAlreadyDeletedDocumentNotFound) {
@@ -468,7 +483,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceOk) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceOk) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -499,7 +514,7 @@
// DELETE namespace.1. document1 and document 4 should be deleted. document2
// and document3 should still be retrievable.
DocumentStore::DeleteByGroupResult group_result =
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/true);
+ doc_store->DeleteByNamespace("namespace.1");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
@@ -512,51 +527,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceOk) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- DocumentProto document1 = test_document1_;
- document1.set_namespace_("namespace.1");
- document1.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document1));
-
- DocumentProto document2 = test_document1_;
- document2.set_namespace_("namespace.2");
- document2.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document2));
-
- DocumentProto document3 = test_document1_;
- document3.set_namespace_("namespace.3");
- document3.set_uri("uri1");
- ICING_ASSERT_OK(doc_store->Put(document3));
-
- DocumentProto document4 = test_document1_;
- document4.set_namespace_("namespace.1");
- document4.set_uri("uri2");
- ICING_ASSERT_OK(doc_store->Put(document4));
-
- // DELETE namespace.1. document1 and document 4 should be deleted. document2
- // and document3 should still be retrievable.
- DocumentStore::DeleteByGroupResult group_result =
- doc_store->DeleteByNamespace("namespace.1", /*soft_delete=*/false);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(doc_store->Get(document2.namespace_(), document2.uri()),
- IsOkAndHolds(EqualsProto(document2)));
- EXPECT_THAT(doc_store->Get(document3.namespace_(), document3.uri()),
- IsOkAndHolds(EqualsProto(document3)));
- EXPECT_THAT(doc_store->Get(document4.namespace_(), document4.uri()),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNonexistentNamespaceNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNonexistentNamespaceNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -566,45 +537,18 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(doc_store
- ->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/true)
- .status,
+ EXPECT_THAT(doc_store->DeleteByNamespace("nonexistent_namespace").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNonexistentNamespaceNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- // Validates that deleting something non-existing won't append anything to
- // ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(doc_store
- ->DeleteByNamespace("nonexistent_namespace",
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteByNamespaceNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteByNamespaceNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -619,33 +563,9 @@
// At this point, there are no existing documents with the namespace, even
// though Icing's derived files know about this namespace. We should still
// return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store
- ->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/true)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteByNamespaceNoExistingDocumentsNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
-
- // At this point, there are no existing documents with the namespace, even
- // though Icing's derived files know about this namespace. We should still
- // return NOT_FOUND since nothing existing has this namespace.
- EXPECT_THAT(document_store
- ->DeleteByNamespace(test_document1_.namespace_(),
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(
+ document_store->DeleteByNamespace(test_document1_.namespace_()).status,
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, DeleteByNamespaceRecoversOk) {
@@ -665,7 +585,7 @@
document4.set_namespace_("namespace.1");
document4.set_uri("uri2");
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -686,21 +606,11 @@
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -710,9 +620,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(doc_store->Get(document1.namespace_(), document1.uri()),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -724,14 +634,13 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeOk) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
- type_config = schema.add_types();
- type_config->set_schema_type("person");
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeOk) {
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .AddType(SchemaTypeConfigBuilder().SetType("person"))
+ .Build();
std::string schema_store_dir = schema_store_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -784,7 +693,7 @@
// Delete the "email" type and ensure that it works across both
// email_document's namespaces. And that other documents aren't affected.
DocumentStore::DeleteByGroupResult group_result =
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
+ document_store->DeleteBySchemaType("email");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -797,8 +706,7 @@
IsOkAndHolds(EqualsProto(person_document)));
// Delete the "message" type and check that other documents aren't affected
- group_result =
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
+ group_result = document_store->DeleteBySchemaType("message");
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
EXPECT_THAT(document_store->Get(email_1_document_id),
@@ -811,94 +719,7 @@
IsOkAndHolds(EqualsProto(person_document)));
}
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeOk) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
- type_config = schema.add_types();
- type_config->set_schema_type("person");
-
- std::string schema_store_dir = schema_store_dir_ + "_custom";
- filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
- filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
- ICING_ASSERT_OK_AND_ASSIGN(
- std::unique_ptr<SchemaStore> schema_store,
- SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
-
- ICING_ASSERT_OK(schema_store->SetSchema(schema));
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- DocumentProto email_document_1 = DocumentBuilder()
- .SetKey("namespace1", "1")
- .SetSchema("email")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_1_document_id,
- document_store->Put(email_document_1));
-
- DocumentProto email_document_2 = DocumentBuilder()
- .SetKey("namespace2", "2")
- .SetSchema("email")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId email_2_document_id,
- document_store->Put(email_document_2));
-
- DocumentProto message_document = DocumentBuilder()
- .SetKey("namespace", "3")
- .SetSchema("message")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId message_document_id,
- document_store->Put(message_document));
-
- DocumentProto person_document = DocumentBuilder()
- .SetKey("namespace", "4")
- .SetSchema("person")
- .SetCreationTimestampMs(1)
- .Build();
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId person_document_id,
- document_store->Put(person_document));
-
- // Delete the "email" type and ensure that it works across both
- // email_document's namespaces. And that other documents aren't affected.
- DocumentStore::DeleteByGroupResult group_result =
- document_store->DeleteBySchemaType("email", /*soft_delete=*/true);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(2));
- EXPECT_THAT(document_store->Get(email_1_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(email_2_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(message_document_id),
- IsOkAndHolds(EqualsProto(message_document)));
- EXPECT_THAT(document_store->Get(person_document_id),
- IsOkAndHolds(EqualsProto(person_document)));
-
- // Delete the "message" type and check that other documents aren't affected
- group_result =
- document_store->DeleteBySchemaType("message", /*soft_delete=*/true);
- EXPECT_THAT(group_result.status, IsOk());
- EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
- EXPECT_THAT(document_store->Get(email_1_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(email_2_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(message_document_id),
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- EXPECT_THAT(document_store->Get(person_document_id),
- IsOkAndHolds(EqualsProto(person_document)));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -908,47 +729,19 @@
// Validates that deleting something non-existing won't append anything to
// ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
+ int64_t document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(document_store
- ->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/true)
- .status,
+ EXPECT_THAT(document_store->DeleteBySchemaType("nonexistent_type").status,
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
+ EXPECT_THAT(document_log_size_before, Eq(document_log_size_after));
}
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNonexistentSchemaTypeNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- // Validates that deleting something non-existing won't append anything to
- // ground truth
- int64_t ground_truth_size_before = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(document_store
- ->DeleteBySchemaType("nonexistent_type",
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
- absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
-
- EXPECT_THAT(ground_truth_size_before, Eq(ground_truth_size_after));
-}
-
-TEST_F(DocumentStoreTest, SoftDeleteBySchemaTypeNoExistingDocumentsNotFound) {
+TEST_F(DocumentStoreTest, DeleteBySchemaTypeNoExistingDocumentsNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -960,38 +753,17 @@
ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
test_document1_.uri()));
- EXPECT_THAT(document_store
- ->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/true)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
-}
-
-TEST_F(DocumentStoreTest, HardDeleteBySchemaTypeNoExistingDocumentsNotFound) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_EXPECT_OK(document_store->Put(test_document1_));
- ICING_EXPECT_OK(document_store->Delete(test_document1_.namespace_(),
- test_document1_.uri()));
-
- EXPECT_THAT(document_store
- ->DeleteBySchemaType(test_document1_.schema(),
- /*soft_delete=*/false)
- .status,
- StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ EXPECT_THAT(
+ document_store->DeleteBySchemaType(test_document1_.schema()).status,
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
TEST_F(DocumentStoreTest, DeleteBySchemaTypeRecoversOk) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
std::string schema_store_dir = schema_store_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -1016,7 +788,7 @@
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1036,21 +808,11 @@
EXPECT_THAT(group_result.status, IsOk());
EXPECT_THAT(group_result.num_docs_deleted, Eq(1));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1060,9 +822,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1070,12 +832,25 @@
IsOkAndHolds(EqualsProto(message_document)));
}
+TEST_F(DocumentStoreTest, PutDeleteThenPut) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+ ICING_EXPECT_OK(doc_store->Put(test_document1_));
+ ICING_EXPECT_OK(
+ doc_store->Delete(test_document1_.namespace_(), test_document1_.uri()));
+ ICING_EXPECT_OK(doc_store->Put(test_document1_));
+}
+
TEST_F(DocumentStoreTest, DeletedSchemaTypeFromSchemaStoreRecoversOk) {
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
std::string schema_store_dir = schema_store_dir_ + "_custom";
filesystem_.DeleteDirectoryRecursively(schema_store_dir.c_str());
@@ -1100,7 +875,7 @@
.SetSchema("message")
.SetCreationTimestampMs(1)
.Build();
- int64_t ground_truth_size_before;
+ int64_t document_log_size_before;
{
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1125,25 +900,16 @@
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- ground_truth_size_before = filesystem_.GetFileSize(
+ document_log_size_before = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
} // Destructors should update checksum and persist all data to file.
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ CorruptDocStoreHeaderChecksumFile();
- SchemaProto new_schema;
- type_config = new_schema.add_types();
- type_config->set_schema_type("message");
-
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(
new_schema, /*ignore_errors_and_delete_documents=*/true));
@@ -1156,9 +922,9 @@
std::move(create_result.document_store);
// Make sure we didn't add anything to the ground truth after we recovered.
- int64_t ground_truth_size_after = filesystem_.GetFileSize(
+ int64_t document_log_size_after = filesystem_.GetFileSize(
absl_ports::StrCat(document_store_dir_, "/document_log").c_str());
- EXPECT_EQ(ground_truth_size_before, ground_truth_size_after);
+ EXPECT_EQ(document_log_size_before, document_log_size_after);
EXPECT_THAT(document_store->Get(email_document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
@@ -1467,17 +1233,7 @@
IsOkAndHolds(EqualsProto(test_document2_)));
}
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -1507,7 +1263,7 @@
/*num_docs=*/1, /*sum_length_in_tokens=*/4)));
}
-TEST_F(DocumentStoreTest, GetDiskUsage) {
+TEST_F(DocumentStoreTest, GetStorageInfo) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1515,8 +1271,8 @@
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_doc_store_size,
- doc_store->GetDiskUsage());
+ DocumentStorageInfoProto doc_store_storage_info = doc_store->GetStorageInfo();
+ int64_t empty_doc_store_size = doc_store_storage_info.document_store_size();
EXPECT_THAT(empty_doc_store_size, Gt(0));
DocumentProto document = DocumentBuilder()
@@ -1525,15 +1281,16 @@
.AddStringProperty("subject", "foo")
.Build();
- // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // Since GetStorageInfo can only get sizes in increments of block_size, we
// need to insert enough documents so the disk usage will increase by at least
// 1 block size. The number 100 is a bit arbitrary, gotten from manually
// testing.
for (int i = 0; i < 100; ++i) {
ICING_ASSERT_OK(doc_store->Put(document));
}
- EXPECT_THAT(doc_store->GetDiskUsage(),
- IsOkAndHolds(Gt(empty_doc_store_size)));
+ doc_store_storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(),
+ Gt(empty_doc_store_size));
// Bad file system
MockFilesystem mock_filesystem;
@@ -1546,8 +1303,8 @@
std::unique_ptr<DocumentStore> doc_store_with_mock_filesystem =
std::move(create_result.document_store);
- EXPECT_THAT(doc_store_with_mock_filesystem->GetDiskUsage(),
- StatusIs(libtextclassifier3::StatusCode::INTERNAL));
+ doc_store_storage_info = doc_store_with_mock_filesystem->GetStorageInfo();
+ EXPECT_THAT(doc_store_storage_info.document_store_size(), Eq(-1));
}
TEST_F(DocumentStoreTest, MaxDocumentId) {
@@ -1882,7 +1639,7 @@
/*length_in_tokens=*/7)));
}
-TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataOutOfRange) {
+TEST_F(DocumentStoreTest, NonexistentDocumentAssociatedScoreDataNotFound) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1891,10 +1648,10 @@
std::move(create_result.document_store);
EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(/*document_id=*/0),
- StatusIs(libtextclassifier3::StatusCode::OUT_OF_RANGE));
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearFilterCache) {
+TEST_F(DocumentStoreTest, DeleteClearsFilterCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1912,59 +1669,13 @@
/*schema_type_id=*/0,
/*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
- // Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentFilterData(document_id).status(), IsOk());
-}
-
-TEST_F(DocumentStoreTest, HardDeleteClearsFilterCache) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_));
-
- EXPECT_THAT(
- doc_store->GetDocumentFilterData(document_id),
- IsOkAndHolds(DocumentFilterData(
- /*namespace_id=*/0,
- /*schema_type_id=*/0,
- /*expiration_timestamp_ms=*/document1_expiration_timestamp_)));
-
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// Associated entry of the deleted document is removed.
EXPECT_THAT(doc_store->GetDocumentFilterData(document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeletionDoesNotClearScoreCache) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> doc_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_, /*num_tokens=*/4));
-
- EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
- IsOkAndHolds(DocumentAssociatedScoreData(
- /*corpus_id=*/0, /*document_score=*/document1_score_,
- /*creation_timestamp_ms=*/document1_creation_timestamp_,
- /*length_in_tokens=*/4)));
-
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
- // Associated entry of the deleted document is removed.
- EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id).status(),
- IsOk());
-}
-
-TEST_F(DocumentStoreTest, HardDeleteClearsScoreCache) {
+TEST_F(DocumentStoreTest, DeleteClearsScoreCache) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -1982,13 +1693,13 @@
/*creation_timestamp_ms=*/document1_creation_timestamp_,
/*length_in_tokens=*/4)));
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
// Associated entry of the deleted document is removed.
EXPECT_THAT(doc_store->GetDocumentAssociatedScoreData(document_id),
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(DocumentStoreTest, SoftDeleteDoesNotClearUsageScores) {
+TEST_F(DocumentStoreTest, DeleteShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2010,15 +1721,21 @@
ASSERT_THAT(doc_store->GetUsageScores(document_id),
IsOkAndHolds(expected_scores));
- // Soft delete the document.
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/true));
+ // Delete the document.
+ ICING_ASSERT_OK(doc_store->Delete("icing", "email/1"));
- // The scores should be the same.
+ // Can't report or get usage scores on the deleted document
+ ASSERT_THAT(
+ doc_store->ReportUsage(usage_report_type1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("Couldn't report usage on a nonexistent document")));
+
ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("Can't get usage scores")));
}
-TEST_F(DocumentStoreTest, HardDeleteShouldClearUsageScores) {
+TEST_F(DocumentStoreTest, ExpirationShouldPreventUsageScores) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2026,8 +1743,20 @@
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
- ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id,
- doc_store->Put(test_document1_));
+ DocumentProto document = DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(10)
+ .SetTtlMs(100)
+ .Build();
+
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id, doc_store->Put(document));
+
+ // Some arbitrary time before the document's creation time (10) + ttl (100)
+ fake_clock_.SetSystemTimeMilliseconds(109);
// Report usage with type 1.
UsageReport usage_report_type1 = CreateUsageReport(
@@ -2040,13 +1769,18 @@
ASSERT_THAT(doc_store->GetUsageScores(document_id),
IsOkAndHolds(expected_scores));
- // Hard delete the document.
- ICING_ASSERT_OK(doc_store->Delete("icing", "email/1", /*soft_delete=*/false));
+ // Some arbitrary time past the document's creation time (10) + ttl (100)
+ fake_clock_.SetSystemTimeMilliseconds(200);
- // The scores should be cleared.
- expected_scores.usage_type1_count = 0;
+ // Can't report or get usage scores on the expired document
+ ASSERT_THAT(
+ doc_store->ReportUsage(usage_report_type1),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("Couldn't report usage on a nonexistent document")));
+
ASSERT_THAT(doc_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND,
+ HasSubstr("Can't get usage scores")));
}
TEST_F(DocumentStoreTest,
@@ -2231,7 +1965,7 @@
EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(checksum));
}
-TEST_F(DocumentStoreTest, ComputeChecksumChangesOnModification) {
+TEST_F(DocumentStoreTest, ComputeChecksumChangesOnNewDocument) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
@@ -2247,6 +1981,24 @@
IsOkAndHolds(Not(Eq(checksum))));
}
+TEST_F(DocumentStoreTest, ComputeChecksumDoesntChangeOnNewUsage) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> document_store =
+ std::move(create_result.document_store);
+
+ ICING_EXPECT_OK(document_store->Put(test_document1_));
+ ICING_ASSERT_OK_AND_ASSIGN(Crc32 checksum, document_store->ComputeChecksum());
+
+ UsageReport usage_report =
+ CreateUsageReport(test_document1_.namespace_(), test_document1_.uri(),
+ /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ ICING_EXPECT_OK(document_store->ReportUsage(usage_report));
+ EXPECT_THAT(document_store->ComputeChecksum(), IsOkAndHolds(Eq(checksum)));
+}
+
TEST_F(DocumentStoreTest, RegenerateDerivedFilesSkipsUnknownSchemaTypeIds) {
const std::string schema_store_dir = schema_store_dir_ + "_custom";
@@ -2275,11 +2027,11 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
@@ -2320,16 +2072,7 @@
message_expiration_timestamp = message_data.expiration_timestamp_ms();
} // Everything destructs and commits changes to file
- // Change the DocumentStore's header combined checksum so that it won't match
- // the recalculated checksum on initialization. This will force a regeneration
- // of derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
+ CorruptDocStoreHeaderChecksumFile();
// Change the schema so that we don't know of the Document's type anymore.
// Since we can't set backwards incompatible changes, we do some file-level
@@ -2340,9 +2083,10 @@
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem_, schema_store_dir, &fake_clock_));
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
+
+ SchemaProto schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(schema));
ICING_ASSERT_OK_AND_ASSIGN(SchemaTypeId email_schema_type_id,
@@ -2388,11 +2132,11 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2440,11 +2184,10 @@
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
// this should change the SchemaTypeIds.
- schema.clear_types();
- type_config = schema.add_types();
- type_config->set_schema_type("message");
- type_config = schema.add_types();
- type_config->set_schema_type("email");
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_EXPECT_OK(schema_store->SetSchema(schema));
@@ -2475,18 +2218,14 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
- auto property_config = type_config->add_properties();
- property_config->set_property_name("subject");
- property_config->set_data_type(PropertyConfigProto::DataType::STRING);
- property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2553,11 +2292,11 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2597,9 +2336,10 @@
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- SchemaProto new_schema;
- type_config = new_schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_EXPECT_OK(
schema_store->SetSchema(new_schema,
@@ -2622,11 +2362,11 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2674,11 +2414,10 @@
// Rearrange the schema types. Since SchemaTypeId is assigned based on order,
// this should change the SchemaTypeIds.
- schema.clear_types();
- type_config = schema.add_types();
- type_config->set_schema_type("message");
- type_config = schema.add_types();
- type_config->set_schema_type("email");
+ schema = SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(SchemaStore::SetSchemaResult set_schema_result,
schema_store->SetSchema(schema));
@@ -2711,18 +2450,14 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
-
- auto property_config = type_config->add_properties();
- property_config->set_property_name("subject");
- property_config->set_data_type(PropertyConfigProto::DataType::STRING);
- property_config->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
- property_config->mutable_string_indexing_config()->set_term_match_type(
- TermMatchType::EXACT_ONLY);
- property_config->mutable_string_indexing_config()->set_tokenizer_type(
- StringIndexingConfig::TokenizerType::PLAIN);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email").AddProperty(
+ PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2792,11 +2527,11 @@
filesystem_.CreateDirectoryRecursively(schema_store_dir.c_str());
// Set a schema
- SchemaProto schema;
- auto type_config = schema.add_types();
- type_config->set_schema_type("email");
- type_config = schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("email"))
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<SchemaStore> schema_store,
@@ -2836,9 +2571,10 @@
EXPECT_THAT(document_store->Get(message_document_id),
IsOkAndHolds(EqualsProto(message_document)));
- SchemaProto new_schema;
- type_config = new_schema.add_types();
- type_config->set_schema_type("message");
+ SchemaProto new_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder().SetType("message"))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
SchemaStore::SetSchemaResult set_schema_result,
@@ -3126,17 +2862,7 @@
IsOkAndHolds(expected_scores));
}
- // Change the DocStore's header combined checksum so that it won't match the
- // recalculated checksum on initialization. This will force a regeneration of
- // derived files from ground truth.
- const std::string header_file =
- absl_ports::StrCat(document_store_dir_, "/document_store_header");
- DocumentStore::Header header;
- header.magic = DocumentStore::Header::kMagic;
- header.checksum = 10; // Arbitrary garbage checksum
- filesystem_.DeleteFile(header_file.c_str());
- filesystem_.Write(header_file.c_str(), &header, sizeof(header));
-
+ CorruptDocStoreHeaderChecksumFile();
// Successfully recover from a corrupt derived file issue.
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -3235,45 +2961,6 @@
IsOkAndHolds(expected_scores));
}
-TEST_F(DocumentStoreTest,
- UsageScoresShouldNotBeCopiedOverFromOldSoftDeletedDocs) {
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentStore::CreateResult create_result,
- DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get()));
- std::unique_ptr<DocumentStore> document_store =
- std::move(create_result.document_store);
-
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentId document_id,
- document_store->Put(DocumentProto(test_document1_)));
-
- // Report usage with type 1.
- UsageReport usage_report_type1 = CreateUsageReport(
- /*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
- UsageReport::USAGE_TYPE1);
- ICING_ASSERT_OK(document_store->ReportUsage(usage_report_type1));
-
- UsageStore::UsageScores expected_scores;
- ++expected_scores.usage_type1_count;
- ASSERT_THAT(document_store->GetUsageScores(document_id),
- IsOkAndHolds(expected_scores));
-
- // Soft delete the doc.
- ICING_ASSERT_OK(document_store->Delete(document_id, /*soft_delete=*/true));
-
- // Put the same document.
- ICING_ASSERT_OK_AND_ASSIGN(
- DocumentId updated_document_id,
- document_store->Put(DocumentProto(test_document1_)));
- // We should get a different document id.
- ASSERT_THAT(updated_document_id, Not(Eq(document_id)));
-
- // Usage scores should be cleared.
- EXPECT_THAT(document_store->GetUsageScores(updated_document_id),
- IsOkAndHolds(UsageStore::UsageScores()));
-}
-
TEST_F(DocumentStoreTest, UsageScoresShouldPersistOnOptimize) {
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
@@ -3402,6 +3089,15 @@
ASSERT_THAT(create_result.data_loss, Eq(DataLoss::COMPLETE));
}
+// TODO(b/185845269) Re-enable this test by copying over a full valid set of
+// document store files. Right now this test only includes the score_cache and
+// the document store header.
+//
+// This causes a problem now because this cl changes behavior to not consider an
+// InitializeDerivedFiles failure to be a recovery if there is nothing to
+// recover because the doocument store is empty.
+#define DISABLE_BACKWARDS_COMPAT_TEST
+#ifndef DISABLE_BACKWARDS_COMPAT_TEST
TEST_F(DocumentStoreTest, LoadScoreCacheAndInitializeSuccessfully) {
// The directory testdata/v0/document_store contains only the scoring_cache
// and the document_store_header (holding the crc for the scoring_cache). If
@@ -3438,17 +3134,522 @@
ASSERT_THAT(filesystem_.CopyFile(src.c_str(), dst.c_str()), true);
}
- NativeInitializeStats initializeStats;
+ InitializeStatsProto initialize_stats;
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
- schema_store_.get(), &initializeStats));
+ schema_store_.get(),
+ /*force_recovery_and_revalidate_documents=*/false,
+ &initialize_stats));
std::unique_ptr<DocumentStore> doc_store =
std::move(create_result.document_store);
// The store_cache trigger regeneration because its element size is
// inconsistent: expected 20 (current new size), actual 12 (as per the v0
// score_cache).
- EXPECT_TRUE(initializeStats.has_document_store_recovery_cause());
+ EXPECT_TRUE(initialize_stats.has_document_store_recovery_cause());
+}
+#endif // DISABLE_BACKWARDS_COMPAT_TEST
+
+TEST_F(DocumentStoreTest, DocumentStoreStorageInfo) {
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store_.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Add three documents.
+ DocumentProto document1 = test_document1_;
+ document1.set_namespace_("namespace.1");
+ document1.set_uri("uri1");
+ ICING_ASSERT_OK(doc_store->Put(document1));
+
+ DocumentProto document2 = test_document1_;
+ document2.set_namespace_("namespace.1");
+ document2.set_uri("uri2");
+ document2.set_creation_timestamp_ms(fake_clock_.GetSystemTimeMilliseconds());
+ document2.set_ttl_ms(100);
+ ICING_ASSERT_OK(doc_store->Put(document2));
+
+ DocumentProto document3 = test_document1_;
+ document3.set_namespace_("namespace.1");
+ document3.set_uri("uri3");
+ ICING_ASSERT_OK(doc_store->Put(document3));
+
+ DocumentProto document4 = test_document1_;
+ document4.set_namespace_("namespace.2");
+ document4.set_uri("uri1");
+ ICING_ASSERT_OK(doc_store->Put(document4));
+
+ // Report usage with type 1 on document1
+ UsageReport usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"namespace.1", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ // Report usage with type 2 on document2
+ UsageReport usage_report_type2 = CreateUsageReport(
+ /*name_space=*/"namespace.1", /*uri=*/"uri2", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE2);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type2));
+
+ // Report usage with type 3 on document3
+ UsageReport usage_report_type3 = CreateUsageReport(
+ /*name_space=*/"namespace.1", /*uri=*/"uri3", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE3);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type3));
+
+ // Report usage with type 1 on document4
+ usage_report_type1 = CreateUsageReport(
+ /*name_space=*/"namespace.2", /*uri=*/"uri1", /*timestamp_ms=*/1000,
+ UsageReport::USAGE_TYPE1);
+ ICING_ASSERT_OK(doc_store->ReportUsage(usage_report_type1));
+
+ // Delete the first doc.
+ ICING_ASSERT_OK(doc_store->Delete(document1.namespace_(), document1.uri()));
+
+ // Expire the second doc.
+ fake_clock_.SetSystemTimeMilliseconds(document2.creation_timestamp_ms() +
+ document2.ttl_ms() + 1);
+
+ // Check high level info
+ DocumentStorageInfoProto storage_info = doc_store->GetStorageInfo();
+ EXPECT_THAT(storage_info.num_alive_documents(), Eq(2));
+ EXPECT_THAT(storage_info.num_deleted_documents(), Eq(1));
+ EXPECT_THAT(storage_info.num_expired_documents(), Eq(1));
+ EXPECT_THAT(storage_info.document_store_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_log_size(), Ge(0));
+ EXPECT_THAT(storage_info.key_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.document_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.filter_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.corpus_score_cache_size(), Ge(0));
+ EXPECT_THAT(storage_info.namespace_id_mapper_size(), Ge(0));
+ EXPECT_THAT(storage_info.num_namespaces(), Eq(2));
+
+ // Check per-namespace info
+ EXPECT_THAT(storage_info.namespace_storage_info_size(), Eq(2));
+
+ NamespaceStorageInfoProto namespace_storage_info =
+ GetNamespaceStorageInfo(storage_info, "namespace.1");
+ EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+ Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+ Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+ Eq(0));
+
+ namespace_storage_info = GetNamespaceStorageInfo(storage_info, "namespace.2");
+ EXPECT_THAT(namespace_storage_info.num_alive_documents(), Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents(), Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type1(), Eq(1));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type2(), Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_alive_documents_usage_type3(), Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type1(),
+ Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type2(),
+ Eq(0));
+ EXPECT_THAT(namespace_storage_info.num_expired_documents_usage_type3(),
+ Eq(0));
+}
+
+TEST_F(DocumentStoreTest, InitializeForceRecoveryUpdatesTypeIds) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // The typeid for "email" should be 0.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+ DocumentId docid = kInvalidDocumentId;
+ {
+ // Create the document store the first time and add an email document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto doc =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+
+ // Add another type to the schema before the email type.
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("alarm")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_type_config)
+ .Build();
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // Adding a new type should cause ids to be reassigned. Ids are assigned in
+ // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+ {
+ // Create the document store the second time and force recovery
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Ensure that the type id of the email document has been correctly updated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(1));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryDoesntUpdateTypeIds) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // The typeid for "email" should be 0.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(0));
+
+ DocumentId docid = kInvalidDocumentId;
+ {
+ // Create the document store the first time and add an email document.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ DocumentProto doc =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(doc));
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+
+ // Add another type to the schema.
+ schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType("alarm")
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName("name")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("time")
+ .SetDataType(TYPE_INT)
+ .SetCardinality(CARDINALITY_OPTIONAL)))
+ .AddType(email_type_config)
+ .Build();
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+ // Adding a new type should cause ids to be reassigned. Ids are assigned in
+ // order of appearance so 'alarm' should be 0 and 'email' should be 1.
+ ASSERT_THAT(schema_store->GetSchemaTypeId("alarm"), IsOkAndHolds(0));
+ ASSERT_THAT(schema_store->GetSchemaTypeId("email"), IsOkAndHolds(1));
+
+ {
+ // Create the document store the second time. Don't force recovery.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ // Check that the type id of the email document has not been updated.
+ ICING_ASSERT_OK_AND_ASSIGN(DocumentFilterData filter_data,
+ doc_store->GetDocumentFilterData(docid));
+ ASSERT_THAT(filter_data.schema_type_id(), Eq(0));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeForceRecoveryDeletesInvalidDocument) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ DocumentId docid = kInvalidDocumentId;
+ DocumentProto docWithBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ DocumentProto docWithoutBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+
+ {
+ // Create the document store the first time and add two email documents: one
+ // that has the 'body' section and one that doesn't.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+
+ // Delete the 'body' property from the 'email' type, making all pre-existing
+ // documents with the 'body' property invalid.
+ email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ schema = SchemaBuilder().AddType(email_type_config).Build();
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true),
+ IsOk());
+
+ {
+ // Create the document store the second time and force recovery
+ CorruptDocStoreHeaderChecksumFile();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/true));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+}
+
+TEST_F(DocumentStoreTest, InitializeDontForceRecoveryKeepsInvalidDocument) {
+ // Start fresh and set the schema with one type.
+ filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(test_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(document_store_dir_.c_str());
+ filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
+
+ SchemaTypeConfigProto email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("body")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ SchemaProto schema = SchemaBuilder().AddType(email_type_config).Build();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ std::unique_ptr<SchemaStore> schema_store,
+ SchemaStore::Create(&filesystem_, schema_store_dir_, &fake_clock_));
+ ASSERT_THAT(schema_store->SetSchema(schema), IsOk());
+
+ DocumentId docid = kInvalidDocumentId;
+ DocumentProto docWithBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/1")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .AddStringProperty("body", "body bar")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+ DocumentProto docWithoutBody =
+ DocumentBuilder()
+ .SetKey("icing", "email/2")
+ .SetSchema("email")
+ .AddStringProperty("subject", "subject foo")
+ .SetScore(document1_score_)
+ .SetCreationTimestampMs(
+ document1_creation_timestamp_) // A random timestamp
+ .SetTtlMs(document1_ttl_)
+ .Build();
+
+ {
+ // Create the document store the first time and add two email documents: one
+ // that has the 'body' section and one that doesn't.
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(&filesystem_, document_store_dir_, &fake_clock_,
+ schema_store.get()));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithBody));
+ ICING_ASSERT_OK_AND_ASSIGN(docid, doc_store->Put(docWithoutBody));
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
+
+ // Delete the 'body' property from the 'email' type, making all pre-existing
+ // documents with the 'body' property invalid.
+ email_type_config =
+ SchemaTypeConfigBuilder()
+ .SetType("email")
+ .AddProperty(PropertyConfigBuilder()
+ .SetName("subject")
+ .SetDataTypeString(MATCH_EXACT, TOKENIZER_PLAIN)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .Build();
+ schema = SchemaBuilder().AddType(email_type_config).Build();
+ ASSERT_THAT(schema_store->SetSchema(
+ schema, /*ignore_errors_and_delete_documents=*/true),
+ IsOk());
+
+ {
+ // Corrupt the document store header checksum so that we will perform
+ // recovery, but without revalidation.
+ CorruptDocStoreHeaderChecksumFile();
+ ICING_ASSERT_OK_AND_ASSIGN(
+ DocumentStore::CreateResult create_result,
+ DocumentStore::Create(
+ &filesystem_, document_store_dir_, &fake_clock_, schema_store.get(),
+ /*force_recovery_and_revalidate_documents=*/false));
+ std::unique_ptr<DocumentStore> doc_store =
+ std::move(create_result.document_store);
+
+ ASSERT_THAT(doc_store->Get(docWithBody.namespace_(), docWithBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithBody)));
+ ASSERT_THAT(
+ doc_store->Get(docWithoutBody.namespace_(), docWithoutBody.uri()),
+ IsOkAndHolds(EqualsProto(docWithoutBody)));
+ }
}
} // namespace
diff --git a/icing/store/usage-store.cc b/icing/store/usage-store.cc
index 54896dc..546067d 100644
--- a/icing/store/usage-store.cc
+++ b/icing/store/usage-store.cc
@@ -74,6 +74,9 @@
"Document id %d is invalid.", document_id));
}
+ // We don't need a copy here because we'll set the value at the same index.
+ // This won't unintentionally grow the underlying file since we already have
+ // enough space for the current index.
auto usage_scores_or = usage_score_cache_->Get(document_id);
// OutOfRange means that the mapper hasn't seen this document id before, it's
@@ -159,7 +162,7 @@
"Document id %d is invalid.", document_id));
}
- auto usage_scores_or = usage_score_cache_->Get(document_id);
+ auto usage_scores_or = usage_score_cache_->GetCopy(document_id);
if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
// No usage scores found. Return the default scores.
return UsageScores();
@@ -168,7 +171,7 @@
return usage_scores_or.status();
}
- return *std::move(usage_scores_or).ValueOrDie();
+ return std::move(usage_scores_or).ValueOrDie();
}
libtextclassifier3::Status UsageStore::SetUsageScores(
@@ -193,10 +196,10 @@
"to_document_id %d is invalid.", to_document_id));
}
- auto usage_scores_or = usage_score_cache_->Get(from_document_id);
+ auto usage_scores_or = usage_score_cache_->GetCopy(from_document_id);
if (usage_scores_or.ok()) {
return usage_score_cache_->Set(to_document_id,
- *std::move(usage_scores_or).ValueOrDie());
+ std::move(usage_scores_or).ValueOrDie());
} else if (absl_ports::IsOutOfRange(usage_scores_or.status())) {
// No usage scores found. Set default scores to to_document_id.
return usage_score_cache_->Set(to_document_id, UsageScores());
@@ -218,6 +221,10 @@
return usage_score_cache_->GetElementsFileSize();
}
+libtextclassifier3::StatusOr<int64_t> UsageStore::GetDiskUsage() const {
+ return usage_score_cache_->GetDiskUsage();
+}
+
libtextclassifier3::Status UsageStore::TruncateTo(DocumentId num_documents) {
if (num_documents >= usage_score_cache_->num_elements()) {
// No need to truncate
diff --git a/icing/store/usage-store.h b/icing/store/usage-store.h
index b7de970..fd77df4 100644
--- a/icing/store/usage-store.h
+++ b/icing/store/usage-store.h
@@ -157,6 +157,14 @@
// INTERNAL_ERROR on IO error
libtextclassifier3::StatusOr<int64_t> GetElementsFileSize() const;
+ // Calculates and returns the disk usage in bytes. Rounds up to the nearest
+ // block size.
+ //
+ // Returns:
+ // Disk usage on success
+ // INTERNAL_ERROR on IO error
+ libtextclassifier3::StatusOr<int64_t> GetDiskUsage() const;
+
// Resizes the storage so that only the usage scores of and before
// last_document_id are stored.
//
diff --git a/icing/store/usage-store_test.cc b/icing/store/usage-store_test.cc
index 220c226..b2dbe4b 100644
--- a/icing/store/usage-store_test.cc
+++ b/icing/store/usage-store_test.cc
@@ -577,6 +577,41 @@
IsOkAndHolds(Gt(empty_file_size)));
}
+TEST_F(UsageStoreTest, GetDiskUsageEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+ EXPECT_THAT(empty_disk_usage, Gt(0));
+}
+
+TEST_F(UsageStoreTest, GetDiskUsageNonEmpty) {
+ ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<UsageStore> usage_store,
+ UsageStore::Create(&filesystem_, test_dir_));
+
+ // There's some internal metadata, so our disk usage will round up to 1 block.
+ ICING_ASSERT_OK_AND_ASSIGN(int64_t empty_disk_usage,
+ usage_store->GetDiskUsage());
+
+ // Since our GetDiskUsage can only get sizes in increments of block_size, we
+ // need to insert enough usage reports so the disk usage will increase by at
+ // least 1 block size. The number 200 is a bit arbitrary, gotten from manually
+ // testing.
+ UsageReport usage_report = CreateUsageReport(
+ "namespace", "uri", /*timestamp_ms=*/1000, UsageReport::USAGE_TYPE1);
+ for (int i = 0; i < 200; ++i) {
+ usage_store->AddUsageReport(usage_report, /*document_id=*/i);
+ }
+
+ // We need to persist since iOS won't see the new disk allocations until after
+ // everything gets written.
+ usage_store->PersistToDisk();
+
+ EXPECT_THAT(usage_store->GetDiskUsage(), IsOkAndHolds(Gt(empty_disk_usage)));
+}
+
} // namespace
} // namespace lib
diff --git a/icing/testing/common-matchers.h b/icing/testing/common-matchers.h
index b7f54ba..8d8bdf2 100644
--- a/icing/testing/common-matchers.h
+++ b/icing/testing/common-matchers.h
@@ -25,7 +25,6 @@
#include "icing/absl_ports/str_join.h"
#include "icing/index/hit/doc-hit-info.h"
#include "icing/legacy/core/icing-string-util.h"
-#include "icing/proto/search.proto.h"
#include "icing/proto/search.pb.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
@@ -267,7 +266,7 @@
}
}
-string ProtoStatusCodeToString(StatusProto::Code code) {
+std::string ProtoStatusCodeToString(StatusProto::Code code) {
switch (code) {
case StatusProto::OK:
return "OK";
@@ -376,14 +375,22 @@
return ExplainMatchResult(error_matcher, arg.message(), result_listener);
}
-MATCHER_P(EqualsSearchResultIgnoreStats, expected, "") {
+MATCHER_P(EqualsSearchResultIgnoreStatsAndScores, expected, "") {
SearchResultProto actual_copy = arg;
actual_copy.clear_query_stats();
actual_copy.clear_debug_info();
+ for (SearchResultProto::ResultProto& result :
+ *actual_copy.mutable_results()) {
+ result.clear_score();
+ }
SearchResultProto expected_copy = expected;
expected_copy.clear_query_stats();
expected_copy.clear_debug_info();
+ for (SearchResultProto::ResultProto& result :
+ *expected_copy.mutable_results()) {
+ result.clear_score();
+ }
return ExplainMatchResult(testing::EqualsProto(expected_copy), actual_copy,
result_listener);
}
diff --git a/icing/testing/jni-test-helpers.h b/icing/testing/jni-test-helpers.h
index adc469a..67a98c3 100644
--- a/icing/testing/jni-test-helpers.h
+++ b/icing/testing/jni-test-helpers.h
@@ -15,6 +15,8 @@
#ifndef ICING_TESTING_JNI_TEST_HELPERS_H_
#define ICING_TESTING_JNI_TEST_HELPERS_H_
+#include <memory>
+
#include "icing/jni/jni-cache.h"
#ifdef ICING_REVERSE_JNI_SEGMENTATION
diff --git a/icing/testing/schema-generator.h b/icing/testing/schema-generator.h
index 78430cc..12133f5 100644
--- a/icing/testing/schema-generator.h
+++ b/icing/testing/schema-generator.h
@@ -18,7 +18,6 @@
#include <random>
#include <string>
-#include "icing/proto/schema.proto.h"
#include "icing/proto/schema.pb.h"
namespace icing {
diff --git a/icing/testing/snippet-helpers.cc b/icing/testing/snippet-helpers.cc
index fde0004..cfd20c2 100644
--- a/icing/testing/snippet-helpers.cc
+++ b/icing/testing/snippet-helpers.cc
@@ -17,28 +17,37 @@
#include <algorithm>
#include <string_view>
+#include "icing/absl_ports/str_join.h"
#include "icing/proto/search.pb.h"
+#include "icing/schema/section-manager.h"
namespace icing {
namespace lib {
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
- const std::string& property_name,
- int snippet_index) {
- auto iterator = std::find_if(
- snippet_proto.entries().begin(), snippet_proto.entries().end(),
- [&property_name](const SnippetProto::EntryProto& entry) {
- return entry.property_name() == property_name;
- });
- if (iterator == snippet_proto.entries().end() ||
- iterator->snippet_matches_size() <= snippet_index) {
- return nullptr;
+namespace {
+
+// Returns the property index and the property name with the index removed.
+// Examples:
+// GetPropertyIndex("foo") will return ["foo", 0]
+// GetPropertyIndex("foo[5]") will return ["foo", 5]
+std::pair<std::string_view, int> GetPropertyIndex(std::string_view property) {
+ size_t l_bracket = property.find(kLBracket);
+ if (l_bracket == std::string_view::npos || l_bracket >= property.length()) {
+ return {property, 0};
}
- return &iterator->snippet_matches(snippet_index);
+ size_t r_bracket = property.find(kRBracket, l_bracket);
+ if (r_bracket == std::string_view::npos || r_bracket - l_bracket < 2) {
+ return {property, 0};
+ }
+ std::string index_string =
+ std::string(property.substr(l_bracket + 1, r_bracket - l_bracket - 1));
+ return {property.substr(0, l_bracket), std::stoi(index_string)};
}
+} // namespace
+
const PropertyProto* GetProperty(const DocumentProto& document,
- const std::string& property_name) {
+ std::string_view property_name) {
const PropertyProto* property = nullptr;
for (const PropertyProto& prop : document.properties()) {
if (prop.name() == property_name) {
@@ -48,32 +57,55 @@
return property;
}
-std::string GetWindow(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index) {
- const SnippetMatchProto* match =
- GetSnippetMatch(snippet_proto, property_name, snippet_index);
- const PropertyProto* property = GetProperty(document, property_name);
- if (match == nullptr || property == nullptr) {
- return "";
+std::vector<std::string_view> GetWindows(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+ std::vector<std::string_view> windows;
+ for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+ windows.push_back(content.substr(match.window_byte_position(),
+ match.window_byte_length()));
}
- std::string_view value = property->string_values(match->values_index());
- return std::string(
- value.substr(match->window_position(), match->window_bytes()));
+ return windows;
}
-std::string GetMatch(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index) {
- const SnippetMatchProto* match =
- GetSnippetMatch(snippet_proto, property_name, snippet_index);
- const PropertyProto* property = GetProperty(document, property_name);
- if (match == nullptr || property == nullptr) {
- return "";
+std::vector<std::string_view> GetMatches(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto) {
+ std::vector<std::string_view> matches;
+ for (const SnippetMatchProto& match : snippet_proto.snippet_matches()) {
+ matches.push_back(content.substr(match.exact_match_byte_position(),
+ match.exact_match_byte_length()));
}
- std::string_view value = property->string_values(match->values_index());
- return std::string(
- value.substr(match->exact_match_position(), match->exact_match_bytes()));
+ return matches;
+}
+
+std::string_view GetString(const DocumentProto* document,
+ std::string_view property_path) {
+ std::vector<std::string_view> properties =
+ absl_ports::StrSplit(property_path, kPropertySeparator);
+ for (int i = 0; i < properties.size(); ++i) {
+ std::string_view property = properties.at(i);
+ int property_index;
+ std::tie(property, property_index) = GetPropertyIndex(property);
+ const PropertyProto* prop = GetProperty(*document, property);
+ if (prop == nullptr) {
+ // requested property doesn't exist in the document. Return empty string.
+ return "";
+ }
+ if (i == properties.size() - 1) {
+ // The last property. Get the string_value
+ if (prop->string_values_size() - 1 < property_index) {
+ // The requested string doesn't exist. Return empty string.
+ return "";
+ }
+ return prop->string_values(property_index);
+ } else if (prop->document_values_size() - 1 < property_index) {
+ // The requested subproperty doesn't exist. return an empty string.
+ return "";
+ } else {
+ // Go to the next subproperty.
+ document = &prop->document_values(property_index);
+ }
+ }
+ return "";
}
} // namespace lib
diff --git a/icing/testing/snippet-helpers.h b/icing/testing/snippet-helpers.h
index 124e421..defadeb 100644
--- a/icing/testing/snippet-helpers.h
+++ b/icing/testing/snippet-helpers.h
@@ -23,36 +23,32 @@
namespace icing {
namespace lib {
-// Retrieve pointer to the snippet_index'th SnippetMatchProto within the
-// EntryProto identified by property_name within snippet_proto.
-// Returns nullptr
-// - if there is no EntryProto within snippet_proto corresponding to
-// property_name.
-// - if there is no SnippetMatchProto at snippet_index within the EntryProto
-const SnippetMatchProto* GetSnippetMatch(const SnippetProto& snippet_proto,
- const std::string& property_name,
- int snippet_index);
-
// Retrieve pointer to the PropertyProto identified by property_name.
// Returns nullptr if no such property exists.
+//
+// NOTE: This function does not handle nesting or indexes. "foo.bar" will return
+// a nullptr even if document contains a property called "foo" that contains a
+// subproperty called "bar".
const PropertyProto* GetProperty(const DocumentProto& document,
const std::string& property_name);
-// Retrieves the window defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property, snippet or window exists.
-std::string GetWindow(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index);
+// Retrieves all windows defined by the snippet_proto for the content.
+std::vector<std::string_view> GetWindows(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto);
-// Retrieves the match defined by the SnippetMatchProto returned by
-// GetSnippetMatch(snippet_proto, property_name, snippet_index) for the property
-// returned by GetProperty(document, property_name).
-// Returns "" if no such property or snippet exists.
-std::string GetMatch(const DocumentProto& document,
- const SnippetProto& snippet_proto,
- const std::string& property_name, int snippet_index);
+// Retrieves all matches defined by the snippet_proto for the content.
+std::vector<std::string_view> GetMatches(
+ std::string_view content, const SnippetProto::EntryProto& snippet_proto);
+
+// Retrieves the string value held in the document corresponding to the
+// property_path.
+// Example:
+// - GetString(doc, "foo") will retrieve the first string value in the
+// property "foo" in document or an empty string if it doesn't exist.
+// - GetString(doc, "foo[1].bar[2]") will retrieve the third string value in
+// the subproperty "bar" of the second document value in the property "foo".
+std::string_view GetString(const DocumentProto* document,
+ std::string_view property_path);
} // namespace lib
} // namespace icing
diff --git a/icing/tokenization/language-segmenter-factory.h b/icing/tokenization/language-segmenter-factory.h
index e60c168..cae3eee 100644
--- a/icing/tokenization/language-segmenter-factory.h
+++ b/icing/tokenization/language-segmenter-factory.h
@@ -18,11 +18,7 @@
#include <memory>
#include <string_view>
-#ifdef __ANDROID__
#include "icing/jni/jni-cache.h"
-#else // __ANDROID__
-class JniCache; // forward declaration to let non-Android builds work.
-#endif // __ANDROID__
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "icing/tokenization/language-segmenter.h"
diff --git a/icing/tokenization/language-segmenter-iterator_test.cc b/icing/tokenization/language-segmenter-iterator_test.cc
index 2b1911e..317da04 100644
--- a/icing/tokenization/language-segmenter-iterator_test.cc
+++ b/icing/tokenization/language-segmenter-iterator_test.cc
@@ -16,8 +16,8 @@
#include "gtest/gtest.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
@@ -143,8 +143,7 @@
StatusIs(libtextclassifier3::StatusCode::NOT_FOUND));
}
-TEST_F(LanguageSegmenterIteratorTest,
- ResetToTermEndingBeforeWithZeroNotFound) {
+TEST_F(LanguageSegmenterIteratorTest, ResetToTermEndingBeforeWithZeroNotFound) {
language_segmenter_factory::SegmenterOptions options(ULOC_US);
ICING_ASSERT_OK_AND_ASSIGN(
auto language_segmenter,
diff --git a/icing/tokenization/plain-tokenizer_test.cc b/icing/tokenization/plain-tokenizer_test.cc
index f578567..2fb9750 100644
--- a/icing/tokenization/plain-tokenizer_test.cc
+++ b/icing/tokenization/plain-tokenizer_test.cc
@@ -19,9 +19,9 @@
#include "gmock/gmock.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-i18n-test-utils.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
diff --git a/icing/tokenization/raw-query-tokenizer_test.cc b/icing/tokenization/raw-query-tokenizer_test.cc
index e1a666b..500efa0 100644
--- a/icing/tokenization/raw-query-tokenizer_test.cc
+++ b/icing/tokenization/raw-query-tokenizer_test.cc
@@ -17,8 +17,8 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/helpers/icu/icu-data-file-helper.h"
+#include "icing/portable/platform.h"
#include "icing/testing/common-matchers.h"
-#include "icing/testing/platform.h"
#include "icing/testing/test-data.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/tokenizer-factory.h"
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
similarity index 85%
rename from icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc
rename to icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
index 8392363..5f5202c 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test-jni-layer.cc
@@ -21,12 +21,12 @@
JNIEnv* g_jenv = nullptr;
extern "C" JNIEXPORT jboolean JNICALL
-Java_icing_tokenization_reverse_1jni_ReverseJniLanguageSegmenterTest_testsMain(
- JNIEnv* env, jclass ignored) {
+Java_icing_jni_ReverseJniLanguageSegmenterJniTest_testsMain(JNIEnv* env,
+ jclass ignored) {
g_jenv = env;
std::vector<char*> my_argv;
- char arg[] = "reverse-jni-language-segmenter-test-lib";
+ char arg[] = "jni-test-lib";
my_argv.push_back(arg);
int argc = 1;
char** argv = &(my_argv[0]);
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
deleted file mode 100644
index 64b68ec..0000000
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-#define ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
-
-#include <jni.h>
-
-#include "icing/jni/jni-cache.h"
-#include "gtest/gtest.h"
-
-extern JNIEnv* g_jenv;
-
-namespace icing {
-namespace lib {
-
-namespace test_internal {
-
-class ReverseJniLanguageSegmenterTest
- : public testing::TestWithParam<const char*> {
- protected:
- ReverseJniLanguageSegmenterTest()
- : jni_cache_(std::move(JniCache::Create(g_jenv)).ValueOrDie()) {}
-
- static std::string GetLocale() { return GetParam(); }
-
- std::unique_ptr<JniCache> jni_cache_;
-};
-
-} // namespace test_internal
-
-} // namespace lib
-} // namespace icing
-
-#endif // ICING_TOKENIZATION_REVERSE_JNI_REVERSE_JNI_LANGUAGE_SEGMENTER_TEST_H_
diff --git a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
similarity index 99%
rename from icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
rename to icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
index 2c268ff..72c3180 100644
--- a/icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.cc
+++ b/icing/tokenization/reverse_jni/reverse-jni-language-segmenter_test.cc
@@ -12,17 +12,19 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "icing/tokenization/reverse_jni/reverse-jni-language-segmenter-test.h"
+#include <jni.h>
#include <memory>
#include <string_view>
+#include "icing/jni/jni-cache.h"
#include "icing/text_classifier/lib3/utils/base/status.h"
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "icing/absl_ports/str_cat.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/icu-i18n-test-utils.h"
+#include "icing/testing/jni-test-helpers.h"
#include "icing/tokenization/language-segmenter-factory.h"
#include "icing/tokenization/language-segmenter.h"
#include "unicode/uloc.h"
@@ -120,6 +122,14 @@
return terms;
}
+class ReverseJniLanguageSegmenterTest
+ : public testing::TestWithParam<const char*> {
+ protected:
+ static std::string GetLocale() { return GetParam(); }
+
+ std::unique_ptr<const JniCache> jni_cache_ = GetTestJniCache();
+};
+
} // namespace
TEST_P(ReverseJniLanguageSegmenterTest, EmptyText) {
diff --git a/icing/tools/document-store-dump.cc b/icing/tools/document-store-dump.cc
deleted file mode 100644
index 45c9bf5..0000000
--- a/icing/tools/document-store-dump.cc
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "icing/tools/document-store-dump.h"
-
-#include <cinttypes>
-
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/util/logging.h"
-
-namespace icing {
-namespace lib {
-namespace {
-
-void AppendDocumentProto(DocId document_id, const Document& doc,
- std::string* output) {
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(
- "Document {\n document_id: %d\n corpus_id: %d\n uri: "
- "'%s'\n score: %d\n created_timestamp_ms: %" PRIu64 "\n",
- static_cast<int>(document_id), doc.corpus_id(),
- doc.uri().c_str(), static_cast<int>(doc.score()),
- static_cast<int64_t>(doc.created_timestamp_ms())));
- for (const auto& section : doc.sections()) {
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(
- " section {\n id: %d\n indexed_length: "
- "%d\n content: '%s'\n snippet: '%s'\n",
- static_cast<int>(section.id()),
- static_cast<int>(section.indexed_length()),
- section.content().c_str(), section.snippet().c_str()));
- for (int64_t extracted_number : section.extracted_numbers()) {
- absl_ports::StrAppend(output, IcingStringUtil::StringPrintf(
- " extracted_numbers: %" PRId64 "\n",
- extracted_number));
- }
- for (const std::string& annotation_token : section.annotation_tokens()) {
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(" annotation_tokens: '%s'\n",
- annotation_token.c_str()));
- }
- std::string indexed = (section.config().indexed()) ? "true" : "false";
- std::string index_prefixes =
- (section.config().index_prefixes()) ? "true" : "false";
- absl_ports::StrAppend(
- output,
- IcingStringUtil::StringPrintf(
- " config {\n name: '%s'\n indexed: %s\n "
- "tokenizer: %d\n weight: %d\n index_prefixes: %s\n "
- "subsection_separator: '%s'\n",
- section.config().name().c_str(), indexed.c_str(),
- section.config().tokenizer(),
- static_cast<int>(section.config().weight()), index_prefixes.c_str(),
- section.config().subsection_separator().c_str()));
- for (const auto& variant_generator :
- section.config().variant_generators()) {
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(
- " variant_generators: %d\n", variant_generator));
- }
- absl_ports::StrAppend(
- output,
- IcingStringUtil::StringPrintf(
- " common_term_legacy_hit_score: %d\n "
- "rfc822_host_name_term_legacy_hit_score: %d\n "
- "semantic_property: '%s'\n universal_section_id: %d\n "
- "omnibox_section_type: %d\n st_section_type: %d\n }\n }\n",
- section.config().common_term_legacy_hit_score(),
- section.config().rfc822_host_name_term_legacy_hit_score(),
- section.config().semantic_property().c_str(),
- section.config().universal_section_id(),
- section.config().omnibox_section_type(),
- section.config().st_section_type()));
- }
- for (const auto& language : doc.languages()) {
- std::string used_classifier =
- (language.used_classifier()) ? "true" : "false";
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(
- " languages {\n language: %d\n score: %d\n "
- "used_classifier: %s\n }\n",
- language.language(), static_cast<int>(language.score()),
- used_classifier.c_str()));
- }
- absl_ports::StrAppend(
- output, IcingStringUtil::StringPrintf(
- " ANNOTATIONS PRINTING NOT IMPLEMENTED YET IN ICING-TOOL\n"));
-}
-
-} // namespace
-
-std::string GetDocumentStoreDump(const DocumentStore& document_store) {
- std::string output;
- for (DocId document_id = 0; document_id < document_store.num_documents();
- document_id++) {
- Document doc;
- if (!document_store.ReadDocument(document_id, &doc)) {
- ICING_LOG(FATAL) << "Failed to read document";
- }
-
- AppendDocumentProto(document_id, doc, &output);
- }
- return output;
-}
-
-} // namespace lib
-} // namespace icing
diff --git a/icing/tools/document-store-dump.h b/icing/tools/document-store-dump.h
deleted file mode 100644
index 023b301..0000000
--- a/icing/tools/document-store-dump.h
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-#define ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-
-namespace icing {
-namespace lib {
-
-// Utility function for dumping the complete document store content.
-// This provides a human-readable representation of the document store, mainly
-// provided for easier understandability for developers.
-// The output of this class should only be available on cmdline-tool-level
-// (with root access), or unit tests. In other words it should not be possible
-// to trigger this on a release key device, for data protection reasons.
-std::string GetDocumentStoreDump(const DocumentStore& document_store);
-
-} // namespace lib
-} // namespace icing
-#endif // ICING_TOOLS_DOCUMENT_STORE_DUMP_H_
diff --git a/icing/tools/icing-tool.cc b/icing/tools/icing-tool.cc
deleted file mode 100644
index 72a11e9..0000000
--- a/icing/tools/icing-tool.cc
+++ /dev/null
@@ -1,306 +0,0 @@
-// Copyright (C) 2019 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Copyright 2012 Google Inc. All Rights Reserved.
-// Author: ulas@google.com (Ulas Kirazci)
-//
-// A tool to debug the native index.
-
-#include <getopt.h>
-#include <unistd.h>
-
-#include <string>
-
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/core/string-util.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/doc-property-filter.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/document-store.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/dynamic-trie.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/filesystem.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/mobstore.h"
-#include "java/com/google/android/gmscore/integ/modules/icing/jni/index/native-index-impl.h"
-#include "icing/absl_ports/str_cat.h"
-#include "icing/legacy/core/icing-string-util.h"
-#include "icing/tools/document-store-dump.h"
-#include "icing/util/logging.h"
-
-using std::vector;
-using ::wireless_android_play_playlog::icing::IndexRestorationStats;
-
-namespace icing {
-namespace lib {
-
-// 256KB for debugging.
-const size_t kMaxDocumentSizeForDebugging = 1u << 18;
-// Dump dynamic trie stats and contents.
-void ProcessDynamicTrie(const char* filename) {
- Filesystem filesystem;
- DynamicTrie trie(filename, DynamicTrie::RuntimeOptions(), &filesystem);
- if (!trie.Init()) {
- ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Opening trie %s failed",
- filename);
- return;
- }
-
- std::string out;
- trie.GetDebugInfo(true, &out);
- printf("Stats:\n%s", out.c_str());
-
- std::ostringstream contents;
- vector<std::string> keys;
- trie.DumpTrie(&contents, &keys);
- printf("Contents:\n%s", contents.str().c_str());
-}
-
-NativeIndexImpl* MakeIndex(const char* root_dir) {
- NativeConfig native_config;
- native_config.set_max_document_size(kMaxDocumentSizeForDebugging);
- FlashIndexOptions flash_index_options(
- NativeIndexImpl::GetNativeIndexDir(root_dir));
- NativeIndexImpl* ni =
- new NativeIndexImpl(root_dir, native_config, flash_index_options);
- InitStatus init_status;
- if (!ni->Init(&init_status)) {
- ICING_LOG(FATAL) << "Failed to initialize legacy native index impl";
- }
-
- IndexRestorationStats unused;
- ni->RestoreIndex(IndexRequestSpec::default_instance(), &unused);
- return ni;
-}
-
-void RunQuery(NativeIndexImpl* ni, const std::string& query, int start,
- int num_results) {
- // Pull out corpusids and uris.
- QueryRequestSpec spec;
- spec.set_no_corpus_filter(true);
- spec.set_want_uris(true);
- spec.set_scoring_verbosity_level(1);
- spec.set_prefix_match(true);
-
- QueryResponse response;
- ni->ExecuteQuery(query, spec, 10000, start, num_results, &response);
-
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "Query [%s] num results %u", query.c_str(), response.num_results());
-
- for (int i = 0, uri_offset = 0; i < response.num_results(); i++) {
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "%d: (cid=%u) uri %.*s", i, response.corpus_ids(i),
- response.uri_lengths(i), response.uri_buffer().data() + uri_offset);
- uri_offset += response.uri_lengths(i);
- }
-}
-
-void RunSuggest(NativeIndexImpl* ni, const std::string& prefix,
- int num_results) {
- SuggestionResponse results;
- ni->Suggest(prefix, num_results, vector<CorpusId>(), &results);
-
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "Query [%s] num results %zu", prefix.c_str(),
- static_cast<size_t>(results.suggestions_size()));
-
- for (size_t i = 0; i < results.suggestions_size(); i++) {
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "Sugg: [%s] display text [%s]", results.suggestions(i).query().c_str(),
- results.suggestions(i).display_text().c_str());
- }
-}
-
-int IcingTool(int argc, char** argv) {
- auto file_storage = CreatePosixFileStorage();
- enum Options {
- OPT_FILENAME,
- OPT_OP,
- OPT_QUERY,
- NUM_OPT,
- };
- static const option kOptions[NUM_OPT + 1] = {
- {"filename", 1, nullptr, 0},
- {"op", 1, nullptr, 0},
- {"query", 1, nullptr, 0},
- {nullptr, 0, nullptr, 0},
- };
- const char* opt_values[NUM_OPT];
- memset(opt_values, 0, sizeof(opt_values));
-
- while (true) {
- int opt_idx = -1;
- int ret = getopt_long(argc, argv, "", kOptions, &opt_idx);
- if (ret != 0) break;
-
- if (opt_idx >= 0 && opt_idx < NUM_OPT) {
- opt_values[opt_idx] = optarg;
- }
- }
-
- if (!opt_values[OPT_OP]) {
- ICING_LOG(ERROR) << "No op specified";
- return -1;
- }
-
- if (!opt_values[OPT_FILENAME]) {
- ICING_LOG(ERROR) << "No filename specified";
- return -1;
- }
- if (!strncmp(
- opt_values[OPT_FILENAME],
- "/data/data/com.google.android.gms/files/AppDataSearch",
- strlen("/data/data/com.google.android.gms/files/AppDataSearch"))) {
- ICING_LOG(ERROR)
- << "Should not read directly from the file in gmscore - "
- "icing-tool also commits writes as side-effects which corrupts "
- "the index on concurrent modification";
- return -1;
- }
-
- const char* op = opt_values[OPT_OP];
- DocumentStore::Options options(file_storage.get(),
- kMaxDocumentSizeForDebugging);
- if (!strcmp(op, "dyntrie")) {
- std::string full_file_path =
- absl_ports::StrCat(opt_values[OPT_FILENAME], "/idx.lexicon");
- ProcessDynamicTrie(full_file_path.c_str());
- } else if (!strcmp(op, "verify")) {
- std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
- ni->CheckVerify();
- } else if (!strcmp(op, "query")) {
- if (opt_values[OPT_QUERY] == nullptr) {
- ICING_LOG(FATAL) << "Opt value is null";
- }
-
- std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
- RunQuery(ni.get(), opt_values[OPT_QUERY], 0, 100);
- } else if (!strcmp(op, "suggest")) {
- if (opt_values[OPT_QUERY] == nullptr) {
- ICING_LOG(FATAL) << "Opt value is null";
- }
-
- std::unique_ptr<NativeIndexImpl> ni(MakeIndex(opt_values[OPT_FILENAME]));
- RunSuggest(ni.get(), opt_values[OPT_QUERY], 100);
- } else if (!strcmp(op, "dump-all-docs")) {
- DocumentStore ds(opt_values[OPT_FILENAME], options);
- if (!ds.Init()) {
- ICING_LOG(FATAL) << "Legacy document store failed to initialize";
- }
-
- printf(
- "------ Document Store Dump Start ------\n"
- "%s\n"
- "------ Document Store Dump End ------\n",
- GetDocumentStoreDump(ds).c_str());
- } else if (!strcmp(op, "dump-uris")) {
- CorpusId corpus_id = kInvalidCorpusId;
- if (opt_values[OPT_QUERY]) {
- // Query is corpus id.
- corpus_id = atoi(opt_values[OPT_QUERY]); // NOLINT
- }
- DocumentStore ds(opt_values[OPT_FILENAME], options);
- if (!ds.Init()) {
- ICING_LOG(FATAL) << "Legacy document store failed to initialize";
- }
-
- DocPropertyFilter dpf;
- ds.AddDeletedTagFilter(&dpf);
-
- // Dump with format "<corpusid> <uri> <tagname>*".
- int filtered = 0;
- vector<std::string> tagnames;
- for (DocId document_id = 0; document_id < ds.num_documents();
- document_id++) {
- Document doc;
- if (!ds.ReadDocument(document_id, &doc)) {
- ICING_LOG(FATAL) << "Failed to read document.";
- }
-
- if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
- filtered++;
- continue;
- }
- if (dpf.Match(0, document_id)) {
- filtered++;
- continue;
- }
-
- tagnames.clear();
- ds.GetAllSetUserTagNames(document_id, &tagnames);
-
- printf("%d %s %s\n", doc.corpus_id(), doc.uri().c_str(),
- StringUtil::JoinStrings("/", tagnames).c_str());
- }
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "Processed %u filtered %d", ds.num_documents(), filtered);
- } else if (!strcmp(op, "dump-docs")) {
- std::string out_filename = opt_values[OPT_FILENAME];
- out_filename.append("/docs-dump");
- CorpusId corpus_id = kInvalidCorpusId;
- if (opt_values[OPT_QUERY]) {
- // Query is corpus id.
- corpus_id = atoi(opt_values[OPT_QUERY]); // NOLINT
- out_filename.push_back('.');
- out_filename.append(opt_values[OPT_QUERY]);
- }
- DocumentStore ds(opt_values[OPT_FILENAME], options);
- if (!ds.Init()) {
- ICING_LOG(FATAL) << "Legacy document store failed to initialize";
- }
-
- DocPropertyFilter dpf;
- ds.AddDeletedTagFilter(&dpf);
-
- // Dump with format (<32-bit length><serialized content>)*.
- FILE* fp = fopen(out_filename.c_str(), "w");
- int filtered = 0;
- for (DocId document_id = 0; document_id < ds.num_documents();
- document_id++) {
- Document doc;
- if (!ds.ReadDocument(document_id, &doc)) {
- ICING_LOG(FATAL) << "Failed to read document.";
- }
-
- if (corpus_id != kInvalidCorpusId && corpus_id != doc.corpus_id()) {
- filtered++;
- continue;
- }
- if (dpf.Match(0, document_id)) {
- filtered++;
- continue;
- }
-
- std::string serialized = doc.SerializeAsString();
- uint32_t length = serialized.size();
- if (fwrite(&length, 1, sizeof(length), fp) != sizeof(length)) {
- ICING_LOG(FATAL) << "Failed to write length information to file";
- }
-
- if (fwrite(serialized.data(), 1, serialized.size(), fp) !=
- serialized.size()) {
- ICING_LOG(FATAL) << "Failed to write document to file";
- }
- }
- ICING_VLOG(1) << IcingStringUtil::StringPrintf(
- "Processed %u filtered %d", ds.num_documents(), filtered);
- fclose(fp);
- } else {
- ICING_LOG(ERROR) << IcingStringUtil::StringPrintf("Unknown op %s", op);
- return -1;
- }
-
- return 0;
-}
-
-} // namespace lib
-} // namespace icing
-
-int main(int argc, char** argv) { return icing::lib::IcingTool(argc, argv); }
diff --git a/icing/util/bit-util.h b/icing/util/bit-util.h
index e2bb817..7ca20b4 100644
--- a/icing/util/bit-util.h
+++ b/icing/util/bit-util.h
@@ -24,19 +24,18 @@
// Manipulating bit fields.
//
-// x value containing the bit field(s)
-// offset offset of bit field in x
-// len len of bit field in x
+// value value containing the bit field(s)
+// lsb_offset offset of bit field in value, starting from the least significant
+// bit. for example, the '1' in '0100' has a lsb_offset of 2
+// len len of bit field in value
//
// REQUIREMENTS
//
-// - x an unsigned integer <= 64 bits
-// - offset + len <= sizeof(x) * 8
+// - value is an unsigned integer <= 64 bits
+// - lsb_offset + len <= sizeof(value) * 8
//
// There is no error checking so you will get garbage if you don't
// ensure the above.
-//
-// To set a value, use BITFIELD_CLEAR then BITFIELD_OR.
// Shifting by more than the word length is undefined (on ARM it has the
// intended effect, but on Intel it shifts by % word length), so check the
@@ -44,20 +43,65 @@
inline uint64_t BitfieldMask(uint32_t len) {
return ((len == 0) ? 0U : ((~uint64_t{0}) >> (64 - (len))));
}
-inline uint64_t BitfieldGet(uint64_t mask, uint32_t lsb_offset, uint32_t len) {
- return ((mask) >> (lsb_offset)) & BitfieldMask(len);
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+ uint8_t* value_out) {
+ *value_out &= ~(BitfieldMask(len) << lsb_offset);
}
-inline void BitfieldSet(uint32_t value, uint32_t lsb_offset, uint32_t len,
- uint32_t* mask) {
- // We conservatively mask val at len so x won't be corrupted if val >=
- // 1 << len.
- *mask |= (uint64_t{value} & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+ uint16_t* value_out) {
+ *value_out &= ~(BitfieldMask(len) << lsb_offset);
}
-inline void BitfieldSet(uint64_t value, uint32_t lsb_offset, uint32_t len,
- uint64_t* mask) {
- // We conservatively mask val at len so x won't be corrupted if val >=
- // 1 << len.
- *mask |= (value & BitfieldMask(len)) << (lsb_offset);
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+ uint32_t* value_out) {
+ *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline void BitfieldClear(uint32_t lsb_offset, uint32_t len,
+ uint64_t* value_out) {
+ *value_out &= ~(BitfieldMask(len) << lsb_offset);
+}
+
+inline uint64_t BitfieldGet(uint64_t value, uint32_t lsb_offset, uint32_t len) {
+ return ((value) >> (lsb_offset)) & BitfieldMask(len);
+}
+
+inline void BitfieldSet(uint8_t new_value, uint32_t lsb_offset, uint32_t len,
+ uint8_t* value_out) {
+ BitfieldClear(lsb_offset, len, value_out);
+
+ // We conservatively mask new_value at len so value won't be corrupted if
+ // new_value >= (1 << len).
+ *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
+}
+
+inline void BitfieldSet(uint16_t new_value, uint32_t lsb_offset, uint32_t len,
+ uint16_t* value_out) {
+ BitfieldClear(lsb_offset, len, value_out);
+
+ // We conservatively mask new_value at len so value won't be corrupted if
+ // new_value >= (1 << len).
+ *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
+}
+
+inline void BitfieldSet(uint32_t new_value, uint32_t lsb_offset, uint32_t len,
+ uint32_t* value_out) {
+ BitfieldClear(lsb_offset, len, value_out);
+
+ // We conservatively mask new_value at len so value won't be corrupted if
+ // new_value >= (1 << len).
+ *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
+}
+
+inline void BitfieldSet(uint64_t new_value, uint32_t lsb_offset, uint32_t len,
+ uint64_t* value_out) {
+ BitfieldClear(lsb_offset, len, value_out);
+
+ // We conservatively mask new_value at len so value won't be corrupted if
+ // new_value >= (1 << len).
+ *value_out |= (new_value & BitfieldMask(len)) << (lsb_offset);
}
} // namespace bit_util
diff --git a/icing/util/bit-util_test.cc b/icing/util/bit-util_test.cc
new file mode 100644
index 0000000..3b86a21
--- /dev/null
+++ b/icing/util/bit-util_test.cc
@@ -0,0 +1,145 @@
+// Copyright (C) 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "icing/util/bit-util.h"
+
+#include <memory>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace icing {
+namespace lib {
+namespace {
+
+using ::testing::Eq;
+
+TEST(BitUtilTest, BitfieldMask) {
+ // Check that we can handle up to uint8_t's
+ EXPECT_THAT(bit_util::BitfieldMask(/*len=*/0), Eq(0b0));
+ EXPECT_THAT(bit_util::BitfieldMask(/*len=*/1), Eq(0b01));
+
+ // Check that we can handle up to uint32_t's
+ EXPECT_THAT(bit_util::BitfieldMask(/*len=*/16), Eq(0b01111111111111111));
+
+ // Check that we can handle up to uint64_t's
+ EXPECT_THAT(
+ bit_util::BitfieldMask(/*len=*/63),
+ Eq(0b0111111111111111111111111111111111111111111111111111111111111111));
+}
+
+TEST(BitUtilTest, BitfieldClear) {
+ // Check that we can handle up to uint8_t's
+ uint8_t value_8 = 0b0;
+ bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b0));
+
+ value_8 = 0b01;
+ bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b00));
+
+ value_8 = 0b011;
+ bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b001));
+
+ value_8 = 0b011;
+ bit_util::BitfieldClear(/*lsb_offset=*/0, /*len=*/2, &value_8);
+ EXPECT_THAT(value_8, Eq(0b000));
+
+ value_8 = 0b0110;
+ bit_util::BitfieldClear(/*lsb_offset=*/1, /*len=*/2, &value_8);
+ EXPECT_THAT(value_8, Eq(0b0000));
+
+ // Check that we can handle up to uint32_t's
+ uint32_t value_32 = 0b010000000000000000000000;
+ bit_util::BitfieldClear(/*lsb_offset=*/22, /*len=*/1, &value_32);
+ EXPECT_THAT(value_32, Eq(0b0));
+
+ // Check that we can handle up to uint64_t's
+ uint64_t value_64 = 0b0100000000000000000000000000000000000;
+ bit_util::BitfieldClear(/*lsb_offset=*/35, /*len=*/1, &value_64);
+ EXPECT_THAT(value_64, Eq(0b0));
+}
+
+TEST(BitUtilTest, BitfieldGet) {
+ // Get something in the uint8_t range
+ EXPECT_THAT(bit_util::BitfieldGet(0b0, /*lsb_offset=*/0, /*len=*/1), Eq(0b0));
+ EXPECT_THAT(bit_util::BitfieldGet(0b01, /*lsb_offset=*/0, /*len=*/1),
+ Eq(0b01));
+ EXPECT_THAT(bit_util::BitfieldGet(0b010, /*lsb_offset=*/1, /*len=*/1),
+ Eq(0b01));
+ EXPECT_THAT(bit_util::BitfieldGet(0b001, /*lsb_offset=*/1, /*len=*/1),
+ Eq(0b0));
+ EXPECT_THAT(bit_util::BitfieldGet(0b011, /*lsb_offset=*/0, /*len=*/2),
+ Eq(0b011));
+ EXPECT_THAT(bit_util::BitfieldGet(0b0110, /*lsb_offset=*/1, /*len=*/2),
+ Eq(0b011));
+ EXPECT_THAT(bit_util::BitfieldGet(0b0101, /*lsb_offset=*/0, /*len=*/3),
+ Eq(0b0101));
+
+ // Get something in the uint32_t range
+ EXPECT_THAT(
+ bit_util::BitfieldGet(0b01000000000000, /*lsb_offset=*/12, /*len=*/1),
+ Eq(0b01));
+
+ // Get something in the uint64_t range
+ EXPECT_THAT(bit_util::BitfieldGet(0b010000000000000000000000000000000000,
+ /*lsb_offset=*/34, /*len=*/1),
+ Eq(0b01));
+}
+
+TEST(BitUtilTest, BitfieldSet) {
+ // Set something in the uint8_t range
+ uint8_t value_8 = 0b0;
+ bit_util::BitfieldSet(0b0, /*lsb_offset=*/0, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b0));
+
+ value_8 = 0b01;
+ bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b01));
+
+ value_8 = 0b00;
+ bit_util::BitfieldSet(0b01, /*lsb_offset=*/0, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b01));
+
+ value_8 = 0b00;
+ bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+ EXPECT_THAT(value_8, Eq(0b011));
+
+ value_8 = 0b01;
+ bit_util::BitfieldSet(0b011, /*lsb_offset=*/0, /*len=*/2, &value_8);
+ EXPECT_THAT(value_8, Eq(0b011));
+
+ value_8 = 0b01;
+ bit_util::BitfieldSet(0b01, /*lsb_offset=*/1, /*len=*/1, &value_8);
+ EXPECT_THAT(value_8, Eq(0b011));
+
+ value_8 = 0b0001;
+ bit_util::BitfieldSet(0b011, /*lsb_offset=*/1, /*len=*/2, &value_8);
+ EXPECT_THAT(value_8, Eq(0b0111));
+
+ // Set something in the uint32_t range
+ uint32_t value_32 = 0b0;
+ bit_util::BitfieldSet(0b01, /*lsb_offset=*/16, /*len=*/1, &value_32);
+ EXPECT_THAT(value_32, Eq(0b010000000000000000));
+
+ // Set something in the uint64_t range
+ uint64_t value_64 = 0b0;
+ bit_util::BitfieldSet(0b01, /*lsb_offset=*/34, /*len=*/1, &value_64);
+ EXPECT_THAT(value_64, Eq(0b010000000000000000000000000000000000));
+}
+
+} // namespace
+} // namespace lib
+} // namespace icing
diff --git a/icing/util/document-validator_test.cc b/icing/util/document-validator_test.cc
index f05e8a6..cb013d7 100644
--- a/icing/util/document-validator_test.cc
+++ b/icing/util/document-validator_test.cc
@@ -21,6 +21,7 @@
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/proto/schema.pb.h"
+#include "icing/schema-builder.h"
#include "icing/schema/schema-store.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
@@ -45,17 +46,52 @@
constexpr char kDefaultNamespace[] = "icing";
constexpr char kDefaultString[] = "This is a string.";
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_OPTIONAL =
+ PropertyConfigProto_Cardinality_Code_OPTIONAL;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REQUIRED =
+ PropertyConfigProto_Cardinality_Code_REQUIRED;
+constexpr PropertyConfigProto_Cardinality_Code CARDINALITY_REPEATED =
+ PropertyConfigProto_Cardinality_Code_REPEATED;
+
+constexpr PropertyConfigProto_DataType_Code TYPE_STRING =
+ PropertyConfigProto_DataType_Code_STRING;
+
class DocumentValidatorTest : public ::testing::Test {
protected:
DocumentValidatorTest() {}
void SetUp() override {
- SchemaProto schema;
- auto type_config = schema.add_types();
- CreateEmailTypeConfig(type_config);
-
- type_config = schema.add_types();
- CreateConversationTypeConfig(type_config);
+ SchemaProto schema =
+ SchemaBuilder()
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .AddType(
+ SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(
+ kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
@@ -66,25 +102,6 @@
std::make_unique<DocumentValidator>(schema_store_.get());
}
- static void CreateEmailTypeConfig(SchemaTypeConfigProto* type_config) {
- type_config->set_schema_type(kTypeEmail);
-
- auto subject = type_config->add_properties();
- subject->set_property_name(kPropertySubject);
- subject->set_data_type(PropertyConfigProto::DataType::STRING);
- subject->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- auto text = type_config->add_properties();
- text->set_property_name(kPropertyText);
- text->set_data_type(PropertyConfigProto::DataType::STRING);
- text->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
-
- auto recipients = type_config->add_properties();
- recipients->set_property_name(kPropertyRecipients);
- recipients->set_data_type(PropertyConfigProto::DataType::STRING);
- recipients->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- }
-
static DocumentBuilder SimpleEmailBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "email/1")
@@ -95,21 +112,6 @@
kDefaultString);
}
- static void CreateConversationTypeConfig(SchemaTypeConfigProto* type_config) {
- type_config->set_schema_type(kTypeConversation);
-
- auto name = type_config->add_properties();
- name->set_property_name(kPropertyName);
- name->set_data_type(PropertyConfigProto::DataType::STRING);
- name->set_cardinality(PropertyConfigProto::Cardinality::REQUIRED);
-
- auto emails = type_config->add_properties();
- emails->set_property_name(kPropertyEmails);
- emails->set_data_type(PropertyConfigProto::DataType::DOCUMENT);
- emails->set_cardinality(PropertyConfigProto::Cardinality::REPEATED);
- emails->set_schema_type(kTypeEmail);
- }
-
static DocumentBuilder SimpleConversationBuilder() {
return DocumentBuilder()
.SetKey(kDefaultNamespace, "conversation/1")
@@ -326,12 +328,26 @@
}
TEST_F(DocumentValidatorTest, HandleTypeConfigMapChangesOk) {
- SchemaProto email_schema;
- auto type_config = email_schema.add_types();
- CreateEmailTypeConfig(type_config);
+ SchemaProto email_schema =
+ SchemaBuilder()
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kTypeEmail)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertySubject)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyText)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_OPTIONAL))
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyRecipients)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
- // Create a custom directory so we don't collide with the test's preset schema
- // in SetUp
+ // Create a custom directory so we don't collide
+ // with the test's preset schema in SetUp
const std::string custom_schema_dir = GetTestTempDir() + "/custom_schema";
filesystem_.DeleteDirectoryRecursively(custom_schema_dir.c_str());
filesystem_.CreateDirectoryRecursively(custom_schema_dir.c_str());
@@ -352,9 +368,21 @@
HasSubstr("'Conversation' not found")));
// Add the 'Conversation' type
- SchemaProto email_and_conversation_schema = email_schema;
- type_config = email_and_conversation_schema.add_types();
- CreateConversationTypeConfig(type_config);
+ SchemaProto email_and_conversation_schema =
+ SchemaBuilder(email_schema)
+ .AddType(SchemaTypeConfigBuilder()
+ .SetType(kTypeConversation)
+ .AddProperty(PropertyConfigBuilder()
+ .SetName(kPropertyName)
+ .SetDataType(TYPE_STRING)
+ .SetCardinality(CARDINALITY_REQUIRED))
+ .AddProperty(
+ PropertyConfigBuilder()
+ .SetName(kPropertyEmails)
+ .SetDataTypeDocument(
+ kTypeEmail, /*index_nested_properties=*/true)
+ .SetCardinality(CARDINALITY_REPEATED)))
+ .Build();
// DocumentValidator should be able to handle the SchemaStore getting updated
// separately
diff --git a/icing/util/math-util.h b/icing/util/math-util.h
index fc11a09..3f2a69d 100644
--- a/icing/util/math-util.h
+++ b/icing/util/math-util.h
@@ -37,7 +37,7 @@
template <typename IntType>
static IntType RoundDownTo(IntType input_value, IntType rounding_value) {
static_assert(std::numeric_limits<IntType>::is_integer,
- "RoundUpTo() operation type is not integer");
+ "RoundDownTo() operation type is not integer");
if (input_value <= 0) {
return 0;
diff --git a/java/Android.bp b/java/Android.bp
index 7daeb0a..ef417ba 100644
--- a/java/Android.bp
+++ b/java/Android.bp
@@ -25,9 +25,12 @@
name: "libicing-java",
srcs: ["src/**/*.java"],
static_libs: [
- "androidx.annotation_annotation",
"icing-java-proto-lite",
"libprotobuf-java-lite",
],
+ libs: [
+ "androidx.annotation_annotation",
+ ],
+ sdk_version: "current",
apex_available: ["com.android.appsearch"],
}
diff --git a/java/src/com/google/android/icing/BreakIteratorBatcher.java b/java/src/com/google/android/icing/BreakIteratorBatcher.java
index 58efbfc..2b87327 100644
--- a/java/src/com/google/android/icing/BreakIteratorBatcher.java
+++ b/java/src/com/google/android/icing/BreakIteratorBatcher.java
@@ -14,9 +14,6 @@
package com.google.android.icing;
-import androidx.annotation.NonNull;
-import androidx.annotation.RestrictTo;
-
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.List;
@@ -38,20 +35,17 @@
* utf16Boundaries = brkItrBatcher.next(5);
* assertThat(utf16Boundaries).asList().containsExactly(9);
* }</pre>
- *
- * @hide
*/
-@RestrictTo(RestrictTo.Scope.LIBRARY_GROUP)
public class BreakIteratorBatcher {
private final BreakIterator iterator;
- public BreakIteratorBatcher(@NonNull Locale locale) {
+ public BreakIteratorBatcher(Locale locale) {
this.iterator = BreakIterator.getWordInstance(locale);
}
/* Direct calls to BreakIterator */
- public void setText(@NonNull String text) {
+ public void setText(String text) {
iterator.setText(text);
}
@@ -73,7 +67,6 @@
* the end of the text (returns BreakIterator#DONE), then only the results of the previous calls
* in that batch will be returned.
*/
- @NonNull
public int[] next(int batchSize) {
List<Integer> breakIndices = new ArrayList<>(batchSize);
for (int i = 0; i < batchSize; ++i) {
diff --git a/java/src/com/google/android/icing/IcingSearchEngine.java b/java/src/com/google/android/icing/IcingSearchEngine.java
index 88d0578..1f5fb51 100644
--- a/java/src/com/google/android/icing/IcingSearchEngine.java
+++ b/java/src/com/google/android/icing/IcingSearchEngine.java
@@ -31,6 +31,7 @@
import com.google.android.icing.proto.InitializeResultProto;
import com.google.android.icing.proto.OptimizeResultProto;
import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
import com.google.android.icing.proto.PutResultProto;
import com.google.android.icing.proto.ReportUsageResultProto;
import com.google.android.icing.proto.ResetResultProto;
@@ -41,6 +42,7 @@
import com.google.android.icing.proto.SearchSpecProto;
import com.google.android.icing.proto.SetSchemaResultProto;
import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
import com.google.android.icing.proto.UsageReport;
import com.google.protobuf.ExtensionRegistryLite;
import com.google.protobuf.InvalidProtocolBufferException;
@@ -51,9 +53,11 @@
*
* <p>If this instance has been closed, the instance is no longer usable.
*
+ * <p>Keep this class to be non-Final so that it can be mocked in AppSearch.
+ *
* <p>NOTE: This class is NOT thread-safe.
*/
-public final class IcingSearchEngine implements Closeable {
+public class IcingSearchEngine implements Closeable {
private static final String TAG = "IcingSearchEngine";
private static final ExtensionRegistryLite EXTENSION_REGISTRY_LITE =
@@ -434,10 +438,10 @@
}
@NonNull
- public PersistToDiskResultProto persistToDisk() {
+ public PersistToDiskResultProto persistToDisk(@NonNull PersistType.Code persistTypeCode) {
throwIfClosed();
- byte[] persistToDiskResultBytes = nativePersistToDisk(this);
+ byte[] persistToDiskResultBytes = nativePersistToDisk(this, persistTypeCode.getNumber());
if (persistToDiskResultBytes == null) {
Log.e(TAG, "Received null PersistToDiskResultProto from native.");
return PersistToDiskResultProto.newBuilder()
@@ -501,6 +505,29 @@
}
@NonNull
+ public StorageInfoResultProto getStorageInfo() {
+ throwIfClosed();
+
+ byte[] storageInfoResultProtoBytes = nativeGetStorageInfo(this);
+ if (storageInfoResultProtoBytes == null) {
+ Log.e(TAG, "Received null StorageInfoResultProto from native.");
+ return StorageInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+
+ try {
+ return StorageInfoResultProto.parseFrom(
+ storageInfoResultProtoBytes, EXTENSION_REGISTRY_LITE);
+ } catch (InvalidProtocolBufferException e) {
+ Log.e(TAG, "Error parsing GetOptimizeInfoResultProto.", e);
+ return StorageInfoResultProto.newBuilder()
+ .setStatus(StatusProto.newBuilder().setCode(StatusProto.Code.INTERNAL))
+ .build();
+ }
+ }
+
+ @NonNull
public ResetResultProto reset() {
throwIfClosed();
@@ -568,11 +595,13 @@
private static native byte[] nativeDeleteByQuery(
IcingSearchEngine instance, byte[] searchSpecBytes);
- private static native byte[] nativePersistToDisk(IcingSearchEngine instance);
+ private static native byte[] nativePersistToDisk(IcingSearchEngine instance, int persistType);
private static native byte[] nativeOptimize(IcingSearchEngine instance);
private static native byte[] nativeGetOptimizeInfo(IcingSearchEngine instance);
+ private static native byte[] nativeGetStorageInfo(IcingSearchEngine instance);
+
private static native byte[] nativeReset(IcingSearchEngine instance);
}
diff --git a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
index 56edaf1..2019033 100644
--- a/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
+++ b/java/tests/instrumentation/src/com/google/android/icing/IcingSearchEngineTest.java
@@ -32,6 +32,7 @@
import com.google.android.icing.proto.InitializeResultProto;
import com.google.android.icing.proto.OptimizeResultProto;
import com.google.android.icing.proto.PersistToDiskResultProto;
+import com.google.android.icing.proto.PersistType;
import com.google.android.icing.proto.PropertyConfigProto;
import com.google.android.icing.proto.PropertyProto;
import com.google.android.icing.proto.PutResultProto;
@@ -44,7 +45,10 @@
import com.google.android.icing.proto.SearchResultProto;
import com.google.android.icing.proto.SearchSpecProto;
import com.google.android.icing.proto.SetSchemaResultProto;
+import com.google.android.icing.proto.SnippetMatchProto;
+import com.google.android.icing.proto.SnippetProto;
import com.google.android.icing.proto.StatusProto;
+import com.google.android.icing.proto.StorageInfoResultProto;
import com.google.android.icing.proto.StringIndexingConfig;
import com.google.android.icing.proto.StringIndexingConfig.TokenizerType;
import com.google.android.icing.proto.TermMatchType;
@@ -394,7 +398,8 @@
public void testPersistToDisk() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
- PersistToDiskResultProto persistToDiskResultProto = icingSearchEngine.persistToDisk();
+ PersistToDiskResultProto persistToDiskResultProto =
+ icingSearchEngine.persistToDisk(PersistType.Code.LITE);
assertStatusOk(persistToDiskResultProto.getStatus());
}
@@ -417,6 +422,14 @@
}
@Test
+ public void testGetStorageInfo() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ StorageInfoResultProto storageInfoResultProto = icingSearchEngine.getStorageInfo();
+ assertStatusOk(storageInfoResultProto.getStatus());
+ }
+
+ @Test
public void testGetAllNamespaces() throws Exception {
assertStatusOk(icingSearchEngine.initialize().getStatus());
@@ -475,6 +488,140 @@
assertStatusOk(reportUsageResultProto.getStatus());
}
+ @Test
+ public void testCJKTSnippets() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+ assertStatusOk(
+ icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+ // String: "我每天走路去上班。"
+ // ^ ^ ^ ^^
+ // UTF16 idx: 0 1 3 5 6
+ // Breaks into segments: "我", "每天", "走路", "去", "上班"
+ String chinese = "我每天走路去上班。";
+ assertThat(chinese.length()).isEqualTo(9);
+ DocumentProto emailDocument1 =
+ createEmailDocument("namespace", "uri1").toBuilder()
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(chinese))
+ .build();
+ assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto searchSpec =
+ SearchSpecProto.newBuilder()
+ .setQuery("每")
+ .setTermMatchType(TermMatchType.Code.PREFIX)
+ .build();
+ ResultSpecProto resultSpecProto =
+ ResultSpecProto.newBuilder()
+ .setSnippetSpec(
+ ResultSpecProto.SnippetSpecProto.newBuilder()
+ .setNumToSnippet(Integer.MAX_VALUE)
+ .setNumMatchesPerProperty(Integer.MAX_VALUE))
+ .build();
+
+ // Search and make sure that we got a single successful results
+ SearchResultProto searchResultProto =
+ icingSearchEngine.search(
+ searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+ assertStatusOk(searchResultProto.getStatus());
+ assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+ // Ensure that one and only one property was matched and it was "subject"
+ SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+ assertThat(snippetProto.getEntriesList()).hasSize(1);
+ SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+ assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+ // Get the content for "subject" and see what the match is.
+ DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+ assertThat(resultDocument.getPropertiesList()).hasSize(1);
+ PropertyProto subjectProperty = resultDocument.getProperties(0);
+ assertThat(subjectProperty.getName()).isEqualTo("subject");
+ assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+ String content = subjectProperty.getStringValues(0);
+
+ // Ensure that there is one and only one match within "subject"
+ assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+ SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+ int matchStart = matchProto.getExactMatchUtf16Position();
+ int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+ assertThat(matchStart).isEqualTo(1);
+ assertThat(matchEnd).isEqualTo(3);
+ String match = content.substring(matchStart, matchEnd);
+ assertThat(match).isEqualTo("每天");
+ }
+
+ @Test
+ public void testUtf16MultiByteSnippets() throws Exception {
+ assertStatusOk(icingSearchEngine.initialize().getStatus());
+
+ SchemaProto schema = SchemaProto.newBuilder().addTypes(createEmailTypeConfig()).build();
+ assertStatusOk(
+ icingSearchEngine.setSchema(schema, /*ignoreErrorsAndDeleteDocuments=*/ false).getStatus());
+
+ // String: "𐀀𐀁 𐀂𐀃 𐀄"
+ // ^ ^ ^
+ // UTF16 idx: 0 5 10
+ // Breaks into segments: "𐀀𐀁", "𐀂𐀃", "𐀄"
+ String text = "𐀀𐀁 𐀂𐀃 𐀄";
+ assertThat(text.length()).isEqualTo(12);
+ DocumentProto emailDocument1 =
+ createEmailDocument("namespace", "uri1").toBuilder()
+ .addProperties(PropertyProto.newBuilder().setName("subject").addStringValues(text))
+ .build();
+ assertStatusOk(icingSearchEngine.put(emailDocument1).getStatus());
+
+ // Search and request snippet matching but no windowing.
+ SearchSpecProto searchSpec =
+ SearchSpecProto.newBuilder()
+ .setQuery("𐀂")
+ .setTermMatchType(TermMatchType.Code.PREFIX)
+ .build();
+ ResultSpecProto resultSpecProto =
+ ResultSpecProto.newBuilder()
+ .setSnippetSpec(
+ ResultSpecProto.SnippetSpecProto.newBuilder()
+ .setNumToSnippet(Integer.MAX_VALUE)
+ .setNumMatchesPerProperty(Integer.MAX_VALUE))
+ .build();
+
+ // Search and make sure that we got a single successful results
+ SearchResultProto searchResultProto =
+ icingSearchEngine.search(
+ searchSpec, ScoringSpecProto.getDefaultInstance(), resultSpecProto);
+ assertStatusOk(searchResultProto.getStatus());
+ assertThat(searchResultProto.getResultsCount()).isEqualTo(1);
+
+ // Ensure that one and only one property was matched and it was "subject"
+ SnippetProto snippetProto = searchResultProto.getResults(0).getSnippet();
+ assertThat(snippetProto.getEntriesList()).hasSize(1);
+ SnippetProto.EntryProto entryProto = snippetProto.getEntries(0);
+ assertThat(entryProto.getPropertyName()).isEqualTo("subject");
+
+ // Get the content for "subject" and see what the match is.
+ DocumentProto resultDocument = searchResultProto.getResults(0).getDocument();
+ assertThat(resultDocument.getPropertiesList()).hasSize(1);
+ PropertyProto subjectProperty = resultDocument.getProperties(0);
+ assertThat(subjectProperty.getName()).isEqualTo("subject");
+ assertThat(subjectProperty.getStringValuesList()).hasSize(1);
+ String content = subjectProperty.getStringValues(0);
+
+ // Ensure that there is one and only one match within "subject"
+ assertThat(entryProto.getSnippetMatchesList()).hasSize(1);
+ SnippetMatchProto matchProto = entryProto.getSnippetMatches(0);
+
+ int matchStart = matchProto.getExactMatchUtf16Position();
+ int matchEnd = matchStart + matchProto.getExactMatchUtf16Length();
+ assertThat(matchStart).isEqualTo(5);
+ assertThat(matchEnd).isEqualTo(9);
+ String match = content.substring(matchStart, matchEnd);
+ assertThat(match).isEqualTo("𐀂𐀃");
+ }
+
private static void assertStatusOk(StatusProto status) {
assertWithMessage(status.getMessage()).that(status.getCode()).isEqualTo(StatusProto.Code.OK);
}
diff --git a/proto/icing/proto/document.proto b/proto/icing/proto/document.proto
index d55b7e2..9a4e5b9 100644
--- a/proto/icing/proto/document.proto
+++ b/proto/icing/proto/document.proto
@@ -110,11 +110,11 @@
// go/icing-library-apis.
optional StatusProto status = 1;
- // Stats of the function call. Inside NativePutDocumentStats, the function
+ // Stats of the function call. Inside PutDocumentStatsProto, the function
// call latency 'latency_ms' will always be populated. The other fields will
// be accurate only when the status above is OK. See logging.proto for
// details.
- optional NativePutDocumentStats native_put_document_stats = 2;
+ optional PutDocumentStatsProto put_document_stats = 2;
}
// Result of a call to IcingSearchEngine.Get
@@ -167,7 +167,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteByNamespace
@@ -186,7 +186,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteBySchemaType
@@ -205,7 +205,7 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
// Result of a call to IcingSearchEngine.DeleteByQuery
@@ -224,5 +224,5 @@
optional StatusProto status = 1;
// Stats for delete execution performance.
- optional NativeDeleteStats delete_stats = 2;
+ optional DeleteStatsProto delete_stats = 2;
}
diff --git a/proto/icing/proto/document_wrapper.proto b/proto/icing/proto/document_wrapper.proto
index e8eb992..929ee33 100644
--- a/proto/icing/proto/document_wrapper.proto
+++ b/proto/icing/proto/document_wrapper.proto
@@ -20,7 +20,6 @@
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// DocumentWrapper as a wrapper of the user-facing DocumentProto is meant to
@@ -30,6 +29,5 @@
message DocumentWrapper {
optional DocumentProto document = 1;
- // Indicates if the document is marked as deleted
- optional bool deleted = 2;
+ reserved 2;
}
diff --git a/proto/icing/proto/initialize.proto b/proto/icing/proto/initialize.proto
index ae2944c..ab2556d 100644
--- a/proto/icing/proto/initialize.proto
+++ b/proto/icing/proto/initialize.proto
@@ -16,12 +16,11 @@
package icing.lib;
-import "icing/proto/status.proto";
import "icing/proto/logging.proto";
+import "icing/proto/status.proto";
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Next tag: 5
@@ -89,11 +88,11 @@
// go/icing-library-apis.
optional StatusProto status = 1;
- // Stats of the function call. Inside NativeInitializeStats, the function call
+ // Stats of the function call. Inside InitializeStatsProto, the function call
// latency 'latency_ms' will always be populated. The other fields will be
// accurate only when the status above is OK or WARNING_DATA_LOSS. See
// logging.proto for details.
- optional NativeInitializeStats native_initialize_stats = 2;
+ optional InitializeStatsProto initialize_stats = 2;
// TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
// go/icing-library-apis.
diff --git a/proto/icing/proto/internal/optimize.proto b/proto/icing/proto/internal/optimize.proto
new file mode 100644
index 0000000..4ed3d73
--- /dev/null
+++ b/proto/icing/proto/internal/optimize.proto
@@ -0,0 +1,29 @@
+// Copyright 2019 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+option java_package = "com.google.android.icing.internal.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// A status that is saved internally in Icing to track information about how
+// often Optimize runs.
+// Next tag: 2
+message OptimizeStatusProto {
+ // The Epoch time at which the last successfuly optimize ran.
+ optional int64 last_successful_optimize_run_time_ms = 1;
+}
diff --git a/proto/icing/proto/logging.proto b/proto/icing/proto/logging.proto
index 09ec756..29f7f80 100644
--- a/proto/icing/proto/logging.proto
+++ b/proto/icing/proto/logging.proto
@@ -24,7 +24,7 @@
// Stats of the top-level function IcingSearchEngine::Initialize().
// Next tag: 11
-message NativeInitializeStats {
+message InitializeStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -40,8 +40,9 @@
// Data in index is inconsistent with ground truth.
INCONSISTENT_WITH_GROUND_TRUTH = 2;
- // Total checksum of all the components does not match.
- TOTAL_CHECKSUM_MISMATCH = 3;
+ // Changes were made to the schema, but possibly not fully applied to the
+ // document store and the index - requiring a recovery.
+ SCHEMA_CHANGES_OUT_OF_SYNC = 3;
// Random I/O errors.
IO_ERROR = 4;
@@ -49,13 +50,13 @@
// Possible recovery causes for document store:
// - DATA_LOSS
- // - TOTAL_CHECKSUM_MISMATCH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
// - IO_ERROR
optional RecoveryCause document_store_recovery_cause = 2;
// Possible recovery causes for index:
// - INCONSISTENT_WITH_GROUND_TRUTH
- // - TOTAL_CHECKSUM_MISMATCH
+ // - SCHEMA_CHANGES_OUT_OF_SYNC
// - IO_ERROR
optional RecoveryCause index_restoration_cause = 3;
@@ -95,7 +96,7 @@
// Stats of the top-level function IcingSearchEngine::Put().
// Next tag: 7
-message NativePutDocumentStats {
+message PutDocumentStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -125,8 +126,11 @@
// Stats of the top-level function IcingSearchEngine::Search() and
// IcingSearchEngine::GetNextPage().
-// Next tag: 15
-message NativeQueryStats {
+// Next tag: 17
+message QueryStatsProto {
+ // The UTF-8 length of the query string
+ optional int32 query_length = 16;
+
// Number of terms in the query string.
optional int32 num_terms = 1;
@@ -154,7 +158,7 @@
optional int32 num_documents_scored = 8;
// How many of the results in the page returned were snippeted.
- optional bool num_results_snippeted = 9;
+ optional int32 num_results_with_snippets = 15;
// Overall time used for the function call.
optional int32 latency_ms = 10;
@@ -172,13 +176,15 @@
// Time used to fetch the document protos. Note that it includes the
// time to snippet if ‘has_snippets’ is true.
optional int32 document_retrieval_latency_ms = 14;
+
+ reserved 9;
}
// Stats of the top-level functions IcingSearchEngine::Delete,
// IcingSearchEngine::DeleteByNamespace, IcingSearchEngine::DeleteBySchemaType,
// IcingSearchEngine::DeleteByQuery.
// Next tag: 4
-message NativeDeleteStats {
+message DeleteStatsProto {
// Overall time used for the function call.
optional int32 latency_ms = 1;
@@ -204,4 +210,4 @@
// Number of documents deleted by this call.
optional int32 num_documents_deleted = 3;
-}
\ No newline at end of file
+}
diff --git a/proto/icing/proto/optimize.proto b/proto/icing/proto/optimize.proto
index 1baa64c..42290f3 100644
--- a/proto/icing/proto/optimize.proto
+++ b/proto/icing/proto/optimize.proto
@@ -23,7 +23,7 @@
option objc_class_prefix = "ICNG";
// Result of a call to IcingSearchEngine.Optimize
-// Next tag: 2
+// Next tag: 3
message OptimizeResultProto {
// Status code can be one of:
// OK
@@ -35,12 +35,13 @@
// See status.proto for more details.
optional StatusProto status = 1;
+ optional OptimizeStatsProto optimize_stats = 2;
// TODO(b/147699081): Add a field to indicate lost_schema and lost_documents.
// go/icing-library-apis.
}
// Result of a call to IcingSearchEngine.GetOptimizeInfo
-// Next tag: 4
+// Next tag: 5
message GetOptimizeInfoResultProto {
// Status code can be one of:
// OK
@@ -57,4 +58,37 @@
// Estimated bytes that could be recovered. The exact size per document isn't
// tracked, so this is based off an average document size.
optional int64 estimated_optimizable_bytes = 3;
+
+ // The amount of time since the last optimize ran.
+ optional int64 time_since_last_optimize_ms = 4;
+}
+
+// Next tag: 10
+message OptimizeStatsProto {
+ // Overall time used for the function call.
+ optional int32 latency_ms = 1;
+
+ // Time used to optimize the document store.
+ optional int32 document_store_optimize_latency_ms = 2;
+
+ // Time used to restore the index.
+ optional int32 index_restoration_latency_ms = 3;
+
+ // Number of documents before the optimization.
+ optional int32 num_original_documents = 4;
+
+ // Number of documents deleted.
+ optional int32 num_deleted_documents = 5;
+
+ // Number of documents expired.
+ optional int32 num_expired_documents = 6;
+
+ // Size of storage before the optimize.
+ optional int64 storage_size_before = 7;
+
+ // Size of storage after the optimize.
+ optional int64 storage_size_after = 8;
+
+ // The amount of time since the last optimize ran.
+ optional int64 time_since_last_optimize_ms = 9;
}
diff --git a/proto/icing/proto/persist.proto b/proto/icing/proto/persist.proto
index 77cf987..8d6b372 100644
--- a/proto/icing/proto/persist.proto
+++ b/proto/icing/proto/persist.proto
@@ -22,6 +22,28 @@
option java_multiple_files = true;
option objc_class_prefix = "ICNG";
+// The type of persistence guarantee that PersistToDisk should provide.
+// Next tag: 3
+message PersistType {
+ enum Code {
+ // Default. Should never be used.
+ UNKNOWN = 0;
+
+ // Only persist the ground truth. A successful PersistToDisk(LITE) should
+ // ensure that no data is lost the next time Icing initializes. This
+ // should be called after each batch of mutations.
+ LITE = 1;
+
+ // Persists all data in internal Icing components. A successful
+ // PersistToDisk(FULL) should not only ensure no data loss like
+ // PersistToDisk(LITE), but also prevent the need to recover internal data
+ // structures the next time Icing initializes. This should be called at
+ // some point before the app terminates.
+ FULL = 2;
+ }
+ optional Code code = 1;
+}
+
// Result of a call to IcingSearchEngine.Persist
// Next tag: 2
message PersistToDiskResultProto {
diff --git a/proto/icing/proto/search.proto b/proto/icing/proto/search.proto
index 6c4e3c9..66fdbe6 100644
--- a/proto/icing/proto/search.proto
+++ b/proto/icing/proto/search.proto
@@ -65,7 +65,7 @@
// Client-supplied specifications on what to include/how to format the search
// results.
-// Next tag: 5
+// Next tag: 6
message ResultSpecProto {
// The results will be returned in pages, and num_per_page specifies the
// number of documents in one page.
@@ -102,34 +102,65 @@
// has been specified for a schema type, then *all* properties of that schema
// type will be retrieved.
repeated TypePropertyMask type_property_masks = 4;
+
+ // Groupings of namespaces whose total returned results should be
+ // limited together.
+ // Next tag: 3
+ message ResultGrouping {
+ // The namespaces in this grouping.
+ repeated string namespaces = 1;
+
+ // The maximum number of results in this grouping that should be returned.
+ optional int32 max_results = 2;
+ }
+
+ // How to limit the number of results returned per set of namespaces. If
+ // results match for a namespace that is not present in any result groupings,
+ // then those results will be returned without limit.
+ //
+ // Non-existent namespaces will be ignored.
+ //
+ // Example : Suppose that there are four namespaces each with three results
+ // matching the query for "foo". Without any result groupings, Icing would
+ // return the following results:
+ // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns0doc2", "ns3doc1",
+ // "ns2doc1", "ns3doc2", "ns2doc0", "ns1doc1", "ns2doc2", "ns1doc1"].
+ //
+ // and the following result groupings:
+ // [ { ["namespace0"], 2 }, { ["namespace1", "namespace2"], 2} ]
+ //
+ // The following results will be returned:
+ // ["ns0doc0", "ns0doc1", "ns1doc0", "ns3doc0", "ns3doc1", "ns2doc1",
+ // "ns3doc2"].
+ repeated ResultGrouping result_groupings = 5;
}
// The representation of a single match within a DocumentProto property.
-// Next tag: 6
+// Next tag: 10
message SnippetMatchProto {
- // Properties may have multiple values. values_index indicates which of these
- // multiple string values the match occurred in. For properties with only one
- // value, the values_index will always be 0.
- // Ex. "Recipients" [
- // { { "Name" : "Daffy Duck" }
- // { "EmailAddress" : "daffduck@gmail.com" } },
- // { { "Name" : "Donald Duck" }
- // { "EmailAddress" : "donduck@gmail.com" } }
- // "Daffy Duck" is the string value with a value_index of 0 for property
- // "Recipients.Name". "Donald Duck" is the string value with a value_index of
- // 1 for property "Recipients.Name".
- optional int32 values_index = 1;
+ // The index of the byte in the string at which the match begins and the
+ // length in bytes of the match.
+ optional int32 exact_match_byte_position = 2;
+ optional int32 exact_match_byte_length = 3;
- // The position and length within the matched string at which the exact
- // match begins.
- optional int32 exact_match_position = 2;
+ // The index of the UTF-16 code unit in the string at which the match begins
+ // and the length in UTF-16 code units of the match. This is for use with
+ // UTF-16 encoded strings like Java.lang.String.
+ optional int32 exact_match_utf16_position = 6;
+ optional int32 exact_match_utf16_length = 7;
- optional int32 exact_match_bytes = 3;
+ // The index of the byte in the string at which the suggested snippet window
+ // begins and the length in bytes of the window.
+ optional int32 window_byte_position = 4;
+ optional int32 window_byte_length = 5;
- // The position and length of the suggested snippet window.
- optional int32 window_position = 4;
+ // The index of the UTF-16 code unit in the string at which the suggested
+ // snippet window begins and the length in UTF-16 code units of the window.
+ // This is for use with UTF-16 encoded strings like Java.lang.String.
+ optional int32 window_utf16_position = 8;
+ optional int32 window_utf16_length = 9;
- optional int32 window_bytes = 5;
+ reserved 1;
}
// A Proto representing all snippets for a single DocumentProto.
@@ -139,9 +170,29 @@
// property values in the corresponding DocumentProto.
// Next tag: 3
message EntryProto {
- // A '.'-delimited sequence of property names indicating which property in
- // the DocumentProto these snippets correspond to.
- // Example properties: 'body', 'sender.name', 'sender.emailaddress', etc.
+ // A property path indicating which property in the DocumentProto these
+ // snippets correspond to. Property paths will contain 1) property names,
+ // 2) the property separator character '.' used to represent nested property
+ // and 3) indices surrounded by brackets to represent a specific value in
+ // that property.
+ //
+ // Example properties:
+ // - 'body' : the first and only string value of a top-level
+ // property called 'body'.
+ // - 'sender.name' : the first and only string value of a property
+ // called 'name' that is a subproperty of a
+ // property called 'sender'.
+ // - 'bcc[1].emailaddress': the first and only string value of a property
+ // called 'emailaddress' that is a subproperty of
+ // the second document value of a property called
+ // 'bcc'.
+ // - 'attachments[0]' : the first (of more than one) string value of a
+ // property called 'attachments'.
+ // NOTE: If there is only a single value for a property (like
+ // 'sender.name'), then no value index will be added to the property path.
+ // An index of [0] is implied. If there is more than one value for a
+ // property, then the value index will be added to the property path (like
+ // 'attachements[0]').
optional string property_name = 1;
repeated SnippetMatchProto snippet_matches = 2;
@@ -167,7 +218,7 @@
optional StatusProto status = 1;
// The Results that matched the query. Empty if there was an error.
- // Next tag: 3
+ // Next tag: 4
message ResultProto {
// Document that matches the SearchSpecProto.
optional DocumentProto document = 1;
@@ -175,6 +226,10 @@
// Snippeting information for the document if requested in the
// ResultSpecProto. A default instance, if not requested.
optional SnippetProto snippet = 2;
+
+ // The score that the document was ranked by. The meaning of this score is
+ // determined by ScoringSpecProto.rank_by.
+ optional double score = 3;
}
repeated ResultProto results = 2;
@@ -198,7 +253,7 @@
// LINT.ThenChange(//depot/google3/icing/result/result-state-manager.h:kInvalidNextPageToken)
// Stats for query execution performance.
- optional NativeQueryStats query_stats = 5;
+ optional QueryStatsProto query_stats = 5;
}
// Next tag: 3
diff --git a/proto/icing/proto/storage.proto b/proto/icing/proto/storage.proto
new file mode 100644
index 0000000..39dab6b
--- /dev/null
+++ b/proto/icing/proto/storage.proto
@@ -0,0 +1,187 @@
+// Copyright 2021 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto2";
+
+package icing.lib;
+
+import "icing/proto/status.proto";
+
+option java_package = "com.google.android.icing.proto";
+option java_multiple_files = true;
+option objc_class_prefix = "ICNG";
+
+// Next tag: 10
+message NamespaceStorageInfoProto {
+ // Name of the namespace
+ optional string namespace = 1;
+
+ // Number of alive documents in this namespace.
+ optional int32 num_alive_documents = 2;
+
+ // NOTE: We don't have stats on number of deleted documents in a namespace
+ // since we completely erase all data on a document when it's deleted. And we
+ // can't figure out which namespace it belonged to.
+
+ // Number of expired documents in this namespace.
+ optional int32 num_expired_documents = 3;
+
+ // LINT.IfChange(namespace_storage_info_usage_types)
+ // Number of alive documents that have a UsageReport.usage_type reported
+ optional int32 num_alive_documents_usage_type1 = 4;
+ optional int32 num_alive_documents_usage_type2 = 5;
+ optional int32 num_alive_documents_usage_type3 = 6;
+
+ // Number of expired documents that have a UsageReport.usage_type reported
+ optional int32 num_expired_documents_usage_type1 = 7;
+ optional int32 num_expired_documents_usage_type2 = 8;
+ optional int32 num_expired_documents_usage_type3 = 9;
+ // LINT.ThenChange()
+}
+
+// Next tag: 15
+message DocumentStorageInfoProto {
+ // Total number of alive documents.
+ optional int32 num_alive_documents = 1;
+
+ // Total number of deleted documents.
+ optional int32 num_deleted_documents = 2;
+
+ // Total number of expired documents.
+ optional int32 num_expired_documents = 3;
+
+ // Total size of the document store in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 document_store_size = 4;
+
+ // Total size of the ground truth in bytes. The ground truth may
+ // include deleted or expired documents. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 document_log_size = 5;
+
+ // Size of the key mapper in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 key_mapper_size = 6;
+
+ // Size of the document id mapper in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 document_id_mapper_size = 7;
+
+ // Size of the score cache in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 score_cache_size = 8;
+
+ // Size of the filter cache in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 filter_cache_size = 9;
+
+ // Size of the corpus mapper in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 corpus_mapper_size = 10;
+
+ // Size of the corpus score cache in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 corpus_score_cache_size = 11;
+
+ // Size of the namespace id mapper in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 namespace_id_mapper_size = 12;
+
+ // Number of namespaces seen from the current documents.
+ //
+ // TODO(cassiewang): This isn't technically needed anymore since clients can
+ // get this number from namespace_storage_info. Consider removing this.
+ optional int32 num_namespaces = 13;
+
+ // Storage information of each namespace.
+ repeated NamespaceStorageInfoProto namespace_storage_info = 14;
+}
+
+// Next tag: 5
+message SchemaStoreStorageInfoProto {
+ // Size of the schema store in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 schema_store_size = 1;
+
+ // Total number of schema types.
+ optional int32 num_schema_types = 2;
+
+ // Total number of all sections across all types
+ optional int32 num_total_sections = 3;
+
+ // Total number of types at the current section limit.
+ optional int32 num_schema_types_sections_exhausted = 4;
+}
+
+// Next tag: 9
+message IndexStorageInfoProto {
+ // Total size of the index in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 index_size = 1;
+
+ // Size of the lite index lexicon in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 lite_index_lexicon_size = 2;
+
+ // Size of the lite index hit buffer in bytes. Will be set to -1 if an IO
+ // error is encountered while calculating this field.
+ optional int64 lite_index_hit_buffer_size = 3;
+
+ // Size of the main index lexicon in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 main_index_lexicon_size = 4;
+
+ // Size of the main index storage in bytes. Will be set to -1 if an IO error
+ // is encountered while calculating this field.
+ optional int64 main_index_storage_size = 5;
+
+ // Size of one main index block in bytes.
+ optional int64 main_index_block_size = 6;
+
+ // Number of main index blocks.
+ optional int32 num_blocks = 7;
+
+ // Percentage of the main index blocks that are free, assuming
+ // allocated blocks are fully used.
+ optional float min_free_fraction = 8;
+}
+
+// Next tag: 5
+message StorageInfoProto {
+ // Total size of Icing’s storage in bytes. Will be set to -1 if an IO error is
+ // encountered while calculating this field.
+ optional int64 total_storage_size = 1;
+
+ // Storage information of the document store.
+ optional DocumentStorageInfoProto document_storage_info = 2;
+
+ // Storage information of the schema store.
+ optional SchemaStoreStorageInfoProto schema_store_storage_info = 3;
+
+ // Storage information of the index.
+ optional IndexStorageInfoProto index_storage_info = 4;
+}
+
+// Next tag: 3
+message StorageInfoResultProto {
+ // Status code can be one of:
+ // OK
+ // FAILED_PRECONDITION
+ //
+ // See status.proto for more details.
+ optional StatusProto status = 1;
+
+ // Storage information of Icing.
+ optional StorageInfoProto storage_info = 2;
+}
diff --git a/proto/icing/proto/usage.proto b/proto/icing/proto/usage.proto
index 7f31a2b..eaa2671 100644
--- a/proto/icing/proto/usage.proto
+++ b/proto/icing/proto/usage.proto
@@ -20,13 +20,11 @@
option java_package = "com.google.android.icing.proto";
option java_multiple_files = true;
-
option objc_class_prefix = "ICNG";
// Representation of a usage report that is generated from the client and sent
// to Icing.
// Next tag: 5
-// LINT.IfChange
message UsageReport {
// Namespace of the document.
optional string document_namespace = 1;
@@ -37,6 +35,7 @@
// Timestamp in milliseconds of when the usage happens.
optional int64 usage_timestamp_ms = 3;
+ // LINT.IfChange
// Next tag: 3
enum UsageType {
// A custom usage type that clients can assign a meaning to. UsageReports of
@@ -50,9 +49,12 @@
// Same as above.
USAGE_TYPE3 = 2;
}
+ // LINT.ThenChange(
+ // //depot/google3/icing/store/usage-store.h:UsageScores,
+ // //depot/google3/icing/proto/\
+ // storage.proto:namespace_storage_info_usage_types)
optional UsageType usage_type = 4;
}
-// LINT.ThenChange(//depot/google3/icing/store/usage-store.h:UsageScores)
// Result of a call to IcingSearchEngine.ReportUsage
// Next tag: 2
@@ -64,4 +66,4 @@
//
// See status.proto for more details.
optional StatusProto status = 1;
-}
\ No newline at end of file
+}
diff --git a/synced_AOSP_CL_number.txt b/synced_AOSP_CL_number.txt
index af8248d..4069810 100644
--- a/synced_AOSP_CL_number.txt
+++ b/synced_AOSP_CL_number.txt
@@ -1 +1 @@
-set(synced_AOSP_CL_number=351841227)
+set(synced_AOSP_CL_number=375495869)