blob: a539fe43cea862d168ab64c5dbc086363ed48c5f [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/index/main/posting-list-accessor.h"
#include <cstdint>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/file/filesystem.h"
#include "icing/index/hit/hit.h"
#include "icing/index/main/flash-index-storage.h"
#include "icing/index/main/index-block.h"
#include "icing/index/main/posting-list-identifier.h"
#include "icing/index/main/posting-list-used.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/hit-test-utils.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
namespace {
using ::testing::ElementsAre;
using ::testing::ElementsAreArray;
using ::testing::Eq;
using ::testing::Lt;
using ::testing::SizeIs;
TEST(PostingListAccessorStorageTest, HitsAddAndRetrieveProperly) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result.status);
EXPECT_THAT(result.id.block_index(), Eq(1));
EXPECT_THAT(result.id.posting_list_index(), Eq(0));
// Retrieve some hits.
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage.GetPostingList(result.id));
EXPECT_THAT(pl_holder.posting_list.GetHits(),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
EXPECT_THAT(pl_holder.block.next_block_index(), Eq(kInvalidBlockIndex));
}
TEST(PostingListAccessorStorageTest, PreexistingPLKeepOnSameBlock) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add a single hit. This will fit in a min-sized posting list.
Hit hit1(/*section_id=*/1, /*document_id=*/0, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result1.status);
// Should have been allocated to the first block.
EXPECT_THAT(result1.id.block_index(), Eq(1));
EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
// Add one more hit. The minimum size for a posting list must be able to fit
// at least two hits, so this should NOT cause the previous pl to be
// reallocated.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor, PostingListAccessor::CreateFromExisting(&flash_index_storage,
result1.id));
Hit hit2 = CreateHit(hit1, /*desired_byte_length=*/1);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit2));
PostingListAccessor::FinalizeResult result2 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result2.status);
// Should have been allocated to the same posting list as the first hit.
EXPECT_THAT(result2.id, Eq(result1.id));
// The posting list at result2.id should hold all of the hits that have been
// added.
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage.GetPostingList(result2.id));
EXPECT_THAT(pl_holder.posting_list.GetHits(),
IsOkAndHolds(ElementsAre(hit2, hit1)));
}
TEST(PostingListAccessorStorageTest, PreexistingPLReallocateToLargerPL) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// The smallest posting list size is 15 bytes. The first four hits will be
// compressed to one byte each and will be able to fit in the 5 byte padded
// region. The last hit will fit in one of the special hits. The posting list
// will be ALMOST_FULL and can fit at most 2 more hits.
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result1.status);
// Should have been allocated to the first block.
EXPECT_THAT(result1.id.block_index(), Eq(1));
EXPECT_THAT(result1.id.posting_list_index(), Eq(0));
// Now let's add some more hits!
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor, PostingListAccessor::CreateFromExisting(&flash_index_storage,
result1.id));
// The current posting list can fit at most 2 more hits. Adding 12 more hits
// should result in these hits being moved to a larger posting list.
std::vector<Hit> hits2 = CreateHits(
/*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/12,
/*desired_byte_length=*/1);
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result2 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result2.status);
// Should have been allocated to the second (new) block because the posting
// list should have grown beyond the size that the first block maintains.
EXPECT_THAT(result2.id.block_index(), Eq(2));
EXPECT_THAT(result2.id.posting_list_index(), Eq(0));
// The posting list at result2.id should hold all of the hits that have been
// added.
for (const Hit& hit : hits2) {
hits1.push_back(hit);
}
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage.GetPostingList(result2.id));
EXPECT_THAT(pl_holder.posting_list.GetHits(),
IsOkAndHolds(ElementsAreArray(hits1.rbegin(), hits1.rend())));
}
TEST(PostingListAccessorStorageTest, MultiBlockChainsBlocksProperly) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result1.status);
PostingListIdentifier second_block_id = result1.id;
// Should have been allocated to the second block, which holds a max-sized
// posting list.
EXPECT_THAT(second_block_id, Eq(PostingListIdentifier(
/*block_index=*/2, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0)));
// Now let's retrieve them!
ICING_ASSERT_OK_AND_ASSIGN(
PostingListHolder pl_holder,
flash_index_storage.GetPostingList(second_block_id));
// This pl_holder will only hold a posting list with the hits that didn't fit
// on the first block.
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
pl_holder.posting_list.GetHits());
ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
EXPECT_THAT(second_block_hits,
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
uint32_t first_block_id = pl_holder.block.next_block_index();
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0);
ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
flash_index_storage.GetPostingList(pl_id));
EXPECT_THAT(
pl_holder.posting_list.GetHits(),
IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
}
TEST(PostingListAccessorStorageTest,
PreexistingMultiBlockReusesBlocksProperly) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
// Add some hits! Any hits!
std::vector<Hit> hits1 =
CreateHits(/*num_hits=*/5000, /*desired_byte_length=*/1);
for (const Hit& hit : hits1) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result1.status);
PostingListIdentifier first_add_id = result1.id;
EXPECT_THAT(first_add_id, Eq(PostingListIdentifier(
/*block_index=*/2, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0)));
// Now add a couple more hits. These should fit on the existing, not full
// second block.
ICING_ASSERT_OK_AND_ASSIGN(
pl_accessor, PostingListAccessor::CreateFromExisting(&flash_index_storage,
first_add_id));
std::vector<Hit> hits2 = CreateHits(
/*start_docid=*/hits1.back().document_id() + 1, /*num_hits=*/50,
/*desired_byte_length=*/1);
for (const Hit& hit : hits2) {
ICING_ASSERT_OK(pl_accessor.PrependHit(hit));
}
PostingListAccessor::FinalizeResult result2 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_EXPECT_OK(result2.status);
PostingListIdentifier second_add_id = result2.id;
EXPECT_THAT(second_add_id, Eq(first_add_id));
// We should be able to retrieve all 5050 hits.
for (const Hit& hit : hits2) {
hits1.push_back(hit);
}
ICING_ASSERT_OK_AND_ASSIGN(PostingListHolder pl_holder,
flash_index_storage.GetPostingList(second_add_id));
// This pl_holder will only hold a posting list with the hits that didn't fit
// on the first block.
ICING_ASSERT_OK_AND_ASSIGN(std::vector<Hit> second_block_hits,
pl_holder.posting_list.GetHits());
ASSERT_THAT(second_block_hits, SizeIs(Lt(hits1.size())));
auto first_block_hits_start = hits1.rbegin() + second_block_hits.size();
EXPECT_THAT(second_block_hits,
ElementsAreArray(hits1.rbegin(), first_block_hits_start));
// Now retrieve all of the hits that were on the first block.
uint32_t first_block_id = pl_holder.block.next_block_index();
EXPECT_THAT(first_block_id, Eq(1));
PostingListIdentifier pl_id(first_block_id, /*posting_list_index=*/0,
/*posting_list_index_bits=*/0);
ICING_ASSERT_OK_AND_ASSIGN(pl_holder,
flash_index_storage.GetPostingList(pl_id));
EXPECT_THAT(
pl_holder.posting_list.GetHits(),
IsOkAndHolds(ElementsAreArray(first_block_hits_start, hits1.rend())));
}
TEST(PostingListAccessorStorageTest, InvalidHitReturnsInvalidArgument) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
Hit invalid_hit;
EXPECT_THAT(pl_accessor.PrependHit(invalid_hit),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(PostingListAccessorStorageTest, HitsNotDecreasingReturnsInvalidArgument) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
Hit hit2(/*section_id=*/6, /*document_id=*/1, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit2),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
Hit hit3(/*section_id=*/2, /*document_id=*/0, Hit::kDefaultTermFrequency);
EXPECT_THAT(pl_accessor.PrependHit(hit3),
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(PostingListAccessorStorageTest, NewPostingListNoHitsAdded) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
EXPECT_THAT(result1.status,
StatusIs(libtextclassifier3::StatusCode::INVALID_ARGUMENT));
}
TEST(PostingListAccessorStorageTest, PreexistingPostingListNoHitsAdded) {
std::string test_dir = GetTestTempDir() + "/test_dir";
std::string file_name = test_dir + "/test_file.idx.index";
Filesystem filesystem;
ASSERT_TRUE(filesystem.DeleteDirectoryRecursively(test_dir.c_str()));
ASSERT_TRUE(filesystem.CreateDirectoryRecursively(test_dir.c_str()));
ICING_ASSERT_OK_AND_ASSIGN(FlashIndexStorage flash_index_storage,
FlashIndexStorage::Create(file_name, &filesystem));
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor,
PostingListAccessor::Create(&flash_index_storage));
Hit hit1(/*section_id=*/3, /*document_id=*/1, Hit::kDefaultTermFrequency);
ICING_ASSERT_OK(pl_accessor.PrependHit(hit1));
PostingListAccessor::FinalizeResult result1 =
PostingListAccessor::Finalize(std::move(pl_accessor));
ICING_ASSERT_OK(result1.status);
ICING_ASSERT_OK_AND_ASSIGN(PostingListAccessor pl_accessor2,
PostingListAccessor::CreateFromExisting(
&flash_index_storage, result1.id));
PostingListAccessor::FinalizeResult result2 =
PostingListAccessor::Finalize(std::move(pl_accessor2));
ICING_ASSERT_OK(result2.status);
}
} // namespace
} // namespace lib
} // namespace icing