blob: 65eecd1533c1f561033bfd573da57c8ddb27054b [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "icing/scoring/scoring-processor.h"
#include <cstdint>
#include "icing/text_classifier/lib3/utils/base/statusor.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/index/iterator/doc-hit-info-iterator-test-util.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/scoring.pb.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/tmp-directory.h"
namespace icing {
namespace lib {
namespace {
using ::testing::ElementsAre;
using ::testing::IsEmpty;
using ::testing::SizeIs;
class ScoringProcessorTest : public testing::Test {
protected:
ScoringProcessorTest()
: test_dir_(GetTestTempDir() + "/icing"),
doc_store_dir_(test_dir_ + "/doc_store"),
schema_store_dir_(test_dir_ + "/schema_store") {}
void SetUp() override {
// Creates file directories
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
filesystem_.CreateDirectoryRecursively(doc_store_dir_.c_str());
filesystem_.CreateDirectoryRecursively(schema_store_dir_.c_str());
ICING_ASSERT_OK_AND_ASSIGN(
schema_store_,
SchemaStore::Create(&filesystem_, test_dir_, &fake_clock_));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentStore::CreateResult create_result,
DocumentStore::Create(&filesystem_, doc_store_dir_, &fake_clock_,
schema_store_.get()));
document_store_ = std::move(create_result.document_store);
// Creates a simple email schema
SchemaProto test_email_schema;
auto type_config = test_email_schema.add_types();
type_config->set_schema_type("email");
auto subject = type_config->add_properties();
subject->set_property_name("subject");
subject->set_data_type(PropertyConfigProto::DataType::STRING);
subject->set_cardinality(PropertyConfigProto::Cardinality::OPTIONAL);
ICING_ASSERT_OK(schema_store_->SetSchema(test_email_schema));
}
void TearDown() override {
document_store_.reset();
schema_store_.reset();
filesystem_.DeleteDirectoryRecursively(test_dir_.c_str());
}
DocumentStore* document_store() { return document_store_.get(); }
private:
const std::string test_dir_;
const std::string doc_store_dir_;
const std::string schema_store_dir_;
Filesystem filesystem_;
FakeClock fake_clock_;
std::unique_ptr<DocumentStore> document_store_;
std::unique_ptr<SchemaStore> schema_store_;
};
constexpr int kDefaultScore = 0;
constexpr int64_t kDefaultCreationTimestampMs = 1571100001111;
DocumentProto CreateDocument(const std::string& name_space,
const std::string& uri, int score,
int64_t creation_timestamp_ms) {
return DocumentBuilder()
.SetKey(name_space, uri)
.SetSchema("email")
.SetScore(score)
.SetCreationTimestampMs(creation_timestamp_ms)
.Build();
}
libtextclassifier3::StatusOr<
std::pair<std::vector<DocHitInfo>, std::vector<ScoredDocumentHit>>>
CreateAndInsertsDocumentsWithScores(DocumentStore* document_store,
const std::vector<int>& scores) {
std::vector<DocHitInfo> doc_hit_infos;
std::vector<ScoredDocumentHit> scored_document_hits;
for (int i = 0; i < scores.size(); i++) {
ICING_ASSIGN_OR_RETURN(DocumentId document_id,
document_store->Put(CreateDocument(
"icing", "email/" + std::to_string(i),
scores.at(i), kDefaultCreationTimestampMs)));
doc_hit_infos.emplace_back(document_id);
scored_document_hits.emplace_back(document_id, kSectionIdMaskNone,
scores.at(i));
}
return std::pair(doc_hit_infos, scored_document_hits);
}
UsageReport CreateUsageReport(std::string name_space, std::string uri,
int64 timestamp_ms,
UsageReport::UsageType usage_type) {
UsageReport usage_report;
usage_report.set_document_namespace(name_space);
usage_report.set_document_uri(uri);
usage_report.set_usage_timestamp_ms(timestamp_ms);
usage_report.set_usage_type(usage_type);
return usage_report;
}
TEST_F(ScoringProcessorTest, CreationWithNullPointerShouldFail) {
ScoringSpecProto spec_proto;
EXPECT_THAT(ScoringProcessor::Create(spec_proto, /*document_store=*/nullptr),
StatusIs(libtextclassifier3::StatusCode::FAILED_PRECONDITION));
}
TEST_F(ScoringProcessorTest, ShouldCreateInstance) {
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
ICING_EXPECT_OK(ScoringProcessor::Create(spec_proto, document_store()));
}
TEST_F(ScoringProcessorTest, ShouldHandleEmptyDocHitIterator) {
// Creates an empty DocHitInfoIterator
std::vector<DocHitInfo> doc_hit_infos = {};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/5),
IsEmpty());
}
TEST_F(ScoringProcessorTest, ShouldHandleNonPositiveNumToScore) {
// Sets up documents
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
document_store()->Put(CreateDocument("icing", "email/1", /*score=*/1,
kDefaultCreationTimestampMs)));
DocHitInfo doc_hit_info1(document_id1);
// Creates a dummy DocHitInfoIterator
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/-1),
IsEmpty());
doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/0),
IsEmpty());
}
TEST_F(ScoringProcessorTest, ShouldRespectNumToScore) {
// Sets up documents
ICING_ASSERT_OK_AND_ASSIGN(
auto doc_hit_result_pair,
CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
// Creates a dummy DocHitInfoIterator with 3 results
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/2),
SizeIs(2));
doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/4),
SizeIs(3));
}
TEST_F(ScoringProcessorTest, ShouldScoreByDocumentScore) {
// Creates input doc_hit_infos and expected output scored_document_hits
ICING_ASSERT_OK_AND_ASSIGN(
auto doc_hit_result_pair,
CreateAndInsertsDocumentsWithScores(document_store(), {1, 3, 2}));
std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
std::vector<ScoredDocumentHit> scored_document_hits =
std::move(doc_hit_result_pair.second);
// Creates a dummy DocHitInfoIterator with 3 results
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
ElementsAre(EqualsScoredDocumentHit(scored_document_hits.at(0)),
EqualsScoredDocumentHit(scored_document_hits.at(1)),
EqualsScoredDocumentHit(scored_document_hits.at(2))));
}
TEST_F(ScoringProcessorTest,
ShouldScoreByRelevanceScore_DocumentsWithDifferentLength) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
document_store()->Put(document1, /*num_tokens=*/10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
document_store()->Put(document2, /*num_tokens=*/100));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id3,
document_store()->Put(document3, /*num_tokens=*/50));
DocHitInfo doc_hit_info1(document_id1);
doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
DocHitInfo doc_hit_info2(document_id2);
doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
DocHitInfo doc_hit_info3(document_id3);
doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
// Creates input doc_hit_infos and expected output scored_document_hits
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
doc_hit_info3};
// Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
query_term_iterators["foo"] =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
// Since the three documents all contain the query term "foo" exactly once,
// the document's length determines the final score. Document shorter than the
// average corpus length are slightly boosted.
ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
/*score=*/0.255482);
ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
/*score=*/0.115927);
ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
/*score=*/0.166435);
EXPECT_THAT(
scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3, &query_term_iterators),
ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
EqualsScoredDocumentHit(expected_scored_doc_hit2),
EqualsScoredDocumentHit(expected_scored_doc_hit3)));
}
TEST_F(ScoringProcessorTest,
ShouldScoreByRelevanceScore_DocumentsWithSameLength) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
document_store()->Put(document1, /*num_tokens=*/10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
document_store()->Put(document2, /*num_tokens=*/10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id3,
document_store()->Put(document3, /*num_tokens=*/10));
DocHitInfo doc_hit_info1(document_id1);
doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
DocHitInfo doc_hit_info2(document_id2);
doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
DocHitInfo doc_hit_info3(document_id3);
doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
SectionId section_id = 0;
SectionIdMask section_id_mask = 1U << section_id;
// Creates input doc_hit_infos and expected output scored_document_hits
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
doc_hit_info3};
// Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
query_term_iterators["foo"] =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
// Since the three documents all contain the query term "foo" exactly once
// and they have the same length, they will have the same BM25F scoret.
ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask,
/*score=*/0.16173716);
ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask,
/*score=*/0.16173716);
ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask,
/*score=*/0.16173716);
EXPECT_THAT(
scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3, &query_term_iterators),
ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
EqualsScoredDocumentHit(expected_scored_doc_hit2),
EqualsScoredDocumentHit(expected_scored_doc_hit3)));
}
TEST_F(ScoringProcessorTest,
ShouldScoreByRelevanceScore_DocumentsWithDifferentQueryFrequency) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id1,
document_store()->Put(document1, /*num_tokens=*/10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id2,
document_store()->Put(document2, /*num_tokens=*/10));
ICING_ASSERT_OK_AND_ASSIGN(
DocumentId document_id3,
document_store()->Put(document3, /*num_tokens=*/10));
DocHitInfo doc_hit_info1(document_id1);
// Document 1 contains the query term "foo" 5 times
doc_hit_info1.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/5);
DocHitInfo doc_hit_info2(document_id2);
// Document 1 contains the query term "foo" 1 time
doc_hit_info2.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
DocHitInfo doc_hit_info3(document_id3);
// Document 1 contains the query term "foo" 3 times
doc_hit_info3.UpdateSection(/*section_id*/ 0, /*hit_term_frequency=*/1);
doc_hit_info3.UpdateSection(/*section_id*/ 1, /*hit_term_frequency=*/2);
SectionIdMask section_id_mask1 = 0b00000001;
SectionIdMask section_id_mask2 = 0b00000001;
SectionIdMask section_id_mask3 = 0b00000011;
// Creates input doc_hit_infos and expected output scored_document_hits
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
doc_hit_info3};
// Creates a dummy DocHitInfoIterator with 3 results for the query "foo"
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::RELEVANCE_SCORE);
// Creates a ScoringProcessor
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
std::unordered_map<std::string, std::unique_ptr<DocHitInfoIterator>>
query_term_iterators;
query_term_iterators["foo"] =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos, "foo");
// Since the three documents all have the same length, the score is decided by
// the frequency of the query term "foo".
ScoredDocumentHit expected_scored_doc_hit1(document_id1, section_id_mask1,
/*score=*/0.309497);
ScoredDocumentHit expected_scored_doc_hit2(document_id2, section_id_mask2,
/*score=*/0.16173716);
ScoredDocumentHit expected_scored_doc_hit3(document_id3, section_id_mask3,
/*score=*/0.268599);
EXPECT_THAT(
scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3, &query_term_iterators),
ElementsAre(EqualsScoredDocumentHit(expected_scored_doc_hit1),
EqualsScoredDocumentHit(expected_scored_doc_hit2),
EqualsScoredDocumentHit(expected_scored_doc_hit3)));
}
TEST_F(ScoringProcessorTest, ShouldScoreByCreationTimestamp) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/1571100001111);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/1571100002222);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/1571100003333);
// Intentionally inserts documents in a different order
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store()->Put(document1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
document_store()->Put(document3));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store()->Put(document2));
DocHitInfo doc_hit_info1(document_id1);
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
document1.creation_timestamp_ms());
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
document2.creation_timestamp_ms());
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
document3.creation_timestamp_ms());
// Creates a dummy DocHitInfoIterator with 3 results
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
doc_hit_info1};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::CREATION_TIMESTAMP);
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
EqualsScoredDocumentHit(scored_document_hit3),
EqualsScoredDocumentHit(scored_document_hit1)));
}
TEST_F(ScoringProcessorTest, ShouldScoreByUsageCount) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store()->Put(document1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store()->Put(document2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
document_store()->Put(document3));
// Report usage for doc1 once and doc2 twice.
UsageReport usage_report_doc1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/0,
UsageReport::USAGE_TYPE1);
UsageReport usage_report_doc2 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/0,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
DocHitInfo doc_hit_info1(document_id1);
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
/*score=*/1);
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
/*score=*/2);
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
/*score=*/0);
// Creates a dummy DocHitInfoIterator with 3 results
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
doc_hit_info3};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::USAGE_TYPE1_COUNT);
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
EqualsScoredDocumentHit(scored_document_hit2),
EqualsScoredDocumentHit(scored_document_hit3)));
}
TEST_F(ScoringProcessorTest, ShouldScoreByUsageTimestamp) {
DocumentProto document1 =
CreateDocument("icing", "email/1", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document2 =
CreateDocument("icing", "email/2", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
DocumentProto document3 =
CreateDocument("icing", "email/3", kDefaultScore,
/*creation_timestamp_ms=*/kDefaultCreationTimestampMs);
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store()->Put(document1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store()->Put(document2));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
document_store()->Put(document3));
// Report usage for doc1 and doc2.
UsageReport usage_report_doc1 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/1", /*timestamp_ms=*/1000,
UsageReport::USAGE_TYPE1);
UsageReport usage_report_doc2 = CreateUsageReport(
/*name_space=*/"icing", /*uri=*/"email/2", /*timestamp_ms=*/5000,
UsageReport::USAGE_TYPE1);
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc1));
ICING_ASSERT_OK(document_store()->ReportUsage(usage_report_doc2));
DocHitInfo doc_hit_info1(document_id1);
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
/*score=*/1);
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
/*score=*/5);
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
/*score=*/0);
// Creates a dummy DocHitInfoIterator with 3 results
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info1, doc_hit_info2,
doc_hit_info3};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(
ScoringSpecProto::RankingStrategy::USAGE_TYPE1_LAST_USED_TIMESTAMP);
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit1),
EqualsScoredDocumentHit(scored_document_hit2),
EqualsScoredDocumentHit(scored_document_hit3)));
}
TEST_F(ScoringProcessorTest, ShouldHandleNoScores) {
// Creates input doc_hit_infos and corresponding scored_document_hits
ICING_ASSERT_OK_AND_ASSIGN(
auto doc_hit_result_pair,
CreateAndInsertsDocumentsWithScores(document_store(), {1, 2, 3}));
std::vector<DocHitInfo> doc_hit_infos = std::move(doc_hit_result_pair.first);
std::vector<ScoredDocumentHit> scored_document_hits =
std::move(doc_hit_result_pair.second);
// Creates a dummy DocHitInfoIterator with 4 results one of which doesn't have
// a score.
doc_hit_infos.emplace(doc_hit_infos.begin(), /*document_id_in=*/4,
kSectionIdMaskNone);
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// The document hit without a score will be be assigned the default score 0 in
// a descending order.
ScoredDocumentHit scored_document_hit_default =
ScoredDocumentHit(4, kSectionIdMaskNone, /*score=*/0.0);
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::DOCUMENT_SCORE);
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/4),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit_default),
EqualsScoredDocumentHit(scored_document_hits.at(0)),
EqualsScoredDocumentHit(scored_document_hits.at(1)),
EqualsScoredDocumentHit(scored_document_hits.at(2))));
}
TEST_F(ScoringProcessorTest, ShouldWrapResultsWhenNoScoring) {
DocumentProto document1 = CreateDocument("icing", "email/1", /*score=*/1,
kDefaultCreationTimestampMs);
DocumentProto document2 = CreateDocument("icing", "email/2", /*score=*/2,
kDefaultCreationTimestampMs);
DocumentProto document3 = CreateDocument("icing", "email/3", /*score=*/3,
kDefaultCreationTimestampMs);
// Intentionally inserts documents in a different order
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id1,
document_store()->Put(document1));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id3,
document_store()->Put(document3));
ICING_ASSERT_OK_AND_ASSIGN(DocumentId document_id2,
document_store()->Put(document2));
DocHitInfo doc_hit_info1(document_id1);
DocHitInfo doc_hit_info2(document_id2);
DocHitInfo doc_hit_info3(document_id3);
// The expected results should all have the default score 0.
ScoredDocumentHit scored_document_hit1(document_id1, kSectionIdMaskNone,
kDefaultScore);
ScoredDocumentHit scored_document_hit2(document_id2, kSectionIdMaskNone,
kDefaultScore);
ScoredDocumentHit scored_document_hit3(document_id3, kSectionIdMaskNone,
kDefaultScore);
// Creates a dummy DocHitInfoIterator with 3 results
std::vector<DocHitInfo> doc_hit_infos = {doc_hit_info2, doc_hit_info3,
doc_hit_info1};
std::unique_ptr<DocHitInfoIterator> doc_hit_info_iterator =
std::make_unique<DocHitInfoIteratorDummy>(doc_hit_infos);
// A ScoringSpecProto with no scoring strategy
ScoringSpecProto spec_proto;
spec_proto.set_rank_by(ScoringSpecProto::RankingStrategy::NONE);
// Creates a ScoringProcessor which ranks in descending order
ICING_ASSERT_OK_AND_ASSIGN(
std::unique_ptr<ScoringProcessor> scoring_processor,
ScoringProcessor::Create(spec_proto, document_store()));
EXPECT_THAT(scoring_processor->Score(std::move(doc_hit_info_iterator),
/*num_to_score=*/3),
ElementsAre(EqualsScoredDocumentHit(scored_document_hit2),
EqualsScoredDocumentHit(scored_document_hit3),
EqualsScoredDocumentHit(scored_document_hit1)));
}
} // namespace
} // namespace lib
} // namespace icing