blob: 40df46221340ad3c5effe793d1cf7171eb015b1d [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "third_party/absl/flags/flag.h"
#include "icing/document-builder.h"
#include "icing/index/index.h"
#include "icing/proto/term.pb.h"
#include "icing/query/query-processor.h"
#include "icing/schema/schema-store.h"
#include "icing/schema/section.h"
#include "icing/store/document-id.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/fake-clock.h"
#include "icing/testing/test-data.h"
#include "icing/testing/tmp-directory.h"
#include "icing/util/logging.h"
// Run on a Linux workstation:
// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
// //icing/query:query-processor_benchmark
//
// $ blaze-bin/icing/query/query-processor_benchmark
// --benchmarks=all
//
// Run on an Android device:
// Make target //icing/tokenization:language-segmenter depend on
// //third_party/icu
//
// Make target //icing/transform:normalizer depend on
// //third_party/icu
//
// Download LangId model file from
// //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and
// put it into your device:
// $ adb push [your model path] /data/local/tmp/
//
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
// //icing/query:query-processor_benchmark
//
// $ adb push blaze-bin/icing/query/query-processor_benchmark
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all
// --adb
// Flag to tell the benchmark that it'll be run on an Android device via adb,
// the benchmark will set up data files accordingly.
ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device");
namespace icing {
namespace lib {
namespace {
void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id,
TermMatchType::Code term_match_type,
const std::string& token) {
Index::Editor editor = index->Edit(document_id, section_id, term_match_type);
ICING_ASSERT_OK(editor.AddHit(token.c_str()));
}
std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem,
const std::string& index_dir) {
Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10);
return Index::Create(options, &filesystem).ValueOrDie();
}
std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() {
if (absl::GetFlag(FLAGS_adb)) {
return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb")
.ValueOrDie();
} else {
return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie();
}
}
std::unique_ptr<Normalizer> CreateNormalizer() {
return Normalizer::Create(
/*max_term_byte_size=*/std::numeric_limits<int>::max())
.ValueOrDie();
}
void CleanUp(const Filesystem& filesystem, const std::string& base_dir) {
filesystem.DeleteDirectoryRecursively(base_dir.c_str());
}
void BM_QueryOneTerm(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
}
IcingFilesystem icing_filesystem;
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_test";
const std::string index_dir = base_dir + "/index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
CleanUp(filesystem, base_dir);
if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
ICING_LOG(ERROR) << "Failed to create test directories";
}
std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
std::unique_ptr<LanguageSegmenter> language_segmenter =
CreateLanguageSegmenter();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
std::unique_ptr<DocumentStore> document_store =
DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
schema_store.get())
.ValueOrDie();
DocumentId document_id = document_store
->Put(DocumentBuilder()
.SetKey("icing", "type1")
.SetSchema("type1")
.Build())
.ValueOrDie();
const std::string input_string(state.range(0), 'A');
AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
TermMatchType::EXACT_ONLY, input_string);
QueryProcessor query_processor(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
schema_store.get(), &fake_clock);
SearchSpecProto search_spec;
search_spec.set_query(input_string);
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
for (auto _ : state) {
QueryProcessor::QueryResults results =
query_processor.ParseSearch(search_spec).ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
}
}
// Destroy document store before the whole directory is removed because it
// persists data in destructor.
document_store.reset();
CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_QueryOneTerm)
->Arg(1000)
->Arg(2000)
->Arg(4000)
->Arg(8000)
->Arg(16000)
->Arg(32000)
->Arg(64000)
->Arg(128000)
->Arg(256000)
->Arg(384000)
->Arg(512000)
->Arg(1024000)
->Arg(2048000)
->Arg(4096000);
void BM_QueryFiveTerms(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
}
IcingFilesystem icing_filesystem;
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_test";
const std::string index_dir = base_dir + "/index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
CleanUp(filesystem, base_dir);
if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
ICING_LOG(ERROR) << "Failed to create test directories";
}
std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
std::unique_ptr<LanguageSegmenter> language_segmenter =
CreateLanguageSegmenter();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
std::unique_ptr<DocumentStore> document_store =
DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
schema_store.get())
.ValueOrDie();
DocumentId document_id = document_store
->Put(DocumentBuilder()
.SetKey("icing", "type1")
.SetSchema("type1")
.Build())
.ValueOrDie();
int term_length = state.range(0) / 5;
const std::string input_string_a(term_length, 'A');
const std::string input_string_b(term_length, 'B');
const std::string input_string_c(term_length, 'C');
const std::string input_string_d(term_length, 'D');
const std::string input_string_e(term_length, 'E');
AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
TermMatchType::EXACT_ONLY, input_string_a);
AddTokenToIndex(index.get(), document_id, /*section_id=*/1,
TermMatchType::EXACT_ONLY, input_string_b);
AddTokenToIndex(index.get(), document_id, /*section_id=*/2,
TermMatchType::EXACT_ONLY, input_string_c);
AddTokenToIndex(index.get(), document_id, /*section_id=*/3,
TermMatchType::EXACT_ONLY, input_string_d);
AddTokenToIndex(index.get(), document_id, /*section_id=*/4,
TermMatchType::EXACT_ONLY, input_string_e);
QueryProcessor query_processor(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
schema_store.get(), &fake_clock);
const std::string query_string = absl_ports::StrCat(
input_string_a, " ", input_string_b, " ", input_string_c, " ",
input_string_d, " ", input_string_e);
SearchSpecProto search_spec;
search_spec.set_query(query_string);
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
for (auto _ : state) {
QueryProcessor::QueryResults results =
query_processor.ParseSearch(search_spec).ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
}
}
// Destroy document store before the whole directory is removed because it
// persists data in destructor.
document_store.reset();
CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_QueryFiveTerms)
->Arg(1000)
->Arg(2000)
->Arg(4000)
->Arg(8000)
->Arg(16000)
->Arg(32000)
->Arg(64000)
->Arg(128000)
->Arg(256000)
->Arg(384000)
->Arg(512000)
->Arg(1024000)
->Arg(2048000)
->Arg(4096000);
void BM_QueryDiacriticTerm(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
}
IcingFilesystem icing_filesystem;
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_test";
const std::string index_dir = base_dir + "/index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
CleanUp(filesystem, base_dir);
if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
ICING_LOG(ERROR) << "Failed to create test directories";
}
std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
std::unique_ptr<LanguageSegmenter> language_segmenter =
CreateLanguageSegmenter();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
std::unique_ptr<DocumentStore> document_store =
DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
schema_store.get())
.ValueOrDie();
DocumentId document_id = document_store
->Put(DocumentBuilder()
.SetKey("icing", "type1")
.SetSchema("type1")
.Build())
.ValueOrDie();
std::string input_string;
while (input_string.length() < state.range(0)) {
input_string.append("àáâãā");
}
AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
TermMatchType::EXACT_ONLY, input_string);
QueryProcessor query_processor(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
schema_store.get(), &fake_clock);
SearchSpecProto search_spec;
search_spec.set_query(input_string);
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
for (auto _ : state) {
QueryProcessor::QueryResults results =
query_processor.ParseSearch(search_spec).ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
}
}
// Destroy document store before the whole directory is removed because it
// persists data in destructor.
document_store.reset();
CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_QueryDiacriticTerm)
->Arg(1000)
->Arg(2000)
->Arg(4000)
->Arg(8000)
->Arg(16000)
->Arg(32000)
->Arg(64000)
->Arg(128000)
->Arg(256000)
->Arg(384000)
->Arg(512000)
->Arg(1024000)
->Arg(2048000)
->Arg(4096000);
void BM_QueryHiragana(benchmark::State& state) {
bool run_via_adb = absl::GetFlag(FLAGS_adb);
if (!run_via_adb) {
ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat"));
}
IcingFilesystem icing_filesystem;
Filesystem filesystem;
const std::string base_dir = GetTestTempDir() + "/query_test";
const std::string index_dir = base_dir + "/index";
const std::string schema_dir = base_dir + "/schema";
const std::string doc_store_dir = base_dir + "/store";
CleanUp(filesystem, base_dir);
if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(schema_dir.c_str()) ||
!filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) {
ICING_LOG(ERROR) << "Failed to create test directories";
}
std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir);
std::unique_ptr<LanguageSegmenter> language_segmenter =
CreateLanguageSegmenter();
std::unique_ptr<Normalizer> normalizer = CreateNormalizer();
FakeClock fake_clock;
SchemaProto schema;
auto type_config = schema.add_types();
type_config->set_schema_type("type1");
ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store,
SchemaStore::Create(&filesystem, schema_dir));
ICING_ASSERT_OK(schema_store->SetSchema(schema));
std::unique_ptr<DocumentStore> document_store =
DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock,
schema_store.get())
.ValueOrDie();
DocumentId document_id = document_store
->Put(DocumentBuilder()
.SetKey("icing", "type1")
.SetSchema("type1")
.Build())
.ValueOrDie();
std::string input_string;
while (input_string.length() < state.range(0)) {
input_string.append("あいうえお");
}
AddTokenToIndex(index.get(), document_id, /*section_id=*/0,
TermMatchType::EXACT_ONLY, input_string);
QueryProcessor query_processor(index.get(), language_segmenter.get(),
normalizer.get(), document_store.get(),
schema_store.get(), &fake_clock);
SearchSpecProto search_spec;
search_spec.set_query(input_string);
search_spec.set_term_match_type(TermMatchType::EXACT_ONLY);
for (auto _ : state) {
QueryProcessor::QueryResults results =
query_processor.ParseSearch(search_spec).ValueOrDie();
while (results.root_iterator->Advance().ok()) {
results.root_iterator->doc_hit_info();
}
}
// Destroy document store before the whole directory is removed because it
// persists data in destructor.
document_store.reset();
CleanUp(filesystem, base_dir);
}
BENCHMARK(BM_QueryHiragana)
->Arg(1000)
->Arg(2000)
->Arg(4000)
->Arg(8000)
->Arg(16000)
->Arg(32000)
->Arg(64000)
->Arg(128000)
->Arg(256000)
->Arg(384000)
->Arg(512000)
->Arg(1024000)
->Arg(2048000)
->Arg(4096000);
} // namespace
} // namespace lib
} // namespace icing