| // Copyright (C) 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "testing/base/public/benchmark.h" |
| #include "gmock/gmock.h" |
| #include "third_party/absl/flags/flag.h" |
| #include "icing/document-builder.h" |
| #include "icing/index/index.h" |
| #include "icing/proto/term.pb.h" |
| #include "icing/query/query-processor.h" |
| #include "icing/schema/schema-store.h" |
| #include "icing/schema/section.h" |
| #include "icing/store/document-id.h" |
| #include "icing/testing/common-matchers.h" |
| #include "icing/testing/fake-clock.h" |
| #include "icing/testing/test-data.h" |
| #include "icing/testing/tmp-directory.h" |
| #include "icing/util/logging.h" |
| |
| // Run on a Linux workstation: |
| // $ blaze build -c opt --dynamic_mode=off --copt=-gmlt |
| // //icing/query:query-processor_benchmark |
| // |
| // $ blaze-bin/icing/query/query-processor_benchmark |
| // --benchmarks=all |
| // |
| // Run on an Android device: |
| // Make target //icing/tokenization:language-segmenter depend on |
| // //third_party/icu |
| // |
| // Make target //icing/transform:normalizer depend on |
| // //third_party/icu |
| // |
| // Download LangId model file from |
| // //nlp/saft/components/lang_id/mobile/fb_model:models/latest_model.smfb and |
| // put it into your device: |
| // $ adb push [your model path] /data/local/tmp/ |
| // |
| // $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1" |
| // --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt |
| // //icing/query:query-processor_benchmark |
| // |
| // $ adb push blaze-bin/icing/query/query-processor_benchmark |
| // /data/local/tmp/ |
| // |
| // $ adb shell /data/local/tmp/query-processor_benchmark --benchmarks=all |
| // --adb |
| |
| // Flag to tell the benchmark that it'll be run on an Android device via adb, |
| // the benchmark will set up data files accordingly. |
| ABSL_FLAG(bool, adb, false, "run benchmark via ADB on an Android device"); |
| |
| namespace icing { |
| namespace lib { |
| |
| namespace { |
| |
| void AddTokenToIndex(Index* index, DocumentId document_id, SectionId section_id, |
| TermMatchType::Code term_match_type, |
| const std::string& token) { |
| Index::Editor editor = index->Edit(document_id, section_id, term_match_type); |
| ICING_ASSERT_OK(editor.AddHit(token.c_str())); |
| } |
| |
| std::unique_ptr<Index> CreateIndex(const IcingFilesystem& filesystem, |
| const std::string& index_dir) { |
| Index::Options options(index_dir, /*index_merge_size=*/1024 * 1024 * 10); |
| return Index::Create(options, &filesystem).ValueOrDie(); |
| } |
| |
| std::unique_ptr<LanguageSegmenter> CreateLanguageSegmenter() { |
| if (absl::GetFlag(FLAGS_adb)) { |
| return LanguageSegmenter::Create("/data/local/tmp/latest_model.smfb") |
| .ValueOrDie(); |
| } else { |
| return LanguageSegmenter::Create(GetLangIdModelPath()).ValueOrDie(); |
| } |
| } |
| |
| std::unique_ptr<Normalizer> CreateNormalizer() { |
| return Normalizer::Create( |
| /*max_term_byte_size=*/std::numeric_limits<int>::max()) |
| .ValueOrDie(); |
| } |
| |
| void CleanUp(const Filesystem& filesystem, const std::string& base_dir) { |
| filesystem.DeleteDirectoryRecursively(base_dir.c_str()); |
| } |
| |
| void BM_QueryOneTerm(benchmark::State& state) { |
| bool run_via_adb = absl::GetFlag(FLAGS_adb); |
| if (!run_via_adb) { |
| ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat")); |
| } |
| |
| IcingFilesystem icing_filesystem; |
| Filesystem filesystem; |
| const std::string base_dir = GetTestTempDir() + "/query_test"; |
| const std::string index_dir = base_dir + "/index"; |
| const std::string schema_dir = base_dir + "/schema"; |
| const std::string doc_store_dir = base_dir + "/store"; |
| |
| CleanUp(filesystem, base_dir); |
| if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) { |
| ICING_LOG(ERROR) << "Failed to create test directories"; |
| } |
| |
| std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir); |
| std::unique_ptr<LanguageSegmenter> language_segmenter = |
| CreateLanguageSegmenter(); |
| std::unique_ptr<Normalizer> normalizer = CreateNormalizer(); |
| FakeClock fake_clock; |
| |
| SchemaProto schema; |
| auto type_config = schema.add_types(); |
| type_config->set_schema_type("type1"); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(&filesystem, schema_dir)); |
| ICING_ASSERT_OK(schema_store->SetSchema(schema)); |
| |
| std::unique_ptr<DocumentStore> document_store = |
| DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock, |
| schema_store.get()) |
| .ValueOrDie(); |
| |
| DocumentId document_id = document_store |
| ->Put(DocumentBuilder() |
| .SetKey("icing", "type1") |
| .SetSchema("type1") |
| .Build()) |
| .ValueOrDie(); |
| |
| const std::string input_string(state.range(0), 'A'); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, input_string); |
| |
| QueryProcessor query_processor(index.get(), language_segmenter.get(), |
| normalizer.get(), document_store.get(), |
| schema_store.get(), &fake_clock); |
| SearchSpecProto search_spec; |
| search_spec.set_query(input_string); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| for (auto _ : state) { |
| QueryProcessor::QueryResults results = |
| query_processor.ParseSearch(search_spec).ValueOrDie(); |
| while (results.root_iterator->Advance().ok()) { |
| results.root_iterator->doc_hit_info(); |
| } |
| } |
| |
| // Destroy document store before the whole directory is removed because it |
| // persists data in destructor. |
| document_store.reset(); |
| CleanUp(filesystem, base_dir); |
| } |
| BENCHMARK(BM_QueryOneTerm) |
| ->Arg(1000) |
| ->Arg(2000) |
| ->Arg(4000) |
| ->Arg(8000) |
| ->Arg(16000) |
| ->Arg(32000) |
| ->Arg(64000) |
| ->Arg(128000) |
| ->Arg(256000) |
| ->Arg(384000) |
| ->Arg(512000) |
| ->Arg(1024000) |
| ->Arg(2048000) |
| ->Arg(4096000); |
| |
| void BM_QueryFiveTerms(benchmark::State& state) { |
| bool run_via_adb = absl::GetFlag(FLAGS_adb); |
| if (!run_via_adb) { |
| ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat")); |
| } |
| |
| IcingFilesystem icing_filesystem; |
| Filesystem filesystem; |
| const std::string base_dir = GetTestTempDir() + "/query_test"; |
| const std::string index_dir = base_dir + "/index"; |
| const std::string schema_dir = base_dir + "/schema"; |
| const std::string doc_store_dir = base_dir + "/store"; |
| |
| CleanUp(filesystem, base_dir); |
| if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) { |
| ICING_LOG(ERROR) << "Failed to create test directories"; |
| } |
| |
| std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir); |
| std::unique_ptr<LanguageSegmenter> language_segmenter = |
| CreateLanguageSegmenter(); |
| std::unique_ptr<Normalizer> normalizer = CreateNormalizer(); |
| FakeClock fake_clock; |
| |
| SchemaProto schema; |
| auto type_config = schema.add_types(); |
| type_config->set_schema_type("type1"); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(&filesystem, schema_dir)); |
| ICING_ASSERT_OK(schema_store->SetSchema(schema)); |
| |
| std::unique_ptr<DocumentStore> document_store = |
| DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock, |
| schema_store.get()) |
| .ValueOrDie(); |
| |
| DocumentId document_id = document_store |
| ->Put(DocumentBuilder() |
| .SetKey("icing", "type1") |
| .SetSchema("type1") |
| .Build()) |
| .ValueOrDie(); |
| |
| int term_length = state.range(0) / 5; |
| |
| const std::string input_string_a(term_length, 'A'); |
| const std::string input_string_b(term_length, 'B'); |
| const std::string input_string_c(term_length, 'C'); |
| const std::string input_string_d(term_length, 'D'); |
| const std::string input_string_e(term_length, 'E'); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, input_string_a); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/1, |
| TermMatchType::EXACT_ONLY, input_string_b); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/2, |
| TermMatchType::EXACT_ONLY, input_string_c); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/3, |
| TermMatchType::EXACT_ONLY, input_string_d); |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/4, |
| TermMatchType::EXACT_ONLY, input_string_e); |
| |
| QueryProcessor query_processor(index.get(), language_segmenter.get(), |
| normalizer.get(), document_store.get(), |
| schema_store.get(), &fake_clock); |
| |
| const std::string query_string = absl_ports::StrCat( |
| input_string_a, " ", input_string_b, " ", input_string_c, " ", |
| input_string_d, " ", input_string_e); |
| |
| SearchSpecProto search_spec; |
| search_spec.set_query(query_string); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| for (auto _ : state) { |
| QueryProcessor::QueryResults results = |
| query_processor.ParseSearch(search_spec).ValueOrDie(); |
| while (results.root_iterator->Advance().ok()) { |
| results.root_iterator->doc_hit_info(); |
| } |
| } |
| |
| // Destroy document store before the whole directory is removed because it |
| // persists data in destructor. |
| document_store.reset(); |
| CleanUp(filesystem, base_dir); |
| } |
| BENCHMARK(BM_QueryFiveTerms) |
| ->Arg(1000) |
| ->Arg(2000) |
| ->Arg(4000) |
| ->Arg(8000) |
| ->Arg(16000) |
| ->Arg(32000) |
| ->Arg(64000) |
| ->Arg(128000) |
| ->Arg(256000) |
| ->Arg(384000) |
| ->Arg(512000) |
| ->Arg(1024000) |
| ->Arg(2048000) |
| ->Arg(4096000); |
| |
| void BM_QueryDiacriticTerm(benchmark::State& state) { |
| bool run_via_adb = absl::GetFlag(FLAGS_adb); |
| if (!run_via_adb) { |
| ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat")); |
| } |
| |
| IcingFilesystem icing_filesystem; |
| Filesystem filesystem; |
| const std::string base_dir = GetTestTempDir() + "/query_test"; |
| const std::string index_dir = base_dir + "/index"; |
| const std::string schema_dir = base_dir + "/schema"; |
| const std::string doc_store_dir = base_dir + "/store"; |
| |
| CleanUp(filesystem, base_dir); |
| if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) { |
| ICING_LOG(ERROR) << "Failed to create test directories"; |
| } |
| |
| std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir); |
| std::unique_ptr<LanguageSegmenter> language_segmenter = |
| CreateLanguageSegmenter(); |
| std::unique_ptr<Normalizer> normalizer = CreateNormalizer(); |
| FakeClock fake_clock; |
| |
| SchemaProto schema; |
| auto type_config = schema.add_types(); |
| type_config->set_schema_type("type1"); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(&filesystem, schema_dir)); |
| ICING_ASSERT_OK(schema_store->SetSchema(schema)); |
| |
| std::unique_ptr<DocumentStore> document_store = |
| DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock, |
| schema_store.get()) |
| .ValueOrDie(); |
| |
| DocumentId document_id = document_store |
| ->Put(DocumentBuilder() |
| .SetKey("icing", "type1") |
| .SetSchema("type1") |
| .Build()) |
| .ValueOrDie(); |
| |
| std::string input_string; |
| while (input_string.length() < state.range(0)) { |
| input_string.append("àáâãā"); |
| } |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, input_string); |
| |
| QueryProcessor query_processor(index.get(), language_segmenter.get(), |
| normalizer.get(), document_store.get(), |
| schema_store.get(), &fake_clock); |
| SearchSpecProto search_spec; |
| search_spec.set_query(input_string); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| for (auto _ : state) { |
| QueryProcessor::QueryResults results = |
| query_processor.ParseSearch(search_spec).ValueOrDie(); |
| while (results.root_iterator->Advance().ok()) { |
| results.root_iterator->doc_hit_info(); |
| } |
| } |
| |
| // Destroy document store before the whole directory is removed because it |
| // persists data in destructor. |
| document_store.reset(); |
| CleanUp(filesystem, base_dir); |
| } |
| BENCHMARK(BM_QueryDiacriticTerm) |
| ->Arg(1000) |
| ->Arg(2000) |
| ->Arg(4000) |
| ->Arg(8000) |
| ->Arg(16000) |
| ->Arg(32000) |
| ->Arg(64000) |
| ->Arg(128000) |
| ->Arg(256000) |
| ->Arg(384000) |
| ->Arg(512000) |
| ->Arg(1024000) |
| ->Arg(2048000) |
| ->Arg(4096000); |
| |
| void BM_QueryHiragana(benchmark::State& state) { |
| bool run_via_adb = absl::GetFlag(FLAGS_adb); |
| if (!run_via_adb) { |
| ICING_ASSERT_OK(SetUpICUDataFile("icing/icu.dat")); |
| } |
| |
| IcingFilesystem icing_filesystem; |
| Filesystem filesystem; |
| const std::string base_dir = GetTestTempDir() + "/query_test"; |
| const std::string index_dir = base_dir + "/index"; |
| const std::string schema_dir = base_dir + "/schema"; |
| const std::string doc_store_dir = base_dir + "/store"; |
| |
| CleanUp(filesystem, base_dir); |
| if (!filesystem.CreateDirectoryRecursively(index_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(schema_dir.c_str()) || |
| !filesystem.CreateDirectoryRecursively(doc_store_dir.c_str())) { |
| ICING_LOG(ERROR) << "Failed to create test directories"; |
| } |
| |
| std::unique_ptr<Index> index = CreateIndex(icing_filesystem, index_dir); |
| std::unique_ptr<LanguageSegmenter> language_segmenter = |
| CreateLanguageSegmenter(); |
| std::unique_ptr<Normalizer> normalizer = CreateNormalizer(); |
| FakeClock fake_clock; |
| |
| SchemaProto schema; |
| auto type_config = schema.add_types(); |
| type_config->set_schema_type("type1"); |
| ICING_ASSERT_OK_AND_ASSIGN(std::unique_ptr<SchemaStore> schema_store, |
| SchemaStore::Create(&filesystem, schema_dir)); |
| ICING_ASSERT_OK(schema_store->SetSchema(schema)); |
| |
| std::unique_ptr<DocumentStore> document_store = |
| DocumentStore::Create(&filesystem, doc_store_dir, &fake_clock, |
| schema_store.get()) |
| .ValueOrDie(); |
| |
| DocumentId document_id = document_store |
| ->Put(DocumentBuilder() |
| .SetKey("icing", "type1") |
| .SetSchema("type1") |
| .Build()) |
| .ValueOrDie(); |
| |
| std::string input_string; |
| while (input_string.length() < state.range(0)) { |
| input_string.append("あいうえお"); |
| } |
| AddTokenToIndex(index.get(), document_id, /*section_id=*/0, |
| TermMatchType::EXACT_ONLY, input_string); |
| |
| QueryProcessor query_processor(index.get(), language_segmenter.get(), |
| normalizer.get(), document_store.get(), |
| schema_store.get(), &fake_clock); |
| |
| SearchSpecProto search_spec; |
| search_spec.set_query(input_string); |
| search_spec.set_term_match_type(TermMatchType::EXACT_ONLY); |
| |
| for (auto _ : state) { |
| QueryProcessor::QueryResults results = |
| query_processor.ParseSearch(search_spec).ValueOrDie(); |
| while (results.root_iterator->Advance().ok()) { |
| results.root_iterator->doc_hit_info(); |
| } |
| } |
| |
| // Destroy document store before the whole directory is removed because it |
| // persists data in destructor. |
| document_store.reset(); |
| CleanUp(filesystem, base_dir); |
| } |
| BENCHMARK(BM_QueryHiragana) |
| ->Arg(1000) |
| ->Arg(2000) |
| ->Arg(4000) |
| ->Arg(8000) |
| ->Arg(16000) |
| ->Arg(32000) |
| ->Arg(64000) |
| ->Arg(128000) |
| ->Arg(256000) |
| ->Arg(384000) |
| ->Arg(512000) |
| ->Arg(1024000) |
| ->Arg(2048000) |
| ->Arg(4096000); |
| } // namespace |
| |
| } // namespace lib |
| } // namespace icing |