blob: de8f550fa41ecb85b700cace0b818e8645307c59 [file] [log] [blame]
// Copyright (C) 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <unistd.h>
#include <fstream>
#include <iostream>
#include <memory>
#include <ostream>
#include <random>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <unordered_set>
#include <vector>
#include "testing/base/public/benchmark.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "icing/document-builder.h"
#include "icing/file/filesystem.h"
#include "icing/icing-search-engine.h"
#include "icing/proto/document.pb.h"
#include "icing/proto/initialize.pb.h"
#include "icing/proto/schema.pb.h"
#include "icing/proto/status.pb.h"
#include "icing/proto/term.pb.h"
#include "icing/testing/common-matchers.h"
#include "icing/testing/document-generator.h"
#include "icing/testing/random-string.h"
#include "icing/testing/schema-generator.h"
#include "icing/testing/tmp-directory.h"
// Run on a Linux workstation:
// $ blaze build -c opt --dynamic_mode=off --copt=-gmlt
// //icing:icing-search-engine_flush_benchmark
//
// $ blaze-bin/icing/icing-search-engine_flush_benchmark
// --benchmarks=all --benchmark_memory_usage
//
// Run on an Android device:
// $ blaze build --copt="-DGOOGLE_COMMANDLINEFLAGS_FULL_API=1"
// --config=android_arm64 -c opt --dynamic_mode=off --copt=-gmlt
// //icing:icing-search-engine_flush_benchmark
//
// $ adb push blaze-bin/icing/icing-search-engine_flush_benchmark
// /data/local/tmp/
//
// $ adb shell /data/local/tmp/icing-search-engine_flush_benchmark
// --benchmarks=all
namespace icing {
namespace lib {
namespace {
// Assume that there will be roughly 10 packages, each using 3 of its own types.
constexpr int kAvgNumNamespaces = 10;
constexpr int kAvgNumTypes = 3;
// ASSUME: Types will have at most ten properties. Types will be created with
// [1, 10] properties.
constexpr int kMaxNumProperties = 10;
// Based on logs from Icing GMSCore.
constexpr int kAvgDocumentSize = 300;
// ASSUME: ~75% of the document's size comes from its content.
constexpr float kContentSizePct = 0.7;
// Average length of word in English is 4.7 characters.
constexpr int kAvgTokenLen = 5;
// Made up value. This results in a fairly reasonable language - the majority of
// generated words are 3-9 characters, ~3% of words are >=20 chars, and the
// longest ones are 27 chars, (roughly consistent with the longest,
// non-contrived English words
// https://en.wikipedia.org/wiki/Longest_word_in_English)
constexpr int kTokenStdDev = 7;
constexpr int kLanguageSize = 1000;
// The number of documents to index.
constexpr int kNumDocuments = 1024;
std::vector<std::string> CreateNamespaces(int num_namespaces) {
std::vector<std::string> namespaces;
while (--num_namespaces >= 0) {
namespaces.push_back("comgooglepackage" + std::to_string(num_namespaces));
}
return namespaces;
}
// Creates a vector containing num_words randomly-generated words for use by
// documents.
template <typename Rand>
std::vector<std::string> CreateLanguage(int num_words, Rand* r) {
std::vector<std::string> language;
std::normal_distribution<> norm_dist(kAvgTokenLen, kTokenStdDev);
while (--num_words >= 0) {
int word_length = 0;
while (word_length < 1) {
word_length = std::round(norm_dist(*r));
}
language.push_back(RandomString(kAlNumAlphabet, word_length, r));
}
return language;
}
class DestructibleDirectory {
public:
explicit DestructibleDirectory(const Filesystem& filesystem,
const std::string& dir)
: filesystem_(filesystem), dir_(dir) {
filesystem_.CreateDirectoryRecursively(dir_.c_str());
}
~DestructibleDirectory() {
filesystem_.DeleteDirectoryRecursively(dir_.c_str());
}
private:
Filesystem filesystem_;
std::string dir_;
};
void BM_FlushBenchmark(benchmark::State& state) {
PersistType::Code persist_type =
(state.range(0)) ? PersistType::LITE : PersistType::FULL;
int num_documents_per_persist = state.range(1);
// Initialize the filesystem
std::string test_dir = GetTestTempDir() + "/icing/benchmark/flush";
Filesystem filesystem;
DestructibleDirectory ddir(filesystem, test_dir);
// Create the schema.
std::default_random_engine random;
int num_types = kAvgNumNamespaces * kAvgNumTypes;
ExactStringPropertyGenerator property_generator;
RandomSchemaGenerator<std::default_random_engine,
ExactStringPropertyGenerator>
schema_generator(&random, &property_generator);
SchemaProto schema =
schema_generator.GenerateSchema(num_types, kMaxNumProperties);
EvenDistributionTypeSelector type_selector(schema);
std::vector<std::string> namespaces = CreateNamespaces(kAvgNumNamespaces);
EvenDistributionNamespaceSelector namespace_selector(namespaces);
std::vector<std::string> language = CreateLanguage(kLanguageSize, &random);
UniformDistributionLanguageTokenGenerator<std::default_random_engine>
token_generator(language, &random);
DocumentGenerator<
EvenDistributionNamespaceSelector, EvenDistributionTypeSelector,
UniformDistributionLanguageTokenGenerator<std::default_random_engine>>
generator(&namespace_selector, &type_selector, &token_generator,
kAvgDocumentSize * kContentSizePct);
IcingSearchEngineOptions options;
options.set_base_dir(test_dir);
std::unique_ptr<IcingSearchEngine> icing =
std::make_unique<IcingSearchEngine>(options);
ASSERT_THAT(icing->Initialize().status(), ProtoIsOk());
ASSERT_THAT(icing->SetSchema(schema).status(), ProtoIsOk());
for (auto s : state) {
for (int i = 0; i < kNumDocuments; ++i) {
icing->Put(generator.generateDoc());
if (i % num_documents_per_persist == num_documents_per_persist - 1) {
icing->PersistToDisk(persist_type);
}
}
}
}
BENCHMARK(BM_FlushBenchmark)
// First argument: lite_flush,
// Second argument: num_document_per_lite_flush
->ArgPair(true, 1)
->ArgPair(false, 1)
->ArgPair(true, 32)
->ArgPair(false, 32)
->ArgPair(true, 1024)
->ArgPair(false, 1024);
} // namespace
} // namespace lib
} // namespace icing