Export libtextclassifier to Android (generated by the export script)
Test: Compile and boot
Change-Id: I0433e6fb549ba0b32bc55933b3c11562e61a0b4d
diff --git a/annotator/annotator.cc b/annotator/annotator.cc
index 3c3f16b..562d58e 100644
--- a/annotator/annotator.cc
+++ b/annotator/annotator.cc
@@ -39,11 +39,9 @@
namespace {
const Model* LoadAndVerifyModel(const void* addr, int size) {
- const Model* model = GetModel(addr);
-
flatbuffers::Verifier verifier(reinterpret_cast<const uint8_t*>(addr), size);
- if (model->Verify(verifier)) {
- return model;
+ if (VerifyModelBuffer(verifier)) {
+ return GetModel(addr);
} else {
return nullptr;
}
diff --git a/annotator/annotator_test.cc b/annotator/annotator_test.cc
index 8598ea4..b6290d5 100644
--- a/annotator/annotator_test.cc
+++ b/annotator/annotator_test.cc
@@ -123,7 +123,7 @@
unpacked_model->triggering_options->enabled_modes = ModeFlag_SELECTION;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -142,7 +142,7 @@
ModeFlag_ANNOTATION_AND_SELECTION;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -172,7 +172,7 @@
"phone");
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -227,7 +227,7 @@
unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -291,7 +291,7 @@
unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -325,7 +325,7 @@
unpacked_model->regex_model->patterns.back()->priority_score = 0.5;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -356,7 +356,7 @@
unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -393,7 +393,7 @@
verified_pattern->verification_options->verify_luhn_checksum = true;
unpacked_model->regex_model->patterns.push_back(std::move(verified_pattern));
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -479,7 +479,7 @@
unpacked_model->triggering_options->enabled_modes = ModeFlag_ANNOTATION;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -499,7 +499,7 @@
unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -538,7 +538,7 @@
unpacked_model->selection_options->always_classify_suggested_selection = true;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -746,7 +746,7 @@
// Set the batch size.
unpacked_model->selection_options->batch_size = 4;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -778,7 +778,7 @@
unpacked_model->triggering_options->min_annotate_confidence =
2.f; // Discards all results.
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -803,7 +803,7 @@
0.f; // Keeps all results.
unpacked_model->triggering_options->enabled_modes = ModeFlag_ALL;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -823,7 +823,7 @@
// Disable the model for annotation.
unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION_AND_SELECTION;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -860,7 +860,7 @@
"phone");
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -905,7 +905,7 @@
/*enabled_for_selection=*/false, /*enabled_for_annotation=*/true, 2.0));
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -1014,7 +1014,7 @@
ModeFlag_ANNOTATION_AND_CLASSIFICATION;
}
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
std::unique_ptr<Annotator> classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
@@ -1186,7 +1186,7 @@
unpacked_model->classification_options->max_num_tokens = -1;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
builder.GetSize(), &unilib_, &calendarlib_);
@@ -1200,7 +1200,7 @@
unpacked_model->classification_options->max_num_tokens = 3;
flatbuffers::FlatBufferBuilder builder2;
- builder2.Finish(Model::Pack(builder2, unpacked_model.get()));
+ FinishModelBuffer(builder2, Model::Pack(builder2, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder2.GetBufferPointer()),
builder2.GetSize(), &unilib_, &calendarlib_);
@@ -1223,7 +1223,7 @@
unpacked_model->classification_options->address_min_num_tokens = 0;
flatbuffers::FlatBufferBuilder builder;
- builder.Finish(Model::Pack(builder, unpacked_model.get()));
+ FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder.GetBufferPointer()),
builder.GetSize(), &unilib_, &calendarlib_);
@@ -1237,7 +1237,7 @@
unpacked_model->classification_options->address_min_num_tokens = 5;
flatbuffers::FlatBufferBuilder builder2;
- builder2.Finish(Model::Pack(builder2, unpacked_model.get()));
+ FinishModelBuffer(builder2, Model::Pack(builder2, unpacked_model.get()));
classifier = Annotator::FromUnownedBuffer(
reinterpret_cast<const char*>(builder2.GetBufferPointer()),
builder2.GetSize(), &unilib_, &calendarlib_);
diff --git a/annotator/datetime/parser_test.cc b/annotator/datetime/parser_test.cc
index 6bd6d10..efe7306 100644
--- a/annotator/datetime/parser_test.cc
+++ b/annotator/datetime/parser_test.cc
@@ -124,7 +124,7 @@
{{expected_start_index, expected_end_index},
{expected_ms_utc, expected_granularity},
/*target_classification_score=*/1.0,
- /*priority_score=*/0.0}};
+ /*priority_score=*/0.1}};
const bool matches =
testing::Matches(ElementsAreArray(expected))(filtered_results);
if (!matches) {
diff --git a/annotator/feature-processor.h b/annotator/feature-processor.h
index ce44372..2d04253 100644
--- a/annotator/feature-processor.h
+++ b/annotator/feature-processor.h
@@ -88,10 +88,7 @@
// identical.
typedef std::map<CodepointSpan, std::vector<float>> EmbeddingCache;
- // If unilib is nullptr, will create and own an instance of a UniLib,
- // otherwise will use what's passed in.
- explicit FeatureProcessor(const FeatureProcessorOptions* options,
- const UniLib* unilib)
+ FeatureProcessor(const FeatureProcessorOptions* options, const UniLib* unilib)
: unilib_(unilib),
feature_extractor_(internal::BuildTokenFeatureExtractorOptions(options),
*unilib_),
diff --git a/annotator/test_data/test_model.fb b/annotator/test_data/test_model.fb
index ca6d9bf..f25b950 100644
--- a/annotator/test_data/test_model.fb
+++ b/annotator/test_data/test_model.fb
Binary files differ
diff --git a/annotator/test_data/test_model_cc.fb b/annotator/test_data/test_model_cc.fb
index a1b73fe..cfe10cf 100644
--- a/annotator/test_data/test_model_cc.fb
+++ b/annotator/test_data/test_model_cc.fb
Binary files differ
diff --git a/annotator/test_data/wrong_embeddings.fb b/annotator/test_data/wrong_embeddings.fb
index 38b6969..7e990ed 100644
--- a/annotator/test_data/wrong_embeddings.fb
+++ b/annotator/test_data/wrong_embeddings.fb
Binary files differ
diff --git a/annotator/token-feature-extractor.cc b/annotator/token-feature-extractor.cc
index 86ab03a..77ad7a4 100644
--- a/annotator/token-feature-extractor.cc
+++ b/annotator/token-feature-extractor.cc
@@ -58,11 +58,11 @@
remapped->clear();
for (auto it = word.begin(); it != word.end(); ++it) {
if (options.remap_digits && unilib.IsDigit(*it)) {
- remapped->AppendCodepoint('0');
+ remapped->push_back('0');
} else if (options.lowercase_tokens) {
- remapped->AppendCodepoint(unilib.ToLower(*it));
+ remapped->push_back(unilib.ToLower(*it));
} else {
- remapped->AppendCodepoint(*it);
+ remapped->push_back(*it);
}
}
}
@@ -160,7 +160,7 @@
int TokenFeatureExtractor::HashToken(StringPiece token) const {
if (options_.allowed_chargrams.empty()) {
- return tc2farmhash::Fingerprint64(token) % options_.num_buckets;
+ return tc3farmhash::Fingerprint64(token) % options_.num_buckets;
} else {
// Padding and out-of-vocabulary tokens have extra buckets reserved because
// they are special and important tokens, and we don't want them to share
@@ -174,7 +174,7 @@
options_.allowed_chargrams.end()) {
return 0; // Out-of-vocabulary.
} else {
- return (tc2farmhash::Fingerprint64(token) %
+ return (tc3farmhash::Fingerprint64(token) %
(options_.num_buckets - kNumExtraBuckets)) +
kNumExtraBuckets;
}
diff --git a/annotator/zlib-utils.cc b/annotator/zlib-utils.cc
index f1de08a..d0fb0d0 100644
--- a/annotator/zlib-utils.cc
+++ b/annotator/zlib-utils.cc
@@ -156,6 +156,9 @@
bool DecompressBuffer(const CompressedBufferT* compressed_pattern,
ZlibDecompressor* zlib_decompressor,
std::string* uncompressed_pattern) {
+ if (!compressed_pattern) {
+ return true;
+ }
std::string packed_pattern =
PackFlatbuffer<CompressedBuffer>(compressed_pattern);
if (!zlib_decompressor->Decompress(