Fixes utf8 handling, datetime model and flight number model, makes
models smaller by compressing the regex rules, and adds i18n models.
(sync from google3)
Test: bit FrameworksCoreTests:android.view.textclassifier.TextClassificationManagerTest
Test: bit CtsViewTestCases:android.view.textclassifier.cts.TextClassificationManagerTest
Bug: 64929062
Bug: 77223425
Change-Id: I04472e6b247e824bf2b745077c50fcde4269aefc
diff --git a/zlib-utils_test.cc b/zlib-utils_test.cc
new file mode 100644
index 0000000..d3b5a19
--- /dev/null
+++ b/zlib-utils_test.cc
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "zlib-utils.h"
+
+#include <memory>
+
+#include "model_generated.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier2 {
+
+TEST(ZlibUtilsTest, CompressModel) {
+ ModelT model;
+ model.regex_model.reset(new RegexModelT);
+ model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+ model.regex_model->patterns.back()->pattern = "this is a test pattern";
+ model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+ model.regex_model->patterns.back()->pattern = "this is a second test pattern";
+
+ model.datetime_model.reset(new DatetimeModelT);
+ model.datetime_model->patterns.emplace_back(new DatetimeModelPatternT);
+ model.datetime_model->patterns.back()->regexes.emplace_back(
+ new DatetimeModelPattern_::RegexT);
+ model.datetime_model->patterns.back()->regexes.back()->pattern =
+ "an example datetime pattern";
+ model.datetime_model->extractors.emplace_back(new DatetimeModelExtractorT);
+ model.datetime_model->extractors.back()->pattern =
+ "an example datetime extractor";
+
+ // Compress the model.
+ EXPECT_TRUE(CompressModel(&model));
+
+ // Sanity check that uncompressed field is removed.
+ EXPECT_TRUE(model.regex_model->patterns[0]->pattern.empty());
+ EXPECT_TRUE(model.regex_model->patterns[1]->pattern.empty());
+ EXPECT_TRUE(model.datetime_model->patterns[0]->regexes[0]->pattern.empty());
+ EXPECT_TRUE(model.datetime_model->extractors[0]->pattern.empty());
+
+ // Pack and load the model.
+ flatbuffers::FlatBufferBuilder builder;
+ builder.Finish(Model::Pack(builder, &model));
+ const Model* compressed_model =
+ GetModel(reinterpret_cast<const char*>(builder.GetBufferPointer()));
+ ASSERT_TRUE(compressed_model != nullptr);
+
+ // Decompress the fields again and check that they match the original.
+ std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+ ASSERT_TRUE(decompressor != nullptr);
+ std::string uncompressed_pattern;
+ EXPECT_TRUE(decompressor->Decompress(
+ compressed_model->regex_model()->patterns()->Get(0)->compressed_pattern(),
+ &uncompressed_pattern));
+ EXPECT_EQ(uncompressed_pattern, "this is a test pattern");
+ EXPECT_TRUE(decompressor->Decompress(
+ compressed_model->regex_model()->patterns()->Get(1)->compressed_pattern(),
+ &uncompressed_pattern));
+ EXPECT_EQ(uncompressed_pattern, "this is a second test pattern");
+ EXPECT_TRUE(decompressor->Decompress(compressed_model->datetime_model()
+ ->patterns()
+ ->Get(0)
+ ->regexes()
+ ->Get(0)
+ ->compressed_pattern(),
+ &uncompressed_pattern));
+ EXPECT_EQ(uncompressed_pattern, "an example datetime pattern");
+ EXPECT_TRUE(decompressor->Decompress(compressed_model->datetime_model()
+ ->extractors()
+ ->Get(0)
+ ->compressed_pattern(),
+ &uncompressed_pattern));
+ EXPECT_EQ(uncompressed_pattern, "an example datetime extractor");
+}
+
+} // namespace libtextclassifier2