Export lib3 to AOSP (external/libtextclassifier part) 1. Include both annotator (existing one) and actions(new one for smart reply and actions) 2. One more model file. actions_suggestions.model is dropped to /etc/textclassifier./ It is around 7.5mb for now, we will slim down it later. 3. The Java counterpart of the JNI is now moved from frameworks/base to here. Test: atest android.view.textclassifier.TextClassificationManagerTest Change-Id: Icb2458967ef51efa2952b3eaddefbf1f7b359930

commit: 6c4cc67c9849339d4e4dfffcfa3eb2342f767890 [log] [tgz]
author: Tony Mak <tonymak@google.com> Mon Sep 17 11:48:50 2018 +0100
committer: Tony Mak <tonymak@google.com> Tue Sep 25 18:36:59 2018 +0100
tree: 1694602c9fd5abe64a26d6363c82b59baf9fa2b0
parent: 30f477bb6871cfebf3136c71da5c14ef3aa69c97 [diff] [blame]
diff --git a/annotator/datetime/parser_test.cc b/annotator/datetime/parser_test.cc
new file mode 100644
index 0000000..6bd6d10
--- /dev/null
+++ b/annotator/datetime/parser_test.cc

@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "annotator/annotator.h"
+#include "annotator/datetime/parser.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
+
+using testing::ElementsAreArray;
+
+namespace libtextclassifier3 {
+namespace {
+
+std::string GetModelPath() {
+  return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
+}
+
+std::string ReadFile(const std::string& file_name) {
+  std::ifstream file_stream(file_name);
+  return std::string(std::istreambuf_iterator<char>(file_stream), {});
+}
+
+std::string FormatMillis(int64 time_ms_utc) {
+  long time_seconds = time_ms_utc / 1000;  // NOLINT
+  // Format time, "ddd yyyy-mm-dd hh:mm:ss zzz"
+  char buffer[512];
+  strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
+           localtime(&time_seconds));
+  return std::string(buffer);
+}
+
+class ParserTest : public testing::Test {
+ public:
+  void SetUp() override {
+    model_buffer_ = ReadFile(GetModelPath() + "test_model.fb");
+    classifier_ = Annotator::FromUnownedBuffer(model_buffer_.data(),
+                                               model_buffer_.size(), &unilib_);
+    TC3_CHECK(classifier_);
+    parser_ = classifier_->DatetimeParserForTests();
+  }
+
+  bool HasNoResult(const std::string& text, bool anchor_start_end = false,
+                   const std::string& timezone = "Europe/Zurich") {
+    std::vector<DatetimeParseResultSpan> results;
+    if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
+                        anchor_start_end, &results)) {
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
+    }
+    return results.empty();
+  }
+
+  bool ParsesCorrectly(const std::string& marked_text,
+                       const int64 expected_ms_utc,
+                       DatetimeGranularity expected_granularity,
+                       bool anchor_start_end = false,
+                       const std::string& timezone = "Europe/Zurich",
+                       const std::string& locales = "en-US") {
+    const UnicodeText marked_text_unicode =
+        UTF8ToUnicodeText(marked_text, /*do_copy=*/false);
+    auto brace_open_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
+    auto brace_end_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
+    TC3_CHECK(brace_open_it != marked_text_unicode.end());
+    TC3_CHECK(brace_end_it != marked_text_unicode.end());
+
+    std::string text;
+    text +=
+        UnicodeText::UTF8Substring(marked_text_unicode.begin(), brace_open_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_open_it), brace_end_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_end_it),
+                                       marked_text_unicode.end());
+
+    std::vector<DatetimeParseResultSpan> results;
+
+    if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
+                        anchor_start_end, &results)) {
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
+    }
+    if (results.empty()) {
+      TC3_LOG(ERROR) << "No results.";
+      return false;
+    }
+
+    const int expected_start_index =
+        std::distance(marked_text_unicode.begin(), brace_open_it);
+    // The -1 bellow is to account for the opening bracket character.
+    const int expected_end_index =
+        std::distance(marked_text_unicode.begin(), brace_end_it) - 1;
+
+    std::vector<DatetimeParseResultSpan> filtered_results;
+    for (const DatetimeParseResultSpan& result : results) {
+      if (SpansOverlap(result.span,
+                       {expected_start_index, expected_end_index})) {
+        filtered_results.push_back(result);
+      }
+    }
+
+    const std::vector<DatetimeParseResultSpan> expected{
+        {{expected_start_index, expected_end_index},
+         {expected_ms_utc, expected_granularity},
+         /*target_classification_score=*/1.0,
+         /*priority_score=*/0.0}};
+    const bool matches =
+        testing::Matches(ElementsAreArray(expected))(filtered_results);
+    if (!matches) {
+      TC3_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
+                     << FormatMillis(expected[0].data.time_ms_utc);
+      for (int i = 0; i < filtered_results.size(); ++i) {
+        TC3_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
+                       << " which corresponds to: "
+                       << FormatMillis(filtered_results[i].data.time_ms_utc);
+      }
+    }
+    return matches;
+  }
+
+  bool ParsesCorrectlyGerman(const std::string& marked_text,
+                             const int64 expected_ms_utc,
+                             DatetimeGranularity expected_granularity) {
+    return ParsesCorrectly(marked_text, expected_ms_utc, expected_granularity,
+                           /*anchor_start_end=*/false,
+                           /*timezone=*/"Europe/Zurich", /*locales=*/"de");
+  }
+
+ protected:
+  std::string model_buffer_;
+  std::unique_ptr<Annotator> classifier_;
+  const DatetimeParser* parser_;
+  UniLib unilib_;
+};
+
+// Test with just a few cases to make debugging of general failures easier.
+TEST_F(ParserTest, ParseShort) {
+  EXPECT_TRUE(
+      ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
+}
+
+TEST_F(ParserTest, Parse) {
+  EXPECT_TRUE(
+      ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
+                              GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29}", 1277512289000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectly("{23/Apr 11:42:35}", 9715355000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly(
+      "Are sentiments apartments decisively the especially alteration. "
+      "Thrown shy denote ten ladies though ask saw. Or by to he going "
+      "think order event music. Incommode so intention defective at "
+      "convinced. Led income months itself and houses you. After nor "
+      "you leave might share court balls. {19/apr/2010 06:36:15} Are "
+      "sentiments apartments decisively the especially alteration. "
+      "Thrown shy denote ten ladies though ask saw. Or by to he going "
+      "think order event music. Incommode so intention defective at "
+      "convinced. Led income months itself and houses you. After nor "
+      "you leave might share court balls. ",
+      1271651775000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
+                              GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30 am}", 1514777400000,
+                              GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
+                              GRANULARITY_HOUR));
+
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -3600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -57600000, GRANULARITY_MINUTE,
+                              /*anchor_start_end=*/false,
+                              "America/Los_Angeles"));
+  EXPECT_TRUE(
+      ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4am}", 97200000, GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectly("{wednesday at 4am}", 529200000, GRANULARITY_HOUR));
+  EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
+                              GRANULARITY_MINUTE));
+}
+
+TEST_F(ParserTest, ParseWithAnchor) {
+  EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/false));
+  EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/true));
+  EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/false));
+  EXPECT_TRUE(HasNoResult("lorem 1 january 2018 ipsum",
+                          /*anchor_start_end=*/true));
+}
+
+TEST_F(ParserTest, ParseGerman) {
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{Januar 1 2018}", 1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("lorem {1 Januar 2018} ipsum",
+                                    1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/Apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{09/März/2004 22:02:40}", 1078866160000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Dez 2, 2010 2:39:58}", 1291253998000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Juni 09 2011 15:28:14}", 1307626094000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{März 16 08:12:04}", 6419524000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29}", 1277512289000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}", 1137899465000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{11:42:35}", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35}", 9715355000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}", 1514777400000,
+                                    GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30 nachm}",
+                                    1514820600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4 nachm}", 1514818800000,
+                                    GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen 0:00}", 82800000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4:00}", 97200000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4 vorm}", 97200000, GRANULARITY_HOUR));
+}
+
+TEST_F(ParserTest, ParseNonUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-GB"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"en"));
+}
+
+TEST_F(ParserTest, ParseUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-US"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"es-US"));
+}
+
+TEST_F(ParserTest, ParseUnknownLanguage) {
+  EXPECT_TRUE(ParsesCorrectly("bylo to {31. 12. 2015} v 6 hodin", 1451516400000,
+                              GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"xx"));
+}
+
+class ParserLocaleTest : public testing::Test {
+ public:
+  void SetUp() override;
+  bool HasResult(const std::string& input, const std::string& locales);
+
+ protected:
+  UniLib unilib_;
+  CalendarLib calendarlib_;
+  flatbuffers::FlatBufferBuilder builder_;
+  std::unique_ptr<DatetimeParser> parser_;
+};
+
+void AddPattern(const std::string& regex, int locale,
+                std::vector<std::unique_ptr<DatetimeModelPatternT>>* patterns) {
+  patterns->emplace_back(new DatetimeModelPatternT);
+  patterns->back()->regexes.emplace_back(new DatetimeModelPattern_::RegexT);
+  patterns->back()->regexes.back()->pattern = regex;
+  patterns->back()->regexes.back()->groups.push_back(
+      DatetimeGroupType_GROUP_UNUSED);
+  patterns->back()->locales.push_back(locale);
+}
+
+void ParserLocaleTest::SetUp() {
+  DatetimeModelT model;
+  model.use_extractors_for_locating = false;
+  model.locales.clear();
+  model.locales.push_back("en-US");
+  model.locales.push_back("en-CH");
+  model.locales.push_back("zh-Hant");
+  model.locales.push_back("en-*");
+  model.locales.push_back("zh-Hant-*");
+  model.locales.push_back("*-CH");
+  model.locales.push_back("default");
+  model.default_locales.push_back(6);
+
+  AddPattern(/*regex=*/"en-US", /*locale=*/0, &model.patterns);
+  AddPattern(/*regex=*/"en-CH", /*locale=*/1, &model.patterns);
+  AddPattern(/*regex=*/"zh-Hant", /*locale=*/2, &model.patterns);
+  AddPattern(/*regex=*/"en-all", /*locale=*/3, &model.patterns);
+  AddPattern(/*regex=*/"zh-Hant-all", /*locale=*/4, &model.patterns);
+  AddPattern(/*regex=*/"all-CH", /*locale=*/5, &model.patterns);
+  AddPattern(/*regex=*/"default", /*locale=*/6, &model.patterns);
+
+  builder_.Finish(DatetimeModel::Pack(builder_, &model));
+  const DatetimeModel* model_fb =
+      flatbuffers::GetRoot<DatetimeModel>(builder_.GetBufferPointer());
+  ASSERT_TRUE(model_fb);
+
+  parser_ = DatetimeParser::Instance(model_fb, unilib_, calendarlib_,
+                                     /*decompressor=*/nullptr);
+  ASSERT_TRUE(parser_);
+}
+
+bool ParserLocaleTest::HasResult(const std::string& input,
+                                 const std::string& locales) {
+  std::vector<DatetimeParseResultSpan> results;
+  EXPECT_TRUE(parser_->Parse(input, /*reference_time_ms_utc=*/0,
+                             /*reference_timezone=*/"", locales,
+                             ModeFlag_ANNOTATION, false, &results));
+  return results.size() == 1;
+}
+
+TEST_F(ParserLocaleTest, English) {
+  EXPECT_TRUE(HasResult("en-US", /*locales=*/"en-US"));
+  EXPECT_FALSE(HasResult("en-CH", /*locales=*/"en-US"));
+  EXPECT_FALSE(HasResult("en-US", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("en-CH", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
+}
+
+TEST_F(ParserLocaleTest, TraditionalChinese) {
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant"));
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-TW"));
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-SG"));
+  EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh-SG"));
+  EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"zh"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"zh-Hant-SG"));
+}
+
+TEST_F(ParserLocaleTest, SwissEnglish) {
+  EXPECT_TRUE(HasResult("all-CH", /*locales=*/"de-CH"));
+  EXPECT_TRUE(HasResult("all-CH", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("en-all", /*locales=*/"en-CH"));
+  EXPECT_FALSE(HasResult("all-CH", /*locales=*/"de-DE"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"de-CH"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3
commit	6c4cc67c9849339d4e4dfffcfa3eb2342f767890	[log] [tgz]
author	Tony Mak <tonymak@google.com>	Mon Sep 17 11:48:50 2018 +0100
committer	Tony Mak <tonymak@google.com>	Tue Sep 25 18:36:59 2018 +0100
tree	1694602c9fd5abe64a26d6363c82b59baf9fa2b0
parent	30f477bb6871cfebf3136c71da5c14ef3aa69c97 [diff] [blame]