Fixes datetime model overtriggering, sms shortcodes being linkified to maps, not recognizing certain phone number formats, and incorrect date jumps when the granularity of the date is less than a day, and incorrect start of week for non-US locales. (Developed in Google3, synced using export_to_aosp.sh script) Test: bit FrameworksCoreTests:android.view.textclassifier.TextClassificationManagerTest Test: bit CtsViewTestCases:android.view.textclassifier.cts.TextClassificationManagerTest Bug: 77799341 Bug: 76199334 Bug: 77800082 Change-Id: Ib23bc27c63ff69673532851254f2800108e74dda

commit: 434442da2a906db90779c376d95aa91f5428694b [log] [tgz]
author: Lukas Zilka <zilka@google.com> Wed Apr 25 11:38:51 2018 +0200
committer: Lukas Zilka <zilka@google.com> Mon Apr 30 09:56:13 2018 +0200
tree: 76accc92cb0998050532ebccd3055acfc30d5499
parent: 0494cf0007eb41d1729ecd1407a4ed6bf85066c3 [diff] [blame]
diff --git a/text-classifier_test.cc b/text-classifier_test.cc
index 440cedf..c8ced76 100644
--- a/text-classifier_test.cc
+++ b/text-classifier_test.cc

@@ -1010,7 +1010,7 @@
   result.clear();
   options.reference_timezone = "Europe/Zurich";
   options.locales = "en-US";
-  result = classifier->ClassifyText("03/05", {0, 5}, options);
+  result = classifier->ClassifyText("03.05.1970", {0, 10}, options);
 
   ASSERT_EQ(result.size(), 1);
   EXPECT_THAT(result[0].collection, "date");
@@ -1020,8 +1020,8 @@
 
   result.clear();
   options.reference_timezone = "Europe/Zurich";
-  options.locales = "en-GB,en-US";
-  result = classifier->ClassifyText("03/05", {0, 5}, options);
+  options.locales = "de";
+  result = classifier->ClassifyText("03.05.1970", {0, 10}, options);
 
   ASSERT_EQ(result.size(), 1);
   EXPECT_THAT(result[0].collection, "date");
@@ -1212,6 +1212,44 @@
 #endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
 
 #ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+TEST_P(TextClassifierTest, MaxTokenLength) {
+  CREATE_UNILIB_FOR_TESTING;
+  const std::string test_model = ReadFile(GetModelPath() + GetParam());
+  std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
+
+  std::unique_ptr<TextClassifier> classifier;
+
+  // With unrestricted number of tokens should behave normally.
+  unpacked_model->classification_options->max_num_tokens = -1;
+
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(Model::Pack(builder, unpacked_model.get()));
+  classifier = TextClassifier::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()),
+      builder.GetSize(), &unilib);
+  ASSERT_TRUE(classifier);
+
+  EXPECT_EQ(FirstResult(classifier->ClassifyText(
+                "I live at 350 Third Street, Cambridge.", {10, 37})),
+            "address");
+
+  // Raise the maximum number of tokens to suppress the classification.
+  unpacked_model->classification_options->max_num_tokens = 3;
+
+  flatbuffers::FlatBufferBuilder builder2;
+  builder2.Finish(Model::Pack(builder2, unpacked_model.get()));
+  classifier = TextClassifier::FromUnownedBuffer(
+      reinterpret_cast<const char*>(builder2.GetBufferPointer()),
+      builder2.GetSize(), &unilib);
+  ASSERT_TRUE(classifier);
+
+  EXPECT_EQ(FirstResult(classifier->ClassifyText(
+                "I live at 350 Third Street, Cambridge.", {10, 37})),
+            "other");
+}
+#endif  // LIBTEXTCLASSIFIER_UNILIB_ICU
+
+#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
 TEST_P(TextClassifierTest, MinAddressTokenLength) {
   CREATE_UNILIB_FOR_TESTING;
   const std::string test_model = ReadFile(GetModelPath() + GetParam());
commit	434442da2a906db90779c376d95aa91f5428694b	[log] [tgz]
author	Lukas Zilka <zilka@google.com>	Wed Apr 25 11:38:51 2018 +0200
committer	Lukas Zilka <zilka@google.com>	Mon Apr 30 09:56:13 2018 +0200
tree	76accc92cb0998050532ebccd3055acfc30d5499
parent	0494cf0007eb41d1729ecd1407a4ed6bf85066c3 [diff] [blame]