Fixes datetime model overtriggering, sms shortcodes being linkified to
maps, not recognizing certain phone number formats, and incorrect date
jumps when the granularity of the date is less than a day, and incorrect
start of week for non-US locales.

(Developed in Google3, synced using export_to_aosp.sh script)

Test: bit FrameworksCoreTests:android.view.textclassifier.TextClassificationManagerTest
Test: bit CtsViewTestCases:android.view.textclassifier.cts.TextClassificationManagerTest

Bug: 77799341
Bug: 76199334
Bug: 77800082
Change-Id: Ib23bc27c63ff69673532851254f2800108e74dda
diff --git a/datetime/parser_test.cc b/datetime/parser_test.cc
index 36525e2..e61ed12 100644
--- a/datetime/parser_test.cc
+++ b/datetime/parser_test.cc
@@ -76,28 +76,41 @@
                        const int64 expected_ms_utc,
                        DatetimeGranularity expected_granularity,
                        bool anchor_start_end = false,
-                       const std::string& timezone = "Europe/Zurich") {
-    auto expected_start_index = marked_text.find("{");
-    EXPECT_TRUE(expected_start_index != std::string::npos);
-    auto expected_end_index = marked_text.find("}");
-    EXPECT_TRUE(expected_end_index != std::string::npos);
+                       const std::string& timezone = "Europe/Zurich",
+                       const std::string& locales = "en-US") {
+    const UnicodeText marked_text_unicode =
+        UTF8ToUnicodeText(marked_text, /*do_copy=*/false);
+    auto brace_open_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
+    auto brace_end_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
+    TC_CHECK(brace_open_it != marked_text_unicode.end());
+    TC_CHECK(brace_end_it != marked_text_unicode.end());
 
     std::string text;
-    text += std::string(marked_text.begin(),
-                        marked_text.begin() + expected_start_index);
-    text += std::string(marked_text.begin() + expected_start_index + 1,
-                        marked_text.begin() + expected_end_index);
-    text += std::string(marked_text.begin() + expected_end_index + 1,
-                        marked_text.end());
+    text +=
+        UnicodeText::UTF8Substring(marked_text_unicode.begin(), brace_open_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_open_it), brace_end_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_end_it),
+                                       marked_text_unicode.end());
 
     std::vector<DatetimeParseResultSpan> results;
 
-    if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
+    if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
                         anchor_start_end, &results)) {
       TC_LOG(ERROR) << text;
       TC_CHECK(false);
     }
-    EXPECT_TRUE(!results.empty());
+    if (results.empty()) {
+      TC_LOG(ERROR) << "No results.";
+      return false;
+    }
+
+    const int expected_start_index =
+        std::distance(marked_text_unicode.begin(), brace_open_it);
+    // The -1 bellow is to account for the opening bracket character.
+    const int expected_end_index =
+        std::distance(marked_text_unicode.begin(), brace_end_it) - 1;
 
     std::vector<DatetimeParseResultSpan> filtered_results;
     for (const DatetimeParseResultSpan& result : results) {
@@ -108,7 +121,7 @@
     }
 
     const std::vector<DatetimeParseResultSpan> expected{
-        {{expected_start_index, expected_end_index - 1},
+        {{expected_start_index, expected_end_index},
          {expected_ms_utc, expected_granularity},
          /*target_classification_score=*/1.0,
          /*priority_score=*/0.0}};
@@ -126,6 +139,14 @@
     return matches;
   }
 
+  bool ParsesCorrectlyGerman(const std::string& marked_text,
+                             const int64 expected_ms_utc,
+                             DatetimeGranularity expected_granularity) {
+    return ParsesCorrectly(marked_text, expected_ms_utc, expected_granularity,
+                           /*anchor_start_end=*/false,
+                           /*timezone=*/"Europe/Zurich", /*locales=*/"de");
+  }
+
  protected:
   std::string model_buffer_;
   std::unique_ptr<TextClassifier> classifier_;
@@ -143,39 +164,26 @@
 TEST_F(ParserTest, Parse) {
   EXPECT_TRUE(
       ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{1 2 2018}", 1514847600000, GRANULARITY_DAY));
   EXPECT_TRUE(
       ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
   EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
                               GRANULARITY_DAY));
-  EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{Apr 20 00:00:35 2010}", 1271714435000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(
       ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{2012-10-14T22:11:20}", 1350245480000,
-                              GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{2014-07-01T14:59:55}.711Z", 1404219595000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29},573", 1277512289000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(
-      ParsesCorrectly("{150423 11:42:35}", 1429782155000, GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{11:42:35}.173", 38555000, GRANULARITY_SECOND));
   EXPECT_TRUE(
       ParsesCorrectly("{23/Apr 11:42:35},173", 9715355000, GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015:11:42:35}", 1429782155000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
@@ -192,18 +200,14 @@
                               GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}.883", 1429782155000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{8/5/2011 3:31:18 AM}:234}", 1312507878000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
                               GRANULARITY_SECOND));
-  EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000,
-                              GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly(
       "Are sentiments apartments decisively the especially alteration. "
       "Thrown shy denote ten ladies though ask saw. Or by to he going "
       "think order event music. Incommode so intention defective at "
       "convinced. Led income months itself and houses you. After nor "
-      "you leave might share court balls. {19/apr/2010:06:36:15} Are "
+      "you leave might share court balls. {19/apr/2010 06:36:15} Are "
       "sentiments apartments decisively the especially alteration. "
       "Thrown shy denote ten ladies though ask saw. Or by to he going "
       "think order event music. Incommode so intention defective at "
@@ -212,8 +216,8 @@
       1271651775000, GRANULARITY_SECOND));
   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
                               GRANULARITY_MINUTE));
-  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4}", 1514775600000,
-                              GRANULARITY_HOUR));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30 am}", 1514777400000,
+                              GRANULARITY_MINUTE));
   EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
                               GRANULARITY_HOUR));
 
@@ -250,6 +254,117 @@
                           /*anchor_start_end=*/true));
 }
 
+TEST_F(ParserTest, ParseGerman) {
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{Januar 1 2018}", 1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("lorem {1 Januar 2018} ipsum",
+                                    1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/Apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{09/März/2004 22:02:40}", 1078866160000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Dez 2, 2010 2:39:58}", 1291253998000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Juni 09 2011 15:28:14}", 1307626094000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{März 16 08:12:04}", 6419524000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29},573", 1277512289000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}", 1137899465000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{11:42:35}", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{11:42:35}.173", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35},173", 9715355000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}.883", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}.883", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}.883", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}", 1514777400000,
+                                    GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30 nachm}",
+                                    1514820600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4 nachm}", 1514818800000,
+                                    GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{heute}", -3600000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{nächste Woche}", 342000000, GRANULARITY_WEEK));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{nächsten Tag}", 82800000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{in drei Tagen}", 255600000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{in drei Wochen}", 1551600000, GRANULARITY_WEEK));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{vor drei Tagen}", -262800000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{morgen}", 82800000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4:00}", 97200000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4}", 97200000, GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{nächsten Mittwoch}", 514800000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{nächsten Mittwoch um 4}", 529200000,
+                                    GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{Vor drei Tagen}", -262800000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{in einer woche}", 342000000, GRANULARITY_WEEK));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{in einer tag}", 82800000, GRANULARITY_DAY));
+}
+
+TEST_F(ParserTest, ParseNonUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-GB"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"en"));
+}
+
+TEST_F(ParserTest, ParseUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-US"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"es-US"));
+}
+
+TEST_F(ParserTest, ParseUnknownLanguage) {
+  EXPECT_TRUE(ParsesCorrectly("bylo to {31. 12. 2015} v 6 hodin", 1451516400000,
+                              GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"xx"));
+}
+
 class ParserLocaleTest : public testing::Test {
  public:
   void SetUp() override;
@@ -281,7 +396,8 @@
   model.locales.push_back("en-*");
   model.locales.push_back("zh-Hant-*");
   model.locales.push_back("*-CH");
-  model.locales.push_back("");
+  model.locales.push_back("default");
+  model.default_locales.push_back(6);
 
   AddPattern(/*regex=*/"en-US", /*locale=*/0, &model.patterns);
   AddPattern(/*regex=*/"en-CH", /*locale=*/1, &model.patterns);