Export lib3 to AOSP (external/libtextclassifier part) 1. Include both annotator (existing one) and actions(new one for smart reply and actions) 2. One more model file. actions_suggestions.model is dropped to /etc/textclassifier./ It is around 7.5mb for now, we will slim down it later. 3. The Java counterpart of the JNI is now moved from frameworks/base to here. Test: atest android.view.textclassifier.TextClassificationManagerTest Change-Id: Icb2458967ef51efa2952b3eaddefbf1f7b359930

commit: 6c4cc67c9849339d4e4dfffcfa3eb2342f767890 [log] [tgz]
author: Tony Mak <tonymak@google.com> Mon Sep 17 11:48:50 2018 +0100
committer: Tony Mak <tonymak@google.com> Tue Sep 25 18:36:59 2018 +0100
tree: 1694602c9fd5abe64a26d6363c82b59baf9fa2b0
parent: 30f477bb6871cfebf3136c71da5c14ef3aa69c97 [diff]
diff --git a/annotator/datetime/extractor.cc b/annotator/datetime/extractor.cc
new file mode 100644
index 0000000..31229dd
--- /dev/null
+++ b/annotator/datetime/extractor.cc

@@ -0,0 +1,469 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/datetime/extractor.h"
+
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+bool DatetimeExtractor::Extract(DateParseData* result,
+                                CodepointSpan* result_span) const {
+  result->field_set_mask = 0;
+  *result_span = {kInvalidIndex, kInvalidIndex};
+
+  if (rule_.regex->groups() == nullptr) {
+    return false;
+  }
+
+  for (int group_id = 0; group_id < rule_.regex->groups()->size(); group_id++) {
+    UnicodeText group_text;
+    const int group_type = rule_.regex->groups()->Get(group_id);
+    if (group_type == DatetimeGroupType_GROUP_UNUSED) {
+      continue;
+    }
+    if (!GroupTextFromMatch(group_id, &group_text)) {
+      TC3_LOG(ERROR) << "Couldn't retrieve group.";
+      return false;
+    }
+    // The pattern can have a group defined in a part that was not matched,
+    // e.g. an optional part. In this case we'll get an empty content here.
+    if (group_text.empty()) {
+      continue;
+    }
+    switch (group_type) {
+      case DatetimeGroupType_GROUP_YEAR: {
+        if (!ParseYear(group_text, &(result->year))) {
+          TC3_LOG(ERROR) << "Couldn't extract YEAR.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::YEAR_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_MONTH: {
+        if (!ParseMonth(group_text, &(result->month))) {
+          TC3_LOG(ERROR) << "Couldn't extract MONTH.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::MONTH_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_DAY: {
+        if (!ParseDigits(group_text, &(result->day_of_month))) {
+          TC3_LOG(ERROR) << "Couldn't extract DAY.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::DAY_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_HOUR: {
+        if (!ParseDigits(group_text, &(result->hour))) {
+          TC3_LOG(ERROR) << "Couldn't extract HOUR.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::HOUR_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_MINUTE: {
+        if (!ParseDigits(group_text, &(result->minute))) {
+          TC3_LOG(ERROR) << "Couldn't extract MINUTE.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::MINUTE_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_SECOND: {
+        if (!ParseDigits(group_text, &(result->second))) {
+          TC3_LOG(ERROR) << "Couldn't extract SECOND.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::SECOND_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_AMPM: {
+        if (!ParseAMPM(group_text, &(result->ampm))) {
+          TC3_LOG(ERROR) << "Couldn't extract AMPM.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::AMPM_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_RELATIONDISTANCE: {
+        if (!ParseRelationDistance(group_text, &(result->relation_distance))) {
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_DISTANCE_FIELD.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::RELATION_DISTANCE_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_RELATION: {
+        if (!ParseRelation(group_text, &(result->relation))) {
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_FIELD.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::RELATION_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_RELATIONTYPE: {
+        if (!ParseRelationType(group_text, &(result->relation_type))) {
+          TC3_LOG(ERROR) << "Couldn't extract RELATION_TYPE_FIELD.";
+          return false;
+        }
+        result->field_set_mask |= DateParseData::RELATION_TYPE_FIELD;
+        break;
+      }
+      case DatetimeGroupType_GROUP_DUMMY1:
+      case DatetimeGroupType_GROUP_DUMMY2:
+        break;
+      default:
+        TC3_LOG(INFO) << "Unknown group type.";
+        continue;
+    }
+    if (!UpdateMatchSpan(group_id, result_span)) {
+      TC3_LOG(ERROR) << "Couldn't update span.";
+      return false;
+    }
+  }
+
+  if (result_span->first == kInvalidIndex ||
+      result_span->second == kInvalidIndex) {
+    *result_span = {kInvalidIndex, kInvalidIndex};
+  }
+
+  return true;
+}
+
+bool DatetimeExtractor::RuleIdForType(DatetimeExtractorType type,
+                                      int* rule_id) const {
+  auto type_it = type_and_locale_to_rule_.find(type);
+  if (type_it == type_and_locale_to_rule_.end()) {
+    return false;
+  }
+
+  auto locale_it = type_it->second.find(locale_id_);
+  if (locale_it == type_it->second.end()) {
+    return false;
+  }
+  *rule_id = locale_it->second;
+  return true;
+}
+
+bool DatetimeExtractor::ExtractType(const UnicodeText& input,
+                                    DatetimeExtractorType extractor_type,
+                                    UnicodeText* match_result) const {
+  int rule_id;
+  if (!RuleIdForType(extractor_type, &rule_id)) {
+    return false;
+  }
+
+  std::unique_ptr<UniLib::RegexMatcher> matcher =
+      rules_[rule_id]->Matcher(input);
+  if (!matcher) {
+    return false;
+  }
+
+  int status;
+  if (!matcher->Find(&status)) {
+    return false;
+  }
+
+  if (match_result != nullptr) {
+    *match_result = matcher->Group(&status);
+    if (status != UniLib::RegexMatcher::kNoError) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool DatetimeExtractor::GroupTextFromMatch(int group_id,
+                                           UnicodeText* result) const {
+  int status;
+  *result = matcher_.Group(group_id, &status);
+  if (status != UniLib::RegexMatcher::kNoError) {
+    return false;
+  }
+  return true;
+}
+
+bool DatetimeExtractor::UpdateMatchSpan(int group_id,
+                                        CodepointSpan* span) const {
+  int status;
+  const int match_start = matcher_.Start(group_id, &status);
+  if (status != UniLib::RegexMatcher::kNoError) {
+    return false;
+  }
+  const int match_end = matcher_.End(group_id, &status);
+  if (status != UniLib::RegexMatcher::kNoError) {
+    return false;
+  }
+  if (span->first == kInvalidIndex || span->first > match_start) {
+    span->first = match_start;
+  }
+  if (span->second == kInvalidIndex || span->second < match_end) {
+    span->second = match_end;
+  }
+
+  return true;
+}
+
+template <typename T>
+bool DatetimeExtractor::MapInput(
+    const UnicodeText& input,
+    const std::vector<std::pair<DatetimeExtractorType, T>>& mapping,
+    T* result) const {
+  for (const auto& type_value_pair : mapping) {
+    if (ExtractType(input, type_value_pair.first)) {
+      *result = type_value_pair.second;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool DatetimeExtractor::ParseWrittenNumber(const UnicodeText& input,
+                                           int* parsed_number) const {
+  std::vector<std::pair<int, int>> found_numbers;
+  for (const auto& type_value_pair :
+       std::vector<std::pair<DatetimeExtractorType, int>>{
+           {DatetimeExtractorType_ZERO, 0},
+           {DatetimeExtractorType_ONE, 1},
+           {DatetimeExtractorType_TWO, 2},
+           {DatetimeExtractorType_THREE, 3},
+           {DatetimeExtractorType_FOUR, 4},
+           {DatetimeExtractorType_FIVE, 5},
+           {DatetimeExtractorType_SIX, 6},
+           {DatetimeExtractorType_SEVEN, 7},
+           {DatetimeExtractorType_EIGHT, 8},
+           {DatetimeExtractorType_NINE, 9},
+           {DatetimeExtractorType_TEN, 10},
+           {DatetimeExtractorType_ELEVEN, 11},
+           {DatetimeExtractorType_TWELVE, 12},
+           {DatetimeExtractorType_THIRTEEN, 13},
+           {DatetimeExtractorType_FOURTEEN, 14},
+           {DatetimeExtractorType_FIFTEEN, 15},
+           {DatetimeExtractorType_SIXTEEN, 16},
+           {DatetimeExtractorType_SEVENTEEN, 17},
+           {DatetimeExtractorType_EIGHTEEN, 18},
+           {DatetimeExtractorType_NINETEEN, 19},
+           {DatetimeExtractorType_TWENTY, 20},
+           {DatetimeExtractorType_THIRTY, 30},
+           {DatetimeExtractorType_FORTY, 40},
+           {DatetimeExtractorType_FIFTY, 50},
+           {DatetimeExtractorType_SIXTY, 60},
+           {DatetimeExtractorType_SEVENTY, 70},
+           {DatetimeExtractorType_EIGHTY, 80},
+           {DatetimeExtractorType_NINETY, 90},
+           {DatetimeExtractorType_HUNDRED, 100},
+           {DatetimeExtractorType_THOUSAND, 1000},
+       }) {
+    int rule_id;
+    if (!RuleIdForType(type_value_pair.first, &rule_id)) {
+      return false;
+    }
+
+    std::unique_ptr<UniLib::RegexMatcher> matcher =
+        rules_[rule_id]->Matcher(input);
+    if (!matcher) {
+      return false;
+    }
+
+    int status;
+    while (matcher->Find(&status) && status == UniLib::RegexMatcher::kNoError) {
+      int span_start = matcher->Start(&status);
+      if (status != UniLib::RegexMatcher::kNoError) {
+        return false;
+      }
+      found_numbers.push_back({span_start, type_value_pair.second});
+    }
+  }
+
+  std::sort(found_numbers.begin(), found_numbers.end(),
+            [](const std::pair<int, int>& a, const std::pair<int, int>& b) {
+              return a.first < b.first;
+            });
+
+  int sum = 0;
+  int running_value = -1;
+  // Simple math to make sure we handle written numerical modifiers correctly
+  // so that :="fifty one  thousand and one" maps to 51001 and not 50 1 1000 1.
+  for (const std::pair<int, int> position_number_pair : found_numbers) {
+    if (running_value >= 0) {
+      if (running_value > position_number_pair.second) {
+        sum += running_value;
+        running_value = position_number_pair.second;
+      } else {
+        running_value *= position_number_pair.second;
+      }
+    } else {
+      running_value = position_number_pair.second;
+    }
+  }
+  sum += running_value;
+  *parsed_number = sum;
+  return true;
+}
+
+bool DatetimeExtractor::ParseDigits(const UnicodeText& input,
+                                    int* parsed_digits) const {
+  UnicodeText digit;
+  if (!ExtractType(input, DatetimeExtractorType_DIGITS, &digit)) {
+    return false;
+  }
+
+  if (!unilib_.ParseInt32(digit, parsed_digits)) {
+    return false;
+  }
+  return true;
+}
+
+bool DatetimeExtractor::ParseYear(const UnicodeText& input,
+                                  int* parsed_year) const {
+  if (!ParseDigits(input, parsed_year)) {
+    return false;
+  }
+
+  if (*parsed_year < 100) {
+    if (*parsed_year < 50) {
+      *parsed_year += 2000;
+    } else {
+      *parsed_year += 1900;
+    }
+  }
+
+  return true;
+}
+
+bool DatetimeExtractor::ParseMonth(const UnicodeText& input,
+                                   int* parsed_month) const {
+  if (ParseDigits(input, parsed_month)) {
+    return true;
+  }
+
+  if (MapInput(input,
+               {
+                   {DatetimeExtractorType_JANUARY, 1},
+                   {DatetimeExtractorType_FEBRUARY, 2},
+                   {DatetimeExtractorType_MARCH, 3},
+                   {DatetimeExtractorType_APRIL, 4},
+                   {DatetimeExtractorType_MAY, 5},
+                   {DatetimeExtractorType_JUNE, 6},
+                   {DatetimeExtractorType_JULY, 7},
+                   {DatetimeExtractorType_AUGUST, 8},
+                   {DatetimeExtractorType_SEPTEMBER, 9},
+                   {DatetimeExtractorType_OCTOBER, 10},
+                   {DatetimeExtractorType_NOVEMBER, 11},
+                   {DatetimeExtractorType_DECEMBER, 12},
+               },
+               parsed_month)) {
+    return true;
+  }
+
+  return false;
+}
+
+bool DatetimeExtractor::ParseAMPM(const UnicodeText& input,
+                                  int* parsed_ampm) const {
+  return MapInput(input,
+                  {
+                      {DatetimeExtractorType_AM, DateParseData::AMPM::AM},
+                      {DatetimeExtractorType_PM, DateParseData::AMPM::PM},
+                  },
+                  parsed_ampm);
+}
+
+bool DatetimeExtractor::ParseRelationDistance(const UnicodeText& input,
+                                              int* parsed_distance) const {
+  if (ParseDigits(input, parsed_distance)) {
+    return true;
+  }
+  if (ParseWrittenNumber(input, parsed_distance)) {
+    return true;
+  }
+  return false;
+}
+
+bool DatetimeExtractor::ParseRelation(
+    const UnicodeText& input, DateParseData::Relation* parsed_relation) const {
+  return MapInput(
+      input,
+      {
+          {DatetimeExtractorType_NOW, DateParseData::Relation::NOW},
+          {DatetimeExtractorType_YESTERDAY, DateParseData::Relation::YESTERDAY},
+          {DatetimeExtractorType_TOMORROW, DateParseData::Relation::TOMORROW},
+          {DatetimeExtractorType_NEXT, DateParseData::Relation::NEXT},
+          {DatetimeExtractorType_NEXT_OR_SAME,
+           DateParseData::Relation::NEXT_OR_SAME},
+          {DatetimeExtractorType_LAST, DateParseData::Relation::LAST},
+          {DatetimeExtractorType_PAST, DateParseData::Relation::PAST},
+          {DatetimeExtractorType_FUTURE, DateParseData::Relation::FUTURE},
+      },
+      parsed_relation);
+}
+
+bool DatetimeExtractor::ParseRelationType(
+    const UnicodeText& input,
+    DateParseData::RelationType* parsed_relation_type) const {
+  return MapInput(
+      input,
+      {
+          {DatetimeExtractorType_MONDAY, DateParseData::MONDAY},
+          {DatetimeExtractorType_TUESDAY, DateParseData::TUESDAY},
+          {DatetimeExtractorType_WEDNESDAY, DateParseData::WEDNESDAY},
+          {DatetimeExtractorType_THURSDAY, DateParseData::THURSDAY},
+          {DatetimeExtractorType_FRIDAY, DateParseData::FRIDAY},
+          {DatetimeExtractorType_SATURDAY, DateParseData::SATURDAY},
+          {DatetimeExtractorType_SUNDAY, DateParseData::SUNDAY},
+          {DatetimeExtractorType_DAY, DateParseData::DAY},
+          {DatetimeExtractorType_WEEK, DateParseData::WEEK},
+          {DatetimeExtractorType_MONTH, DateParseData::MONTH},
+          {DatetimeExtractorType_YEAR, DateParseData::YEAR},
+      },
+      parsed_relation_type);
+}
+
+bool DatetimeExtractor::ParseTimeUnit(const UnicodeText& input,
+                                      int* parsed_time_unit) const {
+  return MapInput(input,
+                  {
+                      {DatetimeExtractorType_DAYS, DateParseData::DAYS},
+                      {DatetimeExtractorType_WEEKS, DateParseData::WEEKS},
+                      {DatetimeExtractorType_MONTHS, DateParseData::MONTHS},
+                      {DatetimeExtractorType_HOURS, DateParseData::HOURS},
+                      {DatetimeExtractorType_MINUTES, DateParseData::MINUTES},
+                      {DatetimeExtractorType_SECONDS, DateParseData::SECONDS},
+                      {DatetimeExtractorType_YEARS, DateParseData::YEARS},
+                  },
+                  parsed_time_unit);
+}
+
+bool DatetimeExtractor::ParseWeekday(const UnicodeText& input,
+                                     int* parsed_weekday) const {
+  return MapInput(
+      input,
+      {
+          {DatetimeExtractorType_MONDAY, DateParseData::MONDAY},
+          {DatetimeExtractorType_TUESDAY, DateParseData::TUESDAY},
+          {DatetimeExtractorType_WEDNESDAY, DateParseData::WEDNESDAY},
+          {DatetimeExtractorType_THURSDAY, DateParseData::THURSDAY},
+          {DatetimeExtractorType_FRIDAY, DateParseData::FRIDAY},
+          {DatetimeExtractorType_SATURDAY, DateParseData::SATURDAY},
+          {DatetimeExtractorType_SUNDAY, DateParseData::SUNDAY},
+      },
+      parsed_weekday);
+}
+
+}  // namespace libtextclassifier3

diff --git a/annotator/datetime/extractor.h b/annotator/datetime/extractor.h
new file mode 100644
index 0000000..4c17aa7
--- /dev/null
+++ b/annotator/datetime/extractor.h

@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
+#include "utils/strings/stringpiece.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+
+namespace libtextclassifier3 {
+
+struct CompiledRule {
+  // The compiled regular expression.
+  std::unique_ptr<const UniLib::RegexPattern> compiled_regex;
+
+  // The uncompiled pattern and information about the pattern groups.
+  const DatetimeModelPattern_::Regex* regex;
+
+  // DatetimeModelPattern which 'regex' is part of and comes from.
+  const DatetimeModelPattern* pattern;
+};
+
+// A helper class for DatetimeParser that extracts structured data
+// (DateParseDate) from the current match of the passed RegexMatcher.
+class DatetimeExtractor {
+ public:
+  DatetimeExtractor(
+      const CompiledRule& rule, const UniLib::RegexMatcher& matcher,
+      int locale_id, const UniLib& unilib,
+      const std::vector<std::unique_ptr<const UniLib::RegexPattern>>&
+          extractor_rules,
+      const std::unordered_map<DatetimeExtractorType,
+                               std::unordered_map<int, int>>&
+          type_and_locale_to_extractor_rule)
+      : rule_(rule),
+        matcher_(matcher),
+        locale_id_(locale_id),
+        unilib_(unilib),
+        rules_(extractor_rules),
+        type_and_locale_to_rule_(type_and_locale_to_extractor_rule) {}
+  bool Extract(DateParseData* result, CodepointSpan* result_span) const;
+
+ private:
+  bool RuleIdForType(DatetimeExtractorType type, int* rule_id) const;
+
+  // Returns true if the rule for given extractor matched. If it matched,
+  // match_result will contain the first group of the rule (if match_result not
+  // nullptr).
+  bool ExtractType(const UnicodeText& input,
+                   DatetimeExtractorType extractor_type,
+                   UnicodeText* match_result = nullptr) const;
+
+  bool GroupTextFromMatch(int group_id, UnicodeText* result) const;
+
+  // Updates the span to include the current match for the given group.
+  bool UpdateMatchSpan(int group_id, CodepointSpan* span) const;
+
+  // Returns true if any of the extractors from 'mapping' matched. If it did,
+  // will fill 'result' with the associated value from 'mapping'.
+  template <typename T>
+  bool MapInput(const UnicodeText& input,
+                const std::vector<std::pair<DatetimeExtractorType, T>>& mapping,
+                T* result) const;
+
+  bool ParseDigits(const UnicodeText& input, int* parsed_digits) const;
+  bool ParseWrittenNumber(const UnicodeText& input, int* parsed_number) const;
+  bool ParseYear(const UnicodeText& input, int* parsed_year) const;
+  bool ParseMonth(const UnicodeText& input, int* parsed_month) const;
+  bool ParseAMPM(const UnicodeText& input, int* parsed_ampm) const;
+  bool ParseRelation(const UnicodeText& input,
+                     DateParseData::Relation* parsed_relation) const;
+  bool ParseRelationDistance(const UnicodeText& input,
+                             int* parsed_distance) const;
+  bool ParseTimeUnit(const UnicodeText& input, int* parsed_time_unit) const;
+  bool ParseRelationType(
+      const UnicodeText& input,
+      DateParseData::RelationType* parsed_relation_type) const;
+  bool ParseWeekday(const UnicodeText& input, int* parsed_weekday) const;
+
+  const CompiledRule& rule_;
+  const UniLib::RegexMatcher& matcher_;
+  int locale_id_;
+  const UniLib& unilib_;
+  const std::vector<std::unique_ptr<const UniLib::RegexPattern>>& rules_;
+  const std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>>&
+      type_and_locale_to_rule_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_

diff --git a/annotator/datetime/parser.cc b/annotator/datetime/parser.cc
new file mode 100644
index 0000000..ac3a62d
--- /dev/null
+++ b/annotator/datetime/parser.cc

@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/datetime/parser.h"
+
+#include <set>
+#include <unordered_set>
+
+#include "annotator/datetime/extractor.h"
+#include "utils/calendar/calendar.h"
+#include "utils/i18n/locale.h"
+#include "utils/strings/split.h"
+
+namespace libtextclassifier3 {
+std::unique_ptr<DatetimeParser> DatetimeParser::Instance(
+    const DatetimeModel* model, const UniLib& unilib,
+    const CalendarLib& calendarlib, ZlibDecompressor* decompressor) {
+  std::unique_ptr<DatetimeParser> result(
+      new DatetimeParser(model, unilib, calendarlib, decompressor));
+  if (!result->initialized_) {
+    result.reset();
+  }
+  return result;
+}
+
+DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
+                               const CalendarLib& calendarlib,
+                               ZlibDecompressor* decompressor)
+    : unilib_(unilib), calendarlib_(calendarlib) {
+  initialized_ = false;
+
+  if (model == nullptr) {
+    return;
+  }
+
+  if (model->patterns() != nullptr) {
+    for (const DatetimeModelPattern* pattern : *model->patterns()) {
+      if (pattern->regexes()) {
+        for (const DatetimeModelPattern_::Regex* regex : *pattern->regexes()) {
+          std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+              UncompressMakeRegexPattern(unilib, regex->pattern(),
+                                         regex->compressed_pattern(),
+                                         decompressor);
+          if (!regex_pattern) {
+            TC3_LOG(ERROR) << "Couldn't create rule pattern.";
+            return;
+          }
+          rules_.push_back({std::move(regex_pattern), regex, pattern});
+          if (pattern->locales()) {
+            for (int locale : *pattern->locales()) {
+              locale_to_rules_[locale].push_back(rules_.size() - 1);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (model->extractors() != nullptr) {
+    for (const DatetimeModelExtractor* extractor : *model->extractors()) {
+      std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+          UncompressMakeRegexPattern(unilib, extractor->pattern(),
+                                     extractor->compressed_pattern(),
+                                     decompressor);
+      if (!regex_pattern) {
+        TC3_LOG(ERROR) << "Couldn't create extractor pattern";
+        return;
+      }
+      extractor_rules_.push_back(std::move(regex_pattern));
+
+      if (extractor->locales()) {
+        for (int locale : *extractor->locales()) {
+          type_and_locale_to_extractor_rule_[extractor->extractor()][locale] =
+              extractor_rules_.size() - 1;
+        }
+      }
+    }
+  }
+
+  if (model->locales() != nullptr) {
+    for (int i = 0; i < model->locales()->Length(); ++i) {
+      locale_string_to_id_[model->locales()->Get(i)->str()] = i;
+    }
+  }
+
+  if (model->default_locales() != nullptr) {
+    for (const int locale : *model->default_locales()) {
+      default_locale_ids_.push_back(locale);
+    }
+  }
+
+  use_extractors_for_locating_ = model->use_extractors_for_locating();
+
+  initialized_ = true;
+}
+
+bool DatetimeParser::Parse(
+    const std::string& input, const int64 reference_time_ms_utc,
+    const std::string& reference_timezone, const std::string& locales,
+    ModeFlag mode, bool anchor_start_end,
+    std::vector<DatetimeParseResultSpan>* results) const {
+  return Parse(UTF8ToUnicodeText(input, /*do_copy=*/false),
+               reference_time_ms_utc, reference_timezone, locales, mode,
+               anchor_start_end, results);
+}
+
+bool DatetimeParser::FindSpansUsingLocales(
+    const std::vector<int>& locale_ids, const UnicodeText& input,
+    const int64 reference_time_ms_utc, const std::string& reference_timezone,
+    ModeFlag mode, bool anchor_start_end, const std::string& reference_locale,
+    std::unordered_set<int>* executed_rules,
+    std::vector<DatetimeParseResultSpan>* found_spans) const {
+  for (const int locale_id : locale_ids) {
+    auto rules_it = locale_to_rules_.find(locale_id);
+    if (rules_it == locale_to_rules_.end()) {
+      continue;
+    }
+
+    for (const int rule_id : rules_it->second) {
+      // Skip rules that were already executed in previous locales.
+      if (executed_rules->find(rule_id) != executed_rules->end()) {
+        continue;
+      }
+
+      if (!(rules_[rule_id].pattern->enabled_modes() & mode)) {
+        continue;
+      }
+
+      executed_rules->insert(rule_id);
+
+      if (!ParseWithRule(rules_[rule_id], input, reference_time_ms_utc,
+                         reference_timezone, reference_locale, locale_id,
+                         anchor_start_end, found_spans)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool DatetimeParser::Parse(
+    const UnicodeText& input, const int64 reference_time_ms_utc,
+    const std::string& reference_timezone, const std::string& locales,
+    ModeFlag mode, bool anchor_start_end,
+    std::vector<DatetimeParseResultSpan>* results) const {
+  std::vector<DatetimeParseResultSpan> found_spans;
+  std::unordered_set<int> executed_rules;
+  std::string reference_locale;
+  const std::vector<int> requested_locales =
+      ParseAndExpandLocales(locales, &reference_locale);
+  if (!FindSpansUsingLocales(requested_locales, input, reference_time_ms_utc,
+                             reference_timezone, mode, anchor_start_end,
+                             reference_locale, &executed_rules, &found_spans)) {
+    return false;
+  }
+
+  std::vector<std::pair<DatetimeParseResultSpan, int>> indexed_found_spans;
+  int counter = 0;
+  for (const auto& found_span : found_spans) {
+    indexed_found_spans.push_back({found_span, counter});
+    counter++;
+  }
+
+  // Resolve conflicts by always picking the longer span and breaking ties by
+  // selecting the earlier entry in the list for a given locale.
+  std::sort(indexed_found_spans.begin(), indexed_found_spans.end(),
+            [](const std::pair<DatetimeParseResultSpan, int>& a,
+               const std::pair<DatetimeParseResultSpan, int>& b) {
+              if ((a.first.span.second - a.first.span.first) !=
+                  (b.first.span.second - b.first.span.first)) {
+                return (a.first.span.second - a.first.span.first) >
+                       (b.first.span.second - b.first.span.first);
+              } else {
+                return a.second < b.second;
+              }
+            });
+
+  found_spans.clear();
+  for (auto& span_index_pair : indexed_found_spans) {
+    found_spans.push_back(span_index_pair.first);
+  }
+
+  std::set<int, std::function<bool(int, int)>> chosen_indices_set(
+      [&found_spans](int a, int b) {
+        return found_spans[a].span.first < found_spans[b].span.first;
+      });
+  for (int i = 0; i < found_spans.size(); ++i) {
+    if (!DoesCandidateConflict(i, found_spans, chosen_indices_set)) {
+      chosen_indices_set.insert(i);
+      results->push_back(found_spans[i]);
+    }
+  }
+
+  return true;
+}
+
+bool DatetimeParser::HandleParseMatch(
+    const CompiledRule& rule, const UniLib::RegexMatcher& matcher,
+    int64 reference_time_ms_utc, const std::string& reference_timezone,
+    const std::string& reference_locale, int locale_id,
+    std::vector<DatetimeParseResultSpan>* result) const {
+  int status = UniLib::RegexMatcher::kNoError;
+  const int start = matcher.Start(&status);
+  if (status != UniLib::RegexMatcher::kNoError) {
+    return false;
+  }
+
+  const int end = matcher.End(&status);
+  if (status != UniLib::RegexMatcher::kNoError) {
+    return false;
+  }
+
+  DatetimeParseResultSpan parse_result;
+  if (!ExtractDatetime(rule, matcher, reference_time_ms_utc, reference_timezone,
+                       reference_locale, locale_id, &(parse_result.data),
+                       &parse_result.span)) {
+    return false;
+  }
+  if (!use_extractors_for_locating_) {
+    parse_result.span = {start, end};
+  }
+  if (parse_result.span.first != kInvalidIndex &&
+      parse_result.span.second != kInvalidIndex) {
+    parse_result.target_classification_score =
+        rule.pattern->target_classification_score();
+    parse_result.priority_score = rule.pattern->priority_score();
+    result->push_back(parse_result);
+  }
+  return true;
+}
+
+bool DatetimeParser::ParseWithRule(
+    const CompiledRule& rule, const UnicodeText& input,
+    const int64 reference_time_ms_utc, const std::string& reference_timezone,
+    const std::string& reference_locale, const int locale_id,
+    bool anchor_start_end, std::vector<DatetimeParseResultSpan>* result) const {
+  std::unique_ptr<UniLib::RegexMatcher> matcher =
+      rule.compiled_regex->Matcher(input);
+  int status = UniLib::RegexMatcher::kNoError;
+  if (anchor_start_end) {
+    if (matcher->Matches(&status) && status == UniLib::RegexMatcher::kNoError) {
+      if (!HandleParseMatch(rule, *matcher, reference_time_ms_utc,
+                            reference_timezone, reference_locale, locale_id,
+                            result)) {
+        return false;
+      }
+    }
+  } else {
+    while (matcher->Find(&status) && status == UniLib::RegexMatcher::kNoError) {
+      if (!HandleParseMatch(rule, *matcher, reference_time_ms_utc,
+                            reference_timezone, reference_locale, locale_id,
+                            result)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+std::vector<int> DatetimeParser::ParseAndExpandLocales(
+    const std::string& locales, std::string* reference_locale) const {
+  std::vector<StringPiece> split_locales = strings::Split(locales, ',');
+  if (!split_locales.empty()) {
+    *reference_locale = split_locales[0].ToString();
+  } else {
+    *reference_locale = "";
+  }
+
+  std::vector<int> result;
+  for (const StringPiece& locale_str : split_locales) {
+    auto locale_it = locale_string_to_id_.find(locale_str.ToString());
+    if (locale_it != locale_string_to_id_.end()) {
+      result.push_back(locale_it->second);
+    }
+
+    const Locale locale = Locale::FromBCP47(locale_str.ToString());
+    if (!locale.IsValid()) {
+      continue;
+    }
+
+    const std::string language = locale.Language();
+    const std::string script = locale.Script();
+    const std::string region = locale.Region();
+
+    // First, try adding *-region locale.
+    if (!region.empty()) {
+      locale_it = locale_string_to_id_.find("*-" + region);
+      if (locale_it != locale_string_to_id_.end()) {
+        result.push_back(locale_it->second);
+      }
+    }
+    // Second, try adding language-script-* locale.
+    if (!script.empty()) {
+      locale_it = locale_string_to_id_.find(language + "-" + script + "-*");
+      if (locale_it != locale_string_to_id_.end()) {
+        result.push_back(locale_it->second);
+      }
+    }
+    // Third, try adding language-* locale.
+    if (!language.empty()) {
+      locale_it = locale_string_to_id_.find(language + "-*");
+      if (locale_it != locale_string_to_id_.end()) {
+        result.push_back(locale_it->second);
+      }
+    }
+  }
+
+  // Add the default locales if they haven't been added already.
+  const std::unordered_set<int> result_set(result.begin(), result.end());
+  for (const int default_locale_id : default_locale_ids_) {
+    if (result_set.find(default_locale_id) == result_set.end()) {
+      result.push_back(default_locale_id);
+    }
+  }
+
+  return result;
+}
+
+namespace {
+
+DatetimeGranularity GetGranularity(const DateParseData& data) {
+  DatetimeGranularity granularity = DatetimeGranularity::GRANULARITY_YEAR;
+  if ((data.field_set_mask & DateParseData::YEAR_FIELD) ||
+      (data.field_set_mask & DateParseData::RELATION_TYPE_FIELD &&
+       (data.relation_type == DateParseData::RelationType::YEAR))) {
+    granularity = DatetimeGranularity::GRANULARITY_YEAR;
+  }
+  if ((data.field_set_mask & DateParseData::MONTH_FIELD) ||
+      (data.field_set_mask & DateParseData::RELATION_TYPE_FIELD &&
+       (data.relation_type == DateParseData::RelationType::MONTH))) {
+    granularity = DatetimeGranularity::GRANULARITY_MONTH;
+  }
+  if (data.field_set_mask & DateParseData::RELATION_TYPE_FIELD &&
+      (data.relation_type == DateParseData::RelationType::WEEK)) {
+    granularity = DatetimeGranularity::GRANULARITY_WEEK;
+  }
+  if (data.field_set_mask & DateParseData::DAY_FIELD ||
+      (data.field_set_mask & DateParseData::RELATION_FIELD &&
+       (data.relation == DateParseData::Relation::NOW ||
+        data.relation == DateParseData::Relation::TOMORROW ||
+        data.relation == DateParseData::Relation::YESTERDAY)) ||
+      (data.field_set_mask & DateParseData::RELATION_TYPE_FIELD &&
+       (data.relation_type == DateParseData::RelationType::MONDAY ||
+        data.relation_type == DateParseData::RelationType::TUESDAY ||
+        data.relation_type == DateParseData::RelationType::WEDNESDAY ||
+        data.relation_type == DateParseData::RelationType::THURSDAY ||
+        data.relation_type == DateParseData::RelationType::FRIDAY ||
+        data.relation_type == DateParseData::RelationType::SATURDAY ||
+        data.relation_type == DateParseData::RelationType::SUNDAY ||
+        data.relation_type == DateParseData::RelationType::DAY))) {
+    granularity = DatetimeGranularity::GRANULARITY_DAY;
+  }
+  if (data.field_set_mask & DateParseData::HOUR_FIELD) {
+    granularity = DatetimeGranularity::GRANULARITY_HOUR;
+  }
+  if (data.field_set_mask & DateParseData::MINUTE_FIELD) {
+    granularity = DatetimeGranularity::GRANULARITY_MINUTE;
+  }
+  if (data.field_set_mask & DateParseData::SECOND_FIELD) {
+    granularity = DatetimeGranularity::GRANULARITY_SECOND;
+  }
+  return granularity;
+}
+
+}  // namespace
+
+bool DatetimeParser::ExtractDatetime(const CompiledRule& rule,
+                                     const UniLib::RegexMatcher& matcher,
+                                     const int64 reference_time_ms_utc,
+                                     const std::string& reference_timezone,
+                                     const std::string& reference_locale,
+                                     int locale_id, DatetimeParseResult* result,
+                                     CodepointSpan* result_span) const {
+  DateParseData parse;
+  DatetimeExtractor extractor(rule, matcher, locale_id, unilib_,
+                              extractor_rules_,
+                              type_and_locale_to_extractor_rule_);
+  if (!extractor.Extract(&parse, result_span)) {
+    return false;
+  }
+
+  result->granularity = GetGranularity(parse);
+
+  if (!calendarlib_.InterpretParseData(
+          parse, reference_time_ms_utc, reference_timezone, reference_locale,
+          result->granularity, &(result->time_ms_utc))) {
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace libtextclassifier3

diff --git a/annotator/datetime/parser.h b/annotator/datetime/parser.h
new file mode 100644
index 0000000..9b91833
--- /dev/null
+++ b/annotator/datetime/parser.h

@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "annotator/datetime/extractor.h"
+#include "annotator/model_generated.h"
+#include "annotator/types.h"
+#include "annotator/zlib-utils.h"
+#include "utils/base/integral_types.h"
+#include "utils/calendar/calendar.h"
+#include "utils/utf8/unilib.h"
+
+namespace libtextclassifier3 {
+
+// Parses datetime expressions in the input and resolves them to actual absolute
+// time.
+class DatetimeParser {
+ public:
+  static std::unique_ptr<DatetimeParser> Instance(
+      const DatetimeModel* model, const UniLib& unilib,
+      const CalendarLib& calendarlib, ZlibDecompressor* decompressor);
+
+  // Parses the dates in 'input' and fills result. Makes sure that the results
+  // do not overlap.
+  // If 'anchor_start_end' is true the extracted results need to start at the
+  // beginning of 'input' and end at the end of it.
+  bool Parse(const std::string& input, int64 reference_time_ms_utc,
+             const std::string& reference_timezone, const std::string& locales,
+             ModeFlag mode, bool anchor_start_end,
+             std::vector<DatetimeParseResultSpan>* results) const;
+
+  // Same as above but takes UnicodeText.
+  bool Parse(const UnicodeText& input, int64 reference_time_ms_utc,
+             const std::string& reference_timezone, const std::string& locales,
+             ModeFlag mode, bool anchor_start_end,
+             std::vector<DatetimeParseResultSpan>* results) const;
+
+ protected:
+  DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
+                 const CalendarLib& calendarlib,
+                 ZlibDecompressor* decompressor);
+
+  // Returns a list of locale ids for given locale spec string (comma-separated
+  // locale names). Assigns the first parsed locale to reference_locale.
+  std::vector<int> ParseAndExpandLocales(const std::string& locales,
+                                         std::string* reference_locale) const;
+
+  // Helper function that finds datetime spans, only using the rules associated
+  // with the given locales.
+  bool FindSpansUsingLocales(
+      const std::vector<int>& locale_ids, const UnicodeText& input,
+      const int64 reference_time_ms_utc, const std::string& reference_timezone,
+      ModeFlag mode, bool anchor_start_end, const std::string& reference_locale,
+      std::unordered_set<int>* executed_rules,
+      std::vector<DatetimeParseResultSpan>* found_spans) const;
+
+  bool ParseWithRule(const CompiledRule& rule, const UnicodeText& input,
+                     int64 reference_time_ms_utc,
+                     const std::string& reference_timezone,
+                     const std::string& reference_locale, const int locale_id,
+                     bool anchor_start_end,
+                     std::vector<DatetimeParseResultSpan>* result) const;
+
+  // Converts the current match in 'matcher' into DatetimeParseResult.
+  bool ExtractDatetime(const CompiledRule& rule,
+                       const UniLib::RegexMatcher& matcher,
+                       int64 reference_time_ms_utc,
+                       const std::string& reference_timezone,
+                       const std::string& reference_locale, int locale_id,
+                       DatetimeParseResult* result,
+                       CodepointSpan* result_span) const;
+
+  // Parse and extract information from current match in 'matcher'.
+  bool HandleParseMatch(const CompiledRule& rule,
+                        const UniLib::RegexMatcher& matcher,
+                        int64 reference_time_ms_utc,
+                        const std::string& reference_timezone,
+                        const std::string& reference_locale, int locale_id,
+                        std::vector<DatetimeParseResultSpan>* result) const;
+
+ private:
+  bool initialized_;
+  const UniLib& unilib_;
+  const CalendarLib& calendarlib_;
+  std::vector<CompiledRule> rules_;
+  std::unordered_map<int, std::vector<int>> locale_to_rules_;
+  std::vector<std::unique_ptr<const UniLib::RegexPattern>> extractor_rules_;
+  std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>>
+      type_and_locale_to_extractor_rule_;
+  std::unordered_map<std::string, int> locale_string_to_id_;
+  std::vector<int> default_locale_ids_;
+  bool use_extractors_for_locating_;
+};
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_PARSER_H_

diff --git a/annotator/datetime/parser_test.cc b/annotator/datetime/parser_test.cc
new file mode 100644
index 0000000..6bd6d10
--- /dev/null
+++ b/annotator/datetime/parser_test.cc

@@ -0,0 +1,413 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <time.h>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <string>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "annotator/annotator.h"
+#include "annotator/datetime/parser.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
+
+using testing::ElementsAreArray;
+
+namespace libtextclassifier3 {
+namespace {
+
+std::string GetModelPath() {
+  return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
+}
+
+std::string ReadFile(const std::string& file_name) {
+  std::ifstream file_stream(file_name);
+  return std::string(std::istreambuf_iterator<char>(file_stream), {});
+}
+
+std::string FormatMillis(int64 time_ms_utc) {
+  long time_seconds = time_ms_utc / 1000;  // NOLINT
+  // Format time, "ddd yyyy-mm-dd hh:mm:ss zzz"
+  char buffer[512];
+  strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
+           localtime(&time_seconds));
+  return std::string(buffer);
+}
+
+class ParserTest : public testing::Test {
+ public:
+  void SetUp() override {
+    model_buffer_ = ReadFile(GetModelPath() + "test_model.fb");
+    classifier_ = Annotator::FromUnownedBuffer(model_buffer_.data(),
+                                               model_buffer_.size(), &unilib_);
+    TC3_CHECK(classifier_);
+    parser_ = classifier_->DatetimeParserForTests();
+  }
+
+  bool HasNoResult(const std::string& text, bool anchor_start_end = false,
+                   const std::string& timezone = "Europe/Zurich") {
+    std::vector<DatetimeParseResultSpan> results;
+    if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
+                        anchor_start_end, &results)) {
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
+    }
+    return results.empty();
+  }
+
+  bool ParsesCorrectly(const std::string& marked_text,
+                       const int64 expected_ms_utc,
+                       DatetimeGranularity expected_granularity,
+                       bool anchor_start_end = false,
+                       const std::string& timezone = "Europe/Zurich",
+                       const std::string& locales = "en-US") {
+    const UnicodeText marked_text_unicode =
+        UTF8ToUnicodeText(marked_text, /*do_copy=*/false);
+    auto brace_open_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
+    auto brace_end_it =
+        std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
+    TC3_CHECK(brace_open_it != marked_text_unicode.end());
+    TC3_CHECK(brace_end_it != marked_text_unicode.end());
+
+    std::string text;
+    text +=
+        UnicodeText::UTF8Substring(marked_text_unicode.begin(), brace_open_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_open_it), brace_end_it);
+    text += UnicodeText::UTF8Substring(std::next(brace_end_it),
+                                       marked_text_unicode.end());
+
+    std::vector<DatetimeParseResultSpan> results;
+
+    if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
+                        anchor_start_end, &results)) {
+      TC3_LOG(ERROR) << text;
+      TC3_CHECK(false);
+    }
+    if (results.empty()) {
+      TC3_LOG(ERROR) << "No results.";
+      return false;
+    }
+
+    const int expected_start_index =
+        std::distance(marked_text_unicode.begin(), brace_open_it);
+    // The -1 bellow is to account for the opening bracket character.
+    const int expected_end_index =
+        std::distance(marked_text_unicode.begin(), brace_end_it) - 1;
+
+    std::vector<DatetimeParseResultSpan> filtered_results;
+    for (const DatetimeParseResultSpan& result : results) {
+      if (SpansOverlap(result.span,
+                       {expected_start_index, expected_end_index})) {
+        filtered_results.push_back(result);
+      }
+    }
+
+    const std::vector<DatetimeParseResultSpan> expected{
+        {{expected_start_index, expected_end_index},
+         {expected_ms_utc, expected_granularity},
+         /*target_classification_score=*/1.0,
+         /*priority_score=*/0.0}};
+    const bool matches =
+        testing::Matches(ElementsAreArray(expected))(filtered_results);
+    if (!matches) {
+      TC3_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
+                     << FormatMillis(expected[0].data.time_ms_utc);
+      for (int i = 0; i < filtered_results.size(); ++i) {
+        TC3_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
+                       << " which corresponds to: "
+                       << FormatMillis(filtered_results[i].data.time_ms_utc);
+      }
+    }
+    return matches;
+  }
+
+  bool ParsesCorrectlyGerman(const std::string& marked_text,
+                             const int64 expected_ms_utc,
+                             DatetimeGranularity expected_granularity) {
+    return ParsesCorrectly(marked_text, expected_ms_utc, expected_granularity,
+                           /*anchor_start_end=*/false,
+                           /*timezone=*/"Europe/Zurich", /*locales=*/"de");
+  }
+
+ protected:
+  std::string model_buffer_;
+  std::unique_ptr<Annotator> classifier_;
+  const DatetimeParser* parser_;
+  UniLib unilib_;
+};
+
+// Test with just a few cases to make debugging of general failures easier.
+TEST_F(ParserTest, ParseShort) {
+  EXPECT_TRUE(
+      ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
+}
+
+TEST_F(ParserTest, Parse) {
+  EXPECT_TRUE(
+      ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
+                              GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29}", 1277512289000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectly("{23/Apr 11:42:35}", 9715355000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
+                              GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly(
+      "Are sentiments apartments decisively the especially alteration. "
+      "Thrown shy denote ten ladies though ask saw. Or by to he going "
+      "think order event music. Incommode so intention defective at "
+      "convinced. Led income months itself and houses you. After nor "
+      "you leave might share court balls. {19/apr/2010 06:36:15} Are "
+      "sentiments apartments decisively the especially alteration. "
+      "Thrown shy denote ten ladies though ask saw. Or by to he going "
+      "think order event music. Incommode so intention defective at "
+      "convinced. Led income months itself and houses you. After nor "
+      "you leave might share court balls. ",
+      1271651775000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
+                              GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30 am}", 1514777400000,
+                              GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
+                              GRANULARITY_HOUR));
+
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -3600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -57600000, GRANULARITY_MINUTE,
+                              /*anchor_start_end=*/false,
+                              "America/Los_Angeles"));
+  EXPECT_TRUE(
+      ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4am}", 97200000, GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectly("{wednesday at 4am}", 529200000, GRANULARITY_HOUR));
+  EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
+                              GRANULARITY_MINUTE));
+}
+
+TEST_F(ParserTest, ParseWithAnchor) {
+  EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/false));
+  EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/true));
+  EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
+                              GRANULARITY_DAY, /*anchor_start_end=*/false));
+  EXPECT_TRUE(HasNoResult("lorem 1 january 2018 ipsum",
+                          /*anchor_start_end=*/true));
+}
+
+TEST_F(ParserTest, ParseGerman) {
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{Januar 1 2018}", 1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("lorem {1 Januar 2018} ipsum",
+                                    1514761200000, GRANULARITY_DAY));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/Apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{09/März/2004 22:02:40}", 1078866160000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Dez 2, 2010 2:39:58}", 1291253998000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{Juni 09 2011 15:28:14}", 1307626094000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{März 16 08:12:04}", 6419524000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29}", 1277512289000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}", 1137899465000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{11:42:35}", 38555000, GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35}", 9715355000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}", 1429782155000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}", 1271651775000,
+                                    GRANULARITY_SECOND));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}", 1514777400000,
+                                    GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30 nachm}",
+                                    1514820600000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4 nachm}", 1514818800000,
+                                    GRANULARITY_HOUR));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen 0:00}", 82800000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4:00}", 97200000, GRANULARITY_MINUTE));
+  EXPECT_TRUE(
+      ParsesCorrectlyGerman("{morgen um 4 vorm}", 97200000, GRANULARITY_HOUR));
+}
+
+TEST_F(ParserTest, ParseNonUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-GB"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"en"));
+}
+
+TEST_F(ParserTest, ParseUs) {
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"en-US"));
+  EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich",
+                              /*locales=*/"es-US"));
+}
+
+TEST_F(ParserTest, ParseUnknownLanguage) {
+  EXPECT_TRUE(ParsesCorrectly("bylo to {31. 12. 2015} v 6 hodin", 1451516400000,
+                              GRANULARITY_DAY,
+                              /*anchor_start_end=*/false,
+                              /*timezone=*/"Europe/Zurich", /*locales=*/"xx"));
+}
+
+class ParserLocaleTest : public testing::Test {
+ public:
+  void SetUp() override;
+  bool HasResult(const std::string& input, const std::string& locales);
+
+ protected:
+  UniLib unilib_;
+  CalendarLib calendarlib_;
+  flatbuffers::FlatBufferBuilder builder_;
+  std::unique_ptr<DatetimeParser> parser_;
+};
+
+void AddPattern(const std::string& regex, int locale,
+                std::vector<std::unique_ptr<DatetimeModelPatternT>>* patterns) {
+  patterns->emplace_back(new DatetimeModelPatternT);
+  patterns->back()->regexes.emplace_back(new DatetimeModelPattern_::RegexT);
+  patterns->back()->regexes.back()->pattern = regex;
+  patterns->back()->regexes.back()->groups.push_back(
+      DatetimeGroupType_GROUP_UNUSED);
+  patterns->back()->locales.push_back(locale);
+}
+
+void ParserLocaleTest::SetUp() {
+  DatetimeModelT model;
+  model.use_extractors_for_locating = false;
+  model.locales.clear();
+  model.locales.push_back("en-US");
+  model.locales.push_back("en-CH");
+  model.locales.push_back("zh-Hant");
+  model.locales.push_back("en-*");
+  model.locales.push_back("zh-Hant-*");
+  model.locales.push_back("*-CH");
+  model.locales.push_back("default");
+  model.default_locales.push_back(6);
+
+  AddPattern(/*regex=*/"en-US", /*locale=*/0, &model.patterns);
+  AddPattern(/*regex=*/"en-CH", /*locale=*/1, &model.patterns);
+  AddPattern(/*regex=*/"zh-Hant", /*locale=*/2, &model.patterns);
+  AddPattern(/*regex=*/"en-all", /*locale=*/3, &model.patterns);
+  AddPattern(/*regex=*/"zh-Hant-all", /*locale=*/4, &model.patterns);
+  AddPattern(/*regex=*/"all-CH", /*locale=*/5, &model.patterns);
+  AddPattern(/*regex=*/"default", /*locale=*/6, &model.patterns);
+
+  builder_.Finish(DatetimeModel::Pack(builder_, &model));
+  const DatetimeModel* model_fb =
+      flatbuffers::GetRoot<DatetimeModel>(builder_.GetBufferPointer());
+  ASSERT_TRUE(model_fb);
+
+  parser_ = DatetimeParser::Instance(model_fb, unilib_, calendarlib_,
+                                     /*decompressor=*/nullptr);
+  ASSERT_TRUE(parser_);
+}
+
+bool ParserLocaleTest::HasResult(const std::string& input,
+                                 const std::string& locales) {
+  std::vector<DatetimeParseResultSpan> results;
+  EXPECT_TRUE(parser_->Parse(input, /*reference_time_ms_utc=*/0,
+                             /*reference_timezone=*/"", locales,
+                             ModeFlag_ANNOTATION, false, &results));
+  return results.size() == 1;
+}
+
+TEST_F(ParserLocaleTest, English) {
+  EXPECT_TRUE(HasResult("en-US", /*locales=*/"en-US"));
+  EXPECT_FALSE(HasResult("en-CH", /*locales=*/"en-US"));
+  EXPECT_FALSE(HasResult("en-US", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("en-CH", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
+}
+
+TEST_F(ParserLocaleTest, TraditionalChinese) {
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant"));
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-TW"));
+  EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-SG"));
+  EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh-SG"));
+  EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"zh"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"zh-Hant-SG"));
+}
+
+TEST_F(ParserLocaleTest, SwissEnglish) {
+  EXPECT_TRUE(HasResult("all-CH", /*locales=*/"de-CH"));
+  EXPECT_TRUE(HasResult("all-CH", /*locales=*/"en-CH"));
+  EXPECT_TRUE(HasResult("en-all", /*locales=*/"en-CH"));
+  EXPECT_FALSE(HasResult("all-CH", /*locales=*/"de-DE"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"de-CH"));
+  EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3
commit	6c4cc67c9849339d4e4dfffcfa3eb2342f767890	[log] [tgz]
author	Tony Mak <tonymak@google.com>	Mon Sep 17 11:48:50 2018 +0100
committer	Tony Mak <tonymak@google.com>	Tue Sep 25 18:36:59 2018 +0100
tree	1694602c9fd5abe64a26d6363c82b59baf9fa2b0
parent	30f477bb6871cfebf3136c71da5c14ef3aa69c97 [diff]