Export libtextclassifier Test: atest TextClassifierServiceTest BUG: 151413366 Change-Id: Ic93a18c7c30a7978313245c7b28845a47ed03570

commit: d0ae7c6c171d6d136ec84d68fdb3bc5853ae0272 [log] [tgz]
author: Tony Mak <tonymak@google.com> Fri Mar 27 13:58:00 2020 +0000
committer: Tony Mak <tonymak@google.com> Fri Mar 27 14:09:13 2020 +0000
tree: 9bda147cd2f5f7a1eea190f1b07ca63c6a50b1c1
parent: d99d58c4636e138c0647af6eb4de45210f1efda6 [diff]
diff --git a/native/annotator/annotator.cc b/native/annotator/annotator.cc
index ce46786..eca8ab8 100644
--- a/native/annotator/annotator.cc
+++ b/native/annotator/annotator.cc

@@ -137,8 +137,6 @@
         fb_annotation_options->enable_date_range();
     result_annotation_options.include_preposition =
         fb_annotation_options->include_preposition();
-    result_annotation_options.expand_date_series =
-        fb_annotation_options->expand_date_series();
     if (fb_annotation_options->extra_requested_dates() != nullptr) {
       for (const auto& extra_requested_date :
            *fb_annotation_options->extra_requested_dates()) {
@@ -1065,6 +1063,12 @@
         return false;
       }
 
+      // A PERSONNAME entity does not conflict with anything.
+      if ((source_mask &
+           (1 << static_cast<int>(AnnotatedSpan::Source::PERSON_NAME)))) {
+        return false;
+      }
+
       // Entities from other sources can conflict.
       return true;
   }
@@ -1761,6 +1765,7 @@
       person_name_engine_->ClassifyText(context, selection_indices,
                                         &person_name_result)) {
     candidates.push_back({selection_indices, {person_name_result}});
+    candidates.back().source = AnnotatedSpan::Source::PERSON_NAME;
   }
 
   // Try the installed app engine.

diff --git a/native/annotator/grammar/dates/annotations/annotation-options.h b/native/annotator/grammar/dates/annotations/annotation-options.h
index d5445fe..29e9939 100755
--- a/native/annotator/grammar/dates/annotations/annotation-options.h
+++ b/native/annotator/grammar/dates/annotations/annotation-options.h

@@ -59,16 +59,6 @@
   //     instance: "Monday" and "6pm".
   bool enable_date_range;
 
-  // If enabled, expand a date series. Must have date_range enabled to be used.
-  // The date range cannot exceed 30 days.
-  //   input: April 4-6, 6:30pm
-  //     If the flag is true, the extracted annotation will contaly 3 instance
-  //     which are April 4 at 6:30pm, April 5 at 6:30pm and April 6 at 6:30pm
-  //     all have the same begin and end annotation
-  //     If the flag is false, the extracted annotation contains one time range
-  //     instance and one date instance
-  bool expand_date_series;
-
   // Timezone in which the input text was written
   std::string reference_timezone;
   // Localization params.
@@ -98,7 +88,6 @@
         include_preposition(false),
         base_timestamp_millis(0),
         enable_date_range(false),
-        expand_date_series(false),
         use_rule_priority_score(false),
         generate_alternative_interpretations_when_ambiguous(false) {}
 };

diff --git a/native/annotator/grammar/dates/parser.cc b/native/annotator/grammar/dates/parser.cc
index 7587b0b..566827e 100644
--- a/native/annotator/grammar/dates/parser.cc
+++ b/native/annotator/grammar/dates/parser.cc

@@ -234,7 +234,7 @@
 
 // Copies the field from one DateMatch to another whose field is null. for
 // example: if the from is "May 1, 8pm", and the to is "9pm", "May 1" will be
-// copied to "to". Now we only copy fields for date range requirement.
+// copied to "to". Now we only copy fields for date range requirement.fv
 void CopyFieldsForDateMatch(const DateMatch& from, DateMatch* to) {
   if (from.time_span_match != nullptr && to->time_span_match == nullptr) {
     to->time_span_match = from.time_span_match;
@@ -743,101 +743,6 @@
   return number_of_days > kMaximumExpansion;
 }
 
-// Expands a date range and merges it with a time.
-// e.g. April 4-6, 2:00pm will be expanded into April 4 at 2pm, April 5 at 2pm
-// and April 6 at 2:00pm
-//  - Only supports a range of days with a time
-//  - Does not expand a date range without time
-void ExpandDateRangeAndMergeWithTime(
-    const UniLib& unilib, const std::vector<UnicodeText::const_iterator>& text,
-    const std::vector<std::string>& ignored_spans,
-    std::vector<DateMatch>* times, std::vector<DateRangeMatch>* date_ranges) {
-  auto next_time = times->begin();
-  auto next_range = date_ranges->begin();
-  while (next_range != date_ranges->end() && next_time != times->end()) {
-    const DateRangeMatch& range = *next_range;
-    if (range.from.HasHour() || !IsPrecedent(range.from, range.to)) {
-      ++next_range;
-      continue;
-    }
-
-    while (next_time != times->end()) {
-      const DateMatch& time = *next_time;
-      if (!time.IsStandaloneTime()) {
-        ++next_time;
-        continue;
-      }
-
-      // The range is before the time
-      if (range.end <= time.begin) {
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_spans,
-                                               range.to, time) &&
-            !IsDateRangeTooLong(range)) {
-          std::vector<DateMatch> expanded_dates;
-          ExpandDateRange(range, &expanded_dates);
-
-          // Merge the expaneded date and with time
-          std::vector<DateMatch> merged_times;
-          for (const auto& expanded_date : expanded_dates) {
-            DateMatch merged_time = time;
-            MergeDateMatch(expanded_date, &merged_time, true);
-            merged_times.push_back(merged_time);
-          }
-          // Insert the expanded time before next_time and move next_time point
-          // to previous time.
-          next_time = times->insert(next_time, merged_times.begin(),
-                                    merged_times.end());
-          next_time += merged_times.size();
-
-          // Remove merged time. now next_time point to the time after the
-          // merged time.
-          next_time = times->erase(next_time);
-          // Remove merged range, now next_range point to the range after the
-          // merged range.
-          next_range = date_ranges->erase(next_range);
-        } else {
-          // range is behind time, check next range.
-          ++next_range;
-        }
-        break;
-      } else if (range.end > time.end && range.begin > time.begin) {
-        // The range is after the time
-        if (AreDateMatchesAdjacentAndMergeable(unilib, text, ignored_spans,
-                                               time, range.from) &&
-            !IsDateRangeTooLong(range)) {
-          std::vector<DateMatch> expanded_dates;
-          ExpandDateRange(range, &expanded_dates);
-
-          // Merge the expaneded dates with time
-          for (auto& expanded_date : expanded_dates) {
-            MergeDateMatch(time, &expanded_date, true);
-          }
-          // Insert expanded time before next_time and move next_time point to
-          // previous time.
-          next_time = times->insert(next_time, expanded_dates.begin(),
-                                    expanded_dates.end());
-          next_time += expanded_dates.size();
-
-          // Remove merged time. Now next_time point to the time after the
-          // merged time.
-          next_time = times->erase(next_time);
-          // Remove merged range. Now next_range point to the range after the
-          // merged range.
-          next_range = date_ranges->erase(next_range);
-          break;
-        } else {
-          // Since the range is after the time, we need to check the next time
-          // first
-          ++next_time;
-        }
-      } else {
-        // Range fully overlaps with time In this case, we move to the next time
-        ++next_time;
-      }
-    }
-  }
-}
-
 // Fills `DateTimes` proto from matched `DateMatch` and `DateRangeMatch`
 // instances.
 std::vector<Annotation> GetOutputAsAnnotationList(
@@ -877,25 +782,12 @@
       MergeDateRangeAndDate(unilib, text, options.ignored_spans, date_matches,
                             &date_range_matches);
       RemoveOverlappedDateByRange(date_range_matches, &date_matches);
-
-      if (options.expand_date_series) {
-        ExpandDateRangeAndMergeWithTime(unilib, text, options.ignored_spans,
-                                        &date_matches, &date_range_matches);
-      }
     }
     FillDateRangeInstances(date_range_matches, &date_annotations);
   }
 
   if (!date_matches.empty()) {
     FillDateInstances(unilib, text, options, &date_matches, &date_annotations);
-
-    int64 timestamp_ms = options.base_timestamp_millis;
-    if (timestamp_ms > 0) {
-      // The timestamp in options is milliseconds, the time_t is seconds from
-      // 00:00 Jan 1 1970 UTC.
-      time_t base_timestamp = timestamp_ms / 1000;
-      NormalizeDateTimes(base_timestamp, &date_annotations);
-    }
   }
   return date_annotations;
 }

diff --git a/native/annotator/grammar/dates/utils/date-utils.cc b/native/annotator/grammar/dates/utils/date-utils.cc
index 02f4873..5a68838 100644
--- a/native/annotator/grammar/dates/utils/date-utils.cc
+++ b/native/annotator/grammar/dates/utils/date-utils.cc

@@ -360,404 +360,6 @@
 }
 
 namespace {
-int NormalizeField(int base, int zero, int* valp, int carry_in) {
-  int carry_out = 0;
-  int val = *valp;
-  if (zero != 0 && val < 0) {
-    val += base;
-    carry_out -= 1;
-  }
-  val -= zero;
-  carry_out += val / base;
-  int rem = val % base;
-  if (carry_in != 0) {
-    carry_out += carry_in / base;
-    rem += carry_in % base;
-    if (rem < 0) {
-      carry_out -= 1;
-      rem += base;
-    } else if (rem >= base) {
-      carry_out += 1;
-      rem -= base;
-    }
-  }
-  if (rem < 0) {
-    carry_out -= 1;
-    rem += base;
-  }
-  *valp = rem + zero;
-  return carry_out;
-}
-
-int DaysPerYear(int year) {
-  if (IsLeapYear(year)) {
-    return DAYSPERLYEAR;
-  }
-  return DAYSPERNYEAR;
-}
-
-const int8 kDaysPer100Years[401] = {
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-int DaysPer100Years(int eyear) { return 36524 + kDaysPer100Years[eyear]; }
-
-const int8 kDaysPer4Years[401] = {
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-int DaysPer4Years(int eyear) { return 1460 + kDaysPer4Years[eyear]; }
-
-#define DAYORDYEARMAX (25252734927766553LL)
-#define DAYORDYEARMIN (-25252734927764584LL)
-
-// Normalize year, month, day, hour, minute and second to valid value. For
-// example:  1hour 15minute 62second is normalized as 1hour 16 minute 2second.
-bool NormalizeDateFields(int* year, int* month, int* day, int* hour,
-                         int* minute, int* second) {
-  int min_carry = NormalizeField(SECSPERMIN, 0, second, 0);
-  int hour_carry = NormalizeField(MINSPERHOUR, 0, minute, min_carry);
-  int day_carry = NormalizeField(HOURSPERDAY, 0, hour, hour_carry);
-  int year_carry = NormalizeField(MONSPERYEAR, 1, month, 0);
-  bool normalized = min_carry || hour_carry || day_carry || year_carry;
-
-  // Normalize the number of days within a 400-year (146097-day) period.
-  if (int c4_carry = NormalizeField(146097, 1, day, day_carry)) {
-    year_carry += c4_carry * 400;
-    normalized = true;
-  }
-
-  // Extract a [0:399] year calendrically equivalent to (year + year_carry)
-  // from that sum in order to simplify year/day normalization and to defer
-  // the possibility of int64 overflow until the final stage.
-  int eyear = *year % 400;
-  if (year_carry != 0) {
-    eyear += year_carry;
-    eyear %= 400;
-  }
-  if (eyear < 0) eyear += 400;
-  year_carry -= eyear;
-
-  int orig_day = *day;
-  if (*day > DAYSPERNYEAR) {
-    eyear += (*month > 2 ? 1 : 0);
-    if (*day > 146097 - DAYSPERNYEAR) {
-      // We often hit the 400th year when stepping a civil time backwards,
-      // so special case it to avoid counting up by 100/4/1 year chunks.
-      *day = DaysPerYear(eyear += 400 - 1) - (146097 - *day);
-    } else {
-      // Handle days in chunks of 100/4/1 years.
-      for (int ydays = DaysPer100Years(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPer100Years(eyear)) {
-        if ((eyear += 100) > 400) {
-          eyear -= 400;
-          year_carry += 400;
-        }
-      }
-      for (int ydays = DaysPer4Years(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPer4Years(eyear)) {
-        if ((eyear += 4) > 400) {
-          eyear -= 400;
-          year_carry += 400;
-        }
-      }
-      for (int ydays = DaysPerYear(eyear); *day > ydays;
-           *day -= ydays, ydays = DaysPerYear(eyear)) {
-        eyear += 1;
-      }
-    }
-    eyear -= (*month > 2 ? 1 : 0);
-  }
-  // Handle days within one year.
-  bool leap_year = IsLeapYear(eyear);
-  for (int mdays = kDaysPerMonth[leap_year][*month]; *day > mdays;
-       *day -= mdays, mdays = kDaysPerMonth[leap_year][*month]) {
-    if (++*month > MONSPERYEAR) {
-      *month = 1;
-      leap_year = IsLeapYear(++eyear);
-    }
-  }
-  if (*day != orig_day) normalized = true;
-
-  // Add the updated eyear back into (year + year_carry).
-  year_carry += eyear;
-  // Overflow.
-  if (*year > DAYORDYEARMAX - year_carry) {
-    return false;
-  } else if (*year < DAYORDYEARMIN - year_carry) {
-    return false;
-  }
-  *year += year_carry;
-  return true;
-}
-
-// Compute the day difference between the day of week in relative date and wday.
-// If the relative date is in future, return positive days. otherwise return the
-// negative future. For example:
-// if day of week in relative date is Mon this week and wday is Wed this week,
-// then return -2.
-// if day of week in relative date is Wed this week and wday is Mon this week,
-// then return 2.
-int32 RelativeDOWToDays(const Property& rd, const int wday) {
-  int days = -1;
-  int multiplier = 1;
-  for (int i = 9; i < rd.int_values.size(); ++i) {
-    int inter = rd.int_values.at(i);
-    int dow = rd.int_values.at(8) - 1;
-    int interval = 0;
-    int cur_multiplier = 1;
-    if (inter == RelativeParameter_::Interpretation_NEAREST_LAST ||
-        inter == RelativeParameter_::Interpretation_PREVIOUS) {
-      // Represent the DOW in the last week.
-      cur_multiplier = -1;
-      if (dow <= wday) {
-        interval = 7 + (wday - dow);
-      } else {
-        interval = 7 - (dow - wday);
-      }
-    } else if (inter == RelativeParameter_::Interpretation_SECOND_LAST) {
-      // Represent the DOW in the week before last week.
-      cur_multiplier = -1;
-      if (dow <= wday) {
-        interval = 14 + (wday - dow);
-      } else {
-        interval = 14 - (dow - wday);
-      }
-    } else if (inter == RelativeParameter_::Interpretation_NEAREST_NEXT ||
-               inter == RelativeParameter_::Interpretation_COMING) {
-      // Represent the DOW in the next week.
-      cur_multiplier = 1;
-      if (dow <= wday) {
-        interval = 7 - (wday - dow);
-      } else {
-        interval = 7 + (dow - wday);
-      }
-      // Represent the DOW in the week of next week.
-    } else if (inter == RelativeParameter_::Interpretation_SECOND_NEXT) {
-      cur_multiplier = 1;
-      if (dow <= wday) {
-        interval = 14 - (wday - dow);
-      } else {
-        interval = 14 + (dow - wday);
-      }
-      // Represent the DOW in the same week regardless of it's past of future.
-    } else if (inter == RelativeParameter_::Interpretation_CURRENT ||
-               inter == RelativeParameter_::Interpretation_NEAREST ||
-               inter == RelativeParameter_::Interpretation_SOME) {
-      interval = abs(wday - dow);
-      cur_multiplier = dow < wday ? -1 : 1;
-    }
-    if (days == -1 || interval < days) {
-      days = interval;
-      multiplier = cur_multiplier;
-    }
-  }
-  return days * multiplier;
-}
-
-// Compute the absolute date and time based on timestamp and relative date and
-// fill the fields year, month, day, hour, minute and second.
-bool RelativeDateToAbsoluteDate(struct tm ts, AnnotationData* date) {
-  int idx = GetPropertyIndex(kDateTimeRelative, *date);
-  if (idx < 0) {
-    return false;
-  }
-  Property* datetime = FindOrCreateDefaultDateTime(date);
-  Property* relative = &date->properties[idx];
-  int year = ts.tm_year + 1900;  // The year in struct tm is since 1900
-  int month = ts.tm_mon + 1;     // Convert to [1, 12]
-  int day = ts.tm_mday;
-  int hour = ts.tm_hour;
-  int minute = ts.tm_min;
-  int second = ts.tm_sec;
-  // If the instance has time, it doesn't make sense to update time based on
-  // relative time. so we simply clear the time in relative date.
-  // For example: 2 days 1 hours ago at 10:00am, the 1 hours will be removed.
-  if (datetime->int_values[3] > 0) {
-    relative->int_values[5] = -1;
-    relative->int_values[6] = -1;
-    relative->int_values[7] = -1;
-  }
-
-  // Get the relative year, month, day, hour, minute and second.
-  if (relative->int_values[8] > 0) {
-    day += RelativeDOWToDays(*relative, ts.tm_wday);
-  } else {
-    int multipler = (relative->int_values[0] > 0) ? 1 : -1;
-    if (relative->int_values[1] > 0) {
-      year += relative->int_values[1] * multipler;
-    }
-    if (relative->int_values[2] > 0) {
-      month += relative->int_values[2] * multipler;
-    }
-    if (relative->int_values[3] > 0) {
-      day += relative->int_values[3] * multipler;
-    }
-    if (relative->int_values[5] > 0) {
-      hour += relative->int_values[5] * multipler;
-    }
-    if (relative->int_values[6] > 0) {
-      minute += relative->int_values[6] * multipler;
-    }
-    if (relative->int_values[7] > 0) {
-      second += relative->int_values[7] * multipler;
-    }
-  }
-
-  if (!NormalizeDateFields(&year, &month, &day, &hour, &minute, &second)) {
-    TC3_VLOG(1) << "Can not normalize date " << year << "-" << month << "-"
-                << day << " " << hour << ":" << minute << ":" << second;
-    return false;
-  }
-
-  // Update year, month, day, hour, minute and second of date instance. We only
-  // update the time unit if the relative date has it. For example:
-  // if the relative date is "1 hour ago", then we don't set minite and second
-  // in data intance, but we set hour and the time unit which is larger than
-  // hour like day, month and year.
-  // if the relative date is "1 year ago", we only update year in date instance
-  // and ignore others.
-  bool set = false;
-  if (relative->int_values[7] >= 0) {
-    set = true;
-    datetime->int_values[5] = second;
-  }
-  if (set || relative->int_values[6] >= 0) {
-    set = true;
-    datetime->int_values[4] = minute;
-  }
-  if (set || relative->int_values[5] >= 0) {
-    set = true;
-    datetime->int_values[3] = hour;
-  }
-  if (set || relative->int_values[3] >= 0 || relative->int_values[8] >= 0) {
-    set = true;
-    datetime->int_values[2] = day;
-  }
-  if (set || relative->int_values[2] >= 0) {
-    set = true;
-    datetime->int_values[1] = month;
-  }
-  if (set || relative->int_values[1] >= 0) {
-    set = true;
-    datetime->int_values[0] = year;
-  }
-  return true;
-}
-
-// If the year is less than 100 and has no bc/ad, it should be normalized.
-static constexpr int kMinYearForNormalization = 100;
-
-// Normalize date instance.
-void NormalizeDateInstance(time_t timestamp, AnnotationData* inst) {
-  struct tm ts;
-  localtime_r(&timestamp, &ts);
-
-  int idx = GetPropertyIndex(kDateTime, *inst);
-  if (idx >= 0) {
-    Property* datetime = &inst->properties[idx];
-    int bc_ad = -1;
-    idx = GetPropertyIndex(kDateTimeSupplementary, *inst);
-    if (idx >= 0) {
-      bc_ad = inst->properties[idx].int_values[0];
-    }
-
-    int year = datetime->int_values[0];
-    if (bc_ad < 0 && year > 0 && year < kMinYearForNormalization) {
-      if (2000 + year <= ts.tm_year + 1900) {
-        datetime->int_values[0] = 2000 + year;
-      } else {
-        datetime->int_values[0] = 1900 + year;
-      }
-    }
-    // Day-of-week never only appear in date instance, it must be in both
-    // relative date and non-relative date. If the date instance already has day
-    // like "Monday, March 19", it doesn't make sense to convert the dow to
-    // absolute date again.
-    if (datetime->int_values[7] > 0 && datetime->int_values[2] > 0) {
-      return;
-    }
-  }
-  RelativeDateToAbsoluteDate(ts, inst);
-}
-
-// Convert normalized date instance to unix time.
-time_t DateInstanceToUnixTimeInternal(time_t timestamp,
-                                      const AnnotationData& inst) {
-  int idx = GetPropertyIndex(kDateTime, inst);
-  if (idx < 0) {
-    return -1;
-  }
-  const Property& prop = inst.properties[idx];
-
-  struct tm ts;
-  localtime_r(&timestamp, &ts);
-
-  if (prop.int_values[0] > 0) {
-    ts.tm_year = prop.int_values[0] - 1900;
-  }
-  if (prop.int_values[1] > 0) {
-    ts.tm_mon = prop.int_values[1] - 1;
-  }
-  if (prop.int_values[2] > 0) {
-    ts.tm_mday = prop.int_values[2];
-  }
-  if (prop.int_values[3] > 0) {
-    ts.tm_hour = prop.int_values[3];
-  }
-  if (prop.int_values[4] > 0) {
-    ts.tm_min = prop.int_values[4];
-  }
-  if (prop.int_values[5] > 0) {
-    ts.tm_sec = prop.int_values[5];
-  }
-  ts.tm_wday = -1;
-  ts.tm_yday = -1;
-  return mktime(&ts);
-}
-}  // namespace
-
-void NormalizeDateTimes(time_t timestamp, std::vector<Annotation>* dates) {
-  for (int i = 0; i < dates->size(); ++i) {
-    if ((*dates)[i].data.type == kDateTimeType) {
-      NormalizeDateInstance(timestamp, &(*dates)[i].data);
-    }
-  }
-}
-
-namespace {
 bool AnyOverlappedField(const DateMatch& prev, const DateMatch& next) {
 #define Field(f) \
   if (prev.f && next.f) return true

diff --git a/native/annotator/grammar/dates/utils/date-utils.h b/native/annotator/grammar/dates/utils/date-utils.h
index de459ea..5d4fdca 100644
--- a/native/annotator/grammar/dates/utils/date-utils.h
+++ b/native/annotator/grammar/dates/utils/date-utils.h

@@ -62,12 +62,6 @@
 // from matched rule.
 void FillDateRangeInstance(const DateRangeMatch& range, Annotation* instance);
 
-// Normalize DateTimes based on timestamp.
-// Currently it does two things:
-//   -- Convert relative date to absolute date
-//   -- Normalize year if year is two digit
-void NormalizeDateTimes(time_t timestamp, std::vector<Annotation>* dates);
-
 // Merge the fields in DateMatch prev to next if there is no overlapped field.
 // If update_span is true, the span of next is also updated.
 // e.g.: prev is 11am, next is: May 1, then the merged next is May 1, 11am

diff --git a/native/annotator/model.fbs b/native/annotator/model.fbs
index f5a241f..31fac49 100755
--- a/native/annotator/model.fbs
+++ b/native/annotator/model.fbs

@@ -396,16 +396,6 @@
   // instance: "Monday" and "6pm".
   enable_date_range:bool = true;
 
-  // If enabled, expand a date series. Must have date_range enabled to be
-  // used. The date range cannot exceed 30 days.
-  // input: April 4-6, 6:30pm
-  // If the flag is true, the extracted annotation will contain 3 instance
-  // which are April 4 at 6:30pm, April 5 at 6:30pm and April 6 at 6:30pm
-  // all have the same begin and end annotation
-  // If the flag is false, the extracted annotation contains one time
-  // range instance and one date instance
-  expand_date_series:bool = true;
-
   // If enabled, the rule priority score is used to set the priority score of
   // the annotation.
   // In case of false the annotation priority score is set from
@@ -834,7 +824,7 @@
 
 namespace libtextclassifier3;
 table NumberAnnotatorOptions {
-  // If true, number annotations will be produced.
+  // If true, number and percentage annotations will be produced.
   enabled:bool = false;
 
   // Score to assign to the annotated numbers and percentages in the annotator.
@@ -843,32 +833,34 @@
   // Number priority score used for conflict resolution with the other models.
   priority_score:float = 0;
 
-  // The modes in which to enable number annotations.
+  // The modes in which to enable number and percentage annotations.
   enabled_modes:ModeFlag = ALL;
 
   // The annotation usecases for which to produce number annotations.
   // This is a flag field for values of AnnotationUsecase.
   enabled_annotation_usecases:uint = 4294967295;
 
-  // A list of codepoints that can form a prefix of a valid number.
+  // [Deprecated] A list of codepoints that can form a prefix of a valid number.
   allowed_prefix_codepoints:[int];
 
-  // A list of codepoints that can form a suffix of a valid number.
+  // [Deprecated] A list of codepoints that can form a suffix of a valid number.
   allowed_suffix_codepoints:[int];
 
-  // List of codepoints that will be stripped from beginning of predicted spans.
+  // [Deprecated] List of codepoints that will be stripped from beginning of
+  // predicted spans.
   ignored_prefix_span_boundary_codepoints:[int];
 
-  // List of codepoints that will be stripped from end of predicted spans.
+  // [Deprecated] List of codepoints that will be stripped from end of predicted
+  // spans.
   ignored_suffix_span_boundary_codepoints:[int];
 
-  // If true, percent annotations will be produced.
+  // [Deprecated] If true, percent annotations will be produced.
   enable_percentage:bool = false;
 
   // Zero separated and ordered list of suffixes that mark a percent.
   percentage_pieces_string:string (shared);
 
-  // List of suffixes offsets in the percent_pieces_string string.
+  // [Deprecated] List of suffixes offsets in the percent_pieces_string string.
   percentage_pieces_offsets:[int];
 
   // Priority score for the percentage annotation.
@@ -881,6 +873,10 @@
   // The maximum number of digits an annotated number can have. Requirement:
   // the value should be less or equal to 20.
   max_number_of_digits:int = 20;
+
+  // The annotation usecases for which to produce percentage annotations.
+  // This is a flag field for values of AnnotationUsecase.
+  percentage_annotation_usecases:uint = 2;
 }
 
 // DurationAnnotator is so far tailored for English and Japanese only.

diff --git a/native/annotator/number/number.cc b/native/annotator/number/number.cc
index fe986ae..3be6ad8 100644
--- a/native/annotator/number/number.cc
+++ b/native/annotator/number/number.cc

@@ -23,6 +23,7 @@
 #include "annotator/collections.h"
 #include "annotator/types.h"
 #include "utils/base/logging.h"
+#include "utils/strings/split.h"
 #include "utils/utf8/unicodetext.h"
 
 namespace libtextclassifier3 {
@@ -149,9 +150,8 @@
       UTF8ToUnicodeText(tokens[suffix_start_index].value, /*do_copy=*/false)
           .begin();
 
-  if (GetPercentSuffixLength(UTF8ToUnicodeText(tokens[suffix_start_index].value,
-                                               /*do_copy=*/false),
-                             0) > 0 &&
+  if (percent_suffixes_.find(tokens[suffix_start_index].value) !=
+          percent_suffixes_.end() &&
       TokensAreValidEnding(tokens, suffix_start_index + 1)) {
     return true;
   }
@@ -175,6 +175,25 @@
   return false;
 }
 
+int NumberAnnotator::FindPercentSuffixEndCodepoint(
+    const std::vector<Token>& tokens,
+    const int suffix_token_start_index) const {
+  if (suffix_token_start_index >= tokens.size()) {
+    return -1;
+  }
+
+  if (percent_suffixes_.find(tokens[suffix_token_start_index].value) !=
+          percent_suffixes_.end() &&
+      TokensAreValidEnding(tokens, suffix_token_start_index + 1)) {
+    return tokens[suffix_token_start_index].end;
+  }
+  if (tokens[suffix_token_start_index].is_whitespace) {
+    return FindPercentSuffixEndCodepoint(tokens, suffix_token_start_index + 1);
+  }
+
+  return -1;
+}
+
 bool NumberAnnotator::TryParseNumber(const UnicodeText& token_text,
                                      const bool is_negative,
                                      int64* parsed_int_value,
@@ -198,8 +217,7 @@
 bool NumberAnnotator::FindAll(const UnicodeText& context,
                               AnnotationUsecase annotation_usecase,
                               std::vector<AnnotatedSpan>* result) const {
-  if (!options_->enabled() || ((1 << annotation_usecase) &
-                               options_->enabled_annotation_usecases()) == 0) {
+  if (!options_->enabled()) {
     return true;
   }
 
@@ -230,80 +248,67 @@
     }
 
     const bool has_decimal = !(parsed_int_value == parsed_double_value);
+    const int new_start_codepoint = is_negative ? token.start - 1 : token.start;
 
-    ClassificationResult classification{Collections::Number(),
-                                        options_->score()};
-    classification.numeric_value = parsed_int_value;
-    classification.numeric_double_value = parsed_double_value;
-    classification.priority_score =
-        has_decimal ? options_->float_number_priority_score()
-                    : options_->priority_score();
+    if (((1 << annotation_usecase) & options_->enabled_annotation_usecases()) !=
+        0) {
+      result->push_back(CreateAnnotatedSpan(
+          new_start_codepoint, token.end, parsed_int_value, parsed_double_value,
+          Collections::Number(), options_->score(),
+          /*priority_score=*/
+          has_decimal ? options_->float_number_priority_score()
+                      : options_->priority_score()));
+    }
 
-    AnnotatedSpan annotated_span;
-    annotated_span.span = {is_negative ? token.start - 1 : token.start,
-                           token.end};
-    annotated_span.classification.push_back(classification);
-    result->push_back(annotated_span);
-  }
-
-  if (options_->enable_percentage()) {
-    FindPercentages(context, result);
+    const int percent_end_codepoint =
+        FindPercentSuffixEndCodepoint(tokens, i + 1);
+    if (percent_end_codepoint != -1 &&
+        ((1 << annotation_usecase) &
+         options_->percentage_annotation_usecases()) != 0) {
+      result->push_back(CreateAnnotatedSpan(
+          new_start_codepoint, percent_end_codepoint, parsed_int_value,
+          parsed_double_value, Collections::Percentage(), options_->score(),
+          options_->percentage_priority_score()));
+    }
   }
 
   return true;
 }
 
-std::vector<uint32> NumberAnnotator::FlatbuffersIntVectorToStdVector(
-    const flatbuffers::Vector<int32_t>* ints) {
-  if (ints == nullptr) {
-    return {};
-  }
-  return {ints->begin(), ints->end()};
+AnnotatedSpan NumberAnnotator::CreateAnnotatedSpan(
+    const int start, const int end, const int int_value,
+    const double double_value, const std::string collection, const float score,
+    const float priority_score) const {
+  ClassificationResult classification{collection, score};
+  classification.numeric_value = int_value;
+  classification.numeric_double_value = double_value;
+  classification.priority_score = priority_score;
+
+  AnnotatedSpan annotated_span;
+  annotated_span.span = {start, end};
+  annotated_span.classification.push_back(classification);
+  return annotated_span;
 }
 
-int NumberAnnotator::GetPercentSuffixLength(const UnicodeText& context,
-                                            int index_codepoints) const {
-  if (index_codepoints >= context.size_codepoints()) {
-    return -1;
+std::unordered_set<std::string>
+NumberAnnotator::FromFlatbufferStringToUnordredSet(
+    const flatbuffers::String* flatbuffer_percent_strings) {
+  std::unordered_set<std::string> strings_set;
+  if (flatbuffer_percent_strings == nullptr) {
+    return strings_set;
   }
-  auto context_it = context.begin();
-  std::advance(context_it, index_codepoints);
-  const StringPiece suffix_context(
-      context_it.utf8_data(),
-      std::distance(context_it.utf8_data(), context.end().utf8_data()));
-  StringSet::Match match;
-  percentage_suffixes_trie_.LongestPrefixMatch(suffix_context, &match);
 
-  if (match.match_length == -1) {
-    return match.match_length;
-  } else {
-    return UTF8ToUnicodeText(context_it.utf8_data(), match.match_length,
-                             /*do_copy=*/false)
-        .size_codepoints();
+  const std::string percent_strings = flatbuffer_percent_strings->str();
+  for (StringPiece suffix : strings::Split(percent_strings, '\0')) {
+    std::string percent_suffix = suffix.ToString();
+    percent_suffix.erase(
+        std::remove_if(percent_suffix.begin(), percent_suffix.end(),
+                       [](unsigned char x) { return std::isspace(x); }),
+        percent_suffix.end());
+    strings_set.insert(percent_suffix);
   }
-}
 
-void NumberAnnotator::FindPercentages(
-    const UnicodeText& context, std::vector<AnnotatedSpan>* result) const {
-  const int initial_result_size = result->size();
-  for (int i = 0; i < initial_result_size; ++i) {
-    AnnotatedSpan annotated_span = (*result)[i];
-    if (annotated_span.classification.empty() ||
-        annotated_span.classification[0].collection != Collections::Number()) {
-      continue;
-    }
-
-    const int match_length =
-        GetPercentSuffixLength(context, annotated_span.span.second);
-    if (match_length > 0) {
-      annotated_span.span = {annotated_span.span.first,
-                             annotated_span.span.second + match_length};
-      annotated_span.classification[0].collection = Collections::Percentage();
-      annotated_span.classification[0].priority_score =
-          options_->percentage_priority_score();
-      result->push_back(annotated_span);
-    }
-  }
+  return strings_set;
 }
 
 }  // namespace libtextclassifier3

diff --git a/native/annotator/number/number.h b/native/annotator/number/number.h
index 6022063..d83bea0 100644
--- a/native/annotator/number/number.h
+++ b/native/annotator/number/number.h

@@ -46,17 +46,8 @@
                              /*internal_tokenizer_codepoint_ranges=*/{},
                              /*split_on_script_change=*/false,
                              /*icu_preserve_whitespace_tokens=*/true)),
-        percentage_pieces_string_(
-            (options->percentage_pieces_string() == nullptr)
-                ? StringPiece()
-                : StringPiece(options->percentage_pieces_string()->data(),
-                              options->percentage_pieces_string()->size())),
-        percentage_pieces_offsets_(FlatbuffersIntVectorToStdVector(
-            options->percentage_pieces_offsets())),
-        percentage_suffixes_trie_(
-            SortedStringsTable(/*num_pieces=*/percentage_pieces_offsets_.size(),
-                               /*offsets=*/percentage_pieces_offsets_.data(),
-                               /*pieces=*/percentage_pieces_string_)),
+        percent_suffixes_(FromFlatbufferStringToUnordredSet(
+            options_->percentage_pieces_string())),
         max_number_of_digits_(options->max_number_of_digits()) {}
 
   // Classifies given text, and if it is a number, it passes the result in
@@ -71,12 +62,10 @@
                std::vector<AnnotatedSpan>* result) const;
 
  private:
-  static std::vector<uint32> FlatbuffersIntVectorToStdVector(
-      const flatbuffers::Vector<int32_t>* ints);
-
-  // Get the length of the percent suffix at the specified index in the context.
-  int GetPercentSuffixLength(const UnicodeText& context,
-                             int index_codepoints) const;
+  // Converts a Flatbuffer string containing zero-separated percent suffixes
+  // to an unordered set.
+  static std::unordered_set<std::string> FromFlatbufferStringToUnordredSet(
+      const flatbuffers::String* flatbuffer_percent_strings);
 
   // Checks if the annotated numbers from the context represent percentages.
   // If yes, replaces the collection type and the annotation boundary in the
@@ -87,38 +76,46 @@
   // Checks if the tokens from in the interval [start_index-2, start_index] are
   // valid characters that can preced a number context.
   bool TokensAreValidStart(const std::vector<Token>& tokens,
-                           const int start_index) const;
+                           int start_index) const;
 
   // Checks if the tokens in the interval (..., prefix_end_index] are a valid
   // number prefix.
   bool TokensAreValidNumberPrefix(const std::vector<Token>& tokens,
-                                  const int prefix_end_index) const;
+                                  int prefix_end_index) const;
 
   // Checks if the tokens from in the interval [ending_index, ending_index+2]
   // are valid characters that can follow a number context.
   bool TokensAreValidEnding(const std::vector<Token>& tokens,
-                            const int ending_index) const;
+                            int ending_index) const;
 
   // Checks if the tokens in the interval [suffix_start_index, ...) are a valid
   // number suffix.
   bool TokensAreValidNumberSuffix(const std::vector<Token>& tokens,
-                                  const int suffix_start_index) const;
+                                  int suffix_start_index) const;
+
+  // Checks if the tokens in the interval [suffix_start_index, ...) are a valid
+  // percent suffix. If false, returns -1, else returns the end codepoint.
+  int FindPercentSuffixEndCodepoint(const std::vector<Token>& tokens,
+                                    int suffix_token_start_index) const;
 
   // Checks if the given text represents a number (either int or double).
-  bool TryParseNumber(const UnicodeText& token_text, const bool is_negative,
+  bool TryParseNumber(const UnicodeText& token_text, bool is_negative,
                       int64* parsed_int_value,
                       double* parsed_double_value) const;
 
   // Checks if a word contains only CJT characters.
   bool IsCJTterm(UnicodeText::const_iterator token_begin_it,
-                 const int token_length) const;
+                 int token_length) const;
+
+  AnnotatedSpan CreateAnnotatedSpan(int start, int end, int int_value,
+                                    double double_value,
+                                    const std::string collection, float score,
+                                    float priority_score) const;
 
   const NumberAnnotatorOptions* options_;
   const UniLib* unilib_;
   const Tokenizer tokenizer_;
-  const StringPiece percentage_pieces_string_;
-  const std::vector<uint32> percentage_pieces_offsets_;
-  const SortedStringsTable percentage_suffixes_trie_;
+  const std::unordered_set<std::string> percent_suffixes_;
   const int max_number_of_digits_;
 };
 

diff --git a/native/annotator/test_data/test_grammar_model.fb b/native/annotator/test_data/test_grammar_model.fb
index d6affd3..73afd79 100644
--- a/native/annotator/test_data/test_grammar_model.fb
+++ b/native/annotator/test_data/test_grammar_model.fb
Binary files differ

diff --git a/native/annotator/test_data/test_model.fb b/native/annotator/test_data/test_model.fb
index 6462e9c..5af8e02 100644
--- a/native/annotator/test_data/test_model.fb
+++ b/native/annotator/test_data/test_model.fb
Binary files differ

diff --git a/native/annotator/test_data/wrong_embeddings.fb b/native/annotator/test_data/wrong_embeddings.fb
index a9815ea..e79ae86 100644
--- a/native/annotator/test_data/wrong_embeddings.fb
+++ b/native/annotator/test_data/wrong_embeddings.fb
Binary files differ

diff --git a/native/annotator/types.h b/native/annotator/types.h
index 0aba85a..60872f1 100644
--- a/native/annotator/types.h
+++ b/native/annotator/types.h

@@ -521,7 +521,7 @@
 
 // Represents a result of Annotate call.
 struct AnnotatedSpan {
-  enum class Source { OTHER, KNOWLEDGE, DURATION, DATETIME };
+  enum class Source { OTHER, KNOWLEDGE, DURATION, DATETIME, PERSON_NAME };
 
   // Unicode codepoint indices in the input string.
   CodepointSpan span = {kInvalidIndex, kInvalidIndex};
commit	d0ae7c6c171d6d136ec84d68fdb3bc5853ae0272	[log] [tgz]
author	Tony Mak <tonymak@google.com>	Fri Mar 27 13:58:00 2020 +0000
committer	Tony Mak <tonymak@google.com>	Fri Mar 27 14:09:13 2020 +0000
tree	9bda147cd2f5f7a1eea190f1b07ca63c6a50b1c1
parent	d99d58c4636e138c0647af6eb4de45210f1efda6 [diff]