| /* |
| * Copyright (C) 2018 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_ |
| #define LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_ |
| |
| #include <string> |
| #include <unordered_map> |
| #include <vector> |
| |
| #include "annotator/model_generated.h" |
| #include "annotator/types.h" |
| #include "utils/strings/stringpiece.h" |
| #include "utils/utf8/unicodetext.h" |
| #include "utils/utf8/unilib.h" |
| |
| namespace libtextclassifier3 { |
| |
| struct CompiledRule { |
| // The compiled regular expression. |
| std::unique_ptr<const UniLib::RegexPattern> compiled_regex; |
| |
| // The uncompiled pattern and information about the pattern groups. |
| const DatetimeModelPattern_::Regex* regex; |
| |
| // DatetimeModelPattern which 'regex' is part of and comes from. |
| const DatetimeModelPattern* pattern; |
| }; |
| |
| // A helper class for DatetimeParser that extracts structured data |
| // (DateParseDate) from the current match of the passed RegexMatcher. |
| class DatetimeExtractor { |
| public: |
| DatetimeExtractor( |
| const CompiledRule& rule, const UniLib::RegexMatcher& matcher, |
| int locale_id, const UniLib& unilib, |
| const std::vector<std::unique_ptr<const UniLib::RegexPattern>>& |
| extractor_rules, |
| const std::unordered_map<DatetimeExtractorType, |
| std::unordered_map<int, int>>& |
| type_and_locale_to_extractor_rule) |
| : rule_(rule), |
| matcher_(matcher), |
| locale_id_(locale_id), |
| unilib_(unilib), |
| rules_(extractor_rules), |
| type_and_locale_to_rule_(type_and_locale_to_extractor_rule) {} |
| bool Extract(DateParseData* result, CodepointSpan* result_span) const; |
| |
| private: |
| bool RuleIdForType(DatetimeExtractorType type, int* rule_id) const; |
| |
| // Returns true if the rule for given extractor matched. If it matched, |
| // match_result will contain the first group of the rule (if match_result not |
| // nullptr). |
| bool ExtractType(const UnicodeText& input, |
| DatetimeExtractorType extractor_type, |
| UnicodeText* match_result = nullptr) const; |
| |
| bool GroupTextFromMatch(int group_id, UnicodeText* result) const; |
| |
| // Updates the span to include the current match for the given group. |
| bool UpdateMatchSpan(int group_id, CodepointSpan* span) const; |
| |
| // Returns true if any of the extractors from 'mapping' matched. If it did, |
| // will fill 'result' with the associated value from 'mapping'. |
| template <typename T> |
| bool MapInput(const UnicodeText& input, |
| const std::vector<std::pair<DatetimeExtractorType, T>>& mapping, |
| T* result) const; |
| |
| bool ParseDigits(const UnicodeText& input, int* parsed_digits) const; |
| bool ParseWrittenNumber(const UnicodeText& input, int* parsed_number) const; |
| bool ParseYear(const UnicodeText& input, int* parsed_year) const; |
| bool ParseMonth(const UnicodeText& input, int* parsed_month) const; |
| bool ParseAMPM(const UnicodeText& input, |
| DateParseData::AMPM* parsed_ampm) const; |
| bool ParseRelation(const UnicodeText& input, |
| DateParseData::Relation* parsed_relation) const; |
| bool ParseRelationDistance(const UnicodeText& input, |
| int* parsed_distance) const; |
| bool ParseTimeUnit(const UnicodeText& input, |
| DateParseData::TimeUnit* parsed_time_unit) const; |
| bool ParseRelationType( |
| const UnicodeText& input, |
| DateParseData::RelationType* parsed_relation_type) const; |
| bool ParseWeekday(const UnicodeText& input, |
| DateParseData::RelationType* parsed_weekday) const; |
| |
| const CompiledRule& rule_; |
| const UniLib::RegexMatcher& matcher_; |
| int locale_id_; |
| const UniLib& unilib_; |
| const std::vector<std::unique_ptr<const UniLib::RegexPattern>>& rules_; |
| const std::unordered_map<DatetimeExtractorType, std::unordered_map<int, int>>& |
| type_and_locale_to_rule_; |
| }; |
| |
| } // namespace libtextclassifier3 |
| |
| #endif // LIBTEXTCLASSIFIER_ANNOTATOR_DATETIME_EXTRACTOR_H_ |