philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 1 | // Copyright (C) 2011 The Libphonenumber Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | // Author: Lara Rennie |
| 16 | // Author: Tao Huang |
| 17 | // |
| 18 | // This is a direct port from PhoneNumberMatcher.java. |
| 19 | // Changes to this class should also happen to the Java version, whenever it |
| 20 | // makes sense. |
| 21 | |
| 22 | #ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ |
| 23 | #define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ |
| 24 | |
| 25 | #include <string> |
philip.liard@gmail.com | e71e831 | 2012-04-27 14:36:02 +0000 | [diff] [blame] | 26 | #include <vector> |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 27 | |
philip.liard@gmail.com | af4a2ce | 2013-04-30 11:35:55 +0000 | [diff] [blame] | 28 | #include "phonenumbers/base/basictypes.h" |
| 29 | #include "phonenumbers/base/memory/scoped_ptr.h" |
philip.liard@gmail.com | e71e831 | 2012-04-27 14:36:02 +0000 | [diff] [blame] | 30 | #include "phonenumbers/callback.h" |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 31 | #include "phonenumbers/regexp_adapter.h" |
| 32 | |
| 33 | namespace i18n { |
| 34 | namespace phonenumbers { |
| 35 | |
davinci@google.com | 6276be9 | 2012-07-04 13:52:11 +0000 | [diff] [blame] | 36 | template <class R, class A1, class A2, class A3, class A4> |
| 37 | class ResultCallback4; |
| 38 | |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 39 | using std::string; |
philip.liard@gmail.com | e71e831 | 2012-04-27 14:36:02 +0000 | [diff] [blame] | 40 | using std::vector; |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 41 | |
dbeaumont@google.com | b348522 | 2012-07-24 14:09:14 +0000 | [diff] [blame] | 42 | class AlternateFormats; |
philip.liard@gmail.com | e71e831 | 2012-04-27 14:36:02 +0000 | [diff] [blame] | 43 | class NumberFormat; |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 44 | class PhoneNumber; |
| 45 | class PhoneNumberMatch; |
| 46 | class PhoneNumberMatcherRegExps; |
| 47 | class PhoneNumberUtil; |
| 48 | |
| 49 | class PhoneNumberMatcher { |
| 50 | friend class PhoneNumberMatcherTest; |
| 51 | public: |
| 52 | // Leniency when finding potential phone numbers in text segments. The levels |
| 53 | // here are ordered in increasing strictness. |
| 54 | enum Leniency { |
| 55 | // Phone numbers accepted are possible, but not necessarily valid. |
| 56 | POSSIBLE, |
| 57 | // Phone numbers accepted are possible and valid. |
| 58 | VALID, |
| 59 | // Phone numbers accepted are valid and are grouped in a possible way for |
| 60 | // this locale. For example, a US number written as "65 02 53 00 00" is not |
| 61 | // accepted at this leniency level, whereas "650 253 0000" or "6502530000" |
| 62 | // are. Numbers with more than one '/' symbol are also dropped at this |
| 63 | // level. |
philip.liard@gmail.com | 1fb4d23 | 2011-11-14 14:20:32 +0000 | [diff] [blame] | 64 | // Warning: The next two levels might result in lower coverage especially |
| 65 | // for regions outside of country code "+1". If you are not sure about which |
| 66 | // level to use, you can send an e-mail to the discussion group |
| 67 | // http://groups.google.com/group/libphonenumber-discuss/ |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 68 | STRICT_GROUPING, |
| 69 | // Phone numbers accepted are valid and are grouped in the same way that we |
| 70 | // would have formatted it, or as a single block. For example, a US number |
| 71 | // written as "650 2530000" is not accepted at this leniency level, whereas |
| 72 | // "650 253 0000" or "6502530000" are. |
| 73 | EXACT_GROUPING, |
| 74 | }; |
| 75 | |
| 76 | // Constructs a phone number matcher. |
| 77 | PhoneNumberMatcher(const PhoneNumberUtil& util, |
| 78 | const string& text, |
| 79 | const string& region_code, |
| 80 | Leniency leniency, |
| 81 | int max_tries); |
| 82 | |
| 83 | // Wrapper to construct a phone number matcher, with no limitation on the |
| 84 | // number of retries and VALID Leniency. |
| 85 | PhoneNumberMatcher(const string& text, |
| 86 | const string& region_code); |
| 87 | |
| 88 | ~PhoneNumberMatcher(); |
| 89 | |
| 90 | // Returns true if the text sequence has another match. |
| 91 | bool HasNext(); |
| 92 | |
| 93 | // Gets next match from text sequence. |
| 94 | bool Next(PhoneNumberMatch* match); |
| 95 | |
| 96 | private: |
| 97 | // The potential states of a PhoneNumberMatcher. |
| 98 | enum State { |
| 99 | NOT_READY, |
| 100 | READY, |
| 101 | DONE, |
| 102 | }; |
| 103 | |
| 104 | // Attempts to extract a match from a candidate string. Returns true if a |
| 105 | // match is found, otherwise returns false. The value "offset" refers to the |
| 106 | // start index of the candidate string within the overall text. |
| 107 | bool Find(int index, PhoneNumberMatch* match); |
| 108 | |
philip.liard@gmail.com | 1fb4d23 | 2011-11-14 14:20:32 +0000 | [diff] [blame] | 109 | // Checks a number was formatted with a national prefix, if the number was |
| 110 | // found in national format, and a national prefix is required for that |
| 111 | // number. Returns false if the number needed to have a national prefix and |
| 112 | // none was found. |
| 113 | bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const; |
| 114 | |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 115 | // Attempts to extract a match from candidate. Returns true if the match was |
| 116 | // found, otherwise returns false. |
| 117 | bool ExtractMatch(const string& candidate, int offset, |
| 118 | PhoneNumberMatch* match); |
| 119 | |
| 120 | // Attempts to extract a match from a candidate string if the whole candidate |
| 121 | // does not qualify as a match. Returns true if a match is found, otherwise |
| 122 | // returns false. |
| 123 | bool ExtractInnerMatch(const string& candidate, int offset, |
| 124 | PhoneNumberMatch* match); |
| 125 | |
| 126 | // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and |
| 127 | // verifies it matches the requested leniency. If parsing and verification |
| 128 | // succeed, returns true, otherwise this method returns false; |
| 129 | bool ParseAndVerify(const string& candidate, int offset, |
| 130 | PhoneNumberMatch* match); |
| 131 | |
philip.liard@gmail.com | e71e831 | 2012-04-27 14:36:02 +0000 | [diff] [blame] | 132 | bool CheckNumberGroupingIsValid( |
| 133 | const PhoneNumber& phone_number, |
| 134 | const string& candidate, |
| 135 | ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&, |
| 136 | const string&, const vector<string>&>* checker) const; |
| 137 | |
| 138 | void GetNationalNumberGroups( |
| 139 | const PhoneNumber& number, |
| 140 | const NumberFormat* formatting_pattern, |
| 141 | vector<string>* digit_blocks) const; |
| 142 | |
| 143 | bool AllNumberGroupsAreExactlyPresent( |
| 144 | const PhoneNumberUtil& util, |
| 145 | const PhoneNumber& phone_number, |
| 146 | const string& normalized_candidate, |
| 147 | const vector<string>& formatted_number_groups) const; |
| 148 | |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 149 | bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number, |
| 150 | const string& candidate) const; |
| 151 | |
lararennie@google.com | 7e77f5f | 2013-07-19 16:32:26 +0000 | [diff] [blame] | 152 | // In interface for testing purposes. |
| 153 | static bool ContainsMoreThanOneSlashInNationalNumber( |
| 154 | const PhoneNumber& number, |
| 155 | const string& candidate, |
| 156 | const PhoneNumberUtil& util); |
| 157 | |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 158 | // Helper method to determine if a character is a Latin-script letter or not. |
| 159 | // For our purposes, combining marks should also return true since we assume |
| 160 | // they have been added to a preceding Latin character. |
| 161 | static bool IsLatinLetter(char32 letter); |
| 162 | |
| 163 | // Helper class holding useful regular expressions. |
| 164 | const PhoneNumberMatcherRegExps* reg_exps_; |
| 165 | |
dbeaumont@google.com | b348522 | 2012-07-24 14:09:14 +0000 | [diff] [blame] | 166 | // Helper class holding loaded data containing alternate ways phone numbers |
| 167 | // might be formatted for certain regions. |
| 168 | const AlternateFormats* alternate_formats_; |
| 169 | |
philip.liard@gmail.com | 6a0a07f | 2011-09-21 17:43:54 +0000 | [diff] [blame] | 170 | // The phone number utility; |
| 171 | const PhoneNumberUtil& phone_util_; |
| 172 | |
| 173 | // The text searched for phone numbers; |
| 174 | const string text_; |
| 175 | |
| 176 | // The region(country) to assume for phone numbers without an international |
| 177 | // prefix. |
| 178 | const string preferred_region_; |
| 179 | |
| 180 | // The degree of validation requested. |
| 181 | Leniency leniency_; |
| 182 | |
| 183 | // The maximum number of retries after matching an invalid number. |
| 184 | int max_tries_; |
| 185 | |
| 186 | // The iteration tristate. |
| 187 | State state_; |
| 188 | |
| 189 | // The last successful match, NULL unless in State.READY. |
| 190 | scoped_ptr<PhoneNumberMatch> last_match_; |
| 191 | |
| 192 | // The next index to start searching at. Undefined in State.DONE. |
| 193 | int search_index_; |
| 194 | |
| 195 | DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher); |
| 196 | }; |
| 197 | |
| 198 | } // namespace phonenumbers |
| 199 | } // namespace i18n |
| 200 | |
| 201 | #endif // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_ |