blob: 471c89acbefd5207bd6710edfac4123247eff24a [file] [log] [blame]
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +00001// Copyright (C) 2011 The Libphonenumber Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15// Author: Lara Rennie
16// Author: Tao Huang
17//
18// This is a direct port from PhoneNumberMatcher.java.
19// Changes to this class should also happen to the Java version, whenever it
20// makes sense.
21
22#ifndef I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
23#define I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_
24
25#include <string>
philip.liard@gmail.come71e8312012-04-27 14:36:02 +000026#include <vector>
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000027
philip.liard@gmail.comaf4a2ce2013-04-30 11:35:55 +000028#include "phonenumbers/base/basictypes.h"
29#include "phonenumbers/base/memory/scoped_ptr.h"
philip.liard@gmail.come71e8312012-04-27 14:36:02 +000030#include "phonenumbers/callback.h"
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000031#include "phonenumbers/regexp_adapter.h"
32
33namespace i18n {
34namespace phonenumbers {
35
davinci@google.com6276be92012-07-04 13:52:11 +000036template <class R, class A1, class A2, class A3, class A4>
37 class ResultCallback4;
38
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000039using std::string;
philip.liard@gmail.come71e8312012-04-27 14:36:02 +000040using std::vector;
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000041
dbeaumont@google.comb3485222012-07-24 14:09:14 +000042class AlternateFormats;
philip.liard@gmail.come71e8312012-04-27 14:36:02 +000043class NumberFormat;
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000044class PhoneNumber;
45class PhoneNumberMatch;
46class PhoneNumberMatcherRegExps;
47class PhoneNumberUtil;
48
49class PhoneNumberMatcher {
50 friend class PhoneNumberMatcherTest;
51 public:
52 // Leniency when finding potential phone numbers in text segments. The levels
53 // here are ordered in increasing strictness.
54 enum Leniency {
55 // Phone numbers accepted are possible, but not necessarily valid.
56 POSSIBLE,
57 // Phone numbers accepted are possible and valid.
58 VALID,
59 // Phone numbers accepted are valid and are grouped in a possible way for
60 // this locale. For example, a US number written as "65 02 53 00 00" is not
61 // accepted at this leniency level, whereas "650 253 0000" or "6502530000"
62 // are. Numbers with more than one '/' symbol are also dropped at this
63 // level.
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +000064 // Warning: The next two levels might result in lower coverage especially
65 // for regions outside of country code "+1". If you are not sure about which
66 // level to use, you can send an e-mail to the discussion group
67 // http://groups.google.com/group/libphonenumber-discuss/
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000068 STRICT_GROUPING,
69 // Phone numbers accepted are valid and are grouped in the same way that we
70 // would have formatted it, or as a single block. For example, a US number
71 // written as "650 2530000" is not accepted at this leniency level, whereas
72 // "650 253 0000" or "6502530000" are.
73 EXACT_GROUPING,
74 };
75
76 // Constructs a phone number matcher.
77 PhoneNumberMatcher(const PhoneNumberUtil& util,
78 const string& text,
79 const string& region_code,
80 Leniency leniency,
81 int max_tries);
82
83 // Wrapper to construct a phone number matcher, with no limitation on the
84 // number of retries and VALID Leniency.
85 PhoneNumberMatcher(const string& text,
86 const string& region_code);
87
88 ~PhoneNumberMatcher();
89
90 // Returns true if the text sequence has another match.
91 bool HasNext();
92
93 // Gets next match from text sequence.
94 bool Next(PhoneNumberMatch* match);
95
96 private:
97 // The potential states of a PhoneNumberMatcher.
98 enum State {
99 NOT_READY,
100 READY,
101 DONE,
102 };
103
104 // Attempts to extract a match from a candidate string. Returns true if a
105 // match is found, otherwise returns false. The value "offset" refers to the
106 // start index of the candidate string within the overall text.
107 bool Find(int index, PhoneNumberMatch* match);
108
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000109 // Checks a number was formatted with a national prefix, if the number was
110 // found in national format, and a national prefix is required for that
111 // number. Returns false if the number needed to have a national prefix and
112 // none was found.
113 bool IsNationalPrefixPresentIfRequired(const PhoneNumber& number) const;
114
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +0000115 // Attempts to extract a match from candidate. Returns true if the match was
116 // found, otherwise returns false.
117 bool ExtractMatch(const string& candidate, int offset,
118 PhoneNumberMatch* match);
119
120 // Attempts to extract a match from a candidate string if the whole candidate
121 // does not qualify as a match. Returns true if a match is found, otherwise
122 // returns false.
123 bool ExtractInnerMatch(const string& candidate, int offset,
124 PhoneNumberMatch* match);
125
126 // Parses a phone number from the candidate using PhoneNumberUtil::Parse() and
127 // verifies it matches the requested leniency. If parsing and verification
128 // succeed, returns true, otherwise this method returns false;
129 bool ParseAndVerify(const string& candidate, int offset,
130 PhoneNumberMatch* match);
131
philip.liard@gmail.come71e8312012-04-27 14:36:02 +0000132 bool CheckNumberGroupingIsValid(
133 const PhoneNumber& phone_number,
134 const string& candidate,
135 ResultCallback4<bool, const PhoneNumberUtil&, const PhoneNumber&,
136 const string&, const vector<string>&>* checker) const;
137
138 void GetNationalNumberGroups(
139 const PhoneNumber& number,
140 const NumberFormat* formatting_pattern,
141 vector<string>* digit_blocks) const;
142
143 bool AllNumberGroupsAreExactlyPresent(
144 const PhoneNumberUtil& util,
145 const PhoneNumber& phone_number,
146 const string& normalized_candidate,
147 const vector<string>& formatted_number_groups) const;
148
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +0000149 bool VerifyAccordingToLeniency(Leniency leniency, const PhoneNumber& number,
150 const string& candidate) const;
151
lararennie@google.com7e77f5f2013-07-19 16:32:26 +0000152 // In interface for testing purposes.
153 static bool ContainsMoreThanOneSlashInNationalNumber(
154 const PhoneNumber& number,
155 const string& candidate,
156 const PhoneNumberUtil& util);
157
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +0000158 // Helper method to determine if a character is a Latin-script letter or not.
159 // For our purposes, combining marks should also return true since we assume
160 // they have been added to a preceding Latin character.
161 static bool IsLatinLetter(char32 letter);
162
163 // Helper class holding useful regular expressions.
164 const PhoneNumberMatcherRegExps* reg_exps_;
165
dbeaumont@google.comb3485222012-07-24 14:09:14 +0000166 // Helper class holding loaded data containing alternate ways phone numbers
167 // might be formatted for certain regions.
168 const AlternateFormats* alternate_formats_;
169
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +0000170 // The phone number utility;
171 const PhoneNumberUtil& phone_util_;
172
173 // The text searched for phone numbers;
174 const string text_;
175
176 // The region(country) to assume for phone numbers without an international
177 // prefix.
178 const string preferred_region_;
179
180 // The degree of validation requested.
181 Leniency leniency_;
182
183 // The maximum number of retries after matching an invalid number.
184 int max_tries_;
185
186 // The iteration tristate.
187 State state_;
188
189 // The last successful match, NULL unless in State.READY.
190 scoped_ptr<PhoneNumberMatch> last_match_;
191
192 // The next index to start searching at. Undefined in State.DONE.
193 int search_index_;
194
195 DISALLOW_COPY_AND_ASSIGN(PhoneNumberMatcher);
196};
197
198} // namespace phonenumbers
199} // namespace i18n
200
201#endif // I18N_PHONENUMBERS_PHONENUMBERMATCHER_H_