blob: 195628f8d12e999ca7c09d06b2809da1c09a409f [file] [log] [blame]
philip.liard@gmail.comb9056912011-08-18 11:41:24 +00001// Copyright (C) 2009 The Libphonenumber Authors
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +00002//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Utility for international phone numbers.
16//
17// Author: Shaopeng Jia
18// Open-sourced by: Philippe Liard
19
20#ifndef I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
21#define I18N_PHONENUMBERS_PHONENUMBERUTIL_H_
22
philip.liard@gmail.com603e7e52011-10-12 12:25:09 +000023#include <stddef.h>
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000024#include <list>
25#include <map>
26#include <set>
27#include <string>
28#include <utility>
29#include <vector>
30
philip.liard@gmail.comaf4a2ce2013-04-30 11:35:55 +000031#include "phonenumbers/base/basictypes.h"
32#include "phonenumbers/base/memory/scoped_ptr.h"
33#include "phonenumbers/base/memory/singleton.h"
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000034#include "phonenumbers/phonenumber.pb.h"
35
36class TelephoneNumber;
37
38namespace i18n {
39namespace phonenumbers {
40
41using std::list;
42using std::map;
43using std::pair;
44using std::set;
45using std::string;
46using std::vector;
47
48using google::protobuf::RepeatedPtrField;
49
philip.liard@gmail.com80d738a2011-09-14 10:42:59 +000050class AsYouTypeFormatter;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000051class Logger;
52class NumberFormat;
53class PhoneMetadata;
philip.liard@gmail.comd7f0c942011-12-02 14:44:03 +000054class PhoneNumberRegExpsAndMappings;
philip.liard@gmail.com384682a2011-07-12 15:41:29 +000055class RegExp;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000056
57// NOTE: A lot of methods in this class require Region Code strings. These must
58// be provided using ISO 3166-1 two-letter country-code format. The list of the
59// codes can be found here:
60// http://www.iso.org/iso/english_country_names_and_code_elements
61
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000062class PhoneNumberUtil : public Singleton<PhoneNumberUtil> {
philip.liard@gmail.comfa6ddee2013-05-03 13:49:35 +000063 private:
philip.liard@gmail.com80d738a2011-09-14 10:42:59 +000064 friend class AsYouTypeFormatter;
philip.liard@gmail.com6a0a07f2011-09-21 17:43:54 +000065 friend class PhoneNumberMatcher;
66 friend class PhoneNumberMatcherRegExps;
67 friend class PhoneNumberMatcherTest;
philip.liard@gmail.comd7f0c942011-12-02 14:44:03 +000068 friend class PhoneNumberRegExpsAndMappings;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000069 friend class PhoneNumberUtilTest;
lararennie@google.comb3bfbbc2013-09-02 17:02:03 +000070 friend class ShortNumberInfo;
71 friend class ShortNumberInfoTest;
philip.liard@gmail.comfa6ddee2013-05-03 13:49:35 +000072 friend class Singleton<PhoneNumberUtil>;
73
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000074 public:
75 ~PhoneNumberUtil();
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +000076 static const char kRegionCodeForNonGeoEntity[];
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000077
78 // INTERNATIONAL and NATIONAL formats are consistent with the definition
79 // in ITU-T Recommendation E. 123. For example, the number of the Google
80 // Zürich office will be written as "+41 44 668 1800" in INTERNATIONAL
81 // format, and as "044 668 1800" in NATIONAL format. E164 format is as per
lararennie@google.com35bd3932012-09-06 09:48:57 +000082 // INTERNATIONAL format but with no formatting applied e.g. "+41446681800".
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000083 // RFC3966 is as per INTERNATIONAL format, but with all spaces and other
84 // separating symbols replaced with a hyphen, and with any phone number
lararennie@google.com35bd3932012-09-06 09:48:57 +000085 // extension appended with ";ext=". It also will have a prefix of "tel:"
86 // added, e.g. "tel:+41-44-668-1800".
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +000087 enum PhoneNumberFormat {
88 E164,
89 INTERNATIONAL,
90 NATIONAL,
91 RFC3966
92 };
93
94 // Type of phone numbers.
95 enum PhoneNumberType {
96 FIXED_LINE,
97 MOBILE,
98 // In some regions (e.g. the USA), it is impossible to distinguish between
99 // fixed-line and mobile numbers by looking at the phone number itself.
100 FIXED_LINE_OR_MOBILE,
101 // Freephone lines
102 TOLL_FREE,
103 PREMIUM_RATE,
104 // The cost of this call is shared between the caller and the recipient, and
105 // is hence typically less than PREMIUM_RATE calls. See
106 // http://en.wikipedia.org/wiki/Shared_Cost_Service for more information.
107 SHARED_COST,
108 // Voice over IP numbers. This includes TSoIP (Telephony Service over IP).
109 VOIP,
110 // A personal number is associated with a particular person, and may be
111 // routed to either a MOBILE or FIXED_LINE number. Some more information can
112 // be found here: http://en.wikipedia.org/wiki/Personal_Numbers
113 PERSONAL_NUMBER,
114 PAGER,
115 // Used for "Universal Access Numbers" or "Company Numbers". They may be
116 // further routed to specific offices, but allow one number to be used for a
117 // company.
118 UAN,
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000119 // Used for "Voice Mail Access Numbers".
120 VOICEMAIL,
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000121 // A phone number is of type UNKNOWN when it does not fit any of the known
122 // patterns for a specific region.
123 UNKNOWN
124 };
125
126 // Types of phone number matches. See detailed description beside the
127 // IsNumberMatch() method.
128 enum MatchType {
129 INVALID_NUMBER, // NOT_A_NUMBER in the java version.
130 NO_MATCH,
131 SHORT_NSN_MATCH,
132 NSN_MATCH,
133 EXACT_MATCH,
134 };
135
136 enum ErrorType {
137 NO_PARSING_ERROR,
138 INVALID_COUNTRY_CODE_ERROR, // INVALID_COUNTRY_CODE in the java version.
139 NOT_A_NUMBER,
140 TOO_SHORT_AFTER_IDD,
141 TOO_SHORT_NSN,
142 TOO_LONG_NSN, // TOO_LONG in the java version.
143 };
144
145 // Possible outcomes when testing if a PhoneNumber is possible.
146 enum ValidationResult {
147 IS_POSSIBLE,
148 INVALID_COUNTRY_CODE,
149 TOO_SHORT,
150 TOO_LONG,
151 };
152
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000153 // Convenience method to get a list of what regions the library has metadata
154 // for.
155 void GetSupportedRegions(set<string>* regions) const;
156
dbeaumont@google.com93d3e8b2012-10-11 16:44:08 +0000157 // Populates a list with the region codes that match the specific country
158 // calling code. For non-geographical country calling codes, the region code
159 // 001 is returned. Also, in the case of no region code being found, the list
160 // is left unchanged.
161 void GetRegionCodesForCountryCallingCode(
162 int country_calling_code,
163 list<string>* region_codes) const;
164
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000165 // Gets a PhoneNumberUtil instance to carry out international phone number
166 // formatting, parsing, or validation. The instance is loaded with phone
167 // number metadata for a number of most commonly used regions, as specified by
168 // DEFAULT_REGIONS_.
169 //
170 // The PhoneNumberUtil is implemented as a singleton. Therefore, calling
lararennie@google.comd61796e2012-06-29 14:25:30 +0000171 // GetInstance multiple times will only result in one instance being created.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000172 static PhoneNumberUtil* GetInstance();
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000173
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000174 // Returns true if the number is a valid vanity (alpha) number such as 800
175 // MICROSOFT. A valid vanity number will start with at least 3 digits and will
176 // have three or more alpha characters. This does not do region-specific
177 // checks - to work out if this number is actually valid for a region, it
178 // should be parsed and methods such as IsPossibleNumberWithReason or
179 // IsValidNumber should be used.
180 bool IsAlphaNumber(const string& number) const;
181
182 // Converts all alpha characters in a number to their respective digits on
183 // a keypad, but retains existing formatting.
184 void ConvertAlphaCharactersInNumber(string* number) const;
185
186 // Normalizes a string of characters representing a phone number. This
187 // converts wide-ascii and arabic-indic numerals to European numerals, and
188 // strips punctuation and alpha characters.
philip.liard@gmail.com384682a2011-07-12 15:41:29 +0000189 void NormalizeDigitsOnly(string* number) const;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000190
lararennie@google.com7e77f5f2013-07-19 16:32:26 +0000191 // Normalizes a string of characters representing a phone number. This strips
192 // all characters which are not diallable on a mobile phone keypad (including
193 // all non-ASCII digits).
194 void NormalizeDiallableCharsOnly(string* number) const;
195
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000196 // Gets the national significant number of a phone number. Note a national
197 // significant number doesn't contain a national prefix or any formatting.
198 void GetNationalSignificantNumber(const PhoneNumber& number,
199 string* national_significant_num) const;
200
201 // Gets the length of the geographical area code from the PhoneNumber object
202 // passed in, so that clients could use it to split a national significant
203 // number into geographical area code and subscriber number. It works in such
204 // a way that the resultant subscriber number should be diallable, at least on
205 // some devices. An example of how this could be used:
206 //
lararennie@google.comd61796e2012-06-29 14:25:30 +0000207 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000208 // PhoneNumber number;
209 // phone_util.Parse("16502530000", "US", &number);
210 // string national_significant_number;
211 // phone_util.GetNationalSignificantNumber(number,
212 // &national_significant_number);
213 // string area_code;
214 // string subscriber_number;
215 //
216 // int area_code_length = phone_util.GetLengthOfGeographicalAreaCode(number);
217 // if (area_code_length > 0) {
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000218 // area_code = national_significant_number.substr(0, area_code_length);
219 // subscriber_number = national_significant_number.substr(
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000220 // area_code_length, string::npos);
221 // else {
222 // area_code = "";
223 // subscriber_number = national_significant_number;
224 // }
225 //
lararennie@google.comd61796e2012-06-29 14:25:30 +0000226 // N.B.: area code is a very ambiguous concept, so the authors generally
227 // recommend against using it for most purposes, but recommend using the
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000228 // more general national_number instead. Read the following carefully before
229 // deciding to use this method:
230 //
231 // - geographical area codes change over time, and this method honors those
232 // changes; therefore, it doesn't guarantee the stability of the result it
233 // produces.
234 // - subscriber numbers may not be diallable from all devices (notably mobile
235 // devices, which typically requires the full national_number to be dialled
236 // in most regions).
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000237 // - most non-geographical numbers have no area codes, including numbers
238 // from non-geographical entities.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000239 // - some geographical numbers have no area codes.
240 int GetLengthOfGeographicalAreaCode(const PhoneNumber& number) const;
241
242 // Gets the length of the national destination code (NDC) from the PhoneNumber
243 // object passed in, so that clients could use it to split a national
244 // significant number into NDC and subscriber number. The NDC of a phone
245 // number is normally the first group of digit(s) right after the country
246 // calling code when the number is formatted in the international format, if
247 // there is a subscriber number part that follows. An example of how this
248 // could be used:
249 //
lararennie@google.comd61796e2012-06-29 14:25:30 +0000250 // const PhoneNumberUtil& phone_util(*PhoneNumberUtil::GetInstance());
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000251 // PhoneNumber number;
252 // phone_util.Parse("16502530000", "US", &number);
253 // string national_significant_number;
254 // phone_util.GetNationalSignificantNumber(number,
255 // &national_significant_number);
256 // string national_destination_code;
257 // string subscriber_number;
258 //
259 // int national_destination_code_length =
260 // phone_util.GetLengthOfGeographicalAreaCode(number);
261 // if (national_destination_code_length > 0) {
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000262 // national_destination_code = national_significant_number.substr(
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000263 // 0, national_destination_code_length);
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000264 // subscriber_number = national_significant_number.substr(
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000265 // national_destination_code_length, string::npos);
266 // else {
267 // national_destination_code = "";
268 // subscriber_number = national_significant_number;
269 // }
270 //
271 // Refer to the unittests to see the difference between this function and
272 // GetLengthOfGeographicalAreaCode().
273 int GetLengthOfNationalDestinationCode(const PhoneNumber& number) const;
274
roes@google.comb779a052013-10-08 13:09:13 +0000275 // Returns the mobile token for the provided country calling code if it has
276 // one, otherwise returns an empty string. A mobile token is a number inserted
277 // before the area code when dialing a mobile number from that country from
278 // abroad.
279 void GetCountryMobileToken(int country_calling_code,
280 string* mobile_token) const;
281
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000282 // Formats a phone number in the specified format using default rules. Note
283 // that this does not promise to produce a phone number that the user can
284 // dial from where they are - although we do format in either NATIONAL or
285 // INTERNATIONAL format depending on what the client asks for, we do not
286 // currently support a more abbreviated format, such as for users in the
287 // same area who could potentially dial the number without area code.
288 void Format(const PhoneNumber& number,
289 PhoneNumberFormat number_format,
290 string* formatted_number) const;
291
292 // Formats a phone number in the specified format using client-defined
293 // formatting rules.
294 void FormatByPattern(
295 const PhoneNumber& number,
296 PhoneNumberFormat number_format,
297 const RepeatedPtrField<NumberFormat>& user_defined_formats,
298 string* formatted_number) const;
299
300 // Formats a phone number in national format for dialing using the carrier as
301 // specified in the carrier_code. The carrier_code will always be used
302 // regardless of whether the phone number already has a preferred domestic
303 // carrier code stored. If carrier_code contains an empty string, return the
304 // number in national format without any carrier code.
305 void FormatNationalNumberWithCarrierCode(const PhoneNumber& number,
306 const string& carrier_code,
307 string* formatted_number) const;
308
309 // Formats a phone number in national format for dialing using the carrier as
310 // specified in the preferred_domestic_carrier_code field of the PhoneNumber
311 // object passed in. If that is missing, use the fallback_carrier_code passed
312 // in instead. If there is no preferred_domestic_carrier_code, and the
313 // fallback_carrier_code contains an empty string, return the number in
314 // national format without any carrier code.
315 //
316 // Use FormatNationalNumberWithCarrierCode instead if the carrier code passed
317 // in should take precedence over the number's preferred_domestic_carrier_code
318 // when formatting.
319 void FormatNationalNumberWithPreferredCarrierCode(
320 const PhoneNumber& number,
321 const string& fallback_carrier_code,
322 string* formatted_number) const;
323
philip.liard@gmail.comd72e8b92011-10-12 11:47:24 +0000324 // Returns a number formatted in such a way that it can be dialed from a
325 // mobile phone in a specific region. If the number cannot be reached from
326 // the region (e.g. some countries block toll-free numbers from being called
327 // outside of the country), the method returns an empty string.
328 void FormatNumberForMobileDialing(
329 const PhoneNumber& number,
330 const string& region_calling_from,
331 bool with_formatting,
332 string* formatted_number) const;
333
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000334 // Formats a phone number for out-of-country dialing purposes.
335 //
336 // Note this function takes care of the case for calling inside of NANPA
337 // and between Russia and Kazakhstan (who share the same country calling
338 // code). In those cases, no international prefix is used. For regions which
339 // have multiple international prefixes, the number in its INTERNATIONAL
340 // format will be returned instead.
341 void FormatOutOfCountryCallingNumber(
342 const PhoneNumber& number,
343 const string& calling_from,
344 string* formatted_number) const;
345
346 // Formats a phone number using the original phone number format that the
347 // number is parsed from. The original format is embedded in the
348 // country_code_source field of the PhoneNumber object passed in. If such
349 // information is missing, the number will be formatted into the NATIONAL
philip.liard@gmail.com44e3b3f2011-10-20 11:39:14 +0000350 // format by default. When the number is an invalid number, the method returns
351 // the raw input when it is available.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000352 void FormatInOriginalFormat(const PhoneNumber& number,
353 const string& region_calling_from,
354 string* formatted_number) const;
355
356 // Formats a phone number for out-of-country dialing purposes.
357 //
358 // Note that in this version, if the number was entered originally using alpha
359 // characters and this version of the number is stored in raw_input, this
360 // representation of the number will be used rather than the digit
361 // representation. Grouping information, as specified by characters such as
362 // "-" and " ", will be retained.
363 //
364 // Caveats:
365 // 1) This will not produce good results if the country calling code is both
366 // present in the raw input _and_ is the start of the national number. This
367 // is not a problem in the regions which typically use alpha numbers.
368 // 2) This will also not produce good results if the raw input has any
369 // grouping information within the first three digits of the national number,
370 // and if the function needs to strip preceding digits/words in the raw input
371 // before these digits. Normally people group the first three digits together
372 // so this is not a huge problem - and will be fixed if it proves to be so.
373 void FormatOutOfCountryKeepingAlphaChars(
374 const PhoneNumber& number,
375 const string& calling_from,
376 string* formatted_number) const;
377
378 // Attempts to extract a valid number from a phone number that is too long to
379 // be valid, and resets the PhoneNumber object passed in to that valid
380 // version. If no valid number could be extracted, the PhoneNumber object
381 // passed in will not be modified. It returns true if a valid phone number can
382 // be successfully extracted.
383 bool TruncateTooLongNumber(PhoneNumber* number) const;
384
385 // Gets the type of a phone number.
386 PhoneNumberType GetNumberType(const PhoneNumber& number) const;
387
388 // Tests whether a phone number matches a valid pattern. Note this doesn't
389 // verify the number is actually in use, which is impossible to tell by just
390 // looking at a number itself.
391 bool IsValidNumber(const PhoneNumber& number) const;
392
393 // Tests whether a phone number is valid for a certain region. Note this
394 // doesn't verify the number is actually in use, which is impossible to tell
395 // by just looking at a number itself. If the country calling code is not the
396 // same as the country calling code for the region, this immediately exits
397 // with false. After this, the specific number pattern rules for the region
398 // are examined.
399 // This is useful for determining for example whether a particular number is
400 // valid for Canada, rather than just a valid NANPA number.
dbeaumont@google.com93d3e8b2012-10-11 16:44:08 +0000401 // Warning: In most cases, you want to use IsValidNumber instead. For
402 // example, this method will mark numbers from British Crown dependencies
403 // such as the Isle of Man as invalid for the region "GB" (United Kingdom),
404 // since it has its own region code, "IM", which may be undesirable.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000405 bool IsValidNumberForRegion(
406 const PhoneNumber& number,
407 const string& region_code) const;
408
409 // Returns the region where a phone number is from. This could be used for
410 // geo-coding at the region level.
411 void GetRegionCodeForNumber(const PhoneNumber& number,
412 string* region_code) const;
413
414 // Returns the country calling code for a specific region. For example,
415 // this would be 1 for the United States, and 64 for New Zealand.
416 int GetCountryCodeForRegion(const string& region_code) const;
417
418 // Returns the region code that matches the specific country code. Note that
philip.liard@gmail.com4784d0d2011-08-12 07:44:38 +0000419 // it is possible that several regions share the same country calling code
420 // (e.g. US and Canada), and in that case, only one of the regions (normally
421 // the one with the largest population) is returned.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000422 void GetRegionCodeForCountryCode(int country_code, string* region_code) const;
423
424 // Checks if this is a region under the North American Numbering Plan
425 // Administration (NANPA).
426 bool IsNANPACountry(const string& region_code) const;
427
philip.liard@gmail.com70942012011-09-16 10:22:04 +0000428 // Returns the national dialling prefix for a specific region. For example,
429 // this would be 1 for the United States, and 0 for New Zealand. Set
430 // strip_non_digits to true to strip symbols like "~" (which indicates a wait
431 // for a dialling tone) from the prefix returned. If no national prefix is
432 // present, we return an empty string.
433 void GetNddPrefixForRegion(const string& region_code,
434 bool strip_non_digits,
435 string* national_prefix) const;
436
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000437 // Checks whether a phone number is a possible number. It provides a more
438 // lenient check than IsValidNumber() in the following sense:
439 // 1. It only checks the length of phone numbers. In particular, it doesn't
440 // check starting digits of the number.
441 // 2. It doesn't attempt to figure out the type of the number, but uses
442 // general rules which applies to all types of phone numbers in a
443 // region. Therefore, it is much faster than IsValidNumber().
444 // 3. For fixed line numbers, many regions have the concept of area code,
445 // which together with subscriber number constitute the national
446 // significant number. It is sometimes okay to dial the subscriber
447 // number only when dialing in the same area. This function will return
448 // true if the subscriber-number-only version is passed in. On the other
449 // hand, because IsValidNumber() validates using information on both
450 // starting digits (for fixed line numbers, that would most likely be
451 // area codes) and length (obviously includes the length of area codes
452 // for fixed line numbers), it will return false for the
453 // subscriber-number-only version.
454 ValidationResult IsPossibleNumberWithReason(const PhoneNumber& number) const;
455
456 // Convenience wrapper around IsPossibleNumberWithReason. Instead of returning
457 // the reason for failure, this method returns a boolean value.
458 bool IsPossibleNumber(const PhoneNumber& number) const;
459
460 // Checks whether a phone number is a possible number given a number in the
461 // form of a string, and the country where the number could be dialed from.
462 // It provides a more lenient check than IsValidNumber(). See
463 // IsPossibleNumber(const PhoneNumber& number) for details.
464 //
465 // This method first parses the number, then invokes
466 // IsPossibleNumber(const PhoneNumber& number) with the resultant PhoneNumber
467 // object.
468 //
469 // region_dialing_from represents the region that we are expecting the number
470 // to be dialed from. Note this is different from the region where the number
471 // belongs. For example, the number +1 650 253 0000 is a number that belongs
472 // to US. When written in this form, it could be dialed from any region. When
473 // it is written as 00 1 650 253 0000, it could be dialed from any region
474 // which uses an international dialling prefix of 00. When it is written as
475 // 650 253 0000, it could only be dialed from within the US, and when written
476 // as 253 0000, it could only be dialed from within a smaller area in the US
477 // (Mountain View, CA, to be more specific).
478 bool IsPossibleNumberForString(
479 const string& number,
480 const string& region_dialing_from) const;
481
482 // Gets a valid fixed-line number for the specified region. Returns false if
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000483 // the region was unknown, or the region 001 is passed in. For 001
484 // (representing non-geographical numbers), call
485 // GetExampleNumberForNonGeoEntity instead.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000486 bool GetExampleNumber(const string& region_code,
487 PhoneNumber* number) const;
488
489 // Gets a valid number of the specified type for the specified region.
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000490 // Returns false if the region was unknown or 001, or if no example number of
491 // that type could be found. For 001 (representing non-geographical numbers),
492 // call GetExampleNumberForNonGeoEntity instead.
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000493 bool GetExampleNumberForType(const string& region_code,
494 PhoneNumberType type,
495 PhoneNumber* number) const;
496
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000497 // Gets a valid number for the specified country calling code for a
498 // non-geographical entity. Returns false if the metadata does not contain
499 // such information, or the country calling code passed in does not belong to
500 // a non-geographical entity.
501 bool GetExampleNumberForNonGeoEntity(
502 int country_calling_code, PhoneNumber* number) const;
503
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000504 // Parses a string and returns it in proto buffer format. This method will
505 // return an error like INVALID_COUNTRY_CODE if the number is not considered
506 // to be a possible number, and NO_PARSING_ERROR if it parsed correctly. Note
507 // that validation of whether the number is actually a valid number for a
508 // particular region is not performed. This can be done separately with
509 // IsValidNumber().
510 //
davinci@google.com6276be92012-07-04 13:52:11 +0000511 // number_to_parse can also be provided in RFC3966 format.
512 //
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000513 // default_region represents the country that we are expecting the number to
514 // be from. This is only used if the number being parsed is not written in
515 // international format. The country_code for the number in this case would be
516 // stored as that of the default country supplied. If the number is guaranteed
517 // to start with a '+' followed by the country calling code, then
518 // "ZZ" can be supplied.
519 ErrorType Parse(const string& number_to_parse,
520 const string& default_region,
521 PhoneNumber* number) const;
522 // Parses a string and returns it in proto buffer format. This method differs
523 // from Parse() in that it always populates the raw_input field of the
524 // protocol buffer with number_to_parse as well as the country_code_source
525 // field.
526 ErrorType ParseAndKeepRawInput(const string& number_to_parse,
527 const string& default_region,
528 PhoneNumber* number) const;
529
530 // Takes two phone numbers and compares them for equality.
531 //
532 // Returns EXACT_MATCH if the country calling code, NSN, presence of a leading
533 // zero for Italian numbers and any extension present are the same.
534 // Returns NSN_MATCH if either or both has no country calling code specified,
535 // and the NSNs and extensions are the same.
536 // Returns SHORT_NSN_MATCH if either or both has no country calling code
537 // specified, or the country calling code specified is the same, and one NSN
538 // could be a shorter version of the other number. This includes the case
539 // where one has an extension specified, and the other does not.
540 // Returns NO_MATCH otherwise.
541 // For example, the numbers +1 345 657 1234 and 657 1234 are a
542 // SHORT_NSN_MATCH. The numbers +1 345 657 1234 and 345 657 are a NO_MATCH.
543 MatchType IsNumberMatch(const PhoneNumber& first_number,
544 const PhoneNumber& second_number) const;
545
546 // Takes two phone numbers as strings and compares them for equality. This
547 // is a convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
548 // PhoneNumber secondNumber). No default region is known.
549 // Returns INVALID_NUMBER if either number cannot be parsed into a phone
550 // number.
551 MatchType IsNumberMatchWithTwoStrings(const string& first_number,
552 const string& second_number) const;
553
554 // Takes two phone numbers and compares them for equality. This is a
555 // convenience wrapper for IsNumberMatch(PhoneNumber firstNumber,
556 // PhoneNumber secondNumber). No default region is known.
557 // Returns INVALID_NUMBER if second_number cannot be parsed into a phone
558 // number.
559 MatchType IsNumberMatchWithOneString(const PhoneNumber& first_number,
560 const string& second_number) const;
561
philip.liard@gmail.comfa6ddee2013-05-03 13:49:35 +0000562 // Overrides the default logging system. This takes ownership of the provided
563 // logger.
564 void SetLogger(Logger* logger);
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000565
philip.liard@gmail.com80d738a2011-09-14 10:42:59 +0000566 // Gets an AsYouTypeFormatter for the specific region.
567 // Returns an AsYouTypeFormatter object, which could be used to format phone
568 // numbers in the specific region "as you type".
569 // The deletion of the returned instance is under the responsibility of the
570 // caller.
571 AsYouTypeFormatter* GetAsYouTypeFormatter(const string& region_code) const;
572
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000573 friend bool ConvertFromTelephoneNumberProto(
574 const TelephoneNumber& proto_to_convert,
575 PhoneNumber* new_proto);
576 friend bool ConvertToTelephoneNumberProto(const PhoneNumber& proto_to_convert,
577 TelephoneNumber* resulting_proto);
578
579 protected:
580 // Check whether the country_calling_code is from a country whose national
581 // significant number could contain a leading zero. An example of such a
582 // country is Italy.
583 bool IsLeadingZeroPossible(int country_calling_code) const;
584
585 private:
philip.liard@gmail.comd7f0c942011-12-02 14:44:03 +0000586 scoped_ptr<Logger> logger_;
587
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000588 typedef pair<int, list<string>*> IntRegionsPair;
589
590 // The minimum and maximum length of the national significant number.
davinci@google.com6276be92012-07-04 13:52:11 +0000591 static const size_t kMinLengthForNsn = 2;
philip.liard@gmail.com44e3b3f2011-10-20 11:39:14 +0000592 // The ITU says the maximum length should be 15, but we have found longer
593 // numbers in Germany.
594 static const size_t kMaxLengthForNsn = 16;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000595 // The maximum length of the country calling code.
596 static const size_t kMaxLengthCountryCode = 3;
597
598 static const char kPlusChars[];
599 // Regular expression of acceptable punctuation found in phone numbers. This
600 // excludes punctuation found as a leading character only. This consists of
601 // dash characters, white space characters, full stops, slashes, square
602 // brackets, parentheses and tildes. It also includes the letter 'x' as that
603 // is found as a placeholder for carrier information in some phone numbers.
604 // Full-width variants are also present.
605 static const char kValidPunctuation[];
606
philip.liard@gmail.com4784d0d2011-08-12 07:44:38 +0000607 // Regular expression of characters typically used to start a second phone
608 // number for the purposes of parsing. This allows us to strip off parts of
609 // the number that are actually the start of another number, such as for:
610 // (530) 583-6985 x302/x2303 -> the second extension here makes this actually
611 // two phone numbers, (530) 583-6985 x302 and (530) 583-6985 x2303. We remove
612 // the second extension so that the first number is parsed correctly. The
613 // string preceding this is captured.
614 // This corresponds to SECOND_NUMBER_START in the java version.
615 static const char kCaptureUpToSecondNumberStart[];
616
philip.liard@gmail.comd7f0c942011-12-02 14:44:03 +0000617 // Helper class holding useful regular expressions and character mappings.
618 scoped_ptr<PhoneNumberRegExpsAndMappings> reg_exps_;
619
philip.liard@gmail.com4784d0d2011-08-12 07:44:38 +0000620 // A mapping from a country calling code to a RegionCode object which denotes
621 // the region represented by that country calling code. Note regions under
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000622 // NANPA share the country calling code 1 and Russia and Kazakhstan share the
623 // country calling code 7. Under this map, 1 is mapped to region code "US" and
624 // 7 is mapped to region code "RU". This is implemented as a sorted vector to
625 // achieve better performance.
626 scoped_ptr<vector<IntRegionsPair> > country_calling_code_to_region_code_map_;
627
628 // The set of regions that share country calling code 1.
629 scoped_ptr<set<string> > nanpa_regions_;
630 static const int kNanpaCountryCode = 1;
631
632 // A mapping from a region code to a PhoneMetadata for that region.
633 scoped_ptr<map<string, PhoneMetadata> > region_to_metadata_map_;
634
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000635 // A mapping from a country calling code for a non-geographical entity to the
636 // PhoneMetadata for that country calling code. Examples of the country
637 // calling codes include 800 (International Toll Free Service) and 808
638 // (International Shared Cost Service).
639 scoped_ptr<map<int, PhoneMetadata> >
640 country_code_to_non_geographical_metadata_map_;
641
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000642 PhoneNumberUtil();
643
644 // Returns a regular expression for the possible extensions that may be found
philip.liard@gmail.com4784d0d2011-08-12 07:44:38 +0000645 // in a number, for use when matching.
646 const string& GetExtnPatternsForMatching() const;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000647
davinci@google.com6276be92012-07-04 13:52:11 +0000648 // Checks if a number matches the plus chars pattern.
649 bool StartsWithPlusCharsPattern(const string& number) const;
650
philip.liard@gmail.com80d738a2011-09-14 10:42:59 +0000651 // Checks whether a string contains only valid digits.
652 bool ContainsOnlyValidDigits(const string& s) const;
653
lararennie@google.com35bd3932012-09-06 09:48:57 +0000654 // Checks if a format is eligible to be used by the AsYouTypeFormatter. This
655 // method is here rather than in asyoutypeformatter.h since it depends on the
656 // valid punctuation declared by the phone number util.
philip.liard@gmail.com80d738a2011-09-14 10:42:59 +0000657 bool IsFormatEligibleForAsYouTypeFormatter(const string& format) const;
658
lararennie@google.com35bd3932012-09-06 09:48:57 +0000659 // Helper function to check if the national prefix formatting rule has the
660 // first group only, i.e., does not start with the national prefix.
661 bool FormattingRuleHasFirstGroupOnly(
662 const string& national_prefix_formatting_rule) const;
663
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000664 // Trims unwanted end characters from a phone number string.
665 void TrimUnwantedEndChars(string* number) const;
666
dbeaumont@google.com93d3e8b2012-10-11 16:44:08 +0000667 // Tests whether a phone number has a geographical association. It checks if
668 // the number is associated to a certain region in the country where it
669 // belongs to. Note that this doesn't verify if the number is actually in use.
670 bool IsNumberGeographical(const PhoneNumber& phone_number) const;
671
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000672 // Helper function to check region code is not unknown or null.
673 bool IsValidRegionCode(const string& region_code) const;
674
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000675 // Helper function to check the country calling code is valid.
676 bool HasValidCountryCallingCode(int country_calling_code) const;
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000677
678 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegion(
679 const string& region_code) const;
680
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000681 const i18n::phonenumbers::PhoneMetadata* GetMetadataForNonGeographicalRegion(
682 int country_calling_code) const;
683
684 const i18n::phonenumbers::PhoneMetadata* GetMetadataForRegionOrCallingCode(
685 int country_calling_code,
686 const string& region_code) const;
687
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000688 // As per GetCountryCodeForRegion, but assumes the validity of the region_code
689 // has already been checked.
690 int GetCountryCodeForValidRegion(const string& region_code) const;
691
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000692 const NumberFormat* ChooseFormattingPatternForNumber(
693 const RepeatedPtrField<NumberFormat>& available_formats,
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000694 const string& national_number) const;
695
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000696 void FormatNsnUsingPatternWithCarrier(
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000697 const string& national_number,
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000698 const NumberFormat& formatting_pattern,
699 PhoneNumberUtil::PhoneNumberFormat number_format,
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000700 const string& carrier_code,
701 string* formatted_number) const;
702
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000703 void FormatNsnUsingPattern(
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000704 const string& national_number,
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000705 const NumberFormat& formatting_pattern,
706 PhoneNumberUtil::PhoneNumberFormat number_format,
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000707 string* formatted_number) const;
708
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000709 // Check if raw_input, which is assumed to be in the national format, has a
710 // national prefix. The national prefix is assumed to be in digits-only form.
711 bool RawInputContainsNationalPrefix(
712 const string& raw_input,
713 const string& national_prefix,
714 const string& region_code) const;
715
716 // Returns true if a number is from a region whose national significant number
717 // couldn't contain a leading zero, but has the italian_leading_zero field set
718 // to true.
719 bool HasUnexpectedItalianLeadingZero(const PhoneNumber& number) const;
720
philip.liard@gmail.com86929be2011-11-29 10:48:41 +0000721 bool HasFormattingPatternForNumber(const PhoneNumber& number) const;
722
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000723 // Simple wrapper of FormatNsnWithCarrier for the common case of
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000724 // no carrier code.
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000725 void FormatNsn(const string& number,
726 const PhoneMetadata& metadata,
727 PhoneNumberFormat number_format,
728 string* formatted_number) const;
729
730 void FormatNsnWithCarrier(const string& number,
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000731 const PhoneMetadata& metadata,
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000732 PhoneNumberFormat number_format,
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000733 const string& carrier_code,
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000734 string* formatted_number) const;
735
philip.liard@gmail.comcbc255f2012-03-15 17:04:03 +0000736 void MaybeAppendFormattedExtension(
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000737 const PhoneNumber& number,
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000738 const PhoneMetadata& metadata,
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000739 PhoneNumberFormat number_format,
740 string* extension) const;
741
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000742 void GetRegionCodeForNumberFromRegionList(
743 const PhoneNumber& number,
744 const list<string>& region_codes,
745 string* region_code) const;
746
philip.liard@gmail.com384682a2011-07-12 15:41:29 +0000747 // Strips the IDD from the start of the number if present. Helper function
748 // used by MaybeStripInternationalPrefixAndNormalize.
749 bool ParsePrefixAsIdd(const RegExp& idd_pattern, string* number) const;
750
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000751 void Normalize(string* number) const;
lararennie@google.com7e77f5f2013-07-19 16:32:26 +0000752
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000753 PhoneNumber::CountryCodeSource MaybeStripInternationalPrefixAndNormalize(
754 const string& possible_idd_prefix,
755 string* number) const;
756
philip.liard@gmail.com1fb4d232011-11-14 14:20:32 +0000757 bool MaybeStripNationalPrefixAndCarrierCode(
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000758 const PhoneMetadata& metadata,
759 string* number,
760 string* carrier_code) const;
761
762 void ExtractPossibleNumber(const string& number,
763 string* extracted_number) const;
764
765 bool IsViablePhoneNumber(const string& number) const;
766
767 bool MaybeStripExtension(string* number, string* extension) const;
768
769 int ExtractCountryCode(string* national_number) const;
770 ErrorType MaybeExtractCountryCode(
771 const PhoneMetadata* default_region_metadata,
772 bool keepRawInput,
773 string* national_number,
774 PhoneNumber* phone_number) const;
775
776 bool CheckRegionForParsing(
777 const string& number_to_parse,
778 const string& default_region) const;
779
780 ErrorType ParseHelper(const string& number_to_parse,
781 const string& default_region,
782 bool keep_raw_input,
783 bool check_region,
784 PhoneNumber* phone_number) const;
785
davinci@google.com6276be92012-07-04 13:52:11 +0000786 void BuildNationalNumberForParsing(const string& number_to_parse,
787 string* national_number) const;
788
philip.liard@gmail.com80b803f2012-02-01 21:35:37 +0000789 // Returns true if the number can be dialled from outside the region, or
790 // unknown. If the number can only be dialled from within the region, returns
791 // false. Does not check the number is a valid number.
philip.liard@gmail.comd72e8b92011-10-12 11:47:24 +0000792 bool CanBeInternationallyDialled(const PhoneNumber& number) const;
793
philip.liard@gmail.com1ad5e5b2011-07-01 08:22:06 +0000794 DISALLOW_COPY_AND_ASSIGN(PhoneNumberUtil);
795};
796
797} // namespace phonenumbers
798} // namespace i18n
799
800#endif // I18N_PHONENUMBERS_PHONENUMBERUTIL_H_