Revert ""Rewrite" PhoneNumberUtil so that it compares two phone strings using as many characters as possible, unlike the previous implementation."
This reverts commit de43094477419f8a190a6f97b47d346827310a02.
related internal issue: 1949010
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp
index 321b0ea..9e5e470 100644
--- a/android/PhoneNumberUtils.cpp
+++ b/android/PhoneNumberUtils.cpp
@@ -1,355 +1,293 @@
-/*
- * Copyright 2009, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+** http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
#include <string.h>
namespace android {
-/* Generated by the following Python script. Values of country calling codes
- are from http://en.wikipedia.org/wiki/List_of_country_calling_codes
+static int MIN_MATCH = 5;
-#!/usr/bin/python
-import sys
-ccc_set_2digits = set([0, 1, 7,
- 20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45,
- 46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61,
- 62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92,
- 93, 94, 95, 98])
-
-ONE_LINE_NUM = 10
-
-for i in xrange(100):
- if i % ONE_LINE_NUM == 0:
- sys.stdout.write(' ')
- if i in ccc_set_2digits:
- included = 'true'
- else:
- included = 'false'
- sys.stdout.write(included + ',')
- if ((i + 1) % ONE_LINE_NUM) == 0:
- sys.stdout.write('\n')
- else:
- sys.stdout.write(' ')
-*/
-static bool two_length_country_code_map[100] = {
- true, true, false, false, false, false, false, true, false, false,
- false, false, false, false, false, false, false, false, false, false,
- true, false, false, false, false, false, false, true, true, false,
- true, true, true, true, true, false, true, false, false, true,
- true, false, false, true, true, true, true, true, true, true,
- false, true, true, true, true, true, true, true, true, false,
- true, true, true, true, true, true, true, false, false, false,
- false, false, false, false, false, false, false, false, false, false,
- false, true, true, true, true, false, true, false, false, true,
- true, true, true, true, true, true, false, false, true, false,
-};
-
-/** True the character(s) expresses some country calling code. False otherwise.
- */
-static bool isCountryCallingCode(int ccc_candidate) {
- return ccc_candidate > 0 &&
- ccc_candidate < (int)sizeof(two_length_country_code_map) &&
- two_length_country_code_map[ccc_candidate];
-}
-
-/**
- * Returns interger corresponding to the input if input "ch" is
- * ISO-LATIN characters 0-9.
- * Returns -1 otherwise
- */
-static int tryGetISODigit (char ch)
+/** True if c is ISO-LATIN characters 0-9 */
+static bool isISODigit (char c)
{
- if ('0' <= ch && ch <= '9') {
- return ch - '0';
- } else {
- return -1;
- }
+ return c >= '0' && c <= '9';
}
/** True if c is ISO-LATIN characters 0-9, *, # , + */
-static bool isNonSeparator(char ch)
+static bool isNonSeparator(char c)
{
- return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+';
+ return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+';
}
/**
- * Try to store the pointer to "new_ptr" which does not have trunk prefix.
+ * Phone numbers are stored in "lookup" form in the database
+ * as reversed strings to allow for caller ID lookup
*
- * Currently this function simply ignore the first digit assuming it is
- * trunk prefix. Actually trunk prefix is different in each country.
- *
- * e.g.
- * "+79161234567" equals "89161234567" (Russian trunk digit is 8)
- * "+33123456789" equals "0123456789" (French trunk digit is 0)
+ * This method takes a phone number and makes a valid SQL "LIKE"
+ * string that will match the lookup form
*
*/
-static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len,
- const char **new_ptr, size_t *new_len)
+/** all of a up to len must be an international prefix or
+ * separators/non-dialing digits
+ */
+static bool matchIntlPrefix(const char* a, int len)
{
- for (size_t i = 0 ; i < len ; i++) {
- char ch = str[i];
- if (tryGetISODigit(ch) >= 0) {
- if (new_ptr != NULL) {
- *new_ptr = str + i + 1;
- }
- if (new_len != NULL) {
- *new_len = len - (i + 1);
- }
- return true;
- } else if (isNonSeparator(ch)) {
- return false;
- }
- }
-
- return false;
-}
-
-static int tryGetCountryCallingCode(const char *str, size_t len,
- const char **new_ptr, size_t *new_len)
-{
- // Rough regexp:
- // ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $
- // 0 1 2 3 45 6 7 89
- //
- // In all the states, this function ignores separator characters.
- // "166" is the special case for the call from Thailand to the US. Ugu!
+ /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */
+ /* 0 1 2 3 45 */
int state = 0;
- int ccc = 0;
- for (size_t i = 0 ; i < len ; i++ ) {
- char ch = str[i];
+ for (int i = 0 ; i < len ; i++) {
+ char c = a[i];
+
switch (state) {
- case 0:
- if (ch == '+') state = 1;
- else if (ch == '0') state = 2;
- else if (ch == '1') state = 8;
- else if (isNonSeparator(ch)) return -1;
+ case 0:
+ if (c == '+') state = 1;
+ else if (c == '0') state = 2;
+ else if (isNonSeparator(c)) return false;
break;
case 2:
- if (ch == '0') state = 3;
- else if (ch == '1') state = 4;
- else if (isNonSeparator(ch)) return -1;
+ if (c == '0') state = 3;
+ else if (c == '1') state = 4;
+ else if (isNonSeparator(c)) return false;
break;
case 4:
- if (ch == '1') state = 5;
- else if (isNonSeparator(ch)) return -1;
+ if (c == '1') state = 5;
+ else if (isNonSeparator(c)) return false;
+ break;
+
+ default:
+ if (isNonSeparator(c)) return false;
+ break;
+
+ }
+ }
+
+ return state == 1 || state == 3 || state == 5;
+}
+
+/** all of 'a' up to len must match non-US trunk prefix ('0') */
+static bool matchTrunkPrefix(const char* a, int len)
+{
+ bool found;
+
+ found = false;
+
+ for (int i = 0 ; i < len ; i++) {
+ char c = a[i];
+
+ if (c == '0' && !found) {
+ found = true;
+ } else if (isNonSeparator(c)) {
+ return false;
+ }
+ }
+
+ return found;
+}
+
+/** all of 'a' up to len must be a (+|00|011)country code)
+ * We're fast and loose with the country code. Any \d{1,3} matches */
+static bool matchIntlPrefixAndCC(const char* a, int len)
+{
+ /* [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */
+ /* 0 1 2 3 45 6 7 8 */
+
+ int state = 0;
+ for (int i = 0 ; i < len ; i++ ) {
+ char c = a[i];
+
+ switch (state) {
+ case 0:
+ if (c == '+') state = 1;
+ else if (c == '0') state = 2;
+ else if (isNonSeparator(c)) return false;
+ break;
+
+ case 2:
+ if (c == '0') state = 3;
+ else if (c == '1') state = 4;
+ else if (isNonSeparator(c)) return false;
+ break;
+
+ case 4:
+ if (c == '1') state = 5;
+ else if (isNonSeparator(c)) return false;
break;
case 1:
case 3:
case 5:
+ if (isISODigit(c)) state = 6;
+ else if (isNonSeparator(c)) return false;
+ break;
+
case 6:
case 7:
- {
- int ret = tryGetISODigit(ch);
- if (ret > 0) {
- ccc = ccc * 10 + ret;
- if (ccc >= 100 || isCountryCallingCode(ccc)) {
- if (new_ptr != NULL) {
- *new_ptr = str + i + 1;
- }
- if (new_len != NULL) {
- *new_len = len - (i + 1);
- }
- return ccc;
- }
- if (state == 1 || state == 3 || state == 5) {
- state = 6;
- } else {
- state++;
- }
- } else if (isNonSeparator(ch)) {
- return -1;
- }
- }
- break;
- case 8:
- if (ch == '6') state = 9;
- else if (isNonSeparator(ch)) return -1;
- break;
- case 9:
- if (ch == '6') {
- if (new_ptr != NULL) {
- *new_ptr = str + i + 1;
- }
- if (new_len != NULL) {
- *new_len = len - (i + 1);
- }
- return 66;
- }
- break;
+ if (isISODigit(c)) state++;
+ else if (isNonSeparator(c)) return false;
+ break;
+
default:
- return -1;
+ if (isNonSeparator(c)) return false;
}
}
- return -1;
+ return state == 6 || state == 7 || state == 8;
+}
+
+/** or -1 if both are negative */
+static int minPositive(int a, int b)
+{
+ if (a >= 0 && b >= 0) {
+ return (a < b) ? a : b;
+ } else if (a >= 0) { /* && b < 0 */
+ return a;
+ } else if (b >= 0) { /* && a < 0 */
+ return b;
+ } else { /* a < 0 && b < 0 */
+ return -1;
+ }
}
/**
- * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means
- * that "ch" has only one digit and separater characters. The one digit is
- * assumed to be trunk prefix.
+ * Return the offset into a of the first appearance of b, or -1 if there
+ * is no such character in a.
*/
-static bool checkPrefixIsIgnorable(const char* ch, int i) {
- bool trunk_prefix_was_read = false;
- while (i >= 0) {
- if (tryGetISODigit(ch[i]) >= 0) {
- if (trunk_prefix_was_read) {
- // More than one digit appeared, meaning that "a" and "b"
- // is different.
- return false;
- } else {
- // Ignore just one digit, assuming it is trunk prefix.
- trunk_prefix_was_read = true;
- }
- } else if (isNonSeparator(ch[i])) {
- // Trunk prefix is a digit, not "*", "#"...
- return false;
- }
- i--;
- }
+static int indexOf(const char *a, char b) {
+ char *ix = strchr(a, b);
- return true;
+ if (ix == NULL)
+ return -1;
+ else
+ return ix - a;
}
/**
* Compare phone numbers a and b, return true if they're identical
* enough for caller ID purposes.
*
- * Assume NULL as 0-length string.
+ * - Compares from right to left
+ * - requires MIN_MATCH (5) characters to match
+ * - handles common trunk prefixes and international prefixes
+ * (basically, everything except the Russian trunk prefix)
*
- * Detailed information:
- * Currently (as of 2009-06-12), we cannot depend on the locale given from the
- * OS. For example, current Android does not accept "en_JP", meaning
- * "the display language is English but the phone should be in Japan", but
- * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix
- * in the country where the phone is used. More specifically, "880-1234-1234"
- * is not valid phone number in Japan since the trunk prefix in Japan is not 8
- * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix
- * is 8. Also, we cannot know whether the country where users live has trunk
- * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT
- * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234"
- * and we can determine "880-1234-1234" is different from "080-1234-1234").
- *
- * In the future, we should handle trunk prefix more correctly, but as of now,
- * we just ignore it...
+ * Tolerates nulls
*/
bool phone_number_compare(const char* a, const char* b)
{
- size_t len_a = 0;
- size_t len_b = 0;
- if (a == NULL) {
- a = "";
- } else {
- len_a = strlen(a);
- }
- if (b == NULL) {
- b = "";
- } else {
- len_b = strlen(b);
+ int ia, ib;
+ int matched;
+
+ if (a == NULL || b == NULL) {
+ return false;
}
- const char* tmp_a = NULL;
- const char* tmp_b = NULL;
- size_t tmp_len_a = len_a;
- size_t tmp_len_b = len_b;
-
- int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a);
- int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b);
- bool ok_to_ignore_prefix = true;
- if (ccc_a >= 0 && ccc_b >= 0) {
- if (ccc_a != ccc_b) {
- // Different Country Calling Code. Must be different phone number.
- return false;
- }
- // When both have ccc, do not ignore trunk prefix. Without this,
- // "+81123123" becomes same as "+810123123" (+81 == Japan)
- ok_to_ignore_prefix = false;
- } else if (ccc_a < 0 && ccc_b < 0) {
- // When both do not have ccc, do not ignore trunk prefix. Without this,
- // "123123" becomes same as "0123123"
- ok_to_ignore_prefix = false;
- } else {
- if (ccc_a < 0) {
- tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a);
- }
- if (ccc_b < 0) {
- tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b);
- }
+ ia = strlen(a);
+ ib = strlen(b);
+ if (ia == 0 || ib == 0) {
+ return false;
}
- if (tmp_a != NULL) {
- a = tmp_a;
- len_a = tmp_len_a;
- }
- if (tmp_b != NULL) {
- b = tmp_b;
- len_b = tmp_len_b;
- }
+ // Compare from right to left
+ ia--;
+ ib--;
- int i_a = len_a - 1;
- int i_b = len_b - 1;
- while (i_a >= 0 && i_b >= 0) {
- bool skip_compare = false;
- char ch_a = a[i_a];
- char ch_b = b[i_b];
- if (!isNonSeparator(ch_a)) {
- i_a--;
- skip_compare = true;
- }
- if (!isNonSeparator(ch_b)) {
- i_b--;
- skip_compare = true;
+ matched = 0;
+
+ while (ia >= 0 && ib >=0) {
+ char ca, cb;
+ bool skipCmp = false;
+
+ ca = a[ia];
+
+ if (!isNonSeparator(ca)) {
+ ia--;
+ skipCmp = true;
}
- if (!skip_compare) {
- if (ch_a != ch_b) {
- return false;
+ cb = b[ib];
+
+ if (!isNonSeparator(cb)) {
+ ib--;
+ skipCmp = true;
+ }
+
+ if (!skipCmp) {
+ if (cb != ca) {
+ break;
}
- i_a--;
- i_b--;
+ ia--; ib--; matched++;
}
}
- if (ok_to_ignore_prefix) {
- if (!checkPrefixIsIgnorable(a, i_a)) {
- return false;
+ if (matched < MIN_MATCH) {
+ int aLen = strlen(a);
+
+ // if the input strings match, but their lengths < MIN_MATCH,
+ // treat them as equal.
+ if (aLen == (int)strlen(b) && aLen == matched) {
+ return true;
}
- if (!checkPrefixIsIgnorable(b, i_b)) {
- return false;
- }
- } else {
- while (i_a >= 0) {
- if (isNonSeparator(a[i_a])) {
- return false;
- }
- i_a--;
- }
- while (i_b >= 0) {
- if (isNonSeparator(b[i_b])) {
- return false;
- }
- i_b--;
- }
+ return false;
}
- return true;
+ // At least one string has matched completely;
+ if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) {
+ return true;
+ }
+
+ /*
+ * Now, what remains must be one of the following for a
+ * match:
+ *
+ * - a '+' on one and a '00' or a '011' on the other
+ * - a '0' on one and a (+,00)<country code> on the other
+ * (for this, a '0' and a '00' prefix would have succeeded above)
+ */
+
+ if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) {
+ return true;
+ }
+
+ if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) {
+ return true;
+ }
+
+ if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) {
+ return true;
+ }
+
+ /*
+ * Last resort: if the number of unmatched characters on both sides is less than or equal
+ * to the length of the longest country code and only one number starts with a + accept
+ * the match. This is because some countries like France and Russia have an extra prefix
+ * digit that is used when dialing locally in country that does not show up when you dial
+ * the number using the country code. In France this prefix digit is used to determine
+ * which land line carrier to route the call over.
+ */
+ bool aPlusFirst = (*a == '+');
+ bool bPlusFirst = (*b == '+');
+ if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) {
+ return true;
+ }
+
+ return false;
}
} // namespace android