Revert ""Rewrite" PhoneNumberUtil so that it compares two phone strings using as many characters as possible, unlike the previous implementation."

This reverts commit de43094477419f8a190a6f97b47d346827310a02.

related internal issue: 1949010
diff --git a/android/PhoneNumberUtils.cpp b/android/PhoneNumberUtils.cpp
index 321b0ea..9e5e470 100644
--- a/android/PhoneNumberUtils.cpp
+++ b/android/PhoneNumberUtils.cpp
@@ -1,355 +1,293 @@
-/*
- * Copyright 2009, The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+/* //device/vmlibs-android/com.android.internal.telephony/PhoneNumberUtils.java
+**
+** Copyright 2006, The Android Open Source Project
+**
+** Licensed under the Apache License, Version 2.0 (the "License"); 
+** you may not use this file except in compliance with the License. 
+** You may obtain a copy of the License at 
+**
+**     http://www.apache.org/licenses/LICENSE-2.0 
+**
+** Unless required by applicable law or agreed to in writing, software 
+** distributed under the License is distributed on an "AS IS" BASIS, 
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+** See the License for the specific language governing permissions and 
+** limitations under the License.
+*/
 
 #include <string.h>
 
 namespace android {
 
-/* Generated by the following Python script. Values of country calling codes
-   are from http://en.wikipedia.org/wiki/List_of_country_calling_codes
+static int MIN_MATCH = 5;
 
-#!/usr/bin/python
-import sys
-ccc_set_2digits = set([0, 1, 7,
-                       20, 27, 28, 30, 31, 32, 33, 34, 36, 39, 40, 43, 44, 45,
-                       46, 47, 48, 49, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61,
-                       62, 63, 64, 65, 66, 81, 82, 83, 84, 86, 89, 90, 91, 92,
-                       93, 94, 95, 98])
-
-ONE_LINE_NUM = 10
-
-for i in xrange(100):
-  if i % ONE_LINE_NUM == 0:
-    sys.stdout.write('    ')
-  if i in ccc_set_2digits:
-    included = 'true'
-  else:
-    included = 'false'
-  sys.stdout.write(included + ',')
-  if ((i + 1) % ONE_LINE_NUM) == 0:
-    sys.stdout.write('\n')
-  else:
-    sys.stdout.write(' ')
-*/
-static bool two_length_country_code_map[100] = {
-    true, true, false, false, false, false, false, true, false, false,
-    false, false, false, false, false, false, false, false, false, false,
-    true, false, false, false, false, false, false, true, true, false,
-    true, true, true, true, true, false, true, false, false, true,
-    true, false, false, true, true, true, true, true, true, true,
-    false, true, true, true, true, true, true, true, true, false,
-    true, true, true, true, true, true, true, false, false, false,
-    false, false, false, false, false, false, false, false, false, false,
-    false, true, true, true, true, false, true, false, false, true,
-    true, true, true, true, true, true, false, false, true, false,
-};
-
-/** True the character(s) expresses some country calling code. False otherwise.
- */
-static bool isCountryCallingCode(int ccc_candidate) {
-    return ccc_candidate > 0 &&
-            ccc_candidate < (int)sizeof(two_length_country_code_map) &&
-            two_length_country_code_map[ccc_candidate];
-}
-
-/**
- * Returns interger corresponding to the input if input "ch" is
- * ISO-LATIN characters 0-9.
- * Returns -1 otherwise
- */
-static int tryGetISODigit (char ch)
+/** True if c is ISO-LATIN characters 0-9 */
+static bool isISODigit (char c)
 {
-    if ('0' <= ch && ch <= '9') {
-        return ch - '0';
-    } else {
-        return -1;
-    }
+    return c >= '0' && c <= '9';
 }
 
 /** True if c is ISO-LATIN characters 0-9, *, # , +  */
-static bool isNonSeparator(char ch)
+static bool isNonSeparator(char c)
 {
-    return ('0' <= ch && ch <= '9') || ch == '*' || ch == '#' || ch == '+';
+    return (c >= '0' && c <= '9') || c == '*' || c == '#' || c == '+';
 }
 
 /**
- * Try to store the pointer to "new_ptr" which does not have trunk prefix.
+ * Phone numbers are stored in "lookup" form in the database
+ * as reversed strings to allow for caller ID lookup
  *
- * Currently this function simply ignore the first digit assuming it is
- * trunk prefix. Actually trunk prefix is different in each country.
- *
- * e.g.
- * "+79161234567" equals "89161234567" (Russian trunk digit is 8)
- * "+33123456789" equals "0123456789" (French trunk digit is 0)
+ * This method takes a phone number and makes a valid SQL "LIKE"
+ * string that will match the lookup form 
  *
  */
-static bool tryGetTrunkPrefixOmittedStr(const char *str, size_t len,
-                                        const char **new_ptr, size_t *new_len)
+/** all of a up to len must be an international prefix or 
+ *  separators/non-dialing digits
+ */
+static bool matchIntlPrefix(const char* a, int len) 
 {
-    for (size_t i = 0 ; i < len ; i++) {
-        char ch = str[i];
-        if (tryGetISODigit(ch) >= 0) {
-            if (new_ptr != NULL) {
-                *new_ptr = str + i + 1;
-            }
-            if (new_len != NULL) {
-                *new_len = len - (i + 1);
-            }
-            return true;
-        } else if (isNonSeparator(ch)) {
-            return false;
-        }
-    }
-
-    return false;
-}
-
-static int tryGetCountryCallingCode(const char *str, size_t len,
-                             const char **new_ptr, size_t *new_len)
-{
-    // Rough regexp:
-    //  ^[^0-9*#+]*((\+|0(0|11)\d\d?|166) [^0-9*#+] $
-    //         0        1 2 3 45  6 7  89
-    //
-    // In all the states, this function ignores separator characters.
-    // "166" is the special case for the call from Thailand to the US. Ugu!
+    /* '([^0-9*#+]\+[^0-9*#+] | [^0-9*#+]0(0|11)[^0-9*#+] )$' */
+    /*        0    1                     2 3 45               */ 
 
     int state = 0;
-    int ccc = 0;
-    for (size_t i = 0 ; i < len ; i++ ) {
-        char ch = str[i];
+    for (int i = 0 ; i < len ; i++) {
+        char c = a[i];
+
         switch (state) {
-            case 0:
-                if      (ch == '+') state = 1;
-                else if (ch == '0') state = 2;
-                else if (ch == '1') state = 8;
-                else if (isNonSeparator(ch)) return -1;
+            case 0: 
+                if      (c == '+') state = 1;
+                else if (c == '0') state = 2;
+                else if (isNonSeparator(c)) return false;
             break;
 
             case 2:
-                if      (ch == '0') state = 3;
-                else if (ch == '1') state = 4;
-                else if (isNonSeparator(ch)) return -1;
+                if      (c == '0') state = 3;
+                else if (c == '1') state = 4;
+                else if (isNonSeparator(c)) return false;
             break;
 
             case 4:
-                if      (ch == '1') state = 5;
-                else if (isNonSeparator(ch)) return -1;
+                if      (c == '1') state = 5;
+                else if (isNonSeparator(c)) return false;
+            break;
+
+            default:
+                if (isNonSeparator(c)) return false;
+            break;
+
+        }
+    }
+
+    return state == 1 || state == 3 || state == 5;
+}
+
+/** all of 'a' up to len must match non-US trunk prefix ('0') */
+static bool matchTrunkPrefix(const char* a, int len)
+{
+    bool found;
+
+    found = false;
+
+    for (int i = 0 ; i < len ; i++) {
+        char c = a[i];
+
+        if (c == '0' && !found) {
+            found = true;
+        } else if (isNonSeparator(c)) {
+            return false;
+        }
+    }
+    
+    return found;
+}
+
+/** all of 'a' up to len must be a (+|00|011)country code) 
+ *  We're fast and loose with the country code. Any \d{1,3} matches */
+static bool matchIntlPrefixAndCC(const char* a, int len)
+{
+    /*  [^0-9*#+]*(\+|0(0|11)\d\d?\d? [^0-9*#+] $ */
+    /*      0       1 2 3 45  6 7  8              */
+
+    int state = 0;
+    for (int i = 0 ; i < len ; i++ ) {
+        char c = a[i];
+
+        switch (state) {
+            case 0:
+                if      (c == '+') state = 1;
+                else if (c == '0') state = 2;
+                else if (isNonSeparator(c)) return false;
+            break;
+
+            case 2:
+                if      (c == '0') state = 3;
+                else if (c == '1') state = 4;
+                else if (isNonSeparator(c)) return false;
+            break;
+
+            case 4:
+                if      (c == '1') state = 5;
+                else if (isNonSeparator(c)) return false;
             break;
 
             case 1:
             case 3:
             case 5:
+                if      (isISODigit(c)) state = 6;
+                else if (isNonSeparator(c)) return false;
+            break;
+
             case 6:
             case 7:
-                {
-                    int ret = tryGetISODigit(ch);
-                    if (ret > 0) {
-                        ccc = ccc * 10 + ret;
-                        if (ccc >= 100 || isCountryCallingCode(ccc)) {
-                            if (new_ptr != NULL) {
-                                *new_ptr = str + i + 1;
-                            }
-                            if (new_len != NULL) {
-                                *new_len = len - (i + 1);
-                            }
-                            return ccc;
-                        }
-                        if (state == 1 || state == 3 || state == 5) {
-                            state = 6;
-                        } else {
-                            state++;
-                        }
-                    } else if (isNonSeparator(ch)) {
-                        return -1;
-                    }
-                }
-                break;
-            case 8:
-                if (ch == '6') state = 9;
-                else if (isNonSeparator(ch)) return -1;
-                break;
-            case 9:
-                if (ch == '6') {
-                    if (new_ptr != NULL) {
-                        *new_ptr = str + i + 1;
-                    }
-                    if (new_len != NULL) {
-                        *new_len = len - (i + 1);
-                    }
-                    return 66;
-                }
-                break;
+                if      (isISODigit(c)) state++;
+                else if (isNonSeparator(c)) return false;
+            break;
+
             default:
-                return -1;
+                if (isNonSeparator(c)) return false;
         }
     }
 
-    return -1;
+    return state == 6 || state == 7 || state == 8;
+}
+
+/** or -1 if both are negative */
+static int minPositive(int a, int b)
+{
+    if (a >= 0 && b >= 0) {
+        return (a < b) ? a : b; 
+    } else if (a >= 0) { /* && b < 0 */
+        return a;
+    } else if (b >= 0) { /* && a < 0 */
+        return b;
+    } else { /* a < 0 && b < 0 */ 
+        return -1;
+    }
 }
 
 /**
- * Return true if the prefix of "ch" is "ignorable". Here, "ignorable" means
- * that "ch" has only one digit and separater characters. The one digit is
- * assumed to be trunk prefix.
+ * Return the offset into a of the first appearance of b, or -1 if there
+ * is no such character in a.
  */
-static bool checkPrefixIsIgnorable(const char* ch, int i) {
-    bool trunk_prefix_was_read = false;
-    while (i >= 0) {
-        if (tryGetISODigit(ch[i]) >= 0) {
-            if (trunk_prefix_was_read) {
-                // More than one digit appeared, meaning that "a" and "b"
-                // is different.
-                return false;
-            } else {
-                // Ignore just one digit, assuming it is trunk prefix.
-                trunk_prefix_was_read = true;
-            }
-        } else if (isNonSeparator(ch[i])) {
-            // Trunk prefix is a digit, not "*", "#"...
-            return false;
-        }
-        i--;
-    }
+static int indexOf(const char *a, char b) {
+    char *ix = strchr(a, b);
 
-    return true;
+    if (ix == NULL)
+        return -1;
+    else
+        return ix - a;
 }
 
 /**
  * Compare phone numbers a and b, return true if they're identical
  * enough for caller ID purposes.
  *
- * Assume NULL as 0-length string.
+ * - Compares from right to left
+ * - requires MIN_MATCH (5) characters to match
+ * - handles common trunk prefixes and international prefixes 
+ *   (basically, everything except the Russian trunk prefix)
  *
- * Detailed information:
- * Currently (as of 2009-06-12), we cannot depend on the locale given from the
- * OS. For example, current Android does not accept "en_JP", meaning
- * "the display language is English but the phone should be in Japan", but
- * en_US, es_US, etc. So we cannot identify which digit is valid trunk prefix
- * in the country where the phone is used. More specifically, "880-1234-1234"
- * is not valid phone number in Japan since the trunk prefix in Japan is not 8
- * but 0 (correct number should be "080-1234-1234"), while Russian trunk prefix
- * is 8. Also, we cannot know whether the country where users live has trunk
- * prefix itself. So, we cannot determine whether "+81-80-1234-1234" is NOT
- * same as "880-1234-1234" (while "+81-80-1234-1234" is same as "080-1234-1234"
- * and we can determine "880-1234-1234" is different from "080-1234-1234").
- *
- * In the future, we should handle trunk prefix more correctly, but as of now,
- * we just ignore it...
+ * Tolerates nulls
  */
 bool phone_number_compare(const char* a, const char* b)
 {
-    size_t len_a = 0;
-    size_t len_b = 0;
-    if (a == NULL) {
-        a = "";
-    } else {
-        len_a = strlen(a);
-    }
-    if (b == NULL) {
-        b = "";
-    } else {
-        len_b = strlen(b);
+    int ia, ib;
+    int matched;
+
+    if (a == NULL || b == NULL) {
+        return false; 
     }
 
-    const char* tmp_a = NULL;
-    const char* tmp_b = NULL;
-    size_t tmp_len_a = len_a;
-    size_t tmp_len_b = len_b;
-
-    int ccc_a = tryGetCountryCallingCode(a, len_a, &tmp_a, &tmp_len_a);
-    int ccc_b = tryGetCountryCallingCode(b, len_b, &tmp_b, &tmp_len_b);
-    bool ok_to_ignore_prefix = true;
-    if (ccc_a >= 0 && ccc_b >= 0) {
-        if (ccc_a != ccc_b) {
-            // Different Country Calling Code. Must be different phone number.
-            return false;
-        }
-        // When both have ccc, do not ignore trunk prefix. Without this,
-        // "+81123123" becomes same as "+810123123" (+81 == Japan)
-        ok_to_ignore_prefix = false;
-    } else if (ccc_a < 0 && ccc_b < 0) {
-        // When both do not have ccc, do not ignore trunk prefix. Without this,
-        // "123123" becomes same as "0123123"
-        ok_to_ignore_prefix = false;
-    } else {
-        if (ccc_a < 0) {
-            tryGetTrunkPrefixOmittedStr(a, len_a, &tmp_a, &tmp_len_a);
-        }
-        if (ccc_b < 0) {
-            tryGetTrunkPrefixOmittedStr(b, len_b, &tmp_b, &tmp_len_b);
-        }
+    ia = strlen(a);
+    ib = strlen(b);
+    if (ia == 0 || ib == 0) {
+        return false;
     }
 
-    if (tmp_a != NULL) {
-        a = tmp_a;
-        len_a = tmp_len_a;
-    }
-    if (tmp_b != NULL) {
-        b = tmp_b;
-        len_b = tmp_len_b;
-    }
+    // Compare from right to left
+    ia--;
+    ib--;
 
-    int i_a = len_a - 1;
-    int i_b = len_b - 1;
-    while (i_a >= 0 && i_b >= 0) {
-        bool skip_compare = false;
-        char ch_a = a[i_a];
-        char ch_b = b[i_b];
-        if (!isNonSeparator(ch_a)) {
-            i_a--;
-            skip_compare = true;
-        }
-        if (!isNonSeparator(ch_b)) {
-            i_b--;
-            skip_compare = true;
+    matched = 0;
+
+    while (ia >= 0 && ib >=0) {
+        char ca, cb;
+        bool skipCmp = false;
+
+        ca = a[ia];
+
+        if (!isNonSeparator(ca)) {
+            ia--;
+            skipCmp = true;
         }
 
-        if (!skip_compare) {
-            if (ch_a != ch_b) {
-                return false;
+        cb = b[ib];
+
+        if (!isNonSeparator(cb)) {
+            ib--;
+            skipCmp = true;
+        }
+
+        if (!skipCmp) {
+            if (cb != ca) {
+                break;
             }
-            i_a--;
-            i_b--;
+            ia--; ib--; matched++;
         }
     }
 
-    if (ok_to_ignore_prefix) {
-        if (!checkPrefixIsIgnorable(a, i_a)) {
-            return false;
+    if (matched < MIN_MATCH) {
+        int aLen = strlen(a);
+        
+        // if the input strings match, but their lengths < MIN_MATCH, 
+        // treat them as equal.
+        if (aLen == (int)strlen(b) && aLen == matched) {
+            return true;
         }
-        if (!checkPrefixIsIgnorable(b, i_b)) {
-            return false;
-        }
-    } else {
-        while (i_a >= 0) {
-            if (isNonSeparator(a[i_a])) {
-                return false;
-            }
-            i_a--;
-        }
-        while (i_b >= 0) {
-            if (isNonSeparator(b[i_b])) {
-                return false;
-            }
-            i_b--;
-        }
+        return false;
     }
 
-    return true;
+    // At least one string has matched completely;
+    if (matched >= MIN_MATCH && (ia < 0 || ib < 0)) {
+        return true;
+    }
+
+    /*
+     * Now, what remains must be one of the following for a 
+     * match:
+     *
+     *  - a '+' on one and a '00' or a '011' on the other
+     *  - a '0' on one and a (+,00)<country code> on the other
+     *     (for this, a '0' and a '00' prefix would have succeeded above)
+     */
+
+    if (matchIntlPrefix(a, ia + 1) && matchIntlPrefix(b, ib +1)) {
+        return true;
+    }
+
+    if (matchTrunkPrefix(a, ia + 1) && matchIntlPrefixAndCC(b, ib +1)) {
+        return true;
+    }
+
+    if (matchTrunkPrefix(b, ib + 1) && matchIntlPrefixAndCC(a, ia +1)) {
+        return true;
+    }
+
+    /*
+     * Last resort: if the number of unmatched characters on both sides is less than or equal
+     * to the length of the longest country code and only one number starts with a + accept
+     * the match. This is because some countries like France and Russia have an extra prefix
+     * digit that is used when dialing locally in country that does not show up when you dial
+     * the number using the country code. In France this prefix digit is used to determine
+     * which land line carrier to route the call over.
+     */
+    bool aPlusFirst = (*a == '+');
+    bool bPlusFirst = (*b == '+');
+    if (ia < 4 && ib < 4 && (aPlusFirst || bPlusFirst) && !(aPlusFirst && bPlusFirst)) {
+        return true;
+    }
+
+    return false;
 }
 
 } // namespace android