AI 143231: am: CL 143056 Fix PhoneticStringUtils so that it ignores whitespaces correctly.
  Original author: dmiyakawa
  Merged from: //branches/cupcake/...

Automated import of CL 143231
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
index 7c8d185..5f8781c 100644
--- a/android/PhoneticStringUtils.cpp
+++ b/android/PhoneticStringUtils.cpp
@@ -89,11 +89,10 @@
     }
 
     if (codepoint <= 0x0020 || codepoint == 0x3000) {
-        // Whitespace.
-        // Skip without increment of the variable "new_len".
+        // Whitespace should be ignored.
         // Note: Formally, more "whitespace" exist. This block only
         // handles part of them
-        return 0x0020;
+        return -1;
     } else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
                (0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
         // Ascii and fullwidth ascii
@@ -369,7 +368,7 @@
         for (codepoint_index = 0, i = 0, next = 0;
              static_cast<size_t>(i) < src_len &&
                      codepoint_index < MAX_CODEPOINTS;
-             i = next, codepoint_index++) {
+             i = next) {
             int codepoint = GetCodePointFromUtf8(src, src_len, i, &next);
             if (codepoint <= 0) {
                 return false;
@@ -384,12 +383,16 @@
                     GetPhoneticallySortableCodePoint(codepoint,
                                                      next_codepoint,
                                                      &next_is_consumed);
-
             // dakuten (voiced mark) or han-dakuten (half-voiced mark) existed.
             if (next_is_consumed) {
                 next = tmp_next;
             }
 
+            if (codepoints[codepoint_index] < 0) {
+              // Do not increment codepoint_index.
+              continue;
+            }
+
             if (codepoints[codepoint_index] < 128) {  // 1 << 7
                 new_len++;
             } else if (codepoints[codepoint_index] < 2048) {
@@ -407,9 +410,19 @@
             } else {
                 new_len += 6;
             }
+
+            codepoint_index++;
         }
     }
 
+    if (codepoint_index == 0) {
+        // If all of codepoints are invalid, we place the string at the end of
+        // the list.
+        codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
+        codepoint_index = 1;
+        new_len = 4;
+    }
+
     new_len += 1;  // For '\0'.
 
     *dst = static_cast<char *>(malloc(sizeof(char) * new_len));