AI 143231: am: CL 143056 Fix PhoneticStringUtils so that it ignores whitespaces correctly.
Original author: dmiyakawa
Merged from: //branches/cupcake/...
Automated import of CL 143231
diff --git a/android/PhoneticStringUtils.cpp b/android/PhoneticStringUtils.cpp
index 7c8d185..5f8781c 100644
--- a/android/PhoneticStringUtils.cpp
+++ b/android/PhoneticStringUtils.cpp
@@ -89,11 +89,10 @@
}
if (codepoint <= 0x0020 || codepoint == 0x3000) {
- // Whitespace.
- // Skip without increment of the variable "new_len".
+ // Whitespace should be ignored.
// Note: Formally, more "whitespace" exist. This block only
// handles part of them
- return 0x0020;
+ return -1;
} else if ((0x0021 <= codepoint && codepoint <= 0x007E) ||
(0xFF01 <= codepoint && codepoint <= 0xFF5E)) {
// Ascii and fullwidth ascii
@@ -369,7 +368,7 @@
for (codepoint_index = 0, i = 0, next = 0;
static_cast<size_t>(i) < src_len &&
codepoint_index < MAX_CODEPOINTS;
- i = next, codepoint_index++) {
+ i = next) {
int codepoint = GetCodePointFromUtf8(src, src_len, i, &next);
if (codepoint <= 0) {
return false;
@@ -384,12 +383,16 @@
GetPhoneticallySortableCodePoint(codepoint,
next_codepoint,
&next_is_consumed);
-
// dakuten (voiced mark) or han-dakuten (half-voiced mark) existed.
if (next_is_consumed) {
next = tmp_next;
}
+ if (codepoints[codepoint_index] < 0) {
+ // Do not increment codepoint_index.
+ continue;
+ }
+
if (codepoints[codepoint_index] < 128) { // 1 << 7
new_len++;
} else if (codepoints[codepoint_index] < 2048) {
@@ -407,9 +410,19 @@
} else {
new_len += 6;
}
+
+ codepoint_index++;
}
}
+ if (codepoint_index == 0) {
+ // If all of codepoints are invalid, we place the string at the end of
+ // the list.
+ codepoints[0] = 0x10000 + CODEPOINT_FOR_NULL_STR;
+ codepoint_index = 1;
+ new_len = 4;
+ }
+
new_len += 1; // For '\0'.
*dst = static_cast<char *>(malloc(sizeof(char) * new_len));