blob: 5665fb90c8380e833eb2cd4d83924aef3524864b [file] [log] [blame]
The Android Open Source Project455ed292009-03-13 13:04:22 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "PhoneticStringUtils.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
Daisuke Miyakawae919af52009-06-26 22:58:32 +090023#include <utils/String8.h>
24
The Android Open Source Project455ed292009-03-13 13:04:22 -070025using namespace android;
26
27class TestExecutor {
28 public:
29 TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
30 bool DoAllTests();
31 private:
32 void DoOneTest(void (TestExecutor::*test)());
33
Daisuke Miyakawa1ec1f3d2009-07-09 14:03:07 +090034 void testUtf32At();
The Android Open Source Project455ed292009-03-13 13:04:22 -070035 void testGetPhoneticallySortableCodePointAscii();
36 void testGetPhoneticallySortableCodePointKana();
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070037 void testGetPhoneticallySortableCodePointWhitespaceOnly();
The Android Open Source Project455ed292009-03-13 13:04:22 -070038 void testGetPhoneticallySortableCodePointSimpleCompare();
Daisuke Miyakawae919af52009-06-26 22:58:32 +090039 void testGetUtf8FromUtf32();
The Android Open Source Project455ed292009-03-13 13:04:22 -070040 void testGetPhoneticallySortableString();
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090041 void testGetNormalizedString();
The Android Open Source Project455ed292009-03-13 13:04:22 -070042
43 // Note: When adding a test, do not forget to add it to DoOneTest().
44
45 int m_total_count;
46 int m_success_count;
47
48 bool m_success;
49};
50
51#define ASSERT_EQ_VALUE(input, expected) \
52 ({ \
53 if ((expected) != (input)) { \
54 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
55 m_success = false; \
56 return; \
57 } \
58 })
59
60#define EXPECT_EQ_VALUE(input, expected) \
61 ({ \
62 if ((expected) != (input)) { \
63 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
64 m_success = false; \
65 } \
66 })
67
68
69bool TestExecutor::DoAllTests() {
Daisuke Miyakawa1ec1f3d2009-07-09 14:03:07 +090070 DoOneTest(&TestExecutor::testUtf32At);
The Android Open Source Project455ed292009-03-13 13:04:22 -070071 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
72 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070073 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
The Android Open Source Project455ed292009-03-13 13:04:22 -070074 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
Daisuke Miyakawae919af52009-06-26 22:58:32 +090075 DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
The Android Open Source Project455ed292009-03-13 13:04:22 -070076 DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090077 DoOneTest(&TestExecutor::testGetNormalizedString);
The Android Open Source Project455ed292009-03-13 13:04:22 -070078
79 printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
80 m_total_count, m_success_count, m_total_count - m_success_count);
81
82 bool success = m_total_count == m_success_count;
83 printf("\n%s\n", success ? "Success" : "Failure");
84
85 return success;
86}
87
88void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
89 m_success = true;
90
91 (this->*test)();
92
93 ++m_total_count;
94 m_success_count += m_success ? 1 : 0;
95}
96
Daisuke Miyakawae919af52009-06-26 22:58:32 +090097#define TEST_GET_UTF32AT(src, index, expected_next, expected_value) \
98 ({ \
99 size_t next; \
Daisuke Miyakawa1ec1f3d2009-07-09 14:03:07 +0900100 int32_t ret = utf32_at(src, strlen(src), index, &next); \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900101 if (ret < 0) { \
102 printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \
103 (src), (index)); \
104 m_success = false; \
105 } else if (next != (expected_next)) { \
106 printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \
107 (src), next, (expected_next)); \
108 } else { \
109 EXPECT_EQ_VALUE(ret, (expected_value)); \
110 } \
111 })
The Android Open Source Project455ed292009-03-13 13:04:22 -0700112
Daisuke Miyakawa1ec1f3d2009-07-09 14:03:07 +0900113void TestExecutor::testUtf32At() {
114 printf("testUtf32At()\n");
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900115
116 TEST_GET_UTF32AT("a", 0, 1, 97);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700117 // Japanese hiragana "a"
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900118 TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700119 // Japanese fullwidth katakana "a" with ascii a
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900120 TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700121
122 // 2 PUA
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900123 TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000);
124 TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700125}
126
127void TestExecutor::testGetPhoneticallySortableCodePointAscii() {
128 printf("testGetPhoneticallySortableCodePoint()\n");
129 int halfwidth[94];
130 int fullwidth[94];
131 int i, codepoint;
132 bool next_is_consumed;
133 for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) {
134 halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
135 &next_is_consumed);
136 if (halfwidth[i] < 0) {
137 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700138 m_success = false;
139 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700140 }
141 if (next_is_consumed) {
142 printf("next_is_consumed become true at 0x%04X", codepoint);
143 m_success = false;
144 return;
145 }
146 }
147 for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) {
148 fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
149 &next_is_consumed);
150 if (fullwidth[i] < 0) {
151 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700152 m_success = false;
153 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700154 }
155 if (next_is_consumed) {
156 printf("next_is_consumed become true at 0x%04X", codepoint);
157 m_success = false;
158 return;
159 }
160 }
161
162 for (i = 0; i < 94; i++) {
163 EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]);
164 }
165}
166
167void TestExecutor::testGetPhoneticallySortableCodePointKana() {
168 printf("testGetPhoneticallySortableCodePointKana()\n");
169 int hiragana[86];
170 int fullwidth_katakana[86];
171 int i, codepoint;
172 bool next_is_consumed;
173
174 for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) {
175 hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
176 &next_is_consumed);
177 if (hiragana[i] < 0) {
178 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700179 m_success = false;
180 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700181 }
182 if (next_is_consumed) {
183 printf("next_is_consumed become true at 0x%04X", codepoint);
184 m_success = false;
185 return;
186 }
187 }
188
189 for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) {
190 fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
191 &next_is_consumed);
192 if (fullwidth_katakana[i] < 0) {
193 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700194 m_success = false;
195 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700196 }
197 if (next_is_consumed) {
198 printf("next_is_consumed become true at 0x%04X", codepoint);
199 m_success = false;
200 return;
201 }
202 }
203
204 // hankaku-katakana space do not have some characters corresponding to
205 // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert
206 // zenkaku-katakana version of them into this array (See the value 0x30??).
207 int halfwidth_katakana[] = {
208 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B,
209 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78,
210 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B,
211 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E,
212 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81,
213 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84,
214 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A,
215 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C,
216 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F,
217 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92,
218 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98,
219 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D,
220 0xFF73, 0xFF9E, 0x30F5, 0x30F6};
221 int len = sizeof(halfwidth_katakana)/sizeof(int);
222
223 int halfwidth_katakana_result[86];
224
225 int j;
226 for (i = 0, j = 0; i < len && j < 86; ++i, ++j) {
227 int codepoint = halfwidth_katakana[i];
228 int next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : -1;
229 halfwidth_katakana_result[j] =
230 GetPhoneticallySortableCodePoint(codepoint, next_codepoint,
231 &next_is_consumed);
232 // Consume voiced mark/half-voiced mark.
233 if (next_is_consumed) {
234 ++i;
235 }
236 }
237 ASSERT_EQ_VALUE(i, len);
238 ASSERT_EQ_VALUE(j, 86);
239
240 for (i = 0; i < 86; ++i) {
241 EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]);
242 EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]);
243 }
244}
245
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700246void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900247 printf("testGetPhoneticallySortableCodePointWhitespaceOnly()\n");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700248 // Halfwidth space
249 int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
250 ASSERT_EQ_VALUE(result, -1);
251 // Fullwidth space
252 result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
253 ASSERT_EQ_VALUE(result, -1);
254 // tab
255 result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
256 ASSERT_EQ_VALUE(result, -1);
257}
258
The Android Open Source Project455ed292009-03-13 13:04:22 -0700259void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
260 printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
261
262 int codepoints[] = {
263 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071,
264 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z',
265 '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'};
266 size_t len = sizeof(codepoints)/sizeof(int);
267 bool next_is_consumed;
268 for (size_t i = 0; i < len - 1; ++i) {
269 int codepoint_a =
270 GetPhoneticallySortableCodePoint(codepoints[i], -1,
271 &next_is_consumed);
272 if (next_is_consumed) {
273 printf("next_is_consumed become true at 0x%04X", codepoint_a);
274 m_success = false;
275 return;
276 }
277 int codepoint_b =
278 GetPhoneticallySortableCodePoint(codepoints[i + 1], -1,
279 &next_is_consumed);
280 if (next_is_consumed) {
281 printf("next_is_consumed become true at 0x%04X", codepoint_b);
282 m_success = false;
283 return;
284 }
285
286 if (codepoint_a >= codepoint_b) {
287 printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n",
288 codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]);
289 m_success = false;
290 return;
291 }
292 }
293}
294
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900295#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700296 ({ \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900297 char32_t codepoints[1] = {codepoint}; \
298 status_t ret = string8.setTo(codepoints, 1); \
299 if (ret != NO_ERROR) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700300 printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
301 m_success = false; \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700302 } else { \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900303 const char* string = string8.string(); \
304 if (strcmp(string, expected) != 0) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700305 printf("Failed at codepoint 0x%04X\n", codepoint); \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900306 for (const char *ch = string; *ch != '\0'; ++ch) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700307 printf("0x%X ", *ch); \
308 } \
309 printf("!= "); \
310 for (const char *ch = expected; *ch != '\0'; ++ch) { \
311 printf("0x%X ", *ch); \
312 } \
313 printf("\n"); \
314 m_success = false; \
315 } \
316 } \
317 })
318
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900319void TestExecutor::testGetUtf8FromUtf32() {
320 printf("testGetUtf8FromUtf32()\n");
321 String8 string8;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700322
323 EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
324 // Armenian capital letter AYB (2 bytes in UTF8)
325 EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
326 // Japanese 'a' (3 bytes in UTF8)
327 EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
328 // Kanji
329 EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
330 // PUA (4 byets in UTF8)
331 EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
332 EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700333}
334
335#define EXPECT_EQ_UTF8_UTF8(src, expected) \
336 ({ \
337 if (!GetPhoneticallySortableString(src, &dst, &len)) { \
338 printf("GetPhoneticallySortableString() returned false.\n"); \
339 m_success = false; \
340 } else { \
341 if (strcmp(dst, expected) != 0) { \
342 for (const char *ch = dst; *ch != '\0'; ++ch) { \
343 printf("0x%X ", *ch); \
344 } \
345 printf("!= "); \
346 for (const char *ch = expected; *ch != '\0'; ++ch) { \
347 printf("0x%X ", *ch); \
348 } \
349 printf("\n"); \
350 m_success = false; \
351 } \
352 free(dst); \
353 } \
354 })
355
356void TestExecutor::testGetPhoneticallySortableString() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900357 printf("testGetPhoneticallySortableString()\n");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700358 char *dst;
359 size_t len;
360
361 // halfwidth alphabets -> fullwidth alphabets.
362 EXPECT_EQ_UTF8_UTF8("ABCD",
363 "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4");
364 // halfwidth/fullwidth-katakana -> hiragana
365 EXPECT_EQ_UTF8_UTF8(
366 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
367 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700368
369 // whitespace -> string which should be placed at last
370 EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700371}
372
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900373#undef EXPECT_EQ_UTF8_UTF8
374
375#define EXPECT_EQ_UTF8_UTF8(src, expected) \
376 ({ \
377 if (!GetNormalizedString(src, &dst, &len)) { \
378 printf("GetPhoneticallySortableString() returned false.\n"); \
379 m_success = false; \
380 } else { \
381 if (strcmp(dst, expected) != 0) { \
382 for (const char *ch = dst; *ch != '\0'; ++ch) { \
383 printf("0x%X ", *ch); \
384 } \
385 printf("!= "); \
386 for (const char *ch = expected; *ch != '\0'; ++ch) { \
387 printf("0x%X ", *ch); \
388 } \
389 printf("\n"); \
390 m_success = false; \
391 } \
392 free(dst); \
393 } \
394 })
395
396void TestExecutor::testGetNormalizedString() {
397 printf("testGetNormalizedString()\n");
398 char *dst;
399 size_t len;
400
401 // halfwidth alphabets/symbols -> keep it as is.
402 EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
403 "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
404 EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
405 "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
406
407 // halfwidth/fullwidth-katakana -> hiragana
408 EXPECT_EQ_UTF8_UTF8(
409 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
410 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
411
412 // whitespace -> keep it as is.
413 EXPECT_EQ_UTF8_UTF8(" \t", " \t");
414}
415
The Android Open Source Project455ed292009-03-13 13:04:22 -0700416int main() {
417 TestExecutor executor;
418 if(executor.DoAllTests()) {
419 return 0;
420 } else {
421 return 1;
422 }
423}