blob: e74f67f1432524ef4a91020923460b61188f2b21 [file] [log] [blame]
The Android Open Source Project455ed292009-03-13 13:04:22 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "PhoneticStringUtils.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23using namespace android;
24
25class TestExecutor {
26 public:
27 TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
28 bool DoAllTests();
29 private:
30 void DoOneTest(void (TestExecutor::*test)());
31
32 void testGetCodePointFromUtf8();
33 void testGetPhoneticallySortableCodePointAscii();
34 void testGetPhoneticallySortableCodePointKana();
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070035 void testGetPhoneticallySortableCodePointWhitespaceOnly();
The Android Open Source Project455ed292009-03-13 13:04:22 -070036 void testGetPhoneticallySortableCodePointSimpleCompare();
37 void testGetUtf8FromCodePoint();
38 void testGetPhoneticallySortableString();
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090039 void testGetNormalizedString();
The Android Open Source Project455ed292009-03-13 13:04:22 -070040
41 // Note: When adding a test, do not forget to add it to DoOneTest().
42
43 int m_total_count;
44 int m_success_count;
45
46 bool m_success;
47};
48
49#define ASSERT_EQ_VALUE(input, expected) \
50 ({ \
51 if ((expected) != (input)) { \
52 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
53 m_success = false; \
54 return; \
55 } \
56 })
57
58#define EXPECT_EQ_VALUE(input, expected) \
59 ({ \
60 if ((expected) != (input)) { \
61 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
62 m_success = false; \
63 } \
64 })
65
66
67bool TestExecutor::DoAllTests() {
68 DoOneTest(&TestExecutor::testGetCodePointFromUtf8);
69 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
70 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070071 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
The Android Open Source Project455ed292009-03-13 13:04:22 -070072 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
73 DoOneTest(&TestExecutor::testGetUtf8FromCodePoint);
74 DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090075 DoOneTest(&TestExecutor::testGetNormalizedString);
The Android Open Source Project455ed292009-03-13 13:04:22 -070076
77 printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
78 m_total_count, m_success_count, m_total_count - m_success_count);
79
80 bool success = m_total_count == m_success_count;
81 printf("\n%s\n", success ? "Success" : "Failure");
82
83 return success;
84}
85
86void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
87 m_success = true;
88
89 (this->*test)();
90
91 ++m_total_count;
92 m_success_count += m_success ? 1 : 0;
93}
94
95void TestExecutor::testGetCodePointFromUtf8() {
96 printf("testGetCodePointFromUtf8()\n");
97 int next;
98
99 EXPECT_EQ_VALUE(GetCodePointFromUtf8("a", 1, 0, &next), 97);
100 EXPECT_EQ_VALUE(next, 1);
101 // Japanese hiragana "a"
102 EXPECT_EQ_VALUE(GetCodePointFromUtf8("\xE3\x81\x82", 3, 0, &next), 0x3042);
103 EXPECT_EQ_VALUE(next, 3);
104 // Japanese fullwidth katakana "a" with ascii a
105 EXPECT_EQ_VALUE(GetCodePointFromUtf8("a\xE3\x82\xA2", 4, 1, &next), 0x30A2);
106 EXPECT_EQ_VALUE(next, 4);
107
108 // 2 PUA
109 ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88",
110 8, 0, &next), 0xFE000);
111 ASSERT_EQ_VALUE(next, 4);
112 ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88",
113 8, next, &next), 0xFE008);
114 ASSERT_EQ_VALUE(next, 8);
115}
116
117void TestExecutor::testGetPhoneticallySortableCodePointAscii() {
118 printf("testGetPhoneticallySortableCodePoint()\n");
119 int halfwidth[94];
120 int fullwidth[94];
121 int i, codepoint;
122 bool next_is_consumed;
123 for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) {
124 halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
125 &next_is_consumed);
126 if (halfwidth[i] < 0) {
127 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700128 m_success = false;
129 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700130 }
131 if (next_is_consumed) {
132 printf("next_is_consumed become true at 0x%04X", codepoint);
133 m_success = false;
134 return;
135 }
136 }
137 for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) {
138 fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
139 &next_is_consumed);
140 if (fullwidth[i] < 0) {
141 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700142 m_success = false;
143 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700144 }
145 if (next_is_consumed) {
146 printf("next_is_consumed become true at 0x%04X", codepoint);
147 m_success = false;
148 return;
149 }
150 }
151
152 for (i = 0; i < 94; i++) {
153 EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]);
154 }
155}
156
157void TestExecutor::testGetPhoneticallySortableCodePointKana() {
158 printf("testGetPhoneticallySortableCodePointKana()\n");
159 int hiragana[86];
160 int fullwidth_katakana[86];
161 int i, codepoint;
162 bool next_is_consumed;
163
164 for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) {
165 hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
166 &next_is_consumed);
167 if (hiragana[i] < 0) {
168 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700169 m_success = false;
170 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700171 }
172 if (next_is_consumed) {
173 printf("next_is_consumed become true at 0x%04X", codepoint);
174 m_success = false;
175 return;
176 }
177 }
178
179 for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) {
180 fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
181 &next_is_consumed);
182 if (fullwidth_katakana[i] < 0) {
183 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700184 m_success = false;
185 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700186 }
187 if (next_is_consumed) {
188 printf("next_is_consumed become true at 0x%04X", codepoint);
189 m_success = false;
190 return;
191 }
192 }
193
194 // hankaku-katakana space do not have some characters corresponding to
195 // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert
196 // zenkaku-katakana version of them into this array (See the value 0x30??).
197 int halfwidth_katakana[] = {
198 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B,
199 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78,
200 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B,
201 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E,
202 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81,
203 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84,
204 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A,
205 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C,
206 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F,
207 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92,
208 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98,
209 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D,
210 0xFF73, 0xFF9E, 0x30F5, 0x30F6};
211 int len = sizeof(halfwidth_katakana)/sizeof(int);
212
213 int halfwidth_katakana_result[86];
214
215 int j;
216 for (i = 0, j = 0; i < len && j < 86; ++i, ++j) {
217 int codepoint = halfwidth_katakana[i];
218 int next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : -1;
219 halfwidth_katakana_result[j] =
220 GetPhoneticallySortableCodePoint(codepoint, next_codepoint,
221 &next_is_consumed);
222 // Consume voiced mark/half-voiced mark.
223 if (next_is_consumed) {
224 ++i;
225 }
226 }
227 ASSERT_EQ_VALUE(i, len);
228 ASSERT_EQ_VALUE(j, 86);
229
230 for (i = 0; i < 86; ++i) {
231 EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]);
232 EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]);
233 }
234}
235
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700236void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900237 printf("testGetPhoneticallySortableCodePointWhitespaceOnly()\n");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700238 // Halfwidth space
239 int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
240 ASSERT_EQ_VALUE(result, -1);
241 // Fullwidth space
242 result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
243 ASSERT_EQ_VALUE(result, -1);
244 // tab
245 result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
246 ASSERT_EQ_VALUE(result, -1);
247}
248
The Android Open Source Project455ed292009-03-13 13:04:22 -0700249void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
250 printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
251
252 int codepoints[] = {
253 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071,
254 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z',
255 '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'};
256 size_t len = sizeof(codepoints)/sizeof(int);
257 bool next_is_consumed;
258 for (size_t i = 0; i < len - 1; ++i) {
259 int codepoint_a =
260 GetPhoneticallySortableCodePoint(codepoints[i], -1,
261 &next_is_consumed);
262 if (next_is_consumed) {
263 printf("next_is_consumed become true at 0x%04X", codepoint_a);
264 m_success = false;
265 return;
266 }
267 int codepoint_b =
268 GetPhoneticallySortableCodePoint(codepoints[i + 1], -1,
269 &next_is_consumed);
270 if (next_is_consumed) {
271 printf("next_is_consumed become true at 0x%04X", codepoint_b);
272 m_success = false;
273 return;
274 }
275
276 if (codepoint_a >= codepoint_b) {
277 printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n",
278 codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]);
279 m_success = false;
280 return;
281 }
282 }
283}
284
285#define EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, i) \
286 ({ \
287 index = i; \
288 if (!GetUtf8FromCodePoint(codepoint, dst, 10, &index)) { \
289 printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
290 m_success = false; \
291 } else if (index >= 10) { \
292 printf("index (%d) >= 10\n", index); \
293 m_success = false; \
294 } else { \
295 dst[index] = '\0'; \
296 if (strcmp(dst + i, expected) != 0) { \
297 printf("Failed at codepoint 0x%04X\n", codepoint); \
298 for (const char *ch = dst; *ch != '\0'; ++ch) { \
299 printf("0x%X ", *ch); \
300 } \
301 printf("!= "); \
302 for (const char *ch = expected; *ch != '\0'; ++ch) { \
303 printf("0x%X ", *ch); \
304 } \
305 printf("\n"); \
306 m_success = false; \
307 } \
308 } \
309 })
310
311#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
312 EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, 0)
313
314
315void TestExecutor::testGetUtf8FromCodePoint() {
316 printf("testGetUtf8FromCodePoint()\n");
317 size_t index = 0;
318 char dst[10];
319
320 EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
321 // Armenian capital letter AYB (2 bytes in UTF8)
322 EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
323 // Japanese 'a' (3 bytes in UTF8)
324 EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
325 // Kanji
326 EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
327 // PUA (4 byets in UTF8)
328 EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
329 EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
330
331 EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(0x058F, "\xD6\x8F", 3);
332
333 index = 0;
334 if (GetUtf8FromCodePoint(0x3043, dst, 2, &index)) {
335 printf("GetUtf8FromCodePont() returned true even when destination length"
336 "is not enough\n");
337 m_success = false;
338 }
339}
340
341#define EXPECT_EQ_UTF8_UTF8(src, expected) \
342 ({ \
343 if (!GetPhoneticallySortableString(src, &dst, &len)) { \
344 printf("GetPhoneticallySortableString() returned false.\n"); \
345 m_success = false; \
346 } else { \
347 if (strcmp(dst, expected) != 0) { \
348 for (const char *ch = dst; *ch != '\0'; ++ch) { \
349 printf("0x%X ", *ch); \
350 } \
351 printf("!= "); \
352 for (const char *ch = expected; *ch != '\0'; ++ch) { \
353 printf("0x%X ", *ch); \
354 } \
355 printf("\n"); \
356 m_success = false; \
357 } \
358 free(dst); \
359 } \
360 })
361
362void TestExecutor::testGetPhoneticallySortableString() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900363 printf("testGetPhoneticallySortableString()\n");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700364 char *dst;
365 size_t len;
366
367 // halfwidth alphabets -> fullwidth alphabets.
368 EXPECT_EQ_UTF8_UTF8("ABCD",
369 "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4");
370 // halfwidth/fullwidth-katakana -> hiragana
371 EXPECT_EQ_UTF8_UTF8(
372 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
373 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700374
375 // whitespace -> string which should be placed at last
376 EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700377}
378
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900379#undef EXPECT_EQ_UTF8_UTF8
380
381#define EXPECT_EQ_UTF8_UTF8(src, expected) \
382 ({ \
383 if (!GetNormalizedString(src, &dst, &len)) { \
384 printf("GetPhoneticallySortableString() returned false.\n"); \
385 m_success = false; \
386 } else { \
387 if (strcmp(dst, expected) != 0) { \
388 for (const char *ch = dst; *ch != '\0'; ++ch) { \
389 printf("0x%X ", *ch); \
390 } \
391 printf("!= "); \
392 for (const char *ch = expected; *ch != '\0'; ++ch) { \
393 printf("0x%X ", *ch); \
394 } \
395 printf("\n"); \
396 m_success = false; \
397 } \
398 free(dst); \
399 } \
400 })
401
402void TestExecutor::testGetNormalizedString() {
403 printf("testGetNormalizedString()\n");
404 char *dst;
405 size_t len;
406
407 // halfwidth alphabets/symbols -> keep it as is.
408 EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
409 "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
410 EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
411 "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
412
413 // halfwidth/fullwidth-katakana -> hiragana
414 EXPECT_EQ_UTF8_UTF8(
415 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
416 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
417
418 // whitespace -> keep it as is.
419 EXPECT_EQ_UTF8_UTF8(" \t", " \t");
420}
421
The Android Open Source Project455ed292009-03-13 13:04:22 -0700422int main() {
423 TestExecutor executor;
424 if(executor.DoAllTests()) {
425 return 0;
426 } else {
427 return 1;
428 }
429}