blob: 06a7ba8d10315a2d1133cc38449ddb973d07437e [file] [log] [blame]
The Android Open Source Project455ed292009-03-13 13:04:22 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "PhoneticStringUtils.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
Daisuke Miyakawae919af52009-06-26 22:58:32 +090023#include <utils/String8.h>
24
The Android Open Source Project455ed292009-03-13 13:04:22 -070025using namespace android;
26
27class TestExecutor {
28 public:
29 TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
30 bool DoAllTests();
31 private:
32 void DoOneTest(void (TestExecutor::*test)());
33
Daisuke Miyakawae919af52009-06-26 22:58:32 +090034 void testGetUtf32At();
The Android Open Source Project455ed292009-03-13 13:04:22 -070035 void testGetPhoneticallySortableCodePointAscii();
36 void testGetPhoneticallySortableCodePointKana();
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070037 void testGetPhoneticallySortableCodePointWhitespaceOnly();
The Android Open Source Project455ed292009-03-13 13:04:22 -070038 void testGetPhoneticallySortableCodePointSimpleCompare();
Daisuke Miyakawae919af52009-06-26 22:58:32 +090039 void testGetUtf8FromUtf32();
The Android Open Source Project455ed292009-03-13 13:04:22 -070040 void testGetPhoneticallySortableString();
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090041 void testGetNormalizedString();
The Android Open Source Project455ed292009-03-13 13:04:22 -070042
43 // Note: When adding a test, do not forget to add it to DoOneTest().
44
45 int m_total_count;
46 int m_success_count;
47
48 bool m_success;
49};
50
51#define ASSERT_EQ_VALUE(input, expected) \
52 ({ \
53 if ((expected) != (input)) { \
54 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
55 m_success = false; \
56 return; \
57 } \
58 })
59
60#define EXPECT_EQ_VALUE(input, expected) \
61 ({ \
62 if ((expected) != (input)) { \
63 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
64 m_success = false; \
65 } \
66 })
67
68
69bool TestExecutor::DoAllTests() {
Daisuke Miyakawae919af52009-06-26 22:58:32 +090070 DoOneTest(&TestExecutor::testGetUtf32At);
The Android Open Source Project455ed292009-03-13 13:04:22 -070071 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
72 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070073 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
The Android Open Source Project455ed292009-03-13 13:04:22 -070074 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
Daisuke Miyakawae919af52009-06-26 22:58:32 +090075 DoOneTest(&TestExecutor::testGetUtf8FromUtf32);
The Android Open Source Project455ed292009-03-13 13:04:22 -070076 DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +090077 DoOneTest(&TestExecutor::testGetNormalizedString);
The Android Open Source Project455ed292009-03-13 13:04:22 -070078
79 printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
80 m_total_count, m_success_count, m_total_count - m_success_count);
81
82 bool success = m_total_count == m_success_count;
83 printf("\n%s\n", success ? "Success" : "Failure");
84
85 return success;
86}
87
88void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
89 m_success = true;
90
91 (this->*test)();
92
93 ++m_total_count;
94 m_success_count += m_success ? 1 : 0;
95}
96
Daisuke Miyakawae919af52009-06-26 22:58:32 +090097#define TEST_GET_UTF32AT(src, index, expected_next, expected_value) \
98 ({ \
99 size_t next; \
100 String8 string8(src); \
101 int32_t ret = string8.getUtf32At((index), &next); \
102 if (ret < 0) { \
103 printf("getUtf32At() returned negative value (src: %s, index: %d)\n", \
104 (src), (index)); \
105 m_success = false; \
106 } else if (next != (expected_next)) { \
107 printf("next is unexpected value (src: %s, actual: %u, expected: %u)\n", \
108 (src), next, (expected_next)); \
109 } else { \
110 EXPECT_EQ_VALUE(ret, (expected_value)); \
111 } \
112 })
The Android Open Source Project455ed292009-03-13 13:04:22 -0700113
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900114void TestExecutor::testGetUtf32At() {
115 printf("testGetUtf32At()\n");
116
117 TEST_GET_UTF32AT("a", 0, 1, 97);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700118 // Japanese hiragana "a"
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900119 TEST_GET_UTF32AT("\xE3\x81\x82", 0, 3, 0x3042);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700120 // Japanese fullwidth katakana "a" with ascii a
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900121 TEST_GET_UTF32AT("a\xE3\x82\xA2", 1, 4, 0x30A2);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700122
123 // 2 PUA
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900124 TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 0, 4, 0xFE000);
125 TEST_GET_UTF32AT("\xF3\xBE\x80\x80\xF3\xBE\x80\x88", 4, 8, 0xFE008);
The Android Open Source Project455ed292009-03-13 13:04:22 -0700126}
127
128void TestExecutor::testGetPhoneticallySortableCodePointAscii() {
129 printf("testGetPhoneticallySortableCodePoint()\n");
130 int halfwidth[94];
131 int fullwidth[94];
132 int i, codepoint;
133 bool next_is_consumed;
134 for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) {
135 halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
136 &next_is_consumed);
137 if (halfwidth[i] < 0) {
138 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700139 m_success = false;
140 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700141 }
142 if (next_is_consumed) {
143 printf("next_is_consumed become true at 0x%04X", codepoint);
144 m_success = false;
145 return;
146 }
147 }
148 for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) {
149 fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
150 &next_is_consumed);
151 if (fullwidth[i] < 0) {
152 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700153 m_success = false;
154 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700155 }
156 if (next_is_consumed) {
157 printf("next_is_consumed become true at 0x%04X", codepoint);
158 m_success = false;
159 return;
160 }
161 }
162
163 for (i = 0; i < 94; i++) {
164 EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]);
165 }
166}
167
168void TestExecutor::testGetPhoneticallySortableCodePointKana() {
169 printf("testGetPhoneticallySortableCodePointKana()\n");
170 int hiragana[86];
171 int fullwidth_katakana[86];
172 int i, codepoint;
173 bool next_is_consumed;
174
175 for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) {
176 hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
177 &next_is_consumed);
178 if (hiragana[i] < 0) {
179 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700180 m_success = false;
181 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700182 }
183 if (next_is_consumed) {
184 printf("next_is_consumed become true at 0x%04X", codepoint);
185 m_success = false;
186 return;
187 }
188 }
189
190 for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) {
191 fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
192 &next_is_consumed);
193 if (fullwidth_katakana[i] < 0) {
194 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700195 m_success = false;
196 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700197 }
198 if (next_is_consumed) {
199 printf("next_is_consumed become true at 0x%04X", codepoint);
200 m_success = false;
201 return;
202 }
203 }
204
205 // hankaku-katakana space do not have some characters corresponding to
206 // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert
207 // zenkaku-katakana version of them into this array (See the value 0x30??).
208 int halfwidth_katakana[] = {
209 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B,
210 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78,
211 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B,
212 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E,
213 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81,
214 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84,
215 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A,
216 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C,
217 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F,
218 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92,
219 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98,
220 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D,
221 0xFF73, 0xFF9E, 0x30F5, 0x30F6};
222 int len = sizeof(halfwidth_katakana)/sizeof(int);
223
224 int halfwidth_katakana_result[86];
225
226 int j;
227 for (i = 0, j = 0; i < len && j < 86; ++i, ++j) {
228 int codepoint = halfwidth_katakana[i];
229 int next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : -1;
230 halfwidth_katakana_result[j] =
231 GetPhoneticallySortableCodePoint(codepoint, next_codepoint,
232 &next_is_consumed);
233 // Consume voiced mark/half-voiced mark.
234 if (next_is_consumed) {
235 ++i;
236 }
237 }
238 ASSERT_EQ_VALUE(i, len);
239 ASSERT_EQ_VALUE(j, 86);
240
241 for (i = 0; i < 86; ++i) {
242 EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]);
243 EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]);
244 }
245}
246
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700247void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900248 printf("testGetPhoneticallySortableCodePointWhitespaceOnly()\n");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700249 // Halfwidth space
250 int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
251 ASSERT_EQ_VALUE(result, -1);
252 // Fullwidth space
253 result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
254 ASSERT_EQ_VALUE(result, -1);
255 // tab
256 result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
257 ASSERT_EQ_VALUE(result, -1);
258}
259
The Android Open Source Project455ed292009-03-13 13:04:22 -0700260void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
261 printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
262
263 int codepoints[] = {
264 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071,
265 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z',
266 '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'};
267 size_t len = sizeof(codepoints)/sizeof(int);
268 bool next_is_consumed;
269 for (size_t i = 0; i < len - 1; ++i) {
270 int codepoint_a =
271 GetPhoneticallySortableCodePoint(codepoints[i], -1,
272 &next_is_consumed);
273 if (next_is_consumed) {
274 printf("next_is_consumed become true at 0x%04X", codepoint_a);
275 m_success = false;
276 return;
277 }
278 int codepoint_b =
279 GetPhoneticallySortableCodePoint(codepoints[i + 1], -1,
280 &next_is_consumed);
281 if (next_is_consumed) {
282 printf("next_is_consumed become true at 0x%04X", codepoint_b);
283 m_success = false;
284 return;
285 }
286
287 if (codepoint_a >= codepoint_b) {
288 printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n",
289 codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]);
290 m_success = false;
291 return;
292 }
293 }
294}
295
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900296#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700297 ({ \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900298 char32_t codepoints[1] = {codepoint}; \
299 status_t ret = string8.setTo(codepoints, 1); \
300 if (ret != NO_ERROR) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700301 printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
302 m_success = false; \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700303 } else { \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900304 const char* string = string8.string(); \
305 if (strcmp(string, expected) != 0) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700306 printf("Failed at codepoint 0x%04X\n", codepoint); \
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900307 for (const char *ch = string; *ch != '\0'; ++ch) { \
The Android Open Source Project455ed292009-03-13 13:04:22 -0700308 printf("0x%X ", *ch); \
309 } \
310 printf("!= "); \
311 for (const char *ch = expected; *ch != '\0'; ++ch) { \
312 printf("0x%X ", *ch); \
313 } \
314 printf("\n"); \
315 m_success = false; \
316 } \
317 } \
318 })
319
Daisuke Miyakawae919af52009-06-26 22:58:32 +0900320void TestExecutor::testGetUtf8FromUtf32() {
321 printf("testGetUtf8FromUtf32()\n");
322 String8 string8;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700323
324 EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
325 // Armenian capital letter AYB (2 bytes in UTF8)
326 EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
327 // Japanese 'a' (3 bytes in UTF8)
328 EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
329 // Kanji
330 EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
331 // PUA (4 byets in UTF8)
332 EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
333 EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700334}
335
336#define EXPECT_EQ_UTF8_UTF8(src, expected) \
337 ({ \
338 if (!GetPhoneticallySortableString(src, &dst, &len)) { \
339 printf("GetPhoneticallySortableString() returned false.\n"); \
340 m_success = false; \
341 } else { \
342 if (strcmp(dst, expected) != 0) { \
343 for (const char *ch = dst; *ch != '\0'; ++ch) { \
344 printf("0x%X ", *ch); \
345 } \
346 printf("!= "); \
347 for (const char *ch = expected; *ch != '\0'; ++ch) { \
348 printf("0x%X ", *ch); \
349 } \
350 printf("\n"); \
351 m_success = false; \
352 } \
353 free(dst); \
354 } \
355 })
356
357void TestExecutor::testGetPhoneticallySortableString() {
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900358 printf("testGetPhoneticallySortableString()\n");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700359 char *dst;
360 size_t len;
361
362 // halfwidth alphabets -> fullwidth alphabets.
363 EXPECT_EQ_UTF8_UTF8("ABCD",
364 "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4");
365 // halfwidth/fullwidth-katakana -> hiragana
366 EXPECT_EQ_UTF8_UTF8(
367 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
368 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700369
370 // whitespace -> string which should be placed at last
371 EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700372}
373
Daisuke Miyakawad28cdc42009-05-18 14:51:52 +0900374#undef EXPECT_EQ_UTF8_UTF8
375
376#define EXPECT_EQ_UTF8_UTF8(src, expected) \
377 ({ \
378 if (!GetNormalizedString(src, &dst, &len)) { \
379 printf("GetPhoneticallySortableString() returned false.\n"); \
380 m_success = false; \
381 } else { \
382 if (strcmp(dst, expected) != 0) { \
383 for (const char *ch = dst; *ch != '\0'; ++ch) { \
384 printf("0x%X ", *ch); \
385 } \
386 printf("!= "); \
387 for (const char *ch = expected; *ch != '\0'; ++ch) { \
388 printf("0x%X ", *ch); \
389 } \
390 printf("\n"); \
391 m_success = false; \
392 } \
393 free(dst); \
394 } \
395 })
396
397void TestExecutor::testGetNormalizedString() {
398 printf("testGetNormalizedString()\n");
399 char *dst;
400 size_t len;
401
402 // halfwidth alphabets/symbols -> keep it as is.
403 EXPECT_EQ_UTF8_UTF8("ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()",
404 "ABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%^&'()");
405 EXPECT_EQ_UTF8_UTF8("abcdefghijklmnopqrstuvwxyz[]{}\\@/",
406 "abcdefghijklmnopqrstuvwxyz[]{}\\@/");
407
408 // halfwidth/fullwidth-katakana -> hiragana
409 EXPECT_EQ_UTF8_UTF8(
410 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
411 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
412
413 // whitespace -> keep it as is.
414 EXPECT_EQ_UTF8_UTF8(" \t", " \t");
415}
416
The Android Open Source Project455ed292009-03-13 13:04:22 -0700417int main() {
418 TestExecutor executor;
419 if(executor.DoAllTests()) {
420 return 0;
421 } else {
422 return 1;
423 }
424}