blob: 05410077aa46bfa2d76b6fcb4553068c9b13acfb [file] [log] [blame]
The Android Open Source Project455ed292009-03-13 13:04:22 -07001/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "PhoneticStringUtils.h"
18
19#include <stdio.h>
20#include <stdlib.h>
21#include <string.h>
22
23using namespace android;
24
25class TestExecutor {
26 public:
27 TestExecutor() : m_total_count(0), m_success_count(0), m_success(true) {}
28 bool DoAllTests();
29 private:
30 void DoOneTest(void (TestExecutor::*test)());
31
32 void testGetCodePointFromUtf8();
33 void testGetPhoneticallySortableCodePointAscii();
34 void testGetPhoneticallySortableCodePointKana();
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070035 void testGetPhoneticallySortableCodePointWhitespaceOnly();
The Android Open Source Project455ed292009-03-13 13:04:22 -070036 void testGetPhoneticallySortableCodePointSimpleCompare();
37 void testGetUtf8FromCodePoint();
38 void testGetPhoneticallySortableString();
39
40 // Note: When adding a test, do not forget to add it to DoOneTest().
41
42 int m_total_count;
43 int m_success_count;
44
45 bool m_success;
46};
47
48#define ASSERT_EQ_VALUE(input, expected) \
49 ({ \
50 if ((expected) != (input)) { \
51 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
52 m_success = false; \
53 return; \
54 } \
55 })
56
57#define EXPECT_EQ_VALUE(input, expected) \
58 ({ \
59 if ((expected) != (input)) { \
60 printf("0x%X(result) != 0x%X(expected)\n", input, expected); \
61 m_success = false; \
62 } \
63 })
64
65
66bool TestExecutor::DoAllTests() {
67 DoOneTest(&TestExecutor::testGetCodePointFromUtf8);
68 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointAscii);
69 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointKana);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -070070 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly);
The Android Open Source Project455ed292009-03-13 13:04:22 -070071 DoOneTest(&TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare);
72 DoOneTest(&TestExecutor::testGetUtf8FromCodePoint);
73 DoOneTest(&TestExecutor::testGetPhoneticallySortableString);
74
75 printf("Test total: %d\nSuccess: %d\nFailure: %d\n",
76 m_total_count, m_success_count, m_total_count - m_success_count);
77
78 bool success = m_total_count == m_success_count;
79 printf("\n%s\n", success ? "Success" : "Failure");
80
81 return success;
82}
83
84void TestExecutor::DoOneTest(void (TestExecutor::*test)()) {
85 m_success = true;
86
87 (this->*test)();
88
89 ++m_total_count;
90 m_success_count += m_success ? 1 : 0;
91}
92
93void TestExecutor::testGetCodePointFromUtf8() {
94 printf("testGetCodePointFromUtf8()\n");
95 int next;
96
97 EXPECT_EQ_VALUE(GetCodePointFromUtf8("a", 1, 0, &next), 97);
98 EXPECT_EQ_VALUE(next, 1);
99 // Japanese hiragana "a"
100 EXPECT_EQ_VALUE(GetCodePointFromUtf8("\xE3\x81\x82", 3, 0, &next), 0x3042);
101 EXPECT_EQ_VALUE(next, 3);
102 // Japanese fullwidth katakana "a" with ascii a
103 EXPECT_EQ_VALUE(GetCodePointFromUtf8("a\xE3\x82\xA2", 4, 1, &next), 0x30A2);
104 EXPECT_EQ_VALUE(next, 4);
105
106 // 2 PUA
107 ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88",
108 8, 0, &next), 0xFE000);
109 ASSERT_EQ_VALUE(next, 4);
110 ASSERT_EQ_VALUE(GetCodePointFromUtf8("\xF3\xBE\x80\x80\xF3\xBE\x80\x88",
111 8, next, &next), 0xFE008);
112 ASSERT_EQ_VALUE(next, 8);
113}
114
115void TestExecutor::testGetPhoneticallySortableCodePointAscii() {
116 printf("testGetPhoneticallySortableCodePoint()\n");
117 int halfwidth[94];
118 int fullwidth[94];
119 int i, codepoint;
120 bool next_is_consumed;
121 for (i = 0, codepoint = 0x0021; codepoint <= 0x007E; ++i, ++codepoint) {
122 halfwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
123 &next_is_consumed);
124 if (halfwidth[i] < 0) {
125 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700126 m_success = false;
127 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700128 }
129 if (next_is_consumed) {
130 printf("next_is_consumed become true at 0x%04X", codepoint);
131 m_success = false;
132 return;
133 }
134 }
135 for (i = 0, codepoint = 0xFF01; codepoint <= 0xFF5E; ++i, ++codepoint) {
136 fullwidth[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
137 &next_is_consumed);
138 if (fullwidth[i] < 0) {
139 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700140 m_success = false;
141 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700142 }
143 if (next_is_consumed) {
144 printf("next_is_consumed become true at 0x%04X", codepoint);
145 m_success = false;
146 return;
147 }
148 }
149
150 for (i = 0; i < 94; i++) {
151 EXPECT_EQ_VALUE(halfwidth[i], fullwidth[i]);
152 }
153}
154
155void TestExecutor::testGetPhoneticallySortableCodePointKana() {
156 printf("testGetPhoneticallySortableCodePointKana()\n");
157 int hiragana[86];
158 int fullwidth_katakana[86];
159 int i, codepoint;
160 bool next_is_consumed;
161
162 for (i = 0, codepoint = 0x3041; codepoint <= 0x3096; ++i, ++codepoint) {
163 hiragana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
164 &next_is_consumed);
165 if (hiragana[i] < 0) {
166 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700167 m_success = false;
168 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700169 }
170 if (next_is_consumed) {
171 printf("next_is_consumed become true at 0x%04X", codepoint);
172 m_success = false;
173 return;
174 }
175 }
176
177 for (i = 0, codepoint = 0x30A1; codepoint <= 0x30F6; ++i, ++codepoint) {
178 fullwidth_katakana[i] = GetPhoneticallySortableCodePoint(codepoint, -1,
179 &next_is_consumed);
180 if (fullwidth_katakana[i] < 0) {
181 printf("returned value become negative at 0x%04X", codepoint);
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700182 m_success = false;
183 return;
The Android Open Source Project455ed292009-03-13 13:04:22 -0700184 }
185 if (next_is_consumed) {
186 printf("next_is_consumed become true at 0x%04X", codepoint);
187 m_success = false;
188 return;
189 }
190 }
191
192 // hankaku-katakana space do not have some characters corresponding to
193 // zenkaku-hiragana (e.g. xwa, xka, xku). To make test easier, insert
194 // zenkaku-katakana version of them into this array (See the value 0x30??).
195 int halfwidth_katakana[] = {
196 0xFF67, 0xFF71, 0xFF68, 0xFF72, 0xFF69, 0xFF73, 0xFF6A, 0xFF74, 0xFF6B,
197 0xFF75, 0xFF76, 0xFF76, 0xFF9E, 0xFF77, 0xFF77, 0xFF9E, 0xFF78, 0xFF78,
198 0xFF9E, 0xFF79, 0xFF79, 0xFF9E, 0xFF7A, 0xFF7A, 0xFF9E, 0xFF7B, 0xFF7B,
199 0xFF9E, 0xFF7C, 0xFF7C, 0xFF9E, 0xFF7D, 0xFF7D, 0xFF9E, 0xFF7E, 0xFF7E,
200 0xFF9E, 0xFF7F, 0xFF7F, 0xFF9E, 0xFF80, 0xFF80, 0xFF9E, 0xFF81, 0xFF81,
201 0xFF9E, 0xFF6F, 0xFF82, 0xFF82, 0xFF9E, 0xFF83, 0xFF83, 0xFF9E, 0xFF84,
202 0xFF84, 0xFF9E, 0xFF85, 0xFF86, 0xFF87, 0xFF88, 0xFF89, 0xFF8A, 0xFF8A,
203 0xFF9E, 0xFF8A, 0xFF9F, 0xFF8B, 0xFF8B, 0xFF9E, 0xFF8B, 0xFF9F, 0xFF8C,
204 0xFF8C, 0xFF9E, 0xFF8C, 0xFF9F, 0xFF8D, 0xFF8D, 0xFF9E, 0xFF8D, 0xFF9F,
205 0xFF8E, 0xFF8E, 0xFF9E, 0xFF8E, 0xFF9F, 0xFF8F, 0xFF90, 0xFF91, 0xFF92,
206 0xFF93, 0xFF6C, 0xFF94, 0xFF6D, 0xFF95, 0xFF6E, 0xFF96, 0xFF97, 0xFF98,
207 0xFF99, 0xFF9A, 0xFF9B, 0x30EE, 0xFF9C, 0x30F0, 0x30F1, 0xFF66, 0xFF9D,
208 0xFF73, 0xFF9E, 0x30F5, 0x30F6};
209 int len = sizeof(halfwidth_katakana)/sizeof(int);
210
211 int halfwidth_katakana_result[86];
212
213 int j;
214 for (i = 0, j = 0; i < len && j < 86; ++i, ++j) {
215 int codepoint = halfwidth_katakana[i];
216 int next_codepoint = i + 1 < len ? halfwidth_katakana[i + 1] : -1;
217 halfwidth_katakana_result[j] =
218 GetPhoneticallySortableCodePoint(codepoint, next_codepoint,
219 &next_is_consumed);
220 // Consume voiced mark/half-voiced mark.
221 if (next_is_consumed) {
222 ++i;
223 }
224 }
225 ASSERT_EQ_VALUE(i, len);
226 ASSERT_EQ_VALUE(j, 86);
227
228 for (i = 0; i < 86; ++i) {
229 EXPECT_EQ_VALUE(fullwidth_katakana[i], hiragana[i]);
230 EXPECT_EQ_VALUE(halfwidth_katakana_result[i], hiragana[i]);
231 }
232}
233
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700234void TestExecutor::testGetPhoneticallySortableCodePointWhitespaceOnly() {
235 printf("testGetPhoneticallySortableCodePointWhitespaceOnly");
236 // Halfwidth space
237 int result = GetPhoneticallySortableCodePoint(0x0020, 0x0061, NULL);
238 ASSERT_EQ_VALUE(result, -1);
239 // Fullwidth space
240 result = GetPhoneticallySortableCodePoint(0x3000, 0x0062, NULL);
241 ASSERT_EQ_VALUE(result, -1);
242 // tab
243 result = GetPhoneticallySortableCodePoint(0x0009, 0x0062, NULL);
244 ASSERT_EQ_VALUE(result, -1);
245}
246
The Android Open Source Project455ed292009-03-13 13:04:22 -0700247void TestExecutor::testGetPhoneticallySortableCodePointSimpleCompare() {
248 printf("testGetPhoneticallySortableCodePointSimpleCompare()\n");
249
250 int codepoints[] = {
251 0x3042, 0x30AB, 0xFF7B, 0x305F, 0x30CA, 0xFF8A, 0x30D0, 0x3071,
252 0x307E, 0x30E4, 0xFF97, 0x308F, 0x3093, 0x3094, 'A', 'Z',
253 '0', '9', '!', '/', ':', '?', '[', '`', '{', '~'};
254 size_t len = sizeof(codepoints)/sizeof(int);
255 bool next_is_consumed;
256 for (size_t i = 0; i < len - 1; ++i) {
257 int codepoint_a =
258 GetPhoneticallySortableCodePoint(codepoints[i], -1,
259 &next_is_consumed);
260 if (next_is_consumed) {
261 printf("next_is_consumed become true at 0x%04X", codepoint_a);
262 m_success = false;
263 return;
264 }
265 int codepoint_b =
266 GetPhoneticallySortableCodePoint(codepoints[i + 1], -1,
267 &next_is_consumed);
268 if (next_is_consumed) {
269 printf("next_is_consumed become true at 0x%04X", codepoint_b);
270 m_success = false;
271 return;
272 }
273
274 if (codepoint_a >= codepoint_b) {
275 printf("0x%04X (from 0x%04X) >= 0x%04X (from 0x%04X)\n",
276 codepoint_a, codepoints[i], codepoint_b, codepoints[i + 1]);
277 m_success = false;
278 return;
279 }
280 }
281}
282
283#define EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, i) \
284 ({ \
285 index = i; \
286 if (!GetUtf8FromCodePoint(codepoint, dst, 10, &index)) { \
287 printf("GetUtf8FromCodePoint() returned false at 0x%04X\n", codepoint); \
288 m_success = false; \
289 } else if (index >= 10) { \
290 printf("index (%d) >= 10\n", index); \
291 m_success = false; \
292 } else { \
293 dst[index] = '\0'; \
294 if (strcmp(dst + i, expected) != 0) { \
295 printf("Failed at codepoint 0x%04X\n", codepoint); \
296 for (const char *ch = dst; *ch != '\0'; ++ch) { \
297 printf("0x%X ", *ch); \
298 } \
299 printf("!= "); \
300 for (const char *ch = expected; *ch != '\0'; ++ch) { \
301 printf("0x%X ", *ch); \
302 } \
303 printf("\n"); \
304 m_success = false; \
305 } \
306 } \
307 })
308
309#define EXPECT_EQ_CODEPOINT_UTF8(codepoint, expected) \
310 EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(codepoint, expected, 0)
311
312
313void TestExecutor::testGetUtf8FromCodePoint() {
314 printf("testGetUtf8FromCodePoint()\n");
315 size_t index = 0;
316 char dst[10];
317
318 EXPECT_EQ_CODEPOINT_UTF8('a', "\x61");
319 // Armenian capital letter AYB (2 bytes in UTF8)
320 EXPECT_EQ_CODEPOINT_UTF8(0x0530, "\xD4\xB0");
321 // Japanese 'a' (3 bytes in UTF8)
322 EXPECT_EQ_CODEPOINT_UTF8(0x3042, "\xE3\x81\x82");
323 // Kanji
324 EXPECT_EQ_CODEPOINT_UTF8(0x65E5, "\xE6\x97\xA5");
325 // PUA (4 byets in UTF8)
326 EXPECT_EQ_CODEPOINT_UTF8(0xFE016, "\xF3\xBE\x80\x96");
327 EXPECT_EQ_CODEPOINT_UTF8(0xFE972, "\xF3\xBE\xA5\xB2");
328
329 EXPECT_EQ_CODEPOINT_UTF8_WITH_INDEX(0x058F, "\xD6\x8F", 3);
330
331 index = 0;
332 if (GetUtf8FromCodePoint(0x3043, dst, 2, &index)) {
333 printf("GetUtf8FromCodePont() returned true even when destination length"
334 "is not enough\n");
335 m_success = false;
336 }
337}
338
339#define EXPECT_EQ_UTF8_UTF8(src, expected) \
340 ({ \
341 if (!GetPhoneticallySortableString(src, &dst, &len)) { \
342 printf("GetPhoneticallySortableString() returned false.\n"); \
343 m_success = false; \
344 } else { \
345 if (strcmp(dst, expected) != 0) { \
346 for (const char *ch = dst; *ch != '\0'; ++ch) { \
347 printf("0x%X ", *ch); \
348 } \
349 printf("!= "); \
350 for (const char *ch = expected; *ch != '\0'; ++ch) { \
351 printf("0x%X ", *ch); \
352 } \
353 printf("\n"); \
354 m_success = false; \
355 } \
356 free(dst); \
357 } \
358 })
359
360void TestExecutor::testGetPhoneticallySortableString() {
361 char *dst;
362 size_t len;
363
364 // halfwidth alphabets -> fullwidth alphabets.
365 EXPECT_EQ_UTF8_UTF8("ABCD",
366 "\xEF\xBC\xA1\xEF\xBC\xA2\xEF\xBC\xA3\xEF\xBC\xA4");
367 // halfwidth/fullwidth-katakana -> hiragana
368 EXPECT_EQ_UTF8_UTF8(
369 "\xE3\x81\x82\xE3\x82\xA4\xE3\x81\x86\xEF\xBD\xB4\xE3\x82\xAA",
370 "\xE3\x81\x82\xE3\x81\x84\xE3\x81\x86\xE3\x81\x88\xE3\x81\x8A");
Daisuke Miyakawa0c45e822009-03-27 19:41:52 -0700371
372 // whitespace -> string which should be placed at last
373 EXPECT_EQ_UTF8_UTF8(" \t", "\xF0\x9F\xBF\xBD");
The Android Open Source Project455ed292009-03-13 13:04:22 -0700374}
375
376int main() {
377 TestExecutor executor;
378 if(executor.DoAllTests()) {
379 return 0;
380 } else {
381 return 1;
382 }
383}