Blame - harness.c - platform/external/unicode

blob: c3b0b3ac3b9012770c8d79f56308ceb6909cecc7 [file] [log] [blame]

Lucas Eckels	dc4699f	2012-08-06 15:22:01 -0700	[diff] [blame]	1	/*
				2	* Copyright 2001-2004 Unicode, Inc.
				3	*
				4	* Disclaimer
				5	*
				6	* This source code is provided as is by Unicode, Inc. No claims are
				7	* made as to fitness for any particular purpose. No warranties of any
				8	* kind are expressed or implied. The recipient agrees to determine
				9	* applicability of information provided. If this file has been
				10	* purchased on magnetic or optical media from Unicode, Inc., the
				11	* sole remedy for any claim will be exchange of defective media
				12	* within 90 days of receipt.
				13	*
				14	* Limitations on Rights to Redistribute This Code
				15	*
				16	* Unicode, Inc. hereby grants the right to freely use the information
				17	* supplied in this file in the creation of products supporting the
				18	* Unicode Standard, and to make copies of this file in any form
				19	* for internal or external distribution as long as this notice
				20	* remains attached.
				21	*
				22	* harness.c
				23	*
				24	* This is a test harness for "ConvertUTF.c". Compile this
				25	* and run without arguments. It will exhaustively test
				26	* the conversion routines, and print a few lines of diagnostic
				27	* output. You don't need to compile ConvertUTF.c itself,
				28	* since it gets #included here along with the header.
				29	* Example of a compile line:
				30	*
				31	* $ gcc -g harness.c -o harness
				32	*
				33	* Rev History: Rick McGowan, new file April 2001.
				34	* Sept 19, 2002: Corrected error on line 234: utf16_buf[2] becomes utf16_result[2]
				35	* per report from Iain Murray.
				36	* July 3, 2003: Updated printout message.
				37	* Oct 19, 2004: Updated isLegalUTF8 test data and corrected switch statements to catch
				38	* illegal surrogate use in UTF-8, per report from Frank Tang.
				39	*
				40	*/
				41
				42	#define CVTUTF_DEBUG 1
				43
				44	#include <stdio.h>
				45	#include "ConvertUTF.c"
				46
				47	/* ---------------------------------------------------------------------
				48	test01 - Spot check a few legal & illegal UTF-8 values only.
				49	This is not an exhaustive test, just a brief one that was
				50	used to develop the "isLegalUTF8" routine.
				51
				52	Legal UTF-8 sequences are:
				53
				54	1st---- 2nd---- 3rd---- 4th---- Codepoints---
				55
				56	00-7F 0000- 007F
				57	C2-DF 80-BF 0080- 07FF
				58	E0 A0-BF 80-BF 0800- 0FFF
				59	E1-EC 80-BF 80-BF 1000- CFFF
				60	ED 80-9F 80-BF D000- D7FF
				61	EE-EF 80-BF 80-BF E000- FFFF
				62	F0 90-BF 80-BF 80-BF 10000- 3FFFF
				63	F1-F3 80-BF 80-BF 80-BF 40000- FFFFF
				64	F4 80-8F 80-BF 80-BF 100000-10FFFF
				65
				66	--------------------------------------------------------------------- */
				67
				68
				69	struct utf8_test {
				70	Boolean utf8_legal; /* is legal sequence? */
				71	int utf8_len; /* length of sequence */
				72	unsigned char utf8_seq[5]; /* the sequence */
				73	};
				74
				75	struct utf8_test utf8_testData[] = {
				76	{ 1, 1, { 0x7A, 0x00, 0x00, 0x00, 0x00 }}, /* 0 */
				77	{ 1, 2, { 0xC2, 0xAC, 0x00, 0x00, 0x00 }}, /* 1 */
				78	{ 1, 2, { 0xDF, 0xB2, 0x00, 0x00, 0x00 }}, /* 2 */
				79	{ 1, 3, { 0xE0, 0xA1, 0x81, 0x00, 0x00 }}, /* 3 */
				80	{ 1, 3, { 0xE1, 0xAC, 0x90, 0x00, 0x00 }}, /* 4 */
				81	{ 1, 3, { 0xF0, 0x93, 0xB2, 0xA1, 0x00 }}, /* 5 */
				82	{ 1, 4, { 0xF1, 0x87, 0x9A, 0xB0, 0x00 }}, /* 6 */
				83	{ 1, 4, { 0xF3, 0x88, 0x9B, 0xAD, 0x00 }}, /* 7 */
				84	{ 1, 4, { 0xF4, 0x82, 0x89, 0x8F, 0x00 }}, /* 8 */
				85
				86	{ 0, 3, { 0x82, 0x00, 0x00, 0x00, 0x00 }}, /* 9 */
				87	{ 0, 2, { 0xF8, 0xAC, 0x00, 0x00, 0x00 }}, /* 10 */
				88	{ 0, 2, { 0xE1, 0xFC, 0xFF, 0x00, 0x00 }}, /* 11 */
				89	{ 0, 3, { 0xC2, 0xFC, 0x00, 0x00, 0x00 }}, /* 12 */
				90	{ 0, 3, { 0xE1, 0xC2, 0x81, 0x00, 0x00 }}, /* 13 */
				91	{ 0, 2, { 0xC2, 0xC1, 0x00, 0x00, 0x00 }}, /* 14 */
				92	{ 0, 2, { 0xC0, 0xAF, 0x00, 0x00, 0x00 }}, /* 15 */
				93	{ 0, 3, { 0xE0, 0x9F, 0x80, 0x00, 0x00 }}, /* 16 */
				94	{ 0, 4, { 0xF0, 0x93, 0xB2, 0xC1, 0x00 }}, /* 17 */
				95
				96	{ 1, 3, { 0xED, 0x9F, 0xBF, 0x00, 0x00 }}, /* 18 */
				97	{ 1, 3, { 0xEE, 0x80, 0x80, 0x00, 0x00 }}, /* 19 */
				98	{ 0, 3, { 0xED, 0xA0, 0x80, 0x00, 0x00 }}, /* 20 */
				99	{ 0, 3, { 0xED, 0xBF, 0xBF, 0x00, 0x00 }}, /* 21 */
				100
				101	/* for all > 21 use "short" buffer lengths to detect over-run */
				102	{ 0, 4, { 0xF0, 0x93, 0xB2, 0xC3, 0x00 }}, /* 18 use short buflen */
				103	{ 0, 0, { 0x00, 0x00, 0x00, 0x00, 0x00 }},
				104
				105	};
				106
				107	int test01() {
				108	int i;
				109	int rval, wantVal1, wantVal2, gotVal1, gotVal2, len2;
				110
				111	printf("Begin Test01\n"); fflush(stdout);
				112
				113	rval = 0;
				114	for (i = 0; utf8_testData[i].utf8_len; i++) {
				115	wantVal1 = wantVal2 = utf8_testData[i].utf8_legal;
				116	gotVal1 = isLegalUTF8(&(utf8_testData[i].utf8_seq[0]), utf8_testData[i].utf8_len);
				117	/* use truncated length for tests over 21 */
				118	if (i <= 21) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; }
				119	gotVal2 = isLegalUTF8Sequence(&(utf8_testData[i].utf8_seq[0]), &(utf8_testData[i].utf8_seq[0])+len2);
				120	if ((gotVal1 != wantVal1) \|\| (gotVal2 != wantVal2)) {
				121	printf("Test01 error: seq %d is %d & %d (should be %d & %d) for bytes (%x,%x,%x,%x,%x,) & len %d\n",
				122	i, gotVal1, gotVal2, wantVal1, wantVal2, utf8_testData[i].utf8_seq[0],
				123	utf8_testData[i].utf8_seq[1], utf8_testData[i].utf8_seq[2],
				124	utf8_testData[i].utf8_seq[3], utf8_testData[i].utf8_seq[4],
				125	utf8_testData[i].utf8_len);
				126	++rval;
				127	}
				128	}
				129
				130	return (rval ? 0 : 1);
				131	}
				132
				133
				134	/* ---------------------------------------------------------------------
				135	test02 - Test round trip UTF32 -> UTF16 -> UTF8 -> UTF16 -> UTF32
				136
				137	This is an exhaustive test of values 0 through 0x10FFFF. It
				138	takes each integer value and converts from UTC4 through the
				139	other encoding forms, and back to UTR32, checking the results
				140	along the way.
				141
				142	It does not check the un-paired low surrogates, except for
				143	the first low surrogate. It intends to get that one illegal
				144	result, prints a message, and continues with tests.
				145
				146	--------------------------------------------------------------------- */
				147
				148	int test02() {
				149	int i, n;
				150	ConversionResult result;
				151	UTF32 utf32_buf[2], utf32_result[2];
				152	UTF16 utf16_buf[3], utf16_result[3];
				153	UTF8 utf8_buf[8];
				154	UTF32 utf32SourceStart, utf32TargetStart;
				155	UTF16 utf16SourceStart, utf16TargetStart;
				156	UTF8 utf8SourceStart, utf8TargetStart;
				157
				158	printf("Begin Test02\n"); fflush(stdout);
				159
				160	for (i = 0; i <= 0x10FFFF; i++) {
				161	utf32_buf[0] = i; utf32_buf[1] = 0;
				162	utf32_result[0] = utf32_result[1] = 0;
				163	utf16_buf[0] = utf16_buf[1] = utf16_buf[2] = 0;
				164	utf16_result[0] = utf16_result[1] = utf16_result[2] = 0;
				165	for (n = 0; n < 8; n++) utf8_buf[n] = 0;
				166
				167	utf32SourceStart = utf32_buf; utf32TargetStart = utf32_result;
				168	utf16TargetStart = utf16SourceStart = utf16_buf;
				169	utf8TargetStart = utf8SourceStart = utf8_buf;
				170
				171	/*
				172	* Test UTF32 -> UTF16
				173	*/
				174	result = ConvertUTF32toUTF16((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), &utf16TargetStart, &(utf16_buf[2]), strictConversion);
				175	if (i < UNI_SUR_HIGH_START \|\| i > UNI_SUR_LOW_END) {
				176	/* skip result checking for all but 0000d800, which we know to be illegal */
				177	switch (result) {
				178	default: fprintf(stderr, "Test02A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				179	case conversionOK: break;
				180	case sourceExhausted: printf("sourceExhausted\t"); break;
				181	case targetExhausted: printf("targetExhausted\t"); break;
				182	case sourceIllegal: printf("sourceIllegal\t"); break;
				183	}
				184	}
				185	if (result != conversionOK) {
				186	if (i <= UNI_SUR_HIGH_START \|\| i > UNI_SUR_LOW_END) {
				187	printf("Test02A for %d, input %08x, output %04x,%04x, result %d\n",
				188	i, utf32_buf[0], utf16_buf[0], utf16_buf[1], result);
				189	if ((i != UNI_SUR_HIGH_START) \|\| (result != sourceIllegal)) {
				190	return 0;
				191	} else {
				192	printf("!!! Test02A: note expected illegal result for 0x0000D800\n");
				193	}
				194	}
				195	}
				196	if (i > UNI_SUR_HIGH_START && i <= UNI_SUR_LOW_END) continue;
				197
				198	/*
				199	* Test UTF16 -> UTF8, with legality check on. We check for everything except
				200	* for unpaired low surrogates. We do make one check that the lowest low
				201	* surrogate, when unpaired, is illegal.
				202	*/
				203	result = ConvertUTF16toUTF8((const UTF16 **) &utf16SourceStart, &(utf16_buf[2]), &utf8TargetStart, &(utf8_buf[7]), strictConversion);
				204	switch (result) {
				205	default: fprintf(stderr, "Test02B fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				206	case conversionOK: break;
				207	case sourceExhausted: printf("sourceExhausted\t"); break;
				208	case targetExhausted: printf("targetExhausted\t"); break;
				209	case sourceIllegal: printf("sourceIllegal\t"); break;
				210	}
				211	if (result != conversionOK) {
				212	printf("Test02B for %d (0x%x), input %04x,%04x; output %s; result %d\n",
				213	i, utf32_buf[0], utf16_buf[0], utf16_buf[1], utf8_buf, result);
				214	if ((i != UNI_SUR_LOW_START) && (i != UNI_SUR_HIGH_START)) {
				215	return 0;
				216	} else {
				217	/* Note: This illegal result only happens if we remove the surrogate
				218	check in Test02A. So it shouldn't be seen unless that check and
				219	the "continue" are removed in the test above.
				220	*/
				221	if (i == UNI_SUR_LOW_START)
				222	printf("!!! Test02B: note expected illegal result for 0xDC00,0000\n");
				223	else if (i == UNI_SUR_HIGH_START)
				224	printf("!!! Test02B: note expected illegal result for 0xD800,0000\n");
				225	}
				226	}
				227	if ((i == UNI_SUR_LOW_START) && result != sourceIllegal) {
				228	printf("Test02B for %d (0x%x), input %04x,%04x; output %s; result %d\n",
				229	i, utf32_buf[0], utf16_buf[0], utf16_buf[1], utf8_buf, result);
				230	printf("Test02B: expected illegal result for 0xDC00,0000 was not flagged illegal.\n");
				231	return 0;
				232	}
				233
				234	if ((i >= UNI_SUR_HIGH_START) & (i <= UNI_SUR_LOW_END)) continue;
				235
				236	/*
				237	* Reset some result buffer pointers for the trip back.
				238	*/
				239	utf32SourceStart = utf32_buf; utf32TargetStart = utf32_result;
				240	utf16TargetStart = utf16SourceStart = utf16_result;
				241	utf8TargetStart = utf8SourceStart = utf8_buf;
				242
				243	/*
				244	* Test UTF8 -> UTF16, with legality check on.
				245	*/
				246	result = ConvertUTF8toUTF16((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf16TargetStart, &(utf16_result[2]), strictConversion);
				247	switch (result) {
				248	default: fprintf(stderr, "Test02C fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				249	case conversionOK: break;
				250	case sourceExhausted: printf("sourceExhausted\t"); break;
				251	case targetExhausted: printf("targetExhausted\t"); break;
				252	case sourceIllegal: printf("sourceIllegal\t"); break;
				253	}
				254	if (result != conversionOK) {
				255	printf("Test02C for %d (0x%x), input %s; output %04x,%04x; result %d\n",
				256	i, utf32_buf[0], utf8_buf, utf16_buf[0], utf16_buf[1], result);
				257	return 0;
				258	}
				259	for (n = 0; n < 3; n++) { /* check that the utf16 result is the same as what went in. */
				260	if (utf16_buf[n] != utf16_result[n]) {
				261	printf("Test02C error: input = 0x%08x; utf16_buf = 0x%04x,0x%04x; utf16_result = 0x%04x,0x%04x\n",
				262	utf32_buf[0], utf16_buf[0], utf16_buf[1], utf16_result[0], utf16_result[1]);
				263	return 0;
				264	}
				265	}
				266
				267	/*
				268	* Test UTF16 -> UTF32, with legality check on. If the result of our previous
				269	* conversion gave us a "surrogate pair", then we need to convert 2 entities
				270	* back to UTF32.
				271	*/
				272	if (utf16_result[0] >= UNI_SUR_HIGH_START && utf16_result[0] <= UNI_SUR_HIGH_END) {
				273	result = ConvertUTF16toUTF32((const UTF16 **) &utf16SourceStart, &(utf16_result[2]), &utf32TargetStart, &(utf32_result[1]), strictConversion);
				274	} else {
				275	result = ConvertUTF16toUTF32((const UTF16 **) &utf16SourceStart, &(utf16_result[1]), &utf32TargetStart, &(utf32_result[1]), strictConversion);
				276	}
				277	switch (result) {
				278	default: fprintf(stderr, "Test02D fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				279	case conversionOK: break;
				280	case sourceExhausted: printf("sourceExhausted\t"); break;
				281	case targetExhausted: printf("targetExhausted\t"); break;
				282	case sourceIllegal: printf("sourceIllegal\t"); break;
				283	}
				284	if (result != conversionOK) {
				285	printf("Test02D for %d (0x%x), input %04x,%04x; output %08x; result %d\n",
				286	i, utf32_buf[0], utf16_buf[0], utf16_buf[1], utf32_result[0], result);
				287	return 0;
				288	}
				289
				290	/*
				291	* Now, check the final round-trip value.
				292	*/
				293	if (utf32_buf[0] != utf32_result[0]) {
				294	printf("Test02E for %d: utf32 input %08x; trip output %08x (utf_16buf is %04x,%04x)\n", i, utf32_buf[0], utf32_result[0], utf16_buf[0], utf16_buf[1]);
				295	return 0;
				296	}
				297	}
				298	return 1;
				299	}
				300
				301	/* ---------------------------------------------------------------------
				302	test03 - Test round trip UTF32 -> UTF8 -> UTF32
				303
				304	This tests the functions that were not tested by test02 above.
				305	For each UTF32 value 0 through 0x10FFFF, it tests the conversion
				306	to UTF-8 and back. The test is exhaustive.
				307
				308	--------------------------------------------------------------------- */
				309
				310	int test03() {
				311	int i, n;
				312	ConversionResult result;
				313	UTF32 utf32_buf[2], utf32_result[2];
				314	UTF8 utf8_buf[8];
				315	UTF32 utf32SourceStart, utf32TargetStart;
				316	UTF8 utf8SourceStart, utf8TargetStart;
				317
				318	printf("Begin Test03\n"); fflush(stdout);
				319
				320	for (i = 0; i <= 0x10FFFF; i++) {
				321	/* Skip all surrogates except UNI_SUR_HIGH_START, which we test for illegality. */
				322	if (i > UNI_SUR_HIGH_START && i <= UNI_SUR_LOW_END) continue;
				323
				324	utf32_buf[0] = i; utf32_buf[1] = 0;
				325	utf32_result[0] = utf32_result[1] = 0;
				326	for (n = 0; n < 8; n++) utf8_buf[n] = 0;
				327
				328	utf32SourceStart = utf32_buf; utf32TargetStart = utf32_result;
				329	utf8TargetStart = utf8SourceStart = utf8_buf;
				330
				331	/*
				332	* Test UTF32 -> UTF8, with legality check on.
				333	*/
				334	result = ConvertUTF32toUTF8((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), & utf8TargetStart, &(utf8_buf[7]), strictConversion);
				335	switch (result) {
				336	default: fprintf(stderr, "Test03A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				337	case conversionOK: break;
				338	case sourceExhausted: printf("sourceExhausted\t"); break;
				339	case targetExhausted: printf("targetExhausted\t"); break;
				340	case sourceIllegal: printf("sourceIllegal\t"); break;
				341	}
				342	if (result != conversionOK) {
				343	printf("Test03A for %d (0x%x); output %s; result %d\n",
				344	i, utf32_buf[0], utf8_buf, result);
				345	if (i != UNI_SUR_HIGH_START) {
				346	return 0;
				347	} else {
				348	printf("!!! Test03A: note expected illegal result for 0x0000D800\n");
				349	}
				350	}
				351	if ((i == UNI_SUR_HIGH_START) && result != sourceIllegal) {
				352	printf("Test03A for %d (0x%x); output %s; result %d\n",
				353	i, utf32_buf[0], utf8_buf, result);
				354	printf("Test03A: expected illegal result for 0x0000D800 was not flagged illegal.\n");
				355	return 0;
				356	}
				357
				358	if ((i >= UNI_SUR_HIGH_START) & (i <= UNI_SUR_LOW_END)) continue;
				359
				360	/*
				361	* Reset some result buffer pointers for the trip back.
				362	*/
				363	utf32SourceStart = utf32_buf; utf32TargetStart = utf32_result;
				364	utf8TargetStart = utf8SourceStart = utf8_buf;
				365
				366	/*
				367	* Test UTF8 -> UTF32, with legality check on.
				368	*/
				369	result = ConvertUTF8toUTF32((const UTF8 **) &utf8SourceStart, &(utf8_buf[trailingBytesForUTF8[utf8_buf[0]]+1]), &utf32TargetStart, &(utf32_result[1]), strictConversion);
				370	switch (result) {
				371	default: fprintf(stderr, "Test03B fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				372	case conversionOK: break;
				373	case sourceExhausted: printf("sourceExhausted\t"); break;
				374	case targetExhausted: printf("targetExhausted\t"); break;
				375	case sourceIllegal: printf("sourceIllegal\t"); break;
				376	}
				377	if (result != conversionOK) {
				378	printf("Test03B for %d (0x%x), input %s; output 0x%08x; result %d\n",
				379	i, utf32_buf[0], utf8_buf, utf32_result[0], result);
				380	return 0;
				381	}
				382
				383	/*
				384	* Now, check the final round-trip value.
				385	*/
				386	if (utf32_buf[0] != utf32_result[0]) {
				387	printf("Test03C for %d: utf32 input %08x; utf8 buf %s; trip output %08x\n", i, utf32_buf[0], utf8_buf, utf32_result[0]);
				388	return 0;
				389	}
				390	}
				391	return 1;
				392	}
				393
				394	/* ---------------------------------------------------------------------
				395	test04 - Test an illegal UTF-32 value > 10FFFF conversion to UTF-8.
				396	Expect it will be turned into UNI_REPLACEMENT_CHAR.
				397
				398	--------------------------------------------------------------------- */
				399
				400	int test04() {
				401	int i, n;
				402	ConversionResult result;
				403	UTF32 utf32_buf[2];
				404	UTF8 utf8_buf[8];
				405	UTF32 utf32SourceStart, utf32TargetStart;
				406	UTF8 utf8SourceStart, utf8TargetStart;
				407
				408	printf("Begin Test04\n"); fflush(stdout);
				409
				410	i = 0x10FFFF + 21; /* an arbitrary value > legal */
				411
				412	utf32_buf[0] = i; utf32_buf[1] = 0;
				413	for (n = 0; n < 8; n++) utf8_buf[n] = 0;
				414
				415	utf32SourceStart = utf32_buf;
				416	utf8TargetStart = utf8_buf;
				417
				418	/*
				419	* Test UTF32 -> UTF8, with legality check on.
				420	*/
				421	result = ConvertUTF32toUTF8((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), & utf8TargetStart, &(utf8_buf[7]), strictConversion);
				422	if (result != sourceIllegal) {
				423	fprintf(stderr, "Test04A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
				424	}
				425
				426	return 1;
				427	}
				428
				429	/* --------------------------------------------------------------------- */
				430
				431	int main() {
				432	printf("Three tests of round-trip conversions will be performed.\n");
				433	printf("One test of illegal UTF-32 will be peroformed.\n");
				434	printf("Two illegal result messages are expected; one in test 02A; one in test 03A.\n");
				435	printf("These are for tests of Surrogate conversion.\n\n");
				436	fflush(stdout);
				437	if (test01()) { printf("****** Test01 succeeded without error. ******\n\n"); }
				438	else { printf("-------- Test01 failed. --------\n\n"); }
				439	if (test02()) { printf("****** Test02 succeeded without error. ******\n\n"); }
				440	else { printf("-------- Test02 failed. --------\n\n"); }
				441	if (test03()) { printf("****** Test03 succeeded without error. ******\n\n"); }
				442	else { printf("-------- Test03 failed. --------\n\n"); }
				443	if (test04()) { printf("****** Test04 succeeded without error. ******\n\n"); }
				444	else { printf("-------- Test04 failed. --------\n\n"); }
				445	return 0;
				446	}