Blame - testchar.c - platform/external/libxml2

blob: 0d087927a7e52423142f593f9b0e01f5cdf68a90 [file] [log] [blame]

Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	1	/**
				2	* Test the UTF-8 decoding routines
				3	*
				4	* author: Daniel Veillard
				5	* copy: see Copyright for the status of this software.
				6	*/
				7
				8	#include <stdio.h>
				9	#include <string.h>
				10	#include <libxml/parser.h>
				11	#include <libxml/parserInternals.h>
				12
Daniel Veillard	145477d	2012-07-16 14:59:29 +0800	[diff] [blame]	13	#include "buf.h"
				14
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	15	int lastError;
				16
				17	static void errorHandler(void *unused, xmlErrorPtr err) {
				18	if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
				19	lastError = err->code;
				20	}
				21	}
				22
				23	char document1[100] = "<doc>XXXX</doc>";
				24	char document2[100] = "<doc foo='XXXX'/>";
				25
				26	static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
				27	int len, char *data, int forbid1, int forbid2) {
				28	int i;
				29	xmlDocPtr res;
				30
				31	for (i = 0;i <= 0xFF;i++) {
				32	lastError = 0;
				33	xmlCtxtReset(ctxt);
				34
				35	data[0] = i;
				36
				37	res = xmlReadMemory(document, len, "test", NULL, 0);
				38
				39	if ((i == forbid1) \|\| (i == forbid2)) {
				40	if ((lastError == 0) \|\| (res != NULL))
				41	fprintf(stderr,
				42	"Failed to detect invalid char for Byte 0x%02X: %c\n",
				43	i, i);
				44	}
				45
				46	else if ((i == '<') \|\| (i == '&')) {
				47	if ((lastError == 0) \|\| (res != NULL))
				48	fprintf(stderr,
				49	"Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
				50	}
				51	else if (((i < 0x20) \|\| (i >= 0x80)) &&
				52	(i != 0x9) && (i != 0xA) && (i != 0xD)) {
				53	if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
				54	fprintf(stderr,
				55	"Failed to detect invalid char for Byte 0x%02X\n", i);
				56	}
				57	else if (res == NULL) {
				58	fprintf(stderr,
				59	"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
				60	}
				61	if (res != NULL)
				62	xmlFreeDoc(res);
				63	}
				64	}
				65
				66	static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
				67	int len, char *data) {
				68	int i, j;
				69	xmlDocPtr res;
				70
				71	for (i = 0x80;i <= 0xFF;i++) {
				72	for (j = 0;j <= 0xFF;j++) {
				73	lastError = 0;
				74	xmlCtxtReset(ctxt);
				75
				76	data[0] = i;
				77	data[1] = j;
				78
				79	res = xmlReadMemory(document, len, "test", NULL, 0);
				80
				81	/* if first bit of first char is set, then second bit must too */
				82	if ((i & 0x80) && ((i & 0x40) == 0)) {
				83	if ((lastError == 0) \|\| (res != NULL))
				84	fprintf(stderr,
				85	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
				86	i, j);
				87	}
				88
				89	/*
				90	* if first bit of first char is set, then second char first
				91	* bits must be 10
				92	*/
				93	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
				94	if ((lastError == 0) \|\| (res != NULL))
				95	fprintf(stderr,
				96	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
				97	i, j);
				98	}
				99
				100	/*
				101	* if using a 2 byte encoding then the value must be greater
				102	* than 0x80, i.e. one of bits 5 to 1 of i must be set
				103	*/
				104	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
				105	if ((lastError == 0) \|\| (res != NULL))
				106	fprintf(stderr,
				107	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
				108	i, j);
				109	}
				110
				111	/*
				112	* if third bit of first char is set, then the sequence would need
				113	* at least 3 bytes, but we give only 2 !
				114	*/
				115	else if ((i & 0xE0) == 0xE0) {
				116	if ((lastError == 0) \|\| (res != NULL))
				117	fprintf(stderr,
				118	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
				119	i, j);
				120	}
				121
				122	/*
				123	* We should see no error in remaning cases
				124	*/
				125	else if ((lastError != 0) \|\| (res == NULL)) {
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	126	fprintf(stderr,
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	127	"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
				128	}
				129	if (res != NULL)
				130	xmlFreeDoc(res);
				131	}
				132	}
				133	}
				134
				135	/**
				136	* testDocumentRanges:
				137	*
				138	* Test the correct UTF8 character parsing in context of XML documents
				139	* Those are in-context injection tests checking the parser behaviour on
				140	* edge case values at different point in content, beginning and end of
				141	* CDATA in text or in attribute values.
				142	*/
				143
				144	static void testDocumentRanges(void) {
				145	xmlParserCtxtPtr ctxt;
				146	char *data;
				147
				148	/*
				149	* Set up a parsing context using the first document as
				150	* the current input source.
				151	*/
				152	ctxt = xmlNewParserCtxt();
				153	if (ctxt == NULL) {
				154	fprintf(stderr, "Failed to allocate parser context\n");
				155	return;
				156	}
				157
				158	printf("testing 1 byte char in document: 1");
				159	fflush(stdout);
				160	data = &document1[5];
				161	data[0] = ' ';
				162	data[1] = ' ';
				163	data[2] = ' ';
				164	data[3] = ' ';
				165	/* test 1 byte injection at beginning of area */
				166	testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
				167	data, -1, -1);
				168	printf(" 2");
				169	fflush(stdout);
				170	data[0] = ' ';
				171	data[1] = ' ';
				172	data[2] = ' ';
				173	data[3] = ' ';
				174	/* test 1 byte injection at end of area */
				175	testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
				176	data + 3, -1, -1);
				177
				178	printf(" 3");
				179	fflush(stdout);
				180	data = &document2[10];
				181	data[0] = ' ';
				182	data[1] = ' ';
				183	data[2] = ' ';
				184	data[3] = ' ';
				185	/* test 1 byte injection at beginning of area */
				186	testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
				187	data, '\'', -1);
				188	printf(" 4");
				189	fflush(stdout);
				190	data[0] = ' ';
				191	data[1] = ' ';
				192	data[2] = ' ';
				193	data[3] = ' ';
				194	/* test 1 byte injection at end of area */
				195	testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
				196	data + 3, '\'', -1);
				197	printf(" done\n");
				198
				199	printf("testing 2 byte char in document: 1");
				200	fflush(stdout);
				201	data = &document1[5];
				202	data[0] = ' ';
				203	data[1] = ' ';
				204	data[2] = ' ';
				205	data[3] = ' ';
				206	/* test 2 byte injection at beginning of area */
				207	testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
				208	data);
				209	printf(" 2");
				210	fflush(stdout);
				211	data[0] = ' ';
				212	data[1] = ' ';
				213	data[2] = ' ';
				214	data[3] = ' ';
				215	/* test 2 byte injection at end of area */
				216	testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
				217	data + 2);
				218
				219	printf(" 3");
				220	fflush(stdout);
				221	data = &document2[10];
				222	data[0] = ' ';
				223	data[1] = ' ';
				224	data[2] = ' ';
				225	data[3] = ' ';
				226	/* test 2 byte injection at beginning of area */
				227	testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
				228	data);
				229	printf(" 4");
				230	fflush(stdout);
				231	data[0] = ' ';
				232	data[1] = ' ';
				233	data[2] = ' ';
				234	data[3] = ' ';
				235	/* test 2 byte injection at end of area */
				236	testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
				237	data + 2);
				238	printf(" done\n");
				239
				240	xmlFreeParserCtxt(ctxt);
				241	}
				242
				243	static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
				244	int i = 0;
				245	int len, c;
				246
				247	data[1] = 0;
				248	data[2] = 0;
				249	data[3] = 0;
				250	for (i = 0;i <= 0xFF;i++) {
				251	data[0] = i;
				252	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				253
				254	lastError = 0;
				255	c = xmlCurrentChar(ctxt, &len);
				256	if ((i == 0) \|\| (i >= 0x80)) {
				257	/* we must see an error there */
				258	if (lastError != XML_ERR_INVALID_CHAR)
				259	fprintf(stderr,
				260	"Failed to detect invalid char for Byte 0x%02X\n", i);
				261	} else if (i == 0xD) {
				262	if ((c != 0xA) \|\| (len != 1))
				263	fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
				264	} else if ((c != i) \|\| (len != 1)) {
				265	fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
				266	}
				267	}
				268	}
				269
				270	static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
				271	int i, j;
				272	int len, c;
				273
				274	data[2] = 0;
				275	data[3] = 0;
				276	for (i = 0x80;i <= 0xFF;i++) {
				277	for (j = 0;j <= 0xFF;j++) {
				278	data[0] = i;
				279	data[1] = j;
				280	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				281
				282	lastError = 0;
				283	c = xmlCurrentChar(ctxt, &len);
				284
				285	/* if first bit of first char is set, then second bit must too */
				286	if ((i & 0x80) && ((i & 0x40) == 0)) {
				287	if (lastError != XML_ERR_INVALID_CHAR)
				288	fprintf(stderr,
				289	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
				290	i, j);
				291	}
				292
				293	/*
				294	* if first bit of first char is set, then second char first
				295	* bits must be 10
				296	*/
				297	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
				298	if (lastError != XML_ERR_INVALID_CHAR)
				299	fprintf(stderr,
				300	"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
				301	i, j, c);
				302	}
				303
				304	/*
				305	* if using a 2 byte encoding then the value must be greater
				306	* than 0x80, i.e. one of bits 5 to 1 of i must be set
				307	*/
				308	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
				309	if (lastError != XML_ERR_INVALID_CHAR)
				310	fprintf(stderr,
				311	"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
				312	i, j, c);
				313	}
				314
				315	/*
				316	* if third bit of first char is set, then the sequence would need
				317	* at least 3 bytes, but we give only 2 !
				318	*/
				319	else if ((i & 0xE0) == 0xE0) {
				320	if (lastError != XML_ERR_INVALID_CHAR)
				321	fprintf(stderr,
				322	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
				323	i, j);
				324	}
				325
				326	/*
				327	* We should see no error in remaning cases
				328	*/
				329	else if ((lastError != 0) \|\| (len != 2)) {
				330	fprintf(stderr,
				331	"Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
				332	}
				333
				334	/*
				335	* Finally check the value is right
				336	*/
				337	else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
				338	fprintf(stderr,
				339	"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
				340	i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
				341	}
				342	}
				343	}
				344	}
				345
				346	static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
				347	int i, j, k, K;
				348	int len, c;
				349	unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
				350	int value;
				351
				352	data[3] = 0;
				353	for (i = 0xE0;i <= 0xFF;i++) {
				354	for (j = 0;j <= 0xFF;j++) {
				355	for (k = 0;k < 6;k++) {
				356	data[0] = i;
				357	data[1] = j;
				358	K = lows[k];
				359	data[2] = (char) K;
				360	value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
				361	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				362
				363	lastError = 0;
				364	c = xmlCurrentChar(ctxt, &len);
				365
				366	/*
				367	* if fourth bit of first char is set, then the sequence would need
				368	* at least 4 bytes, but we give only 3 !
				369	*/
				370	if ((i & 0xF0) == 0xF0) {
				371	if (lastError != XML_ERR_INVALID_CHAR)
				372	fprintf(stderr,
				373	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
				374	i, j, K, data[3]);
				375	}
				376
				377	/*
				378	* The second and the third bytes must start with 10
				379	*/
				380	else if (((j & 0xC0) != 0x80) \|\| ((K & 0xC0) != 0x80)) {
				381	if (lastError != XML_ERR_INVALID_CHAR)
				382	fprintf(stderr,
				383	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
				384	i, j, K);
				385	}
				386
				387	/*
				388	* if using a 3 byte encoding then the value must be greater
				389	* than 0x800, i.e. one of bits 4 to 0 of i must be set or
				390	* the 6th byte of data[1] must be set
				391	*/
				392	else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
				393	if (lastError != XML_ERR_INVALID_CHAR)
				394	fprintf(stderr,
				395	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
				396	i, j, K);
				397	}
				398
				399	/*
				400	* There are values in that range that are not allowed in XML-1.0
				401	*/
				402	else if (((value > 0xD7FF) && (value <0xE000)) \|\|
				403	((value > 0xFFFD) && (value <0x10000))) {
				404	if (lastError != XML_ERR_INVALID_CHAR)
				405	fprintf(stderr,
				406	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
				407	value, i, j, K);
				408	}
				409
				410	/*
				411	* We should see no error in remaining cases
				412	*/
				413	else if ((lastError != 0) \|\| (len != 3)) {
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	414	fprintf(stderr,
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	415	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
				416	i, j, K);
				417	}
				418
				419	/*
				420	* Finally check the value is right
				421	*/
				422	else if (c != value) {
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	423	fprintf(stderr,
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	424	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
				425	i, j, data[2], value, c);
				426	}
				427	}
				428	}
				429	}
				430	}
				431
				432	static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
				433	int i, j, k, K, l, L;
				434	int len, c;
				435	unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
				436	int value;
				437
				438	data[4] = 0;
				439	for (i = 0xF0;i <= 0xFF;i++) {
				440	for (j = 0;j <= 0xFF;j++) {
				441	for (k = 0;k < 6;k++) {
				442	for (l = 0;l < 6;l++) {
				443	data[0] = i;
				444	data[1] = j;
				445	K = lows[k];
				446	data[2] = (char) K;
				447	L = lows[l];
				448	data[3] = (char) L;
				449	value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
				450	((i & 0x7) << 18);
				451	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				452
				453	lastError = 0;
				454	c = xmlCurrentChar(ctxt, &len);
				455
				456	/*
				457	* if fifth bit of first char is set, then the sequence would need
				458	* at least 5 bytes, but we give only 4 !
				459	*/
				460	if ((i & 0xF8) == 0xF8) {
				461	if (lastError != XML_ERR_INVALID_CHAR)
				462	fprintf(stderr,
				463	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
				464	i, j, K, data[3]);
				465	}
				466
				467	/*
				468	* The second, third and fourth bytes must start with 10
				469	*/
				470	else if (((j & 0xC0) != 0x80) \|\| ((K & 0xC0) != 0x80) \|\|
				471	((L & 0xC0) != 0x80)) {
				472	if (lastError != XML_ERR_INVALID_CHAR)
				473	fprintf(stderr,
				474	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
				475	i, j, K, L);
				476	}
				477
				478	/*
				479	* if using a 3 byte encoding then the value must be greater
				480	* than 0x10000, i.e. one of bits 3 to 0 of i must be set or
				481	* the 6 or 5th byte of j must be set
				482	*/
				483	else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
				484	if (lastError != XML_ERR_INVALID_CHAR)
				485	fprintf(stderr,
				486	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
				487	i, j, K, L);
				488	}
				489
				490	/*
				491	* There are values in that range that are not allowed in XML-1.0
				492	*/
				493	else if (((value > 0xD7FF) && (value <0xE000)) \|\|
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	494	((value > 0xFFFD) && (value <0x10000)) \|\|
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	495	(value > 0x10FFFF)) {
				496	if (lastError != XML_ERR_INVALID_CHAR)
				497	fprintf(stderr,
				498	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
				499	value, i, j, K, L);
				500	}
				501
				502	/*
				503	* We should see no error in remaining cases
				504	*/
				505	else if ((lastError != 0) \|\| (len != 4)) {
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	506	fprintf(stderr,
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	507	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
				508	i, j, K);
				509	}
				510
				511	/*
				512	* Finally check the value is right
				513	*/
				514	else if (c != value) {
Daniel Veillard	f8e3db0	2012-09-11 13:26:36 +0800	[diff] [blame]	515	fprintf(stderr,
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	516	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
				517	i, j, data[2], value, c);
				518	}
				519	}
				520	}
				521	}
				522	}
				523	}
				524
				525	/**
				526	* testCharRanges:
				527	*
				528	* Test the correct UTF8 character parsing in isolation i.e.
				529	* not when parsing a full document, this is less expensive and we can
				530	* cover the full range of UTF-8 chars accepted by XML-1.0
				531	*/
				532
				533	static void testCharRanges(void) {
				534	char data[5];
				535	xmlParserCtxtPtr ctxt;
				536	xmlParserInputBufferPtr buf;
				537	xmlParserInputPtr input;
				538
				539	memset(data, 0, 5);
				540
				541	/*
				542	* Set up a parsing context using the above data buffer as
				543	* the current input source.
				544	*/
				545	ctxt = xmlNewParserCtxt();
				546	if (ctxt == NULL) {
				547	fprintf(stderr, "Failed to allocate parser context\n");
				548	return;
				549	}
				550	buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
				551	XML_CHAR_ENCODING_NONE);
				552	if (buf == NULL) {
				553	fprintf(stderr, "Failed to allocate input buffer\n");
				554	goto error;
				555	}
				556	input = xmlNewInputStream(ctxt);
				557	if (input == NULL) {
				558	xmlFreeParserInputBuffer(buf);
				559	goto error;
				560	}
				561	input->filename = NULL;
				562	input->buf = buf;
Daniel Veillard	145477d	2012-07-16 14:59:29 +0800	[diff] [blame]	563	input->cur =
				564	input->base = xmlBufContent(input->buf->buffer);
				565	input->end = input->base + 4;
Daniel Veillard	abade01	2008-07-24 15:05:38 +0000	[diff] [blame]	566	inputPush(ctxt, input);
				567
				568	printf("testing char range: 1");
				569	fflush(stdout);
				570	testCharRangeByte1(ctxt, data);
				571	printf(" 2");
				572	fflush(stdout);
				573	testCharRangeByte2(ctxt, data);
				574	printf(" 3");
				575	fflush(stdout);
				576	testCharRangeByte3(ctxt, data);
				577	printf(" 4");
				578	fflush(stdout);
				579	testCharRangeByte4(ctxt, data);
				580	printf(" done\n");
				581	fflush(stdout);
				582
				583	error:
				584	xmlFreeParserCtxt(ctxt);
				585	}
				586
				587	int main(void) {
				588
				589	/*
				590	* this initialize the library and check potential ABI mismatches
				591	* between the version it was compiled for and the actual shared
				592	* library used.
				593	*/
				594	LIBXML_TEST_VERSION
				595
				596	/*
				597	* Catch errors separately
				598	*/
				599
				600	xmlSetStructuredErrorFunc(NULL, errorHandler);
				601
				602	/*
				603	* Run the tests
				604	*/
				605	testCharRanges();
				606	testDocumentRanges();
				607
				608	/*
				609	* Cleanup function for the XML library.
				610	*/
				611	xmlCleanupParser();
				612	/*
				613	* this is to debug memory for regression tests
				614	*/
				615	xmlMemoryDump();
				616	return(0);
				617	}