testchar.c - platform/external/libxml2 - Gitiles

 /**
  * Test the UTF-8 decoding routines
  *
  * author: Daniel Veillard
  * copy: see Copyright for the status of this software.
  */

 #include <stdio.h>
 #include <string.h>
 #include <libxml/parser.h>
 #include <libxml/parserInternals.h>

 int lastError;

 static void errorHandler(void *unused, xmlErrorPtr err) {
     if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
         lastError = err->code;
     }
 }

 char document1[100] = "<doc>XXXX</doc>";
 char document2[100] = "<doc foo='XXXX'/>";

 static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
                   int len,  char *data, int forbid1, int forbid2) {
     int i;
     xmlDocPtr res;

     for (i = 0;i <= 0xFF;i++) {
 	lastError = 0;
 	xmlCtxtReset(ctxt);

         data[0] = i;

 	res = xmlReadMemory(document, len, "test", NULL, 0);

 	if ((i == forbid1) || (i == forbid2)) {
 	    if ((lastError == 0) || (res != NULL))
 	        fprintf(stderr,
 		    "Failed to detect invalid char for Byte 0x%02X: %c\n",
 		        i, i);
 	}

 	else if ((i == '<') || (i == '&')) {
 	    if ((lastError == 0) || (res != NULL))
 	        fprintf(stderr,
 		    "Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
 	}
 	else if (((i < 0x20) || (i >= 0x80)) &&
 	    (i != 0x9) && (i != 0xA) && (i != 0xD)) {
 	    if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
 	        fprintf(stderr,
 		    "Failed to detect invalid char for Byte 0x%02X\n", i);
 	}
 	else if (res == NULL) {
 	    fprintf(stderr,
 		"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
 	}
 	if (res != NULL)
 	    xmlFreeDoc(res);
     }
 }

 static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
                   int len,  char *data) {
     int i, j;
     xmlDocPtr res;

     for (i = 0x80;i <= 0xFF;i++) {
     for (j = 0;j <= 0xFF;j++) {
 	lastError = 0;
 	xmlCtxtReset(ctxt);

         data[0] = i;
         data[1] = j;

 	res = xmlReadMemory(document, len, "test", NULL, 0);

 	/* if first bit of first char is set, then second bit must too */
 	if ((i & 0x80) && ((i & 0x40) == 0)) {
 	    if ((lastError == 0) || (res != NULL))
 		fprintf(stderr,
 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
 			i, j);
 	}

 	/*
 	 * if first bit of first char is set, then second char first
 	 * bits must be 10
 	 */
 	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
 	    if ((lastError == 0) || (res != NULL))
 		fprintf(stderr,
 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
 			i, j);
 	}

 	/*
 	 * if using a 2 byte encoding then the value must be greater
 	 * than 0x80, i.e. one of bits 5 to 1 of i must be set
 	 */
 	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
 	    if ((lastError == 0) || (res != NULL))
 		fprintf(stderr,
 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
 			i, j);
 	}

 	/*
 	 * if third bit of first char is set, then the sequence would need
 	 * at least 3 bytes, but we give only 2 !
 	 */
 	else if ((i & 0xE0) == 0xE0) {
 	    if ((lastError == 0) || (res != NULL))
 		fprintf(stderr,
 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
 			i, j);
 	}

 	/*
 	 * We should see no error in remaning cases
 	 */
 	else if ((lastError != 0) || (res == NULL)) {
 	    fprintf(stderr,
 		"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
 	}
 	if (res != NULL)
 	    xmlFreeDoc(res);
     }
     }
 }

 /**
  * testDocumentRanges:
  *
  * Test the correct UTF8 character parsing in context of XML documents
  * Those are in-context injection tests checking the parser behaviour on
  * edge case values at different point in content, beginning and end of
  * CDATA in text or in attribute values.
  */

 static void testDocumentRanges(void) {
     xmlParserCtxtPtr ctxt;
     char *data;

     /*
      * Set up a parsing context using the first document as
      * the current input source.
      */
     ctxt = xmlNewParserCtxt();
     if (ctxt == NULL) {
         fprintf(stderr, "Failed to allocate parser context\n");
 	return;
     }

     printf("testing 1 byte char in document: 1");
     fflush(stdout);
     data = &document1[5];
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 1 byte injection at beginning of area */
     testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
                            data, -1, -1);
     printf(" 2");
     fflush(stdout);
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 1 byte injection at end of area */
     testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
                            data + 3, -1, -1);

     printf(" 3");
     fflush(stdout);
     data = &document2[10];
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 1 byte injection at beginning of area */
     testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
                            data, '\'', -1);
     printf(" 4");
     fflush(stdout);
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 1 byte injection at end of area */
     testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
                            data + 3, '\'', -1);
     printf(" done\n");

     printf("testing 2 byte char in document: 1");
     fflush(stdout);
     data = &document1[5];
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 2 byte injection at beginning of area */
     testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
                            data);
     printf(" 2");
     fflush(stdout);
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 2 byte injection at end of area */
     testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
                            data + 2);

     printf(" 3");
     fflush(stdout);
     data = &document2[10];
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 2 byte injection at beginning of area */
     testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
                            data);
     printf(" 4");
     fflush(stdout);
     data[0] = ' ';
     data[1] = ' ';
     data[2] = ' ';
     data[3] = ' ';
     /* test 2 byte injection at end of area */
     testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
                            data + 2);
     printf(" done\n");

     xmlFreeParserCtxt(ctxt);
 }

 static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
     int i = 0;
     int len, c;

     data[1] = 0;
     data[2] = 0;
     data[3] = 0;
     for (i = 0;i <= 0xFF;i++) {
         data[0] = i;
 	ctxt->charset = XML_CHAR_ENCODING_UTF8;

 	lastError = 0;
         c = xmlCurrentChar(ctxt, &len);
 	if ((i == 0) || (i >= 0x80)) {
 	    /* we must see an error there */
 	    if (lastError != XML_ERR_INVALID_CHAR)
 	        fprintf(stderr,
 		    "Failed to detect invalid char for Byte 0x%02X\n", i);
 	} else if (i == 0xD) {
 	    if ((c != 0xA) || (len != 1))
 		fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
 	} else if ((c != i) || (len != 1)) {
 	    fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
 	}
     }
 }

 static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
     int i, j;
     int len, c;

     data[2] = 0;
     data[3] = 0;
     for (i = 0x80;i <= 0xFF;i++) {
 	for (j = 0;j <= 0xFF;j++) {
 	    data[0] = i;
 	    data[1] = j;
 	    ctxt->charset = XML_CHAR_ENCODING_UTF8;

 	    lastError = 0;
 	    c = xmlCurrentChar(ctxt, &len);

 	    /* if first bit of first char is set, then second bit must too */
 	    if ((i & 0x80) && ((i & 0x40) == 0)) {
 		if (lastError != XML_ERR_INVALID_CHAR)
 		    fprintf(stderr,
 		    "Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
 		            i, j);
 	    }

 	    /*
 	     * if first bit of first char is set, then second char first
 	     * bits must be 10
 	     */
 	    else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
 		if (lastError != XML_ERR_INVALID_CHAR)
 		    fprintf(stderr,
 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
 		            i, j, c);
 	    }

 	    /*
 	     * if using a 2 byte encoding then the value must be greater
 	     * than 0x80, i.e. one of bits 5 to 1 of i must be set
 	     */
 	    else if ((i & 0x80) && ((i & 0x1E) == 0)) {
 		if (lastError != XML_ERR_INVALID_CHAR)
 		    fprintf(stderr,
 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
 		            i, j, c);
 	    }

 	    /*
 	     * if third bit of first char is set, then the sequence would need
 	     * at least 3 bytes, but we give only 2 !
 	     */
 	    else if ((i & 0xE0) == 0xE0) {
 		if (lastError != XML_ERR_INVALID_CHAR)
 		    fprintf(stderr,
 		"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
 		            i, j);
 	    }

             /*
 	     * We should see no error in remaning cases
 	     */
 	    else if ((lastError != 0) || (len != 2)) {
 		fprintf(stderr,
 		    "Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
 	    }

             /*
 	     * Finally check the value is right
 	     */
 	    else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
 		fprintf(stderr,
 	"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
 	                i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
 	    }
         }
     }
 }

 static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
     int i, j, k, K;
     int len, c;
     unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
     int value;

     data[3] = 0;
     for (i = 0xE0;i <= 0xFF;i++) {
     for (j = 0;j <= 0xFF;j++) {
     for (k = 0;k < 6;k++) {
 	data[0] = i;
 	data[1] = j;
 	K = lows[k];
 	data[2] = (char) K;
 	value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
 	ctxt->charset = XML_CHAR_ENCODING_UTF8;

 	lastError = 0;
 	c = xmlCurrentChar(ctxt, &len);

 	/*
 	 * if fourth bit of first char is set, then the sequence would need
 	 * at least 4 bytes, but we give only 3 !
 	 */
 	if ((i & 0xF0) == 0xF0) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
 			i, j, K, data[3]);
 	}

         /*
 	 * The second and the third bytes must start with 10
 	 */
 	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80)) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
 			i, j, K);
 	}

 	/*
 	 * if using a 3 byte encoding then the value must be greater
 	 * than 0x800, i.e. one of bits 4 to 0 of i must be set or
 	 * the 6th byte of data[1] must be set
 	 */
 	else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	    "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
 			i, j, K);
 	}

         /*
 	 * There are values in that range that are not allowed in XML-1.0
 	 */
 	else if (((value > 0xD7FF) && (value <0xE000)) ||
 	         ((value > 0xFFFD) && (value <0x10000))) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
 			value, i, j, K);
 	}

 	/*
 	 * We should see no error in remaining cases
 	 */
 	else if ((lastError != 0) || (len != 3)) {
 	    fprintf(stderr,
 		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
 		    i, j, K);
 	}

 	/*
 	 * Finally check the value is right
 	 */
 	else if (c != value) {
 	    fprintf(stderr,
     "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
 		i, j, data[2], value, c);
 	}
     }
     }
     }
 }

 static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
     int i, j, k, K, l, L;
     int len, c;
     unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
     int value;

     data[4] = 0;
     for (i = 0xF0;i <= 0xFF;i++) {
     for (j = 0;j <= 0xFF;j++) {
     for (k = 0;k < 6;k++) {
     for (l = 0;l < 6;l++) {
 	data[0] = i;
 	data[1] = j;
 	K = lows[k];
 	data[2] = (char) K;
 	L = lows[l];
 	data[3] = (char) L;
 	value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
 	        ((i & 0x7) << 18);
 	ctxt->charset = XML_CHAR_ENCODING_UTF8;

 	lastError = 0;
 	c = xmlCurrentChar(ctxt, &len);

 	/*
 	 * if fifth bit of first char is set, then the sequence would need
 	 * at least 5 bytes, but we give only 4 !
 	 */
 	if ((i & 0xF8) == 0xF8) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
   "Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
 			i, j, K, data[3]);
 	}

         /*
 	 * The second, third and fourth bytes must start with 10
 	 */
 	else if (((j & 0xC0) != 0x80) || ((K & 0xC0) != 0x80) ||
 	         ((L & 0xC0) != 0x80)) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
 			i, j, K, L);
 	}

 	/*
 	 * if using a 3 byte encoding then the value must be greater
 	 * than 0x10000, i.e. one of bits 3 to 0 of i must be set or
 	 * the 6 or 5th byte of j must be set
 	 */
 	else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
 			i, j, K, L);
 	}

         /*
 	 * There are values in that range that are not allowed in XML-1.0
 	 */
 	else if (((value > 0xD7FF) && (value <0xE000)) ||
 	         ((value > 0xFFFD) && (value <0x10000)) ||
 		 (value > 0x10FFFF)) {
 	    if (lastError != XML_ERR_INVALID_CHAR)
 		fprintf(stderr,
 "Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
 			value, i, j, K, L);
 	}

 	/*
 	 * We should see no error in remaining cases
 	 */
 	else if ((lastError != 0) || (len != 4)) {
 	    fprintf(stderr,
 		"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
 		    i, j, K);
 	}

 	/*
 	 * Finally check the value is right
 	 */
 	else if (c != value) {
 	    fprintf(stderr,
     "Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
 		i, j, data[2], value, c);
 	}
     }
     }
     }
     }
 }

 /**
  * testCharRanges:
  *
  * Test the correct UTF8 character parsing in isolation i.e.
  * not when parsing a full document, this is less expensive and we can
  * cover the full range of UTF-8 chars accepted by XML-1.0
  */

 static void testCharRanges(void) {
     char data[5];
     xmlParserCtxtPtr ctxt;
     xmlParserInputBufferPtr buf;
     xmlParserInputPtr input;

     memset(data, 0, 5);

     /*
      * Set up a parsing context using the above data buffer as
      * the current input source.
      */
     ctxt = xmlNewParserCtxt();
     if (ctxt == NULL) {
         fprintf(stderr, "Failed to allocate parser context\n");
 	return;
     }
     buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
                                            XML_CHAR_ENCODING_NONE);
     if (buf == NULL) {
         fprintf(stderr, "Failed to allocate input buffer\n");
 	goto error;
     }
     input = xmlNewInputStream(ctxt);
     if (input == NULL) {
         xmlFreeParserInputBuffer(buf);
 	goto error;
     }
     input->filename = NULL;
     input->buf = buf;
     input->base = input->buf->buffer->content;
     input->cur = input->buf->buffer->content;
     input->end = &input->buf->buffer->content[4];
     inputPush(ctxt, input);

     printf("testing char range: 1");
     fflush(stdout);
     testCharRangeByte1(ctxt, data);
     printf(" 2");
     fflush(stdout);
     testCharRangeByte2(ctxt, data);
     printf(" 3");
     fflush(stdout);
     testCharRangeByte3(ctxt, data);
     printf(" 4");
     fflush(stdout);
     testCharRangeByte4(ctxt, data);
     printf(" done\n");
     fflush(stdout);

 error:
     xmlFreeParserCtxt(ctxt);
 }

 int main(void) {

     /*
      * this initialize the library and check potential ABI mismatches
      * between the version it was compiled for and the actual shared
      * library used.
      */
     LIBXML_TEST_VERSION

     /*
      * Catch errors separately
      */

     xmlSetStructuredErrorFunc(NULL, errorHandler);

     /*
      * Run the tests
      */
     testCharRanges();
     testDocumentRanges();

     /*
      * Cleanup function for the XML library.
      */
     xmlCleanupParser();
     /*
      * this is to debug memory for regression tests
      */
     xmlMemoryDump();
     return(0);
 }
	/**
	* Test the UTF-8 decoding routines
	*
	* author: Daniel Veillard
	* copy: see Copyright for the status of this software.
	*/

	#include <stdio.h>
	#include <string.h>
	#include <libxml/parser.h>
	#include <libxml/parserInternals.h>

	int lastError;

	static void errorHandler(void *unused, xmlErrorPtr err) {
	if ((unused == NULL) && (err != NULL) && (lastError == 0)) {
	lastError = err->code;
	}
	}

	char document1[100] = "<doc>XXXX</doc>";
	char document2[100] = "<doc foo='XXXX'/>";

	static void testDocumentRangeByte1(xmlParserCtxtPtr ctxt, char *document,
	int len, char *data, int forbid1, int forbid2) {
	int i;
	xmlDocPtr res;

	for (i = 0;i <= 0xFF;i++) {
	lastError = 0;
	xmlCtxtReset(ctxt);

	data[0] = i;

	res = xmlReadMemory(document, len, "test", NULL, 0);

	if ((i == forbid1) \|\| (i == forbid2)) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Byte 0x%02X: %c\n",
	i, i);
	}

	else if ((i == '<') \|\| (i == '&')) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect illegal char %c for Byte 0x%02X\n", i, i);
	}
	else if (((i < 0x20) \|\| (i >= 0x80)) &&
	(i != 0x9) && (i != 0xA) && (i != 0xD)) {
	if ((lastError != XML_ERR_INVALID_CHAR) && (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Byte 0x%02X\n", i);
	}
	else if (res == NULL) {
	fprintf(stderr,
	"Failed to parse valid char for Byte 0x%02X : %c\n", i, i);
	}
	if (res != NULL)
	xmlFreeDoc(res);
	}
	}

	static void testDocumentRangeByte2(xmlParserCtxtPtr ctxt, char *document,
	int len, char *data) {
	int i, j;
	xmlDocPtr res;

	for (i = 0x80;i <= 0xFF;i++) {
	for (j = 0;j <= 0xFF;j++) {
	lastError = 0;
	xmlCtxtReset(ctxt);

	data[0] = i;
	data[1] = j;

	res = xmlReadMemory(document, len, "test", NULL, 0);

	/* if first bit of first char is set, then second bit must too */
	if ((i & 0x80) && ((i & 0x40) == 0)) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
	i, j);
	}

	/*
	* if first bit of first char is set, then second char first
	* bits must be 10
	*/
	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
	i, j);
	}

	/*
	* if using a 2 byte encoding then the value must be greater
	* than 0x80, i.e. one of bits 5 to 1 of i must be set
	*/
	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
	i, j);
	}

	/*
	* if third bit of first char is set, then the sequence would need
	* at least 3 bytes, but we give only 2 !
	*/
	else if ((i & 0xE0) == 0xE0) {
	if ((lastError == 0) \|\| (res != NULL))
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
	i, j);
	}

	/*
	* We should see no error in remaning cases
	*/
	else if ((lastError != 0) \|\| (res == NULL)) {
	fprintf(stderr,
	"Failed to parse document for Bytes 0x%02X 0x%02X\n", i, j);
	}
	if (res != NULL)
	xmlFreeDoc(res);
	}
	}
	}

	/**
	* testDocumentRanges:
	*
	* Test the correct UTF8 character parsing in context of XML documents
	* Those are in-context injection tests checking the parser behaviour on
	* edge case values at different point in content, beginning and end of
	* CDATA in text or in attribute values.
	*/

	static void testDocumentRanges(void) {
	xmlParserCtxtPtr ctxt;
	char *data;

	/*
	* Set up a parsing context using the first document as
	* the current input source.
	*/
	ctxt = xmlNewParserCtxt();
	if (ctxt == NULL) {
	fprintf(stderr, "Failed to allocate parser context\n");
	return;
	}

	printf("testing 1 byte char in document: 1");
	fflush(stdout);
	data = &document1[5];
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 1 byte injection at beginning of area */
	testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
	data, -1, -1);
	printf(" 2");
	fflush(stdout);
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 1 byte injection at end of area */
	testDocumentRangeByte1(ctxt, &document1[0], strlen(document1),
	data + 3, -1, -1);

	printf(" 3");
	fflush(stdout);
	data = &document2[10];
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 1 byte injection at beginning of area */
	testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
	data, '\'', -1);
	printf(" 4");
	fflush(stdout);
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 1 byte injection at end of area */
	testDocumentRangeByte1(ctxt, &document2[0], strlen(document2),
	data + 3, '\'', -1);
	printf(" done\n");

	printf("testing 2 byte char in document: 1");
	fflush(stdout);
	data = &document1[5];
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 2 byte injection at beginning of area */
	testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
	data);
	printf(" 2");
	fflush(stdout);
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 2 byte injection at end of area */
	testDocumentRangeByte2(ctxt, &document1[0], strlen(document1),
	data + 2);

	printf(" 3");
	fflush(stdout);
	data = &document2[10];
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 2 byte injection at beginning of area */
	testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
	data);
	printf(" 4");
	fflush(stdout);
	data[0] = ' ';
	data[1] = ' ';
	data[2] = ' ';
	data[3] = ' ';
	/* test 2 byte injection at end of area */
	testDocumentRangeByte2(ctxt, &document2[0], strlen(document2),
	data + 2);
	printf(" done\n");

	xmlFreeParserCtxt(ctxt);
	}

	static void testCharRangeByte1(xmlParserCtxtPtr ctxt, char *data) {
	int i = 0;
	int len, c;

	data[1] = 0;
	data[2] = 0;
	data[3] = 0;
	for (i = 0;i <= 0xFF;i++) {
	data[0] = i;
	ctxt->charset = XML_CHAR_ENCODING_UTF8;

	lastError = 0;
	c = xmlCurrentChar(ctxt, &len);
	if ((i == 0) \|\| (i >= 0x80)) {
	/* we must see an error there */
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Byte 0x%02X\n", i);
	} else if (i == 0xD) {
	if ((c != 0xA) \|\| (len != 1))
	fprintf(stderr, "Failed to convert char for Byte 0x%02X\n", i);
	} else if ((c != i) \|\| (len != 1)) {
	fprintf(stderr, "Failed to parse char for Byte 0x%02X\n", i);
	}
	}
	}

	static void testCharRangeByte2(xmlParserCtxtPtr ctxt, char *data) {
	int i, j;
	int len, c;

	data[2] = 0;
	data[3] = 0;
	for (i = 0x80;i <= 0xFF;i++) {
	for (j = 0;j <= 0xFF;j++) {
	data[0] = i;
	data[1] = j;
	ctxt->charset = XML_CHAR_ENCODING_UTF8;

	lastError = 0;
	c = xmlCurrentChar(ctxt, &len);

	/* if first bit of first char is set, then second bit must too */
	if ((i & 0x80) && ((i & 0x40) == 0)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X\n",
	i, j);
	}

	/*
	* if first bit of first char is set, then second char first
	* bits must be 10
	*/
	else if ((i & 0x80) && ((j & 0xC0) != 0x80)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
	i, j, c);
	}

	/*
	* if using a 2 byte encoding then the value must be greater
	* than 0x80, i.e. one of bits 5 to 1 of i must be set
	*/
	else if ((i & 0x80) && ((i & 0x1E) == 0)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X: %d\n",
	i, j, c);
	}

	/*
	* if third bit of first char is set, then the sequence would need
	* at least 3 bytes, but we give only 2 !
	*/
	else if ((i & 0xE0) == 0xE0) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x00\n",
	i, j);
	}

	/*
	* We should see no error in remaning cases
	*/
	else if ((lastError != 0) \|\| (len != 2)) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X\n", i, j);
	}

	/*
	* Finally check the value is right
	*/
	else if (c != (j & 0x3F) + ((i & 0x1F) << 6)) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X: expect %d got %d\n",
	i, j, ((j & 0x3F) + ((i & 0x1F) << 6)), c);
	}
	}
	}
	}

	static void testCharRangeByte3(xmlParserCtxtPtr ctxt, char *data) {
	int i, j, k, K;
	int len, c;
	unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
	int value;

	data[3] = 0;
	for (i = 0xE0;i <= 0xFF;i++) {
	for (j = 0;j <= 0xFF;j++) {
	for (k = 0;k < 6;k++) {
	data[0] = i;
	data[1] = j;
	K = lows[k];
	data[2] = (char) K;
	value = (K & 0x3F) + ((j & 0x3F) << 6) + ((i & 0xF) << 12);
	ctxt->charset = XML_CHAR_ENCODING_UTF8;

	lastError = 0;
	c = xmlCurrentChar(ctxt, &len);

	/*
	* if fourth bit of first char is set, then the sequence would need
	* at least 4 bytes, but we give only 3 !
	*/
	if ((i & 0xF0) == 0xF0) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
	i, j, K, data[3]);
	}

	/*
	* The second and the third bytes must start with 10
	*/
	else if (((j & 0xC0) != 0x80) \|\| ((K & 0xC0) != 0x80)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
	i, j, K);
	}

	/*
	* if using a 3 byte encoding then the value must be greater
	* than 0x800, i.e. one of bits 4 to 0 of i must be set or
	* the 6th byte of data[1] must be set
	*/
	else if (((i & 0xF) == 0) && ((j & 0x20) == 0)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X\n",
	i, j, K);
	}

	/*
	* There are values in that range that are not allowed in XML-1.0
	*/
	else if (((value > 0xD7FF) && (value <0xE000)) \|\|
	((value > 0xFFFD) && (value <0x10000))) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X\n",
	value, i, j, K);
	}

	/*
	* We should see no error in remaining cases
	*/
	else if ((lastError != 0) \|\| (len != 3)) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
	i, j, K);
	}

	/*
	* Finally check the value is right
	*/
	else if (c != value) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
	i, j, data[2], value, c);
	}
	}
	}
	}
	}

	static void testCharRangeByte4(xmlParserCtxtPtr ctxt, char *data) {
	int i, j, k, K, l, L;
	int len, c;
	unsigned char lows[6] = {0, 0x80, 0x81, 0xC1, 0xFF, 0xBF};
	int value;

	data[4] = 0;
	for (i = 0xF0;i <= 0xFF;i++) {
	for (j = 0;j <= 0xFF;j++) {
	for (k = 0;k < 6;k++) {
	for (l = 0;l < 6;l++) {
	data[0] = i;
	data[1] = j;
	K = lows[k];
	data[2] = (char) K;
	L = lows[l];
	data[3] = (char) L;
	value = (L & 0x3F) + ((K & 0x3F) << 6) + ((j & 0x3F) << 12) +
	((i & 0x7) << 18);
	ctxt->charset = XML_CHAR_ENCODING_UTF8;

	lastError = 0;
	c = xmlCurrentChar(ctxt, &len);

	/*
	* if fifth bit of first char is set, then the sequence would need
	* at least 5 bytes, but we give only 4 !
	*/
	if ((i & 0xF8) == 0xF8) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
	i, j, K, data[3]);
	}

	/*
	* The second, third and fourth bytes must start with 10
	*/
	else if (((j & 0xC0) != 0x80) \|\| ((K & 0xC0) != 0x80) \|\|
	((L & 0xC0) != 0x80)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
	i, j, K, L);
	}

	/*
	* if using a 3 byte encoding then the value must be greater
	* than 0x10000, i.e. one of bits 3 to 0 of i must be set or
	* the 6 or 5th byte of j must be set
	*/
	else if (((i & 0x7) == 0) && ((j & 0x30) == 0)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
	i, j, K, L);
	}

	/*
	* There are values in that range that are not allowed in XML-1.0
	*/
	else if (((value > 0xD7FF) && (value <0xE000)) \|\|
	((value > 0xFFFD) && (value <0x10000)) \|\|
	(value > 0x10FFFF)) {
	if (lastError != XML_ERR_INVALID_CHAR)
	fprintf(stderr,
	"Failed to detect invalid char 0x%04X for Bytes 0x%02X 0x%02X 0x%02X 0x%02X\n",
	value, i, j, K, L);
	}

	/*
	* We should see no error in remaining cases
	*/
	else if ((lastError != 0) \|\| (len != 4)) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X\n",
	i, j, K);
	}

	/*
	* Finally check the value is right
	*/
	else if (c != value) {
	fprintf(stderr,
	"Failed to parse char for Bytes 0x%02X 0x%02X 0x%02X: expect %d got %d\n",
	i, j, data[2], value, c);
	}
	}
	}
	}
	}
	}

	/**
	* testCharRanges:
	*
	* Test the correct UTF8 character parsing in isolation i.e.
	* not when parsing a full document, this is less expensive and we can
	* cover the full range of UTF-8 chars accepted by XML-1.0
	*/

	static void testCharRanges(void) {
	char data[5];
	xmlParserCtxtPtr ctxt;
	xmlParserInputBufferPtr buf;
	xmlParserInputPtr input;

	memset(data, 0, 5);

	/*
	* Set up a parsing context using the above data buffer as
	* the current input source.
	*/
	ctxt = xmlNewParserCtxt();
	if (ctxt == NULL) {
	fprintf(stderr, "Failed to allocate parser context\n");
	return;
	}
	buf = xmlParserInputBufferCreateStatic(data, sizeof(data),
	XML_CHAR_ENCODING_NONE);
	if (buf == NULL) {
	fprintf(stderr, "Failed to allocate input buffer\n");
	goto error;
	}
	input = xmlNewInputStream(ctxt);
	if (input == NULL) {
	xmlFreeParserInputBuffer(buf);
	goto error;
	}
	input->filename = NULL;
	input->buf = buf;
	input->base = input->buf->buffer->content;
	input->cur = input->buf->buffer->content;
	input->end = &input->buf->buffer->content[4];
	inputPush(ctxt, input);

	printf("testing char range: 1");
	fflush(stdout);
	testCharRangeByte1(ctxt, data);
	printf(" 2");
	fflush(stdout);
	testCharRangeByte2(ctxt, data);
	printf(" 3");
	fflush(stdout);
	testCharRangeByte3(ctxt, data);
	printf(" 4");
	fflush(stdout);
	testCharRangeByte4(ctxt, data);
	printf(" done\n");
	fflush(stdout);

	error:
	xmlFreeParserCtxt(ctxt);
	}

	int main(void) {

	/*
	* this initialize the library and check potential ABI mismatches
	* between the version it was compiled for and the actual shared
	* library used.
	*/
	LIBXML_TEST_VERSION

	/*
	* Catch errors separately
	*/

	xmlSetStructuredErrorFunc(NULL, errorHandler);

	/*
	* Run the tests
	*/
	testCharRanges();
	testDocumentRanges();

	/*
	* Cleanup function for the XML library.
	*/
	xmlCleanupParser();
	/*
	* this is to debug memory for regression tests
	*/
	xmlMemoryDump();
	return(0);
	}