Blame - parserInternals.c - platform/external/libxml2

blob: 758c6b33a0450e42b469c347e17fa3f742b2645e [file] [log] [blame]

The Android Open Source Project	ab4e2e9	2009-03-03 19:30:06 -0800	[diff] [blame]	1	/*
				2	* parserInternals.c : Internal routines (and obsolete ones) needed for the
				3	* XML and HTML parsers.
				4	*
				5	* See Copyright for the status of this software.
				6	*
				7	* daniel@veillard.com
				8	*/
				9
				10	#define IN_LIBXML
				11	#include "libxml.h"
				12
				13	#if defined(WIN32) && !defined (__CYGWIN__)
				14	#define XML_DIR_SEP '\\'
				15	#else
				16	#define XML_DIR_SEP '/'
				17	#endif
				18
				19	#include <string.h>
				20	#ifdef HAVE_CTYPE_H
				21	#include <ctype.h>
				22	#endif
				23	#ifdef HAVE_STDLIB_H
				24	#include <stdlib.h>
				25	#endif
				26	#ifdef HAVE_SYS_STAT_H
				27	#include <sys/stat.h>
				28	#endif
				29	#ifdef HAVE_FCNTL_H
				30	#include <fcntl.h>
				31	#endif
				32	#ifdef HAVE_UNISTD_H
				33	#include <unistd.h>
				34	#endif
				35	#ifdef HAVE_ZLIB_H
				36	#include <zlib.h>
				37	#endif
				38
				39	#include <libxml/xmlmemory.h>
				40	#include <libxml/tree.h>
				41	#include <libxml/parser.h>
				42	#include <libxml/parserInternals.h>
				43	#include <libxml/valid.h>
				44	#include <libxml/entities.h>
				45	#include <libxml/xmlerror.h>
				46	#include <libxml/encoding.h>
				47	#include <libxml/valid.h>
				48	#include <libxml/xmlIO.h>
				49	#include <libxml/uri.h>
				50	#include <libxml/dict.h>
				51	#include <libxml/SAX.h>
				52	#ifdef LIBXML_CATALOG_ENABLED
				53	#include <libxml/catalog.h>
				54	#endif
				55	#include <libxml/globals.h>
				56	#include <libxml/chvalid.h>
				57
				58	/*
				59	* Various global defaults for parsing
				60	*/
				61
				62	/**
				63	* xmlCheckVersion:
				64	* @version: the include version number
				65	*
				66	* check the compiled lib version against the include one.
				67	* This can warn or immediately kill the application
				68	*/
				69	void
				70	xmlCheckVersion(int version) {
				71	int myversion = (int) LIBXML_VERSION;
				72
				73	xmlInitParser();
				74
				75	if ((myversion / 10000) != (version / 10000)) {
				76	xmlGenericError(xmlGenericErrorContext,
				77	"Fatal: program compiled against libxml %d using libxml %d\n",
				78	(version / 10000), (myversion / 10000));
				79	fprintf(stderr,
				80	"Fatal: program compiled against libxml %d using libxml %d\n",
				81	(version / 10000), (myversion / 10000));
				82	}
				83	if ((myversion / 100) < (version / 100)) {
				84	xmlGenericError(xmlGenericErrorContext,
				85	"Warning: program compiled against libxml %d using older %d\n",
				86	(version / 100), (myversion / 100));
				87	}
				88	}
				89
				90
				91	/************************************************************************
				92	* *
				93	* Some factorized error routines *
				94	* *
				95	************************************************************************/
				96
				97
				98	/**
				99	* xmlErrMemory:
				100	* @ctxt: an XML parser context
				101	* @extra: extra informations
				102	*
				103	* Handle a redefinition of attribute error
				104	*/
				105	void
				106	xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
				107	{
				108	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
				109	(ctxt->instate == XML_PARSER_EOF))
				110	return;
				111	if (ctxt != NULL) {
				112	ctxt->errNo = XML_ERR_NO_MEMORY;
				113	ctxt->instate = XML_PARSER_EOF;
				114	ctxt->disableSAX = 1;
				115	}
				116	if (extra)
				117	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
				118	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
				119	NULL, NULL, 0, 0,
				120	"Memory allocation failed : %s\n", extra);
				121	else
				122	__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
				123	XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
				124	NULL, NULL, 0, 0, "Memory allocation failed\n");
				125	}
				126
				127	/**
				128	* __xmlErrEncoding:
				129	* @ctxt: an XML parser context
				130	* @xmlerr: the error number
				131	* @msg: the error message
				132	* @str1: an string info
				133	* @str2: an string info
				134	*
				135	* Handle an encoding error
				136	*/
				137	void
				138	__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
				139	const char msg, const xmlChar str1, const xmlChar * str2)
				140	{
				141	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
				142	(ctxt->instate == XML_PARSER_EOF))
				143	return;
				144	if (ctxt != NULL)
				145	ctxt->errNo = xmlerr;
				146	__xmlRaiseError(NULL, NULL, NULL,
				147	ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
				148	NULL, 0, (const char ) str1, (const char ) str2,
				149	NULL, 0, 0, msg, str1, str2);
				150	if (ctxt != NULL) {
				151	ctxt->wellFormed = 0;
				152	if (ctxt->recovery == 0)
				153	ctxt->disableSAX = 1;
				154	}
				155	}
				156
				157	/**
				158	* xmlErrInternal:
				159	* @ctxt: an XML parser context
				160	* @msg: the error message
				161	* @str: error informations
				162	*
				163	* Handle an internal error
				164	*/
				165	static void
				166	xmlErrInternal(xmlParserCtxtPtr ctxt, const char msg, const xmlChar str)
				167	{
				168	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
				169	(ctxt->instate == XML_PARSER_EOF))
				170	return;
				171	if (ctxt != NULL)
				172	ctxt->errNo = XML_ERR_INTERNAL_ERROR;
				173	__xmlRaiseError(NULL, NULL, NULL,
				174	ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
				175	XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
				176	0, 0, msg, str);
				177	if (ctxt != NULL) {
				178	ctxt->wellFormed = 0;
				179	if (ctxt->recovery == 0)
				180	ctxt->disableSAX = 1;
				181	}
				182	}
				183
				184	/**
				185	* xmlErrEncodingInt:
				186	* @ctxt: an XML parser context
				187	* @error: the error number
				188	* @msg: the error message
				189	* @val: an integer value
				190	*
				191	* n encoding error
				192	*/
				193	static void
				194	xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
				195	const char *msg, int val)
				196	{
				197	if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
				198	(ctxt->instate == XML_PARSER_EOF))
				199	return;
				200	if (ctxt != NULL)
				201	ctxt->errNo = error;
				202	__xmlRaiseError(NULL, NULL, NULL,
				203	ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
				204	NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
				205	if (ctxt != NULL) {
				206	ctxt->wellFormed = 0;
				207	if (ctxt->recovery == 0)
				208	ctxt->disableSAX = 1;
				209	}
				210	}
				211
				212	/**
				213	* xmlIsLetter:
				214	* @c: an unicode character (int)
				215	*
				216	* Check whether the character is allowed by the production
				217	* [84] Letter ::= BaseChar \| Ideographic
				218	*
				219	* Returns 0 if not, non-zero otherwise
				220	*/
				221	int
				222	xmlIsLetter(int c) {
				223	return(IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c));
				224	}
				225
				226	/************************************************************************
				227	* *
				228	* Input handling functions for progressive parsing *
				229	* *
				230	************************************************************************/
				231
				232	/* #define DEBUG_INPUT */
				233	/* #define DEBUG_STACK */
				234	/* #define DEBUG_PUSH */
				235
				236
				237	/* we need to keep enough input to show errors in context */
				238	#define LINE_LEN 80
				239
				240	#ifdef DEBUG_INPUT
				241	#define CHECK_BUFFER(in) check_buffer(in)
				242
				243	static
				244	void check_buffer(xmlParserInputPtr in) {
				245	if (in->base != in->buf->buffer->content) {
				246	xmlGenericError(xmlGenericErrorContext,
				247	"xmlParserInput: base mismatch problem\n");
				248	}
				249	if (in->cur < in->base) {
				250	xmlGenericError(xmlGenericErrorContext,
				251	"xmlParserInput: cur < base problem\n");
				252	}
				253	if (in->cur > in->base + in->buf->buffer->use) {
				254	xmlGenericError(xmlGenericErrorContext,
				255	"xmlParserInput: cur > base + use problem\n");
				256	}
				257	xmlGenericError(xmlGenericErrorContext,"buffer %x : content %x, cur %d, use %d, size %d\n",
				258	(int) in, (int) in->buf->buffer->content, in->cur - in->base,
				259	in->buf->buffer->use, in->buf->buffer->size);
				260	}
				261
				262	#else
				263	#define CHECK_BUFFER(in)
				264	#endif
				265
				266
				267	/**
				268	* xmlParserInputRead:
				269	* @in: an XML parser input
				270	* @len: an indicative size for the lookahead
				271	*
				272	* This function refresh the input for the parser. It doesn't try to
				273	* preserve pointers to the input buffer, and discard already read data
				274	*
				275	* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
				276	* end of this entity
				277	*/
				278	int
				279	xmlParserInputRead(xmlParserInputPtr in, int len) {
				280	int ret;
				281	int used;
				282	int indx;
				283
				284	if (in == NULL) return(-1);
				285	#ifdef DEBUG_INPUT
				286	xmlGenericError(xmlGenericErrorContext, "Read\n");
				287	#endif
				288	if (in->buf == NULL) return(-1);
				289	if (in->base == NULL) return(-1);
				290	if (in->cur == NULL) return(-1);
				291	if (in->buf->buffer == NULL) return(-1);
				292	if (in->buf->readcallback == NULL) return(-1);
				293
				294	CHECK_BUFFER(in);
				295
				296	used = in->cur - in->buf->buffer->content;
				297	ret = xmlBufferShrink(in->buf->buffer, used);
				298	if (ret > 0) {
				299	in->cur -= ret;
				300	in->consumed += ret;
				301	}
				302	ret = xmlParserInputBufferRead(in->buf, len);
				303	if (in->base != in->buf->buffer->content) {
				304	/*
				305	* the buffer has been reallocated
				306	*/
				307	indx = in->cur - in->base;
				308	in->base = in->buf->buffer->content;
				309	in->cur = &in->buf->buffer->content[indx];
				310	}
				311	in->end = &in->buf->buffer->content[in->buf->buffer->use];
				312
				313	CHECK_BUFFER(in);
				314
				315	return(ret);
				316	}
				317
				318	/**
				319	* xmlParserInputGrow:
				320	* @in: an XML parser input
				321	* @len: an indicative size for the lookahead
				322	*
				323	* This function increase the input for the parser. It tries to
				324	* preserve pointers to the input buffer, and keep already read data
				325	*
				326	* Returns the number of xmlChars read, or -1 in case of error, 0 indicate the
				327	* end of this entity
				328	*/
				329	int
				330	xmlParserInputGrow(xmlParserInputPtr in, int len) {
				331	int ret;
				332	int indx;
				333
				334	if (in == NULL) return(-1);
				335	#ifdef DEBUG_INPUT
				336	xmlGenericError(xmlGenericErrorContext, "Grow\n");
				337	#endif
				338	if (in->buf == NULL) return(-1);
				339	if (in->base == NULL) return(-1);
				340	if (in->cur == NULL) return(-1);
				341	if (in->buf->buffer == NULL) return(-1);
				342
				343	CHECK_BUFFER(in);
				344
				345	indx = in->cur - in->base;
				346	if (in->buf->buffer->use > (unsigned int) indx + INPUT_CHUNK) {
				347
				348	CHECK_BUFFER(in);
				349
				350	return(0);
				351	}
				352	if (in->buf->readcallback != NULL)
				353	ret = xmlParserInputBufferGrow(in->buf, len);
				354	else
				355	return(0);
				356
				357	/*
				358	* NOTE : in->base may be a "dangling" i.e. freed pointer in this
				359	* block, but we use it really as an integer to do some
				360	* pointer arithmetic. Insure will raise it as a bug but in
				361	* that specific case, that's not !
				362	*/
				363	if (in->base != in->buf->buffer->content) {
				364	/*
				365	* the buffer has been reallocated
				366	*/
				367	indx = in->cur - in->base;
				368	in->base = in->buf->buffer->content;
				369	in->cur = &in->buf->buffer->content[indx];
				370	}
				371	in->end = &in->buf->buffer->content[in->buf->buffer->use];
				372
				373	CHECK_BUFFER(in);
				374
				375	return(ret);
				376	}
				377
				378	/**
				379	* xmlParserInputShrink:
				380	* @in: an XML parser input
				381	*
				382	* This function removes used input for the parser.
				383	*/
				384	void
				385	xmlParserInputShrink(xmlParserInputPtr in) {
				386	int used;
				387	int ret;
				388	int indx;
				389
				390	#ifdef DEBUG_INPUT
				391	xmlGenericError(xmlGenericErrorContext, "Shrink\n");
				392	#endif
				393	if (in == NULL) return;
				394	if (in->buf == NULL) return;
				395	if (in->base == NULL) return;
				396	if (in->cur == NULL) return;
				397	if (in->buf->buffer == NULL) return;
				398
				399	CHECK_BUFFER(in);
				400
				401	used = in->cur - in->buf->buffer->content;
				402	/*
				403	* Do not shrink on large buffers whose only a tiny fraction
				404	* was consumed
				405	*/
				406	if (used > INPUT_CHUNK) {
				407	ret = xmlBufferShrink(in->buf->buffer, used - LINE_LEN);
				408	if (ret > 0) {
				409	in->cur -= ret;
				410	in->consumed += ret;
				411	}
				412	in->end = &in->buf->buffer->content[in->buf->buffer->use];
				413	}
				414
				415	CHECK_BUFFER(in);
				416
				417	if (in->buf->buffer->use > INPUT_CHUNK) {
				418	return;
				419	}
				420	xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
				421	if (in->base != in->buf->buffer->content) {
				422	/*
				423	* the buffer has been reallocated
				424	*/
				425	indx = in->cur - in->base;
				426	in->base = in->buf->buffer->content;
				427	in->cur = &in->buf->buffer->content[indx];
				428	}
				429	in->end = &in->buf->buffer->content[in->buf->buffer->use];
				430
				431	CHECK_BUFFER(in);
				432	}
				433
				434	/************************************************************************
				435	* *
				436	* UTF8 character input and related functions *
				437	* *
				438	************************************************************************/
				439
				440	/**
				441	* xmlNextChar:
				442	* @ctxt: the XML parser context
				443	*
				444	* Skip to the next char input char.
				445	*/
				446
				447	void
				448	xmlNextChar(xmlParserCtxtPtr ctxt)
				449	{
				450	if ((ctxt == NULL) \|\| (ctxt->instate == XML_PARSER_EOF) \|\|
				451	(ctxt->input == NULL))
				452	return;
				453
				454	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
				455	if ((*ctxt->input->cur == 0) &&
				456	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0) &&
				457	(ctxt->instate != XML_PARSER_COMMENT)) {
				458	/*
				459	* If we are at the end of the current entity and
				460	* the context allows it, we pop consumed entities
				461	* automatically.
				462	* the auto closing should be blocked in other cases
				463	*/
				464	xmlPopInput(ctxt);
				465	} else {
				466	const unsigned char *cur;
				467	unsigned char c;
				468
				469	/*
				470	* 2.11 End-of-Line Handling
				471	* the literal two-character sequence "#xD#xA" or a standalone
				472	* literal #xD, an XML processor must pass to the application
				473	* the single character #xA.
				474	*/
				475	if (*(ctxt->input->cur) == '\n') {
				476	ctxt->input->line++; ctxt->input->col = 1;
				477	} else
				478	ctxt->input->col++;
				479
				480	/*
				481	* We are supposed to handle UTF8, check it's valid
				482	* From rfc2044: encoding of the Unicode values on UTF-8:
				483	*
				484	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				485	* 0000 0000-0000 007F 0xxxxxxx
				486	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				487	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				488	*
				489	* Check for the 0x110000 limit too
				490	*/
				491	cur = ctxt->input->cur;
				492
				493	c = *cur;
				494	if (c & 0x80) {
				495	if (c == 0xC0)
				496	goto encoding_error;
				497	if (cur[1] == 0)
				498	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				499	if ((cur[1] & 0xc0) != 0x80)
				500	goto encoding_error;
				501	if ((c & 0xe0) == 0xe0) {
				502	unsigned int val;
				503
				504	if (cur[2] == 0)
				505	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				506	if ((cur[2] & 0xc0) != 0x80)
				507	goto encoding_error;
				508	if ((c & 0xf0) == 0xf0) {
				509	if (cur[3] == 0)
				510	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				511	if (((c & 0xf8) != 0xf0) \|\|
				512	((cur[3] & 0xc0) != 0x80))
				513	goto encoding_error;
				514	/* 4-byte code */
				515	ctxt->input->cur += 4;
				516	val = (cur[0] & 0x7) << 18;
				517	val \|= (cur[1] & 0x3f) << 12;
				518	val \|= (cur[2] & 0x3f) << 6;
				519	val \|= cur[3] & 0x3f;
				520	} else {
				521	/* 3-byte code */
				522	ctxt->input->cur += 3;
				523	val = (cur[0] & 0xf) << 12;
				524	val \|= (cur[1] & 0x3f) << 6;
				525	val \|= cur[2] & 0x3f;
				526	}
				527	if (((val > 0xd7ff) && (val < 0xe000)) \|\|
				528	((val > 0xfffd) && (val < 0x10000)) \|\|
				529	(val >= 0x110000)) {
				530	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
				531	"Char 0x%X out of allowed range\n",
				532	val);
				533	}
				534	} else
				535	/* 2-byte code */
				536	ctxt->input->cur += 2;
				537	} else
				538	/* 1-byte code */
				539	ctxt->input->cur++;
				540
				541	ctxt->nbChars++;
				542	if (*ctxt->input->cur == 0)
				543	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				544	}
				545	} else {
				546	/*
				547	* Assume it's a fixed length encoding (1) with
				548	* a compatible encoding for the ASCII set, since
				549	* XML constructs only use < 128 chars
				550	*/
				551
				552	if (*(ctxt->input->cur) == '\n') {
				553	ctxt->input->line++; ctxt->input->col = 1;
				554	} else
				555	ctxt->input->col++;
				556	ctxt->input->cur++;
				557	ctxt->nbChars++;
				558	if (*ctxt->input->cur == 0)
				559	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				560	}
				561	if ((*ctxt->input->cur == '%') && (!ctxt->html))
				562	xmlParserHandlePEReference(ctxt);
				563	if ((*ctxt->input->cur == 0) &&
				564	(xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
				565	xmlPopInput(ctxt);
				566	return;
				567	encoding_error:
				568	/*
				569	* If we detect an UTF8 error that probably mean that the
				570	* input encoding didn't get properly advertised in the
				571	* declaration header. Report the error and switch the encoding
				572	* to ISO-Latin-1 (if you don't like this policy, just declare the
				573	* encoding !)
				574	*/
				575	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
				576	(ctxt->input->end - ctxt->input->cur < 4)) {
				577	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
				578	"Input is not proper UTF-8, indicate encoding !\n",
				579	NULL, NULL);
				580	} else {
				581	char buffer[150];
				582
				583	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				584	ctxt->input->cur[0], ctxt->input->cur[1],
				585	ctxt->input->cur[2], ctxt->input->cur[3]);
				586	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
				587	"Input is not proper UTF-8, indicate encoding !\n%s",
				588	BAD_CAST buffer, NULL);
				589	}
				590	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				591	ctxt->input->cur++;
				592	return;
				593	}
				594
				595	/**
				596	* xmlCurrentChar:
				597	* @ctxt: the XML parser context
				598	* @len: pointer to the length of the char read
				599	*
				600	* The current char value, if using UTF-8 this may actually span multiple
				601	* bytes in the input buffer. Implement the end of line normalization:
				602	* 2.11 End-of-Line Handling
				603	* Wherever an external parsed entity or the literal entity value
				604	* of an internal parsed entity contains either the literal two-character
				605	* sequence "#xD#xA" or a standalone literal #xD, an XML processor
				606	* must pass to the application the single character #xA.
				607	* This behavior can conveniently be produced by normalizing all
				608	* line breaks to #xA on input, before parsing.)
				609	*
				610	* Returns the current char value and its length
				611	*/
				612
				613	int
				614	xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
				615	if ((ctxt == NULL) \|\| (len == NULL) \|\| (ctxt->input == NULL)) return(0);
				616	if (ctxt->instate == XML_PARSER_EOF)
				617	return(0);
				618
				619	if ((ctxt->input->cur >= 0x20) && (ctxt->input->cur <= 0x7F)) {
				620	*len = 1;
				621	return((int) *ctxt->input->cur);
				622	}
				623	if (ctxt->charset == XML_CHAR_ENCODING_UTF8) {
				624	/*
				625	* We are supposed to handle UTF8, check it's valid
				626	* From rfc2044: encoding of the Unicode values on UTF-8:
				627	*
				628	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				629	* 0000 0000-0000 007F 0xxxxxxx
				630	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				631	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				632	*
				633	* Check for the 0x110000 limit too
				634	*/
				635	const unsigned char *cur = ctxt->input->cur;
				636	unsigned char c;
				637	unsigned int val;
				638
				639	c = *cur;
				640	if (c & 0x80) {
				641	if (((c & 0x40) == 0) \|\| (c == 0xC0))
				642	goto encoding_error;
				643	if (cur[1] == 0)
				644	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				645	if ((cur[1] & 0xc0) != 0x80)
				646	goto encoding_error;
				647	if ((c & 0xe0) == 0xe0) {
				648	if (cur[2] == 0)
				649	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				650	if ((cur[2] & 0xc0) != 0x80)
				651	goto encoding_error;
				652	if ((c & 0xf0) == 0xf0) {
				653	if (cur[3] == 0)
				654	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				655	if (((c & 0xf8) != 0xf0) \|\|
				656	((cur[3] & 0xc0) != 0x80))
				657	goto encoding_error;
				658	/* 4-byte code */
				659	*len = 4;
				660	val = (cur[0] & 0x7) << 18;
				661	val \|= (cur[1] & 0x3f) << 12;
				662	val \|= (cur[2] & 0x3f) << 6;
				663	val \|= cur[3] & 0x3f;
				664	if (val < 0x10000)
				665	goto encoding_error;
				666	} else {
				667	/* 3-byte code */
				668	*len = 3;
				669	val = (cur[0] & 0xf) << 12;
				670	val \|= (cur[1] & 0x3f) << 6;
				671	val \|= cur[2] & 0x3f;
				672	if (val < 0x800)
				673	goto encoding_error;
				674	}
				675	} else {
				676	/* 2-byte code */
				677	*len = 2;
				678	val = (cur[0] & 0x1f) << 6;
				679	val \|= cur[1] & 0x3f;
				680	if (val < 0x80)
				681	goto encoding_error;
				682	}
				683	if (!IS_CHAR(val)) {
				684	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
				685	"Char 0x%X out of allowed range\n", val);
				686	}
				687	return(val);
				688	} else {
				689	/* 1-byte code */
				690	*len = 1;
				691	if (*ctxt->input->cur == 0)
				692	xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
				693	if ((*ctxt->input->cur == 0) &&
				694	(ctxt->input->end > ctxt->input->cur)) {
				695	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
				696	"Char 0x0 out of allowed range\n", 0);
				697	}
				698	if (*ctxt->input->cur == 0xD) {
				699	if (ctxt->input->cur[1] == 0xA) {
				700	ctxt->nbChars++;
				701	ctxt->input->cur++;
				702	}
				703	return(0xA);
				704	}
				705	return((int) *ctxt->input->cur);
				706	}
				707	}
				708	/*
				709	* Assume it's a fixed length encoding (1) with
				710	* a compatible encoding for the ASCII set, since
				711	* XML constructs only use < 128 chars
				712	*/
				713	*len = 1;
				714	if (*ctxt->input->cur == 0xD) {
				715	if (ctxt->input->cur[1] == 0xA) {
				716	ctxt->nbChars++;
				717	ctxt->input->cur++;
				718	}
				719	return(0xA);
				720	}
				721	return((int) *ctxt->input->cur);
				722	encoding_error:
				723	/*
				724	* An encoding problem may arise from a truncated input buffer
				725	* splitting a character in the middle. In that case do not raise
				726	* an error but return 0 to endicate an end of stream problem
				727	*/
				728	if (ctxt->input->end - ctxt->input->cur < 4) {
				729	*len = 0;
				730	return(0);
				731	}
				732
				733	/*
				734	* If we detect an UTF8 error that probably mean that the
				735	* input encoding didn't get properly advertised in the
				736	* declaration header. Report the error and switch the encoding
				737	* to ISO-Latin-1 (if you don't like this policy, just declare the
				738	* encoding !)
				739	*/
				740	{
				741	char buffer[150];
				742
				743	snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				744	ctxt->input->cur[0], ctxt->input->cur[1],
				745	ctxt->input->cur[2], ctxt->input->cur[3]);
				746	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
				747	"Input is not proper UTF-8, indicate encoding !\n%s",
				748	BAD_CAST buffer, NULL);
				749	}
				750	ctxt->charset = XML_CHAR_ENCODING_8859_1;
				751	*len = 1;
				752	return((int) *ctxt->input->cur);
				753	}
				754
				755	/**
				756	* xmlStringCurrentChar:
				757	* @ctxt: the XML parser context
				758	* @cur: pointer to the beginning of the char
				759	* @len: pointer to the length of the char read
				760	*
				761	* The current char value, if using UTF-8 this may actually span multiple
				762	* bytes in the input buffer.
				763	*
				764	* Returns the current char value and its length
				765	*/
				766
				767	int
				768	xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar * cur, int *len)
				769	{
				770	if ((len == NULL) \|\| (cur == NULL)) return(0);
				771	if ((ctxt == NULL) \|\| (ctxt->charset == XML_CHAR_ENCODING_UTF8)) {
				772	/*
				773	* We are supposed to handle UTF8, check it's valid
				774	* From rfc2044: encoding of the Unicode values on UTF-8:
				775	*
				776	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				777	* 0000 0000-0000 007F 0xxxxxxx
				778	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				779	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				780	*
				781	* Check for the 0x110000 limit too
				782	*/
				783	unsigned char c;
				784	unsigned int val;
				785
				786	c = *cur;
				787	if (c & 0x80) {
				788	if ((cur[1] & 0xc0) != 0x80)
				789	goto encoding_error;
				790	if ((c & 0xe0) == 0xe0) {
				791
				792	if ((cur[2] & 0xc0) != 0x80)
				793	goto encoding_error;
				794	if ((c & 0xf0) == 0xf0) {
				795	if (((c & 0xf8) != 0xf0) \|\| ((cur[3] & 0xc0) != 0x80))
				796	goto encoding_error;
				797	/* 4-byte code */
				798	*len = 4;
				799	val = (cur[0] & 0x7) << 18;
				800	val \|= (cur[1] & 0x3f) << 12;
				801	val \|= (cur[2] & 0x3f) << 6;
				802	val \|= cur[3] & 0x3f;
				803	} else {
				804	/* 3-byte code */
				805	*len = 3;
				806	val = (cur[0] & 0xf) << 12;
				807	val \|= (cur[1] & 0x3f) << 6;
				808	val \|= cur[2] & 0x3f;
				809	}
				810	} else {
				811	/* 2-byte code */
				812	*len = 2;
				813	val = (cur[0] & 0x1f) << 6;
				814	val \|= cur[1] & 0x3f;
				815	}
				816	if (!IS_CHAR(val)) {
				817	xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
				818	"Char 0x%X out of allowed range\n", val);
				819	}
				820	return (val);
				821	} else {
				822	/* 1-byte code */
				823	*len = 1;
				824	return ((int) *cur);
				825	}
				826	}
				827	/*
				828	* Assume it's a fixed length encoding (1) with
				829	* a compatible encoding for the ASCII set, since
				830	* XML constructs only use < 128 chars
				831	*/
				832	*len = 1;
				833	return ((int) *cur);
				834	encoding_error:
				835
				836	/*
				837	* An encoding problem may arise from a truncated input buffer
				838	* splitting a character in the middle. In that case do not raise
				839	* an error but return 0 to endicate an end of stream problem
				840	*/
				841	if ((ctxt == NULL) \|\| (ctxt->input == NULL) \|\|
				842	(ctxt->input->end - ctxt->input->cur < 4)) {
				843	*len = 0;
				844	return(0);
				845	}
				846	/*
				847	* If we detect an UTF8 error that probably mean that the
				848	* input encoding didn't get properly advertised in the
				849	* declaration header. Report the error and switch the encoding
				850	* to ISO-Latin-1 (if you don't like this policy, just declare the
				851	* encoding !)
				852	*/
				853	{
				854	char buffer[150];
				855
				856	snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
				857	ctxt->input->cur[0], ctxt->input->cur[1],
				858	ctxt->input->cur[2], ctxt->input->cur[3]);
				859	__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
				860	"Input is not proper UTF-8, indicate encoding !\n%s",
				861	BAD_CAST buffer, NULL);
				862	}
				863	*len = 1;
				864	return ((int) *cur);
				865	}
				866
				867	/**
				868	* xmlCopyCharMultiByte:
				869	* @out: pointer to an array of xmlChar
				870	* @val: the char value
				871	*
				872	* append the char value in the array
				873	*
				874	* Returns the number of xmlChar written
				875	*/
				876	int
				877	xmlCopyCharMultiByte(xmlChar *out, int val) {
				878	if (out == NULL) return(0);
				879	/*
				880	* We are supposed to handle UTF8, check it's valid
				881	* From rfc2044: encoding of the Unicode values on UTF-8:
				882	*
				883	* UCS-4 range (hex.) UTF-8 octet sequence (binary)
				884	* 0000 0000-0000 007F 0xxxxxxx
				885	* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
				886	* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
				887	*/
				888	if (val >= 0x80) {
				889	xmlChar *savedout = out;
				890	int bits;
				891	if (val < 0x800) { *out++= (val >> 6) \| 0xC0; bits= 0; }
				892	else if (val < 0x10000) { *out++= (val >> 12) \| 0xE0; bits= 6;}
				893	else if (val < 0x110000) { *out++= (val >> 18) \| 0xF0; bits= 12; }
				894	else {
				895	xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
				896	"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
				897	val);
				898	return(0);
				899	}
				900	for ( ; bits >= 0; bits-= 6)
				901	*out++= ((val >> bits) & 0x3F) \| 0x80 ;
				902	return (out - savedout);
				903	}
				904	*out = (xmlChar) val;
				905	return 1;
				906	}
				907
				908	/**
				909	* xmlCopyChar:
				910	* @len: Ignored, compatibility
				911	* @out: pointer to an array of xmlChar
				912	* @val: the char value
				913	*
				914	* append the char value in the array
				915	*
				916	* Returns the number of xmlChar written
				917	*/
				918
				919	int
				920	xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
				921	if (out == NULL) return(0);
				922	/* the len parameter is ignored */
				923	if (val >= 0x80) {
				924	return(xmlCopyCharMultiByte (out, val));
				925	}
				926	*out = (xmlChar) val;
				927	return 1;
				928	}
				929
				930	/************************************************************************
				931	* *
				932	* Commodity functions to switch encodings *
				933	* *
				934	************************************************************************/
				935
				936	/**
				937	* xmlSwitchEncoding:
				938	* @ctxt: the parser context
				939	* @enc: the encoding value (number)
				940	*
				941	* change the input functions when discovering the character encoding
				942	* of a given entity.
				943	*
				944	* Returns 0 in case of success, -1 otherwise
				945	*/
				946	int
				947	xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
				948	{
				949	xmlCharEncodingHandlerPtr handler;
				950
				951	if (ctxt == NULL) return(-1);
				952	switch (enc) {
				953	case XML_CHAR_ENCODING_ERROR:
				954	__xmlErrEncoding(ctxt, XML_ERR_UNKNOWN_ENCODING,
				955	"encoding unknown\n", NULL, NULL);
				956	return(-1);
				957	case XML_CHAR_ENCODING_NONE:
				958	/* let's assume it's UTF-8 without the XML decl */
				959	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				960	return(0);
				961	case XML_CHAR_ENCODING_UTF8:
				962	/* default encoding, no conversion should be needed */
				963	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				964
				965	/*
				966	* Errata on XML-1.0 June 20 2001
				967	* Specific handling of the Byte Order Mark for
				968	* UTF-8
				969	*/
				970	if ((ctxt->input != NULL) &&
				971	(ctxt->input->cur[0] == 0xEF) &&
				972	(ctxt->input->cur[1] == 0xBB) &&
				973	(ctxt->input->cur[2] == 0xBF)) {
				974	ctxt->input->cur += 3;
				975	}
				976	return(0);
				977	case XML_CHAR_ENCODING_UTF16LE:
				978	case XML_CHAR_ENCODING_UTF16BE:
				979	/*The raw input characters are encoded
				980	*in UTF-16. As we expect this function
				981	*to be called after xmlCharEncInFunc, we expect
				982	*ctxt->input->cur to contain UTF-8 encoded characters.
				983	*So the raw UTF16 Byte Order Mark
				984	*has also been converted into
				985	*an UTF-8 BOM. Let's skip that BOM.
				986	*/
				987	if ((ctxt->input != NULL) && (ctxt->input->cur != NULL) &&
				988	(ctxt->input->cur[0] == 0xEF) &&
				989	(ctxt->input->cur[1] == 0xBB) &&
				990	(ctxt->input->cur[2] == 0xBF)) {
				991	ctxt->input->cur += 3;
				992	}
				993	break ;
				994	default:
				995	break;
				996	}
				997	handler = xmlGetCharEncodingHandler(enc);
				998	if (handler == NULL) {
				999	/*
				1000	* Default handlers.
				1001	*/
				1002	switch (enc) {
				1003	case XML_CHAR_ENCODING_ASCII:
				1004	/* default encoding, no conversion should be needed */
				1005	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				1006	return(0);
				1007	case XML_CHAR_ENCODING_UTF16LE:
				1008	break;
				1009	case XML_CHAR_ENCODING_UTF16BE:
				1010	break;
				1011	case XML_CHAR_ENCODING_UCS4LE:
				1012	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1013	"encoding not supported %s\n",
				1014	BAD_CAST "USC4 little endian", NULL);
				1015	break;
				1016	case XML_CHAR_ENCODING_UCS4BE:
				1017	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1018	"encoding not supported %s\n",
				1019	BAD_CAST "USC4 big endian", NULL);
				1020	break;
				1021	case XML_CHAR_ENCODING_EBCDIC:
				1022	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1023	"encoding not supported %s\n",
				1024	BAD_CAST "EBCDIC", NULL);
				1025	break;
				1026	case XML_CHAR_ENCODING_UCS4_2143:
				1027	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1028	"encoding not supported %s\n",
				1029	BAD_CAST "UCS4 2143", NULL);
				1030	break;
				1031	case XML_CHAR_ENCODING_UCS4_3412:
				1032	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1033	"encoding not supported %s\n",
				1034	BAD_CAST "UCS4 3412", NULL);
				1035	break;
				1036	case XML_CHAR_ENCODING_UCS2:
				1037	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1038	"encoding not supported %s\n",
				1039	BAD_CAST "UCS2", NULL);
				1040	break;
				1041	case XML_CHAR_ENCODING_8859_1:
				1042	case XML_CHAR_ENCODING_8859_2:
				1043	case XML_CHAR_ENCODING_8859_3:
				1044	case XML_CHAR_ENCODING_8859_4:
				1045	case XML_CHAR_ENCODING_8859_5:
				1046	case XML_CHAR_ENCODING_8859_6:
				1047	case XML_CHAR_ENCODING_8859_7:
				1048	case XML_CHAR_ENCODING_8859_8:
				1049	case XML_CHAR_ENCODING_8859_9:
				1050	/*
				1051	* We used to keep the internal content in the
				1052	* document encoding however this turns being unmaintainable
				1053	* So xmlGetCharEncodingHandler() will return non-null
				1054	* values for this now.
				1055	*/
				1056	if ((ctxt->inputNr == 1) &&
				1057	(ctxt->encoding == NULL) &&
				1058	(ctxt->input != NULL) &&
				1059	(ctxt->input->encoding != NULL)) {
				1060	ctxt->encoding = xmlStrdup(ctxt->input->encoding);
				1061	}
				1062	ctxt->charset = enc;
				1063	return(0);
				1064	case XML_CHAR_ENCODING_2022_JP:
				1065	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1066	"encoding not supported %s\n",
				1067	BAD_CAST "ISO-2022-JP", NULL);
				1068	break;
				1069	case XML_CHAR_ENCODING_SHIFT_JIS:
				1070	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1071	"encoding not supported %s\n",
				1072	BAD_CAST "Shift_JIS", NULL);
				1073	break;
				1074	case XML_CHAR_ENCODING_EUC_JP:
				1075	__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
				1076	"encoding not supported %s\n",
				1077	BAD_CAST "EUC-JP", NULL);
				1078	break;
				1079	default:
				1080	break;
				1081	}
				1082	}
				1083	if (handler == NULL)
				1084	return(-1);
				1085	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				1086	return(xmlSwitchToEncoding(ctxt, handler));
				1087	}
				1088
				1089	/**
				1090	* xmlSwitchInputEncoding:
				1091	* @ctxt: the parser context
				1092	* @input: the input stream
				1093	* @handler: the encoding handler
				1094	*
				1095	* change the input functions when discovering the character encoding
				1096	* of a given entity.
				1097	*
				1098	* Returns 0 in case of success, -1 otherwise
				1099	*/
				1100	int
				1101	xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
				1102	xmlCharEncodingHandlerPtr handler)
				1103	{
				1104	int nbchars;
				1105
				1106	if (handler == NULL)
				1107	return (-1);
				1108	if (input == NULL)
				1109	return (-1);
				1110	if (input->buf != NULL) {
				1111	if (input->buf->encoder != NULL) {
				1112	/*
				1113	* Check in case the auto encoding detetection triggered
				1114	* in already.
				1115	*/
				1116	if (input->buf->encoder == handler)
				1117	return (0);
				1118
				1119	/*
				1120	* "UTF-16" can be used for both LE and BE
				1121	if ((!xmlStrncmp(BAD_CAST input->buf->encoder->name,
				1122	BAD_CAST "UTF-16", 6)) &&
				1123	(!xmlStrncmp(BAD_CAST handler->name,
				1124	BAD_CAST "UTF-16", 6))) {
				1125	return(0);
				1126	}
				1127	*/
				1128
				1129	/*
				1130	* Note: this is a bit dangerous, but that's what it
				1131	* takes to use nearly compatible signature for different
				1132	* encodings.
				1133	*/
				1134	xmlCharEncCloseFunc(input->buf->encoder);
				1135	input->buf->encoder = handler;
				1136	return (0);
				1137	}
				1138	input->buf->encoder = handler;
				1139
				1140	/*
				1141	* Is there already some content down the pipe to convert ?
				1142	*/
				1143	if ((input->buf->buffer != NULL) && (input->buf->buffer->use > 0)) {
				1144	int processed;
				1145	unsigned int use;
				1146
				1147	/*
				1148	* Specific handling of the Byte Order Mark for
				1149	* UTF-16
				1150	*/
				1151	if ((handler->name != NULL) &&
				1152	(!strcmp(handler->name, "UTF-16LE") \|\|
				1153	!strcmp(handler->name, "UTF-16")) &&
				1154	(input->cur[0] == 0xFF) && (input->cur[1] == 0xFE)) {
				1155	input->cur += 2;
				1156	}
				1157	if ((handler->name != NULL) &&
				1158	(!strcmp(handler->name, "UTF-16BE")) &&
				1159	(input->cur[0] == 0xFE) && (input->cur[1] == 0xFF)) {
				1160	input->cur += 2;
				1161	}
				1162	/*
				1163	* Errata on XML-1.0 June 20 2001
				1164	* Specific handling of the Byte Order Mark for
				1165	* UTF-8
				1166	*/
				1167	if ((handler->name != NULL) &&
				1168	(!strcmp(handler->name, "UTF-8")) &&
				1169	(input->cur[0] == 0xEF) &&
				1170	(input->cur[1] == 0xBB) && (input->cur[2] == 0xBF)) {
				1171	input->cur += 3;
				1172	}
				1173
				1174	/*
				1175	* Shrink the current input buffer.
				1176	* Move it as the raw buffer and create a new input buffer
				1177	*/
				1178	processed = input->cur - input->base;
				1179	xmlBufferShrink(input->buf->buffer, processed);
				1180	input->buf->raw = input->buf->buffer;
				1181	input->buf->buffer = xmlBufferCreate();
				1182	input->buf->rawconsumed = processed;
				1183	use = input->buf->raw->use;
				1184
				1185	if (ctxt->html) {
				1186	/*
				1187	* convert as much as possible of the buffer
				1188	*/
				1189	nbchars = xmlCharEncInFunc(input->buf->encoder,
				1190	input->buf->buffer,
				1191	input->buf->raw);
				1192	} else {
				1193	/*
				1194	* convert just enough to get
				1195	* '<?xml version="1.0" encoding="xxx"?>'
				1196	* parsed with the autodetected encoding
				1197	* into the parser reading buffer.
				1198	*/
				1199	nbchars = xmlCharEncFirstLine(input->buf->encoder,
				1200	input->buf->buffer,
				1201	input->buf->raw);
				1202	}
				1203	if (nbchars < 0) {
				1204	xmlErrInternal(ctxt,
				1205	"switching encoding: encoder error\n",
				1206	NULL);
				1207	return (-1);
				1208	}
				1209	input->buf->rawconsumed += use - input->buf->raw->use;
				1210	input->base = input->cur = input->buf->buffer->content;
				1211	input->end = &input->base[input->buf->buffer->use];
				1212
				1213	}
				1214	return (0);
				1215	} else if (input->length == 0) {
				1216	/*
				1217	* When parsing a static memory array one must know the
				1218	* size to be able to convert the buffer.
				1219	*/
				1220	xmlErrInternal(ctxt, "switching encoding : no input\n", NULL);
				1221	return (-1);
				1222	}
				1223	return (0);
				1224	}
				1225
				1226	/**
				1227	* xmlSwitchToEncoding:
				1228	* @ctxt: the parser context
				1229	* @handler: the encoding handler
				1230	*
				1231	* change the input functions when discovering the character encoding
				1232	* of a given entity.
				1233	*
				1234	* Returns 0 in case of success, -1 otherwise
				1235	*/
				1236	int
				1237	xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
				1238	{
				1239	int ret = 0;
				1240
				1241	if (handler != NULL) {
				1242	if (ctxt->input != NULL) {
				1243	ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
				1244	} else {
				1245	xmlErrInternal(ctxt, "xmlSwitchToEncoding : no input\n",
				1246	NULL);
				1247	return(-1);
				1248	}
				1249	/*
				1250	* The parsing is now done in UTF8 natively
				1251	*/
				1252	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				1253	} else
				1254	return(-1);
				1255	return(ret);
				1256	}
				1257
				1258	/************************************************************************
				1259	* *
				1260	* Commodity functions to handle entities processing *
				1261	* *
				1262	************************************************************************/
				1263
				1264	/**
				1265	* xmlFreeInputStream:
				1266	* @input: an xmlParserInputPtr
				1267	*
				1268	* Free up an input stream.
				1269	*/
				1270	void
				1271	xmlFreeInputStream(xmlParserInputPtr input) {
				1272	if (input == NULL) return;
				1273
				1274	if (input->filename != NULL) xmlFree((char *) input->filename);
				1275	if (input->directory != NULL) xmlFree((char *) input->directory);
				1276	if (input->encoding != NULL) xmlFree((char *) input->encoding);
				1277	if (input->version != NULL) xmlFree((char *) input->version);
				1278	if ((input->free != NULL) && (input->base != NULL))
				1279	input->free((xmlChar *) input->base);
				1280	if (input->buf != NULL)
				1281	xmlFreeParserInputBuffer(input->buf);
				1282	xmlFree(input);
				1283	}
				1284
				1285	/**
				1286	* xmlNewInputStream:
				1287	* @ctxt: an XML parser context
				1288	*
				1289	* Create a new input stream structure
				1290	* Returns the new input stream or NULL
				1291	*/
				1292	xmlParserInputPtr
				1293	xmlNewInputStream(xmlParserCtxtPtr ctxt) {
				1294	xmlParserInputPtr input;
				1295	static int id = 0;
				1296
				1297	input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
				1298	if (input == NULL) {
				1299	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
				1300	return(NULL);
				1301	}
				1302	memset(input, 0, sizeof(xmlParserInput));
				1303	input->line = 1;
				1304	input->col = 1;
				1305	input->standalone = -1;
				1306	/*
				1307	* we don't care about thread reentrancy unicity for a single
				1308	* parser context (and hence thread) is sufficient.
				1309	*/
				1310	input->id = id++;
				1311	return(input);
				1312	}
				1313
				1314	/**
				1315	* xmlNewIOInputStream:
				1316	* @ctxt: an XML parser context
				1317	* @input: an I/O Input
				1318	* @enc: the charset encoding if known
				1319	*
				1320	* Create a new input stream structure encapsulating the @input into
				1321	* a stream suitable for the parser.
				1322	*
				1323	* Returns the new input stream or NULL
				1324	*/
				1325	xmlParserInputPtr
				1326	xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
				1327	xmlCharEncoding enc) {
				1328	xmlParserInputPtr inputStream;
				1329
				1330	if (input == NULL) return(NULL);
				1331	if (xmlParserDebugEntities)
				1332	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
				1333	inputStream = xmlNewInputStream(ctxt);
				1334	if (inputStream == NULL) {
				1335	return(NULL);
				1336	}
				1337	inputStream->filename = NULL;
				1338	inputStream->buf = input;
				1339	inputStream->base = inputStream->buf->buffer->content;
				1340	inputStream->cur = inputStream->buf->buffer->content;
				1341	inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
				1342	if (enc != XML_CHAR_ENCODING_NONE) {
				1343	xmlSwitchEncoding(ctxt, enc);
				1344	}
				1345
				1346	return(inputStream);
				1347	}
				1348
				1349	/**
				1350	* xmlNewEntityInputStream:
				1351	* @ctxt: an XML parser context
				1352	* @entity: an Entity pointer
				1353	*
				1354	* Create a new input stream based on an xmlEntityPtr
				1355	*
				1356	* Returns the new input stream or NULL
				1357	*/
				1358	xmlParserInputPtr
				1359	xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
				1360	xmlParserInputPtr input;
				1361
				1362	if (entity == NULL) {
				1363	xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
				1364	NULL);
				1365	return(NULL);
				1366	}
				1367	if (xmlParserDebugEntities)
				1368	xmlGenericError(xmlGenericErrorContext,
				1369	"new input from entity: %s\n", entity->name);
				1370	if (entity->content == NULL) {
				1371	switch (entity->etype) {
				1372	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
				1373	xmlErrInternal(ctxt, "Cannot parse entity %s\n",
				1374	entity->name);
				1375	break;
				1376	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
				1377	case XML_EXTERNAL_PARAMETER_ENTITY:
				1378	return(xmlLoadExternalEntity((char *) entity->URI,
				1379	(char *) entity->ExternalID, ctxt));
				1380	case XML_INTERNAL_GENERAL_ENTITY:
				1381	xmlErrInternal(ctxt,
				1382	"Internal entity %s without content !\n",
				1383	entity->name);
				1384	break;
				1385	case XML_INTERNAL_PARAMETER_ENTITY:
				1386	xmlErrInternal(ctxt,
				1387	"Internal parameter entity %s without content !\n",
				1388	entity->name);
				1389	break;
				1390	case XML_INTERNAL_PREDEFINED_ENTITY:
				1391	xmlErrInternal(ctxt,
				1392	"Predefined entity %s without content !\n",
				1393	entity->name);
				1394	break;
				1395	}
				1396	return(NULL);
				1397	}
				1398	input = xmlNewInputStream(ctxt);
				1399	if (input == NULL) {
				1400	return(NULL);
				1401	}
Patrick Scott	60a4c35	2009-07-09 09:30:54 -0400	[diff] [blame^]	1402	if (entity->URI != NULL)
				1403	input->filename = (char ) xmlStrdup((xmlChar ) entity->URI);
The Android Open Source Project	ab4e2e9	2009-03-03 19:30:06 -0800	[diff] [blame]	1404	input->base = entity->content;
				1405	input->cur = entity->content;
				1406	input->length = entity->length;
				1407	input->end = &entity->content[input->length];
				1408	return(input);
				1409	}
				1410
				1411	/**
				1412	* xmlNewStringInputStream:
				1413	* @ctxt: an XML parser context
				1414	* @buffer: an memory buffer
				1415	*
				1416	* Create a new input stream based on a memory buffer.
				1417	* Returns the new input stream
				1418	*/
				1419	xmlParserInputPtr
				1420	xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
				1421	xmlParserInputPtr input;
				1422
				1423	if (buffer == NULL) {
				1424	xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
				1425	NULL);
				1426	return(NULL);
				1427	}
				1428	if (xmlParserDebugEntities)
				1429	xmlGenericError(xmlGenericErrorContext,
				1430	"new fixed input: %.30s\n", buffer);
				1431	input = xmlNewInputStream(ctxt);
				1432	if (input == NULL) {
				1433	xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
				1434	return(NULL);
				1435	}
				1436	input->base = buffer;
				1437	input->cur = buffer;
				1438	input->length = xmlStrlen(buffer);
				1439	input->end = &buffer[input->length];
				1440	return(input);
				1441	}
				1442
				1443	/**
				1444	* xmlNewInputFromFile:
				1445	* @ctxt: an XML parser context
				1446	* @filename: the filename to use as entity
				1447	*
				1448	* Create a new input stream based on a file or an URL.
				1449	*
				1450	* Returns the new input stream or NULL in case of error
				1451	*/
				1452	xmlParserInputPtr
				1453	xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
				1454	xmlParserInputBufferPtr buf;
				1455	xmlParserInputPtr inputStream;
				1456	char *directory = NULL;
				1457	xmlChar *URI = NULL;
				1458
				1459	if (xmlParserDebugEntities)
				1460	xmlGenericError(xmlGenericErrorContext,
				1461	"new input from file: %s\n", filename);
				1462	if (ctxt == NULL) return(NULL);
				1463	buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
				1464	if (buf == NULL) {
				1465	if (filename == NULL)
				1466	__xmlLoaderErr(ctxt,
				1467	"failed to load external entity: NULL filename \n",
				1468	NULL);
				1469	else
				1470	__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
				1471	(const char *) filename);
				1472	return(NULL);
				1473	}
				1474
				1475	inputStream = xmlNewInputStream(ctxt);
				1476	if (inputStream == NULL)
				1477	return(NULL);
				1478
				1479	inputStream->buf = buf;
				1480	inputStream = xmlCheckHTTPInput(ctxt, inputStream);
				1481	if (inputStream == NULL)
				1482	return(NULL);
				1483
				1484	if (inputStream->filename == NULL)
				1485	URI = xmlStrdup((xmlChar *) filename);
				1486	else
				1487	URI = xmlStrdup((xmlChar *) inputStream->filename);
				1488	directory = xmlParserGetDirectory((const char *) URI);
				1489	if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
				1490	inputStream->filename = (char ) xmlCanonicPath((const xmlChar ) URI);
				1491	if (URI != NULL) xmlFree((char *) URI);
				1492	inputStream->directory = directory;
				1493
				1494	inputStream->base = inputStream->buf->buffer->content;
				1495	inputStream->cur = inputStream->buf->buffer->content;
				1496	inputStream->end = &inputStream->base[inputStream->buf->buffer->use];
				1497	if ((ctxt->directory == NULL) && (directory != NULL))
				1498	ctxt->directory = (char ) xmlStrdup((const xmlChar ) directory);
				1499	return(inputStream);
				1500	}
				1501
				1502	/************************************************************************
				1503	* *
				1504	* Commodity functions to handle parser contexts *
				1505	* *
				1506	************************************************************************/
				1507
				1508	/**
				1509	* xmlInitParserCtxt:
				1510	* @ctxt: an XML parser context
				1511	*
				1512	* Initialize a parser context
				1513	*
				1514	* Returns 0 in case of success and -1 in case of error
				1515	*/
				1516
				1517	int
				1518	xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
				1519	{
				1520	xmlParserInputPtr input;
				1521
				1522	if(ctxt==NULL) {
				1523	xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
				1524	return(-1);
				1525	}
				1526
				1527	xmlDefaultSAXHandlerInit();
				1528
				1529	if (ctxt->dict == NULL)
				1530	ctxt->dict = xmlDictCreate();
				1531	if (ctxt->dict == NULL) {
				1532	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1533	return(-1);
				1534	}
				1535	if (ctxt->sax == NULL)
				1536	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
				1537	if (ctxt->sax == NULL) {
				1538	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1539	return(-1);
				1540	}
				1541	else
				1542	xmlSAXVersion(ctxt->sax, 2);
				1543
				1544	ctxt->maxatts = 0;
				1545	ctxt->atts = NULL;
				1546	/* Allocate the Input stack */
				1547	if (ctxt->inputTab == NULL) {
				1548	ctxt->inputTab = (xmlParserInputPtr *)
				1549	xmlMalloc(5 * sizeof(xmlParserInputPtr));
				1550	ctxt->inputMax = 5;
				1551	}
				1552	if (ctxt->inputTab == NULL) {
				1553	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1554	ctxt->inputNr = 0;
				1555	ctxt->inputMax = 0;
				1556	ctxt->input = NULL;
				1557	return(-1);
				1558	}
				1559	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
				1560	xmlFreeInputStream(input);
				1561	}
				1562	ctxt->inputNr = 0;
				1563	ctxt->input = NULL;
				1564
				1565	ctxt->version = NULL;
				1566	ctxt->encoding = NULL;
				1567	ctxt->standalone = -1;
				1568	ctxt->hasExternalSubset = 0;
				1569	ctxt->hasPErefs = 0;
				1570	ctxt->html = 0;
				1571	ctxt->external = 0;
				1572	ctxt->instate = XML_PARSER_START;
				1573	ctxt->token = 0;
				1574	ctxt->directory = NULL;
				1575
				1576	/* Allocate the Node stack */
				1577	if (ctxt->nodeTab == NULL) {
				1578	ctxt->nodeTab = (xmlNodePtr ) xmlMalloc(10 sizeof(xmlNodePtr));
				1579	ctxt->nodeMax = 10;
				1580	}
				1581	if (ctxt->nodeTab == NULL) {
				1582	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1583	ctxt->nodeNr = 0;
				1584	ctxt->nodeMax = 0;
				1585	ctxt->node = NULL;
				1586	ctxt->inputNr = 0;
				1587	ctxt->inputMax = 0;
				1588	ctxt->input = NULL;
				1589	return(-1);
				1590	}
				1591	ctxt->nodeNr = 0;
				1592	ctxt->node = NULL;
				1593
				1594	/* Allocate the Name stack */
				1595	if (ctxt->nameTab == NULL) {
				1596	ctxt->nameTab = (const xmlChar *) xmlMalloc(10 sizeof(xmlChar *));
				1597	ctxt->nameMax = 10;
				1598	}
				1599	if (ctxt->nameTab == NULL) {
				1600	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1601	ctxt->nodeNr = 0;
				1602	ctxt->nodeMax = 0;
				1603	ctxt->node = NULL;
				1604	ctxt->inputNr = 0;
				1605	ctxt->inputMax = 0;
				1606	ctxt->input = NULL;
				1607	ctxt->nameNr = 0;
				1608	ctxt->nameMax = 0;
				1609	ctxt->name = NULL;
				1610	return(-1);
				1611	}
				1612	ctxt->nameNr = 0;
				1613	ctxt->name = NULL;
				1614
				1615	/* Allocate the space stack */
				1616	if (ctxt->spaceTab == NULL) {
				1617	ctxt->spaceTab = (int ) xmlMalloc(10 sizeof(int));
				1618	ctxt->spaceMax = 10;
				1619	}
				1620	if (ctxt->spaceTab == NULL) {
				1621	xmlErrMemory(NULL, "cannot initialize parser context\n");
				1622	ctxt->nodeNr = 0;
				1623	ctxt->nodeMax = 0;
				1624	ctxt->node = NULL;
				1625	ctxt->inputNr = 0;
				1626	ctxt->inputMax = 0;
				1627	ctxt->input = NULL;
				1628	ctxt->nameNr = 0;
				1629	ctxt->nameMax = 0;
				1630	ctxt->name = NULL;
				1631	ctxt->spaceNr = 0;
				1632	ctxt->spaceMax = 0;
				1633	ctxt->space = NULL;
				1634	return(-1);
				1635	}
				1636	ctxt->spaceNr = 1;
				1637	ctxt->spaceMax = 10;
				1638	ctxt->spaceTab[0] = -1;
				1639	ctxt->space = &ctxt->spaceTab[0];
				1640	ctxt->userData = ctxt;
				1641	ctxt->myDoc = NULL;
				1642	ctxt->wellFormed = 1;
				1643	ctxt->nsWellFormed = 1;
				1644	ctxt->valid = 1;
				1645	ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
				1646	ctxt->validate = xmlDoValidityCheckingDefaultValue;
				1647	ctxt->pedantic = xmlPedanticParserDefaultValue;
				1648	ctxt->linenumbers = xmlLineNumbersDefaultValue;
				1649	ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
				1650	if (ctxt->keepBlanks == 0)
				1651	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
				1652
				1653	ctxt->vctxt.finishDtd = XML_CTXT_FINISH_DTD_0;
				1654	ctxt->vctxt.userData = ctxt;
				1655	ctxt->vctxt.error = xmlParserValidityError;
				1656	ctxt->vctxt.warning = xmlParserValidityWarning;
				1657	if (ctxt->validate) {
				1658	if (xmlGetWarningsDefaultValue == 0)
				1659	ctxt->vctxt.warning = NULL;
				1660	else
				1661	ctxt->vctxt.warning = xmlParserValidityWarning;
				1662	ctxt->vctxt.nodeMax = 0;
				1663	}
				1664	ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
				1665	ctxt->record_info = 0;
				1666	ctxt->nbChars = 0;
				1667	ctxt->checkIndex = 0;
				1668	ctxt->inSubset = 0;
				1669	ctxt->errNo = XML_ERR_OK;
				1670	ctxt->depth = 0;
				1671	ctxt->charset = XML_CHAR_ENCODING_UTF8;
				1672	ctxt->catalogs = NULL;
Patrick Scott	60a4c35	2009-07-09 09:30:54 -0400	[diff] [blame^]	1673	ctxt->nbentities = 0;
The Android Open Source Project	ab4e2e9	2009-03-03 19:30:06 -0800	[diff] [blame]	1674	xmlInitNodeInfoSeq(&ctxt->node_seq);
				1675	return(0);
				1676	}
				1677
				1678	/**
				1679	* xmlFreeParserCtxt:
				1680	* @ctxt: an XML parser context
				1681	*
				1682	* Free all the memory used by a parser context. However the parsed
				1683	* document in ctxt->myDoc is not freed.
				1684	*/
				1685
				1686	void
				1687	xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
				1688	{
				1689	xmlParserInputPtr input;
				1690
				1691	if (ctxt == NULL) return;
				1692
				1693	while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
				1694	xmlFreeInputStream(input);
				1695	}
				1696	if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
				1697	if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
				1698	if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
				1699	if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
				1700	if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
				1701	if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
				1702	if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
				1703	if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
				1704	#ifdef LIBXML_SAX1_ENABLED
				1705	if ((ctxt->sax != NULL) &&
				1706	(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
				1707	#else
				1708	if (ctxt->sax != NULL)
				1709	#endif /* LIBXML_SAX1_ENABLED */
				1710	xmlFree(ctxt->sax);
				1711	if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
				1712	if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
				1713	if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
				1714	if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
				1715	if (ctxt->nsTab != NULL) xmlFree((char *) ctxt->nsTab);
				1716	if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
				1717	if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
				1718	if (ctxt->attsDefault != NULL)
				1719	xmlHashFree(ctxt->attsDefault, (xmlHashDeallocator) xmlFree);
				1720	if (ctxt->attsSpecial != NULL)
				1721	xmlHashFree(ctxt->attsSpecial, NULL);
				1722	if (ctxt->freeElems != NULL) {
				1723	xmlNodePtr cur, next;
				1724
				1725	cur = ctxt->freeElems;
				1726	while (cur != NULL) {
				1727	next = cur->next;
				1728	xmlFree(cur);
				1729	cur = next;
				1730	}
				1731	}
				1732	if (ctxt->freeAttrs != NULL) {
				1733	xmlAttrPtr cur, next;
				1734
				1735	cur = ctxt->freeAttrs;
				1736	while (cur != NULL) {
				1737	next = cur->next;
				1738	xmlFree(cur);
				1739	cur = next;
				1740	}
				1741	}
				1742	/*
				1743	* cleanup the error strings
				1744	*/
				1745	if (ctxt->lastError.message != NULL)
				1746	xmlFree(ctxt->lastError.message);
				1747	if (ctxt->lastError.file != NULL)
				1748	xmlFree(ctxt->lastError.file);
				1749	if (ctxt->lastError.str1 != NULL)
				1750	xmlFree(ctxt->lastError.str1);
				1751	if (ctxt->lastError.str2 != NULL)
				1752	xmlFree(ctxt->lastError.str2);
				1753	if (ctxt->lastError.str3 != NULL)
				1754	xmlFree(ctxt->lastError.str3);
				1755
				1756	#ifdef LIBXML_CATALOG_ENABLED
				1757	if (ctxt->catalogs != NULL)
				1758	xmlCatalogFreeLocal(ctxt->catalogs);
				1759	#endif
				1760	xmlFree(ctxt);
				1761	}
				1762
				1763	/**
				1764	* xmlNewParserCtxt:
				1765	*
				1766	* Allocate and initialize a new parser context.
				1767	*
				1768	* Returns the xmlParserCtxtPtr or NULL
				1769	*/
				1770
				1771	xmlParserCtxtPtr
				1772	xmlNewParserCtxt(void)
				1773	{
				1774	xmlParserCtxtPtr ctxt;
				1775
				1776	ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
				1777	if (ctxt == NULL) {
				1778	xmlErrMemory(NULL, "cannot allocate parser context\n");
				1779	return(NULL);
				1780	}
				1781	memset(ctxt, 0, sizeof(xmlParserCtxt));
				1782	if (xmlInitParserCtxt(ctxt) < 0) {
				1783	xmlFreeParserCtxt(ctxt);
				1784	return(NULL);
				1785	}
				1786	return(ctxt);
				1787	}
				1788
				1789	/************************************************************************
				1790	* *
				1791	* Handling of node informations *
				1792	* *
				1793	************************************************************************/
				1794
				1795	/**
				1796	* xmlClearParserCtxt:
				1797	* @ctxt: an XML parser context
				1798	*
				1799	* Clear (release owned resources) and reinitialize a parser context
				1800	*/
				1801
				1802	void
				1803	xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
				1804	{
				1805	if (ctxt==NULL)
				1806	return;
				1807	xmlClearNodeInfoSeq(&ctxt->node_seq);
				1808	xmlCtxtReset(ctxt);
				1809	}
				1810
				1811
				1812	/**
				1813	* xmlParserFindNodeInfo:
				1814	* @ctx: an XML parser context
				1815	* @node: an XML node within the tree
				1816	*
				1817	* Find the parser node info struct for a given node
				1818	*
				1819	* Returns an xmlParserNodeInfo block pointer or NULL
				1820	*/
				1821	const xmlParserNodeInfo *
				1822	xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
				1823	{
				1824	unsigned long pos;
				1825
				1826	if ((ctx == NULL) \|\| (node == NULL))
				1827	return (NULL);
				1828	/* Find position where node should be at */
				1829	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
				1830	if (pos < ctx->node_seq.length
				1831	&& ctx->node_seq.buffer[pos].node == node)
				1832	return &ctx->node_seq.buffer[pos];
				1833	else
				1834	return NULL;
				1835	}
				1836
				1837
				1838	/**
				1839	* xmlInitNodeInfoSeq:
				1840	* @seq: a node info sequence pointer
				1841	*
				1842	* -- Initialize (set to initial state) node info sequence
				1843	*/
				1844	void
				1845	xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
				1846	{
				1847	if (seq == NULL)
				1848	return;
				1849	seq->length = 0;
				1850	seq->maximum = 0;
				1851	seq->buffer = NULL;
				1852	}
				1853
				1854	/**
				1855	* xmlClearNodeInfoSeq:
				1856	* @seq: a node info sequence pointer
				1857	*
				1858	* -- Clear (release memory and reinitialize) node
				1859	* info sequence
				1860	*/
				1861	void
				1862	xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
				1863	{
				1864	if (seq == NULL)
				1865	return;
				1866	if (seq->buffer != NULL)
				1867	xmlFree(seq->buffer);
				1868	xmlInitNodeInfoSeq(seq);
				1869	}
				1870
				1871	/**
				1872	* xmlParserFindNodeInfoIndex:
				1873	* @seq: a node info sequence pointer
				1874	* @node: an XML node pointer
				1875	*
				1876	*
				1877	* xmlParserFindNodeInfoIndex : Find the index that the info record for
				1878	* the given node is or should be at in a sorted sequence
				1879	*
				1880	* Returns a long indicating the position of the record
				1881	*/
				1882	unsigned long
				1883	xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
				1884	const xmlNodePtr node)
				1885	{
				1886	unsigned long upper, lower, middle;
				1887	int found = 0;
				1888
				1889	if ((seq == NULL) \|\| (node == NULL))
				1890	return ((unsigned long) -1);
				1891
				1892	/* Do a binary search for the key */
				1893	lower = 1;
				1894	upper = seq->length;
				1895	middle = 0;
				1896	while (lower <= upper && !found) {
				1897	middle = lower + (upper - lower) / 2;
				1898	if (node == seq->buffer[middle - 1].node)
				1899	found = 1;
				1900	else if (node < seq->buffer[middle - 1].node)
				1901	upper = middle - 1;
				1902	else
				1903	lower = middle + 1;
				1904	}
				1905
				1906	/* Return position */
				1907	if (middle == 0 \|\| seq->buffer[middle - 1].node < node)
				1908	return middle;
				1909	else
				1910	return middle - 1;
				1911	}
				1912
				1913
				1914	/**
				1915	* xmlParserAddNodeInfo:
				1916	* @ctxt: an XML parser context
				1917	* @info: a node info sequence pointer
				1918	*
				1919	* Insert node info record into the sorted sequence
				1920	*/
				1921	void
				1922	xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
				1923	const xmlParserNodeInfoPtr info)
				1924	{
				1925	unsigned long pos;
				1926
				1927	if ((ctxt == NULL) \|\| (info == NULL)) return;
				1928
				1929	/* Find pos and check to see if node is already in the sequence */
				1930	pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
				1931	info->node);
				1932
				1933	if ((pos < ctxt->node_seq.length) &&
				1934	(ctxt->node_seq.buffer != NULL) &&
				1935	(ctxt->node_seq.buffer[pos].node == info->node)) {
				1936	ctxt->node_seq.buffer[pos] = *info;
				1937	}
				1938
				1939	/* Otherwise, we need to add new node to buffer */
				1940	else {
				1941	if (ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) {
				1942	xmlParserNodeInfo *tmp_buffer;
				1943	unsigned int byte_size;
				1944
				1945	if (ctxt->node_seq.maximum == 0)
				1946	ctxt->node_seq.maximum = 2;
				1947	byte_size = (sizeof(ctxt->node_seq.buffer)
				1948	(2 * ctxt->node_seq.maximum));
				1949
				1950	if (ctxt->node_seq.buffer == NULL)
				1951	tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
				1952	else
				1953	tmp_buffer =
				1954	(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
				1955	byte_size);
				1956
				1957	if (tmp_buffer == NULL) {
				1958	xmlErrMemory(ctxt, "failed to allocate buffer\n");
				1959	return;
				1960	}
				1961	ctxt->node_seq.buffer = tmp_buffer;
				1962	ctxt->node_seq.maximum *= 2;
				1963	}
				1964
				1965	/* If position is not at end, move elements out of the way */
				1966	if (pos != ctxt->node_seq.length) {
				1967	unsigned long i;
				1968
				1969	for (i = ctxt->node_seq.length; i > pos; i--)
				1970	ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
				1971	}
				1972
				1973	/* Copy element and increase length */
				1974	ctxt->node_seq.buffer[pos] = *info;
				1975	ctxt->node_seq.length++;
				1976	}
				1977	}
				1978
				1979	/************************************************************************
				1980	* *
				1981	* Defaults settings *
				1982	* *
				1983	************************************************************************/
				1984	/**
				1985	* xmlPedanticParserDefault:
				1986	* @val: int 0 or 1
				1987	*
				1988	* Set and return the previous value for enabling pedantic warnings.
				1989	*
				1990	* Returns the last value for 0 for no substitution, 1 for substitution.
				1991	*/
				1992
				1993	int
				1994	xmlPedanticParserDefault(int val) {
				1995	int old = xmlPedanticParserDefaultValue;
				1996
				1997	xmlPedanticParserDefaultValue = val;
				1998	return(old);
				1999	}
				2000
				2001	/**
				2002	* xmlLineNumbersDefault:
				2003	* @val: int 0 or 1
				2004	*
				2005	* Set and return the previous value for enabling line numbers in elements
				2006	* contents. This may break on old application and is turned off by default.
				2007	*
				2008	* Returns the last value for 0 for no substitution, 1 for substitution.
				2009	*/
				2010
				2011	int
				2012	xmlLineNumbersDefault(int val) {
				2013	int old = xmlLineNumbersDefaultValue;
				2014
				2015	xmlLineNumbersDefaultValue = val;
				2016	return(old);
				2017	}
				2018
				2019	/**
				2020	* xmlSubstituteEntitiesDefault:
				2021	* @val: int 0 or 1
				2022	*
				2023	* Set and return the previous value for default entity support.
				2024	* Initially the parser always keep entity references instead of substituting
				2025	* entity values in the output. This function has to be used to change the
				2026	* default parser behavior
				2027	* SAX::substituteEntities() has to be used for changing that on a file by
				2028	* file basis.
				2029	*
				2030	* Returns the last value for 0 for no substitution, 1 for substitution.
				2031	*/
				2032
				2033	int
				2034	xmlSubstituteEntitiesDefault(int val) {
				2035	int old = xmlSubstituteEntitiesDefaultValue;
				2036
				2037	xmlSubstituteEntitiesDefaultValue = val;
				2038	return(old);
				2039	}
				2040
				2041	/**
				2042	* xmlKeepBlanksDefault:
				2043	* @val: int 0 or 1
				2044	*
				2045	* Set and return the previous value for default blanks text nodes support.
				2046	* The 1.x version of the parser used an heuristic to try to detect
				2047	* ignorable white spaces. As a result the SAX callback was generating
				2048	* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
				2049	* using the DOM output text nodes containing those blanks were not generated.
				2050	* The 2.x and later version will switch to the XML standard way and
				2051	* ignorableWhitespace() are only generated when running the parser in
				2052	* validating mode and when the current element doesn't allow CDATA or
				2053	* mixed content.
				2054	* This function is provided as a way to force the standard behavior
				2055	* on 1.X libs and to switch back to the old mode for compatibility when
				2056	* running 1.X client code on 2.X . Upgrade of 1.X code should be done
				2057	* by using xmlIsBlankNode() commodity function to detect the "empty"
				2058	* nodes generated.
				2059	* This value also affect autogeneration of indentation when saving code
				2060	* if blanks sections are kept, indentation is not generated.
				2061	*
				2062	* Returns the last value for 0 for no substitution, 1 for substitution.
				2063	*/
				2064
				2065	int
				2066	xmlKeepBlanksDefault(int val) {
				2067	int old = xmlKeepBlanksDefaultValue;
				2068
				2069	xmlKeepBlanksDefaultValue = val;
				2070	xmlIndentTreeOutput = !val;
				2071	return(old);
				2072	}
				2073
				2074	#define bottom_parserInternals
				2075	#include "elfgcchack.h"