Blame - xml_parser.c - platform/external/libxml2

blob: dfec5a78c2c1006663c4b096bab532304809ee24 [file] [log] [blame]

Daniel Veillard	01791d5	1998-07-24 19:24:09 +0000	[diff] [blame^]	1	/*
				2	* parser.c : an XML 1.0 non-verifying parser
				3	*
				4	* See Copyright for the status of this software.
				5	*
				6	* $Id$
				7	*/
				8
				9	#include <config.h>
				10	#include <stdio.h>
				11	#include <ctype.h>
				12	#include <string.h> /* for memset() only */
				13	#include <malloc.h>
				14	#include <sys/stat.h>
				15	#ifdef HAVE_FCNTL_H
				16	#include <fcntl.h>
				17	#endif
				18	#ifdef HAVE_UNISTD_H
				19	#include <unistd.h>
				20	#endif
				21	#ifdef HAVE_ZLIB_H
				22	#include <zlib.h>
				23	#endif
				24
				25	#include "xml_tree.h"
				26	#include "xml_parser.h"
				27	#include "xml_entities.h"
				28
				29	/*
				30	* A few macros needed to help building the parser.
				31	*/
				32
				33	#ifdef UNICODE
				34	/*
				35	* UNICODE version of the macros. Incomplete now TODO !!!!
				36	*/
				37	#define IS_CHAR(c) \
				38	(((c) == 0x09) \|\| ((c) == 0x0a) \|\| ((c) == 0x0d) \|\| \
				39	(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
				40
				41	#define SKIP_BLANKS(p) \
				42	while (((p) == 0x20) \|\| ((p) == 0x09) \|\| (*(p) == 0xa) \|\| \
				43	(*(p) == 0x3000)) (p)++;
				44
				45	/* I'm too lazy to complete this one TODO !!!! */
				46	#define IS_BASECHAR(c) \
				47	((((c) >= 0x41) && ((c) <= 0x5a)) \|\| \
				48	(((c) >= 0x61) && ((c) <= 0x7a)) \|\| \
				49	(((c) >= 0xaa) && ((c) <= 0x5b)) \|\| \
				50	(((c) >= 0xc0) && ((c) <= 0xd6)) \|\| \
				51	(((c) >= 0xd8) && ((c) <= 0xf6)) \|\| \
				52	(((c) >= 0xf8) && ((c) <= 0xff)) \|\| \
				53	((c) == 0xba))
				54
				55	/* I'm too lazy to complete this one TODO !!!! */
				56	#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
				57
				58	/* I'm too lazy to complete this one TODO !!!! */
				59	#define IS_COMBINING(c) 0
				60
				61	#define IS_IGNORABLE(c) \
				62	((((c) >= 0x200c) && ((c) <= 0x200f)) \|\| \
				63	(((c) >= 0x202a) && ((c) <= 0x202e)) \|\| \
				64	(((c) >= 0x206a) && ((c) <= 0x206f)) \|\| \
				65	((c) == 0xfeff))
				66
				67	#define IS_EXTENDER(c) \
				68	(((c) == 0xb7) \|\| ((c) == 0x2d0) \|\| ((c) == 0x2d1) \|\| \
				69	((c) == 0x387) \|\| ((c) == 0x640) \|\| ((c) == 0xe46) \|\| \
				70	((c) == 0xec6) \|\| ((c) == 0x3005) \
				71	(((c) >= 0x3031) && ((c) <= 0x3035)) \|\| \
				72	(((c) >= 0x309b) && ((c) <= 0x309e)) \|\| \
				73	(((c) >= 0x30fc) && ((c) <= 0x30fe)) \|\| \
				74	(((c) >= 0xff70) && ((c) <= 0xff9e)) \|\| \
				75	((c) == 0xff9f))
				76
				77	#define IS_IDEOGRAPHIC(c) \
				78	((((c) >= 0x4e00) && ((c) <= 0x9fa5)) \|\| \
				79	(((c) >= 0xf900) && ((c) <= 0xfa2d)) \|\| \
				80	(((c) >= 0x3021) && ((c) <= 0x3029)) \|\| \
				81	((c) == 0x3007))
				82
				83	#define IS_LETTER(c) (IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c))
				84
				85	/* I'm too lazy to complete this one ! */
				86	#define IS_BLANK(c) (((c) == 0x20) \|\| ((c) == 0x09) \|\| ((c) == 0xa))
				87	#else
				88	/*
				89	* 8bits / ASCII version of the macros.
				90	*/
				91	#define IS_CHAR(c) \
				92	(((c) == 0x09) \|\| ((c) == 0x0a) \|\| ((c) == 0x0d) \|\| ((c) >= 0x20))
				93
				94	#define IS_BASECHAR(c) \
				95	((((c) >= 0x41) && ((c) <= 0x5a)) \|\| \
				96	(((c) >= 0x61) && ((c) <= 0x7a)) \|\| \
				97	(((c) >= 0xaa) && ((c) <= 0x5b)) \|\| \
				98	(((c) >= 0xc0) && ((c) <= 0xd6)) \|\| \
				99	(((c) >= 0xd8) && ((c) <= 0xf6)) \|\| \
				100	(((c) >= 0xf8) && ((c) <= 0xff)) \|\| \
				101	((c) == 0xba))
				102
				103	#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
				104
				105	#define IS_LETTER(c) IS_BASECHAR(c)
				106
				107	#define IS_COMBINING(c) 0
				108
				109	#define IS_IGNORABLE(c) 0
				110
				111	#define IS_EXTENDER(c) ((c) == 0xb7)
				112
				113	#define IS_BLANK(c) (((c) == 0x20) \|\| ((c) == 0x09) \|\| ((c) == 0xa))
				114	#endif
				115
				116
				117	#define SKIP_EOL(p) \
				118	if ((p) == 0x13) { p++ ; if ((p) == 0x10) p++; } \
				119	if ((p) == 0x10) { p++ ; if ((p) == 0x13) p++; }
				120
				121	#define SKIP_BLANKS(p) \
				122	while (IS_BLANK(*(p))) (p)++;
				123
				124	#define MOVETO_ENDTAG(p) \
				125	while (IS_CHAR(p) && ((p) != '>')) (p)++;
				126
				127	#define MOVETO_STARTTAG(p) \
				128	while (IS_CHAR(p) && ((p) != '<')) (p)++;
				129
				130	/*
				131	* Forward definition for recusive behaviour.
				132	*/
				133	xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
				134
				135	/*
				136	* xmlHandleData : this routine represent's the specific application
				137	* behaviour when reading a piece of text.
				138	*
				139	* For example in WebDav, any piece made only of blanks is eliminated
				140	*/
				141
				142	CHAR xmlHandleData(CHAR in) {
				143	CHAR *cur;
				144
				145	if (in == NULL) return(NULL);
				146	cur = in;
				147	while (IS_CHAR(*cur)) {
				148	if (!IS_BLANK(*cur)) goto not_blank;
				149	cur++;
				150	}
				151	free(in);
				152	return(NULL);
				153
				154	not_blank:
				155	return(in);
				156	}
				157
				158	/*
				159	* xmlStrndup : a strdup for array of CHAR's
				160	*/
				161
				162	CHAR xmlStrndup(const CHAR cur, int len) {
				163	CHAR ret = malloc((len + 1) sizeof(CHAR));
				164
				165	if (ret == NULL) {
				166	fprintf(stderr, "malloc of %d byte failed\n",
				167	(len + 1) * sizeof(CHAR));
				168	return(NULL);
				169	}
				170	memcpy(ret, cur, len * sizeof(CHAR));
				171	ret[len] = 0;
				172	return(ret);
				173	}
				174
				175	/*
				176	* xmlStrdup : a strdup for CHAR's
				177	*/
				178
				179	CHAR xmlStrdup(const CHAR cur) {
				180	const CHAR *p = cur;
				181
				182	while (IS_CHAR(*p)) p++;
				183	return(xmlStrndup(cur, p - cur));
				184	}
				185
				186	/*
				187	* xmlStrcmp : a strcmp for CHAR's
				188	*/
				189
				190	int xmlStrcmp(const CHAR str1, const CHAR str2) {
				191	register int tmp;
				192
				193	do {
				194	tmp = str1++ - str2++;
				195	if (tmp != 0) return(tmp);
				196	} while ((str1 != 0) && (str2 != 0));
				197	return (str1 - str2);
				198	}
				199
				200	/*
				201	* xmlStrncmp : a strncmp for CHAR's
				202	*/
				203
				204	int xmlStrncmp(const CHAR str1, const CHAR str2, int len) {
				205	register int tmp;
				206
				207	if (len <= 0) return(0);
				208	do {
				209	tmp = str1++ - str2++;
				210	if (tmp != 0) return(tmp);
				211	len--;
				212	if (len <= 0) return(0);
				213	} while ((str1 != 0) && (str2 != 0));
				214	return (str1 - str2);
				215	}
				216
				217	/*
				218	* xmlStrchr : a strchr for CHAR's
				219	*/
				220
				221	CHAR xmlStrchr(const CHAR str, CHAR val) {
				222	while (*str != 0) {
				223	if (str == val) return((CHAR ) str);
				224	str++;
				225	}
				226	return(NULL);
				227	}
				228
				229	/*
				230	* xmlParseName : parse an XML name.
				231	*/
				232
				233	CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
				234	const CHAR *q;
				235	CHAR *ret = NULL;
				236
				237	/*
				238	* Name ::= (Letter \| '_') (NameChar)*
				239	*/
				240	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
				241	q = ctxt->cur++;
				242	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				243	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\| (ctxt->cur[0] == '_') \|\|
				244	(ctxt->cur[0] == ':') \|\|
				245	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				246	(IS_EXTENDER(ctxt->cur[0])))
				247	ctxt->cur++;
				248
				249	ret = xmlStrndup(q, ctxt->cur - q);
				250
				251	return(ret);
				252	}
				253
				254	/*
				255	* Parse and return a string between quotes or doublequotes
				256	*/
				257	CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
				258	CHAR *ret = NULL;
				259	const CHAR *q;
				260
				261	if (ctxt->cur[0] == '"') {
				262	ctxt->cur++;
				263	q = ctxt->cur;
				264	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '"')) ctxt->cur++;
				265	if (ctxt->cur[0] != '"')
				266	fprintf(stderr, "String not closed \"%.50s\n", q);
				267	else {
				268	ret = xmlStrndup(q, ctxt->cur - q);
				269	ctxt->cur++;
				270	}
				271	} else if (ctxt->cur[0] == '\''){
				272	ctxt->cur++;
				273	q = ctxt->cur;
				274	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '\'')) ctxt->cur++;
				275	if (ctxt->cur[0] != '\'')
				276	fprintf(stderr, "String not closed '%.50s\n", q);
				277	else {
				278	ret = xmlStrndup(q, ctxt->cur - q);
				279	ctxt->cur++;
				280	}
				281	}
				282	return(ret);
				283	}
				284
				285	/*
				286	* Skip an XML (SGML) comment <!-- .... -->
				287	*
				288	* TODO !!!! Save the comment in the tree !!!
				289	*/
				290	void xmlParserSkipComment(xmlParserCtxtPtr ctxt) {
				291	const CHAR q, start;
				292	const CHAR *r;
				293
				294	/*
				295	* An extra check may avoid errors and isn't that costly !
				296	*/
				297	if ((ctxt->cur[0] != '<') \|\| (ctxt->cur[1] != '!') \|\|
				298	(ctxt->cur[2] != '-') \|\| (ctxt->cur[3] != '-')) return;
				299
				300	ctxt->cur += 4;
				301	start = q = ctxt->cur;
				302	ctxt->cur++;
				303	r = ctxt->cur;
				304	ctxt->cur++;
				305	while (IS_CHAR(ctxt->cur[0]) &&
				306	((ctxt->cur[0] == ':') \|\| (ctxt->cur[0] != '>') \|\|
				307	(r != '-') \|\| (q != '-'))) {
				308	ctxt->cur++;r++;q++;
				309	}
				310	if (!IS_CHAR(ctxt->cur[0])) {
				311	fprintf(stderr, "Comment not terminated <!--%.50s\n", start);
				312	ctxt->cur = start; /* !!! We shouldn't really try to recover !!! */
				313	} else {
				314	ctxt->cur++;
				315	}
				316	}
				317
				318	/*
				319	* xmlParseNamespace: parse specific '<?namespace ...' constructs.
				320	*/
				321
				322	void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
				323	CHAR *href = NULL;
				324	CHAR *AS = NULL;
				325	int garbage = 0;
				326
				327	/*
				328	* We just skipped "namespace" or "xml:namespace"
				329	*/
				330	SKIP_BLANKS(ctxt->cur);
				331
				332	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '>')) {
				333	/*
				334	* We can have "ns" or "prefix" attributes
				335	* Old encoding as 'href' or 'AS' attributes is still supported
				336	*/
				337	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 's')) {
				338	garbage = 0;
				339	ctxt->cur += 2;
				340	SKIP_BLANKS(ctxt->cur);
				341
				342	if (ctxt->cur[0] != '=') continue;
				343	ctxt->cur++;
				344	SKIP_BLANKS(ctxt->cur);
				345
				346	href = xmlParseQuotedString(ctxt);
				347	SKIP_BLANKS(ctxt->cur);
				348	} else if ((ctxt->cur[0] == 'h') && (ctxt->cur[1] == 'r') &&
				349	(ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f')) {
				350	garbage = 0;
				351	ctxt->cur += 4;
				352	SKIP_BLANKS(ctxt->cur);
				353
				354	if (ctxt->cur[0] != '=') continue;
				355	ctxt->cur++;
				356	SKIP_BLANKS(ctxt->cur);
				357
				358	href = xmlParseQuotedString(ctxt);
				359	SKIP_BLANKS(ctxt->cur);
				360	} else if ((ctxt->cur[0] == 'p') && (ctxt->cur[1] == 'r') &&
				361	(ctxt->cur[2] == 'e') && (ctxt->cur[3] == 'f') &&
				362	(ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'x')) {
				363	garbage = 0;
				364	ctxt->cur += 6;
				365	SKIP_BLANKS(ctxt->cur);
				366
				367	if (ctxt->cur[0] != '=') continue;
				368	ctxt->cur++;
				369	SKIP_BLANKS(ctxt->cur);
				370
				371	AS = xmlParseQuotedString(ctxt);
				372	SKIP_BLANKS(ctxt->cur);
				373	} else if ((ctxt->cur[0] == 'A') && (ctxt->cur[1] == 'S')) {
				374	garbage = 0;
				375	ctxt->cur += 2;
				376	SKIP_BLANKS(ctxt->cur);
				377
				378	if (ctxt->cur[0] != '=') continue;
				379	ctxt->cur++;
				380	SKIP_BLANKS(ctxt->cur);
				381
				382	AS = xmlParseQuotedString(ctxt);
				383	SKIP_BLANKS(ctxt->cur);
				384	} else if ((ctxt->cur[0] == '?') && (ctxt->cur[1] == '>')) {
				385	garbage = 0;
				386	ctxt->cur ++;
				387	} else {
				388	/*
				389	* Found garbage when parsing the namespace
				390	*/
				391	if (!garbage) fprintf(stderr,
				392	"\nxmlParseNamespace found garbage: ");
				393	fprintf(stderr, "%c", ctxt->cur[0]);
				394	ctxt->cur++;
				395	}
				396	}
				397
				398	MOVETO_ENDTAG(ctxt->cur);
				399	ctxt->cur++;
				400
				401	/*
				402	* Register the DTD.
				403	*/
				404	if (href != NULL)
				405	xmlNewDtd(ctxt->doc, href, AS);
				406
				407	if (AS != NULL) free(AS);
				408	if (href != NULL) free(href);
				409	}
				410
				411	/*
				412	* xmlParsePI: parse an XML Processing Instruction.
				413	*/
				414
				415	void xmlParsePI(xmlParserCtxtPtr ctxt) {
				416	if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
				417	/*
				418	* this is a Processing Instruction.
				419	*/
				420	ctxt->cur += 2;
				421
				422	/*
				423	* Special for WebDav, support for the Processing Instruction
				424	* '<?namespace ...' contruct in the header of the XML document.
				425	*/
				426	if ((ctxt->cur[0] == 'n') && (ctxt->cur[1] == 'a') &&
				427	(ctxt->cur[2] == 'm') && (ctxt->cur[3] == 'e') &&
				428	(ctxt->cur[4] == 's') && (ctxt->cur[5] == 'p') &&
				429	(ctxt->cur[6] == 'a') && (ctxt->cur[7] == 'c') &&
				430	(ctxt->cur[8] == 'e')) {
				431	ctxt->cur += 9;
				432	xmlParseNamespace(ctxt);
				433	} else if ((ctxt->cur[0] == 'x') && (ctxt->cur[1] == 'm') &&
				434	(ctxt->cur[2] == 'l') && (ctxt->cur[3] == ':') &&
				435	(ctxt->cur[4] == 'n') && (ctxt->cur[5] == 'a') &&
				436	(ctxt->cur[6] == 'm') && (ctxt->cur[7] == 'e') &&
				437	(ctxt->cur[8] == 's') && (ctxt->cur[9] == 'p') &&
				438	(ctxt->cur[10] == 'a') && (ctxt->cur[11] == 'c') &&
				439	(ctxt->cur[12] == 'e')) {
				440	ctxt->cur += 13;
				441	xmlParseNamespace(ctxt);
				442	} else {
				443	/* Unknown PI, ignore it ! */
				444	fprintf(stderr, "xmlParsePI : skipping unknown PI %30s\n",
				445	ctxt->cur);
				446	MOVETO_ENDTAG(ctxt->cur);
				447	ctxt->cur++;
				448	}
				449	}
				450	}
				451
				452	/*
				453	* xmlParseAttribute: parse a start of tag.
				454	*
				455	* Attribute ::= Name Eq AttValue
				456	*/
				457
				458	void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
				459	const CHAR *q;
				460	CHAR name, value = NULL;
				461
				462	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
				463	return;
				464	}
				465	q = ctxt->cur++;
				466	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				467	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\|
				468	(ctxt->cur[0] == '_') \|\| (ctxt->cur[0] == ':') \|\|
				469	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				470	(IS_EXTENDER(ctxt->cur[0])))
				471	ctxt->cur++;
				472	name = xmlStrndup(q, ctxt->cur - q);
				473
				474	/*
				475	* We should have the equal, we are laxist here and allow attributes
				476	* without values and extra spaces.
				477	*/
				478	SKIP_BLANKS(ctxt->cur);
				479	if (ctxt->cur[0] == '=') {
				480	ctxt->cur++;
				481	SKIP_BLANKS(ctxt->cur);
				482	if ((ctxt->cur[0] != '\'') && (ctxt->cur[0] != '"')) {
				483	fprintf(stderr, "Quotes were expected for attribute value %.20s\n",
				484	q);
				485	} else
				486	value = xmlParseQuotedString(ctxt);
				487	}
				488
				489	/*
				490	* Add the attribute to the node.
				491	*/
				492	if (name != NULL) {
				493	xmlNewProp(node, name, value);
				494	free(name);
				495	}
				496	if ( value != NULL )
				497	free(value);
				498	}
				499
				500	/*
				501	* xmlParseStartTag: parse a start of tag.
				502	*/
				503
				504	xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
				505	const CHAR *q;
				506	CHAR ns, name;
				507	xmlDtdPtr dtd = NULL;
				508	xmlNodePtr ret = NULL;
				509
				510	/*
				511	* Theorically one should just parse a Name, but with the addition
				512	* of the namespace needed for WebDav, it's a bit more complicated
				513	* since the element name may be prefixed by a namespace prefix.
				514	*
				515	* QName ::= (NSPart ':')? LocalPart
				516	* NSPart ::= Name
				517	* LocalPart ::= Name
				518	* STag ::= '<' QName (S Attribute)* S? '>'
				519	*
				520	* instead of :
				521	*
				522	* STag ::= '<' QName (S Attribute)* S? '>'
				523	*/
				524	if (ctxt->cur[0] != '<') return(NULL);
				525	ctxt->cur++;
				526
				527	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return(NULL);
				528	q = ctxt->cur++;
				529	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				530	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\|
				531	(ctxt->cur[0] == '_') \|\|
				532	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				533	(IS_EXTENDER(ctxt->cur[0])))
				534	ctxt->cur++;
				535
				536	if (ctxt->cur[0] == ':') {
				537	ns = xmlStrndup(q, ctxt->cur - q);
				538
				539	ctxt->cur++; /* skip the column */
				540	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
				541	fprintf(stderr,
				542	"Start tag : no element name after namespace identifier %.20s\n",
				543	q);
				544	free(ns);
				545	return(NULL);
				546	}
				547	q = ctxt->cur++;
				548	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				549	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\|
				550	(ctxt->cur[0] == '_') \|\| (ctxt->cur[0] == ':') \|\|
				551	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				552	(IS_EXTENDER(ctxt->cur[0])))
				553	ctxt->cur++;
				554	name = xmlStrndup(q, ctxt->cur - q);
				555
				556	/*
				557	* Search the DTD associated to ns.
				558	*/
				559	dtd = xmlSearchDtd(ctxt->doc, ns);
				560	if (dtd == NULL)
				561	fprintf(stderr, "Start tag : Couldn't find namespace %s\n", ns);
				562	free(ns);
				563	} else
				564	name = xmlStrndup(q, ctxt->cur - q);
				565
				566	ret = xmlNewNode(dtd, name, NULL);
				567
				568	/*
				569	* Now parse the attributes, it ends up with the ending
				570	*
				571	* (S Attribute)* S?
				572	*/
				573	SKIP_BLANKS(ctxt->cur);
				574	while ((IS_CHAR(ctxt->cur[0])) &&
				575	(ctxt->cur[0] != '>') &&
				576	((ctxt->cur[0] != '/') \|\| (ctxt->cur[1] != '>'))) {
				577	if (IS_LETTER(ctxt->cur[0]) \|\| (ctxt->cur[0] == '_'))
				578	xmlParseAttribute(ctxt, ret);
				579	else {
				580	/* We should warn TODO !!! */
				581	ctxt->cur++;
				582	}
				583	SKIP_BLANKS(ctxt->cur);
				584	}
				585
				586	return(ret);
				587	}
				588
				589	/*
				590	* xmlParseEndTag: parse an end of tag, note that the '</' part has
				591	* already been read.
				592	*/
				593
				594	void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlDtdPtr dtdPtr, CHAR *tagPtr) {
				595	const CHAR *q;
				596	CHAR ns, name;
				597	xmlDtdPtr dtd = NULL;
				598
				599	*dtdPtr = NULL;
				600	*tagPtr = NULL;
				601
				602	/*
				603	* Theorically one should just parse a Name, but with the addition
				604	* of the namespace needed for WebDav, it's a bit more complicated
				605	* since the element name may be prefixed by a namespace prefix.
				606	*
				607	* QName ::= (NSPart ':')? LocalPart
				608	* NSPart ::= Name
				609	* LocalPart ::= Name
				610	* ETag ::= '</' QName S? '>'
				611	*
				612	* instead of :
				613	*
				614	* ETag ::= '</' Name S? '>'
				615	*/
				616	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) return;
				617	q = ctxt->cur++;
				618	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				619	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\|
				620	(ctxt->cur[0] == '_') \|\|
				621	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				622	(IS_EXTENDER(ctxt->cur[0])))
				623	ctxt->cur++;
				624
				625	if (ctxt->cur[0] == ':') {
				626	ns = xmlStrndup(q, ctxt->cur - q);
				627
				628	ctxt->cur++; /* skip the column */
				629	if (!IS_LETTER(ctxt->cur[0]) && (ctxt->cur[0] != '_')) {
				630	fprintf(stderr,
				631	"End tag : no element name after namespace identifier %.20s\n",
				632	q);
				633	free(ns);
				634	return;
				635	}
				636	q = ctxt->cur++;
				637	while ((IS_LETTER(ctxt->cur[0])) \|\| (IS_DIGIT(ctxt->cur[0])) \|\|
				638	(ctxt->cur[0] == '.') \|\| (ctxt->cur[0] == '-') \|\|
				639	(ctxt->cur[0] == '_') \|\| (ctxt->cur[0] == ':') \|\|
				640	(IS_COMBINING(ctxt->cur[0])) \|\| (IS_IGNORABLE(ctxt->cur[0])) \|\|
				641	(IS_EXTENDER(ctxt->cur[0])))
				642	ctxt->cur++;
				643	name = xmlStrndup(q, ctxt->cur - q);
				644
				645	/*
				646	* Search the DTD associated to ns.
				647	*/
				648	dtd = xmlSearchDtd(ctxt->doc, ns);
				649	if (dtd == NULL)
				650	fprintf(stderr, "End tag : Couldn't find namespace %s\n", ns);
				651	free(ns);
				652	} else
				653	name = xmlStrndup(q, ctxt->cur - q);
				654
				655	*dtdPtr = dtd;
				656	*tagPtr = name;
				657
				658	/*
				659	* We should definitely be at the ending "S? '>'" part
				660	*/
				661	SKIP_BLANKS(ctxt->cur);
				662	if ((!IS_CHAR(ctxt->cur[0])) \|\| (ctxt->cur[0] != '>')) {
				663	fprintf(stderr, "End tag : expected '>', got %.20s\n", ctxt->cur);
				664	/*
				665	* Note : skipping to the next '>' is probably otherkill,
				666	* especially in case the '>' is hust missing.
				667	*
				668	* Otherwise add:
				669	* MOVETO_ENDTAG(ctxt->cur);
				670	*/
				671	} else
				672	ctxt->cur++;
				673
				674	return;
				675	}
				676
				677	/*
				678	* xmlParseCDSect: escaped pure raw content.
				679	*/
				680	CHAR *xmlParseCDSect(xmlParserCtxtPtr ctxt) {
				681	const CHAR r, s, *base;
				682	CHAR *ret;
				683
				684	base = ctxt->cur;
				685	if (!IS_CHAR(ctxt->cur[0])) {
				686	fprintf(stderr, "CData section not finished : %.20s\n", base);
				687	return(NULL);
				688	}
				689	r = ctxt->cur++;
				690	if (!IS_CHAR(ctxt->cur[0])) {
				691	fprintf(stderr, "CData section not finished : %.20s\n", base);
				692	return(NULL);
				693	}
				694	s = ctxt->cur++;
				695	while (IS_CHAR(ctxt->cur[0]) &&
				696	((r != ']') \|\| (s != ']') \|\| (ctxt->cur[0] != '>'))) {
				697	r++;s++;ctxt->cur++;
				698	}
				699	if (!IS_CHAR(ctxt->cur[0])) {
				700	fprintf(stderr, "CData section not finished : %.20s\n", base);
				701	return(NULL);
				702	}
				703	ret = xmlStrndup(base, ctxt->cur-base);
				704
				705	return(ret);
				706	}
				707
				708	/*
				709	* xmlParseContent: a content is
				710	* (element \| PCData \| Reference \| CDSect \| PI \| Comment)
				711	*
				712	* element : starts by '<'
				713	* PCData : any CHAR but '&' or '<'
				714	* Reference : starts by '&'
				715	* CDSect : starts by '<![CDATA['
				716	* PI : starts by '<?'
				717	*/
				718
				719	xmlNodePtr xmlParseContent(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
				720	const CHAR *q;
				721	CHAR *data = NULL;
				722	xmlNodePtr ret = NULL;
				723
				724	/*
				725	* First case : a Processing Instruction.
				726	*/
				727	if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
				728	xmlParsePI(ctxt);
				729	}
				730	/*
				731	* Second case : a CDSection
				732	*/
				733	if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
				734	(ctxt->cur[2] == '[') && (ctxt->cur[3] == 'C') &&
				735	(ctxt->cur[4] == 'D') && (ctxt->cur[5] == 'A') &&
				736	(ctxt->cur[6] == 'T') && (ctxt->cur[7] == 'A') &&
				737	(ctxt->cur[8] == '[')) {
				738	ctxt->cur += 9;
				739	data = xmlParseCDSect(ctxt);
				740	}
				741	/*
				742	* Third case : a sub-element.
				743	*/
				744	else if (ctxt->cur[0] == '<') {
				745	ret = xmlParseElement(ctxt);
				746	}
				747	/*
				748	* Last case, text. Note that References are handled directly.
				749	*/
				750	else {
				751	q = ctxt->cur;
				752	while (IS_CHAR(ctxt->cur[0]) && (ctxt->cur[0] != '<')) ctxt->cur++;
				753
				754	if (!IS_CHAR(ctxt->cur[0])) {
				755	fprintf(stderr, "Truncated content : %.50s\n", q);
				756	return(NULL);
				757	}
				758
				759	/*
				760	* Do the Entities decoding...
				761	*/
				762	data = xmlStrdup(xmlDecodeEntities(ctxt->doc, q, ctxt->cur - q));
				763	}
				764
				765	/*
				766	* Handle the data if any. If there is no child
				767	* add it as content, otherwise create a new node of type text.
				768	*/
				769	if (data != NULL)
				770	data = xmlHandleData(data);
				771	if (data != NULL) {
				772	if (node->childs == NULL)
				773	xmlNodeSetContent(node, data);
				774	else
				775	ret = xmlNewText(data);
				776	free(data);
				777	}
				778
				779	return(ret);
				780	}
				781
				782	/*
				783	* xmlParseElement: parse an XML element
				784	*/
				785
				786	xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
				787	xmlNodePtr ret, child;
				788	const CHAR *openTag = ctxt->cur;
				789	const CHAR *closeTag = ctxt->cur;
				790
				791	ret = xmlParseStartTag(ctxt);
				792	if (ret == NULL) {
				793	return(NULL);
				794	}
				795
				796	/*
				797	* Check for an Empty Element.
				798	*/
				799	if ((ctxt->cur[0] == '/') && (ctxt->cur[1] == '>')) {
				800	ctxt->cur += 2;
				801	return(ret);
				802	}
				803	if (ctxt->cur[0] == '>') ctxt->cur++;
				804	else {
				805	fprintf(stderr, "Couldn't find end of Start Tag %.30s\n", openTag);
				806	return(NULL);
				807	}
				808
				809	/*
				810	* Parse the content of the element:
				811	* (element \| PCData \| Reference \| CDSect \| PI \| Comment) *
				812	*
				813	* element : starts by '<'
				814	* PCData : any CHAR but '&' or '<'
				815	* Reference : starts by '&'
				816	* CDSect : starts by '<![CDATA['
				817	* PI : starts by '<?'
				818	*
				819	* The loop stops upon detection of an end of tag '</'
				820	*/
				821	while ((IS_CHAR(ctxt->cur[0])) &&
				822	((ctxt->cur[0] != '<') \|\| (ctxt->cur[1] != '/'))) {
				823	child = xmlParseContent(ctxt, ret);
				824	if (child != NULL)
				825	xmlAddChild(ret, child);
				826	}
				827	if (!IS_CHAR(ctxt->cur[0])) {
				828	fprintf(stderr, "Premature end of data in tag %.30s\n", openTag);
				829	return(NULL);
				830	}
				831
				832	/*
				833	* parse the end of tag : '</' has been detected.
				834	*/
				835	ctxt->cur += 2;
				836	if (ctxt->cur[0] == '>') ctxt->cur++; /* simplified closing </> */
				837	else {
				838	CHAR *endTag;
				839	xmlDtdPtr endDtd;
				840
				841	xmlParseEndTag(ctxt, &endDtd, &endTag);
				842
				843	/*
				844	* Check that the Name in the ETag is the same as in the STag.
				845	*/
				846	if (endDtd != ret->dtd) {
				847	fprintf(stderr, "Start and End tags don't use the same DTD:\n");
				848	fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
				849	}
				850	if (strcmp(ret->name, endTag)) {
				851	fprintf(stderr, "Start and End tags don't use the same name:\n");
				852	fprintf(stderr, "\t%.30s\n\t%.30s\n", openTag, closeTag);
				853	}
				854
				855	if ( endTag != NULL )
				856	free(endTag);
				857	}
				858
				859	return(ret);
				860	}
				861
				862	/*
				863	* xmlParseXMLDecl: parse an XML declaration header
				864	*/
				865
				866	void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
				867	CHAR *version;
				868
				869	/*
				870	* We know that '<?xml' is here.
				871	*/
				872	ctxt->cur += 5;
				873
				874	/*
				875	* Parse the version info
				876	*/
				877	SKIP_BLANKS(ctxt->cur);
				878
				879	/*
				880	* We should have 'version=' here !
				881	*/
				882	if ((ctxt->cur[0] == 'v') && (ctxt->cur[1] == 'e') &&
				883	(ctxt->cur[2] == 'r') && (ctxt->cur[3] == 's') &&
				884	(ctxt->cur[4] == 'i') && (ctxt->cur[5] == 'o') &&
				885	(ctxt->cur[6] == 'n') && (ctxt->cur[7] == '=')) {
				886	ctxt->cur += 8;
				887	version = xmlParseQuotedString(ctxt);
				888	if (version == NULL)
				889	ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
				890	else {
				891	ctxt->doc = xmlNewDoc(version);
				892	free(version);
				893	}
				894	} else {
				895	ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
				896	}
				897
				898	/*
				899	* We should check for Required Markup Declaration TODO !!!!
				900	*/
				901	MOVETO_ENDTAG(ctxt->cur);
				902	ctxt->cur++;
				903
				904	}
				905
				906	/*
				907	* xmlParseMisc: parse an XML Misc optionnal field.
				908	* (Comment \| PI \| S)*
				909	*/
				910
				911	void xmlParseMisc(xmlParserCtxtPtr ctxt) {
				912	while (((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) \|\|
				913	((ctxt->cur[0] == '<') && (ctxt->cur[1] == '!') &&
				914	(ctxt->cur[2] == '-') && (ctxt->cur[2] == '-')) \|\|
				915	IS_BLANK(ctxt->cur[0])) {
				916	if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?')) {
				917	xmlParsePI(ctxt);
				918	} else if (IS_BLANK(ctxt->cur[0])) {
				919	ctxt->cur++;
				920	} else
				921	xmlParserSkipComment(ctxt);
				922	}
				923	}
				924
				925	/*
				926	* xmlParseDocument : parse an XML document and build a tree.
				927	*/
				928
				929	int xmlParseDocument(xmlParserCtxtPtr ctxt) {
				930	/*
				931	* We should check for encoding here and plug-in some
				932	* conversion code TODO !!!!
				933	*/
				934
				935	/*
				936	* Wipe out everything which is before the first '<'
				937	*/
				938	SKIP_BLANKS(ctxt->cur);
				939
				940	/*
				941	* Check for the XMLDecl in the Prolog.
				942	*/
				943	if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
				944	(ctxt->cur[2] == 'x') && (ctxt->cur[3] == 'm') &&
				945	(ctxt->cur[4] == 'l')) {
				946	xmlParseXMLDecl(ctxt);
				947	/* SKIP_EOL(cur); */
				948	SKIP_BLANKS(ctxt->cur);
				949	} else if ((ctxt->cur[0] == '<') && (ctxt->cur[1] == '?') &&
				950	(ctxt->cur[2] == 'X') && (ctxt->cur[3] == 'M') &&
				951	(ctxt->cur[4] == 'L')) {
				952	/*
				953	* The first drafts were using <?XML and the final W3C REC
				954	* now use <?xml ...
				955	*/
				956	xmlParseXMLDecl(ctxt);
				957	/* SKIP_EOL(cur); */
				958	SKIP_BLANKS(ctxt->cur);
				959	} else {
				960	ctxt->doc = xmlNewDoc(XML_DEFAULT_VERSION);
				961	}
				962
				963	/*
				964	* The Misc part of the Prolog
				965	* (Comment \| PI \| S) *
				966	*/
				967	xmlParseMisc(ctxt);
				968
				969	/*
				970	* Time to start parsing
				971	*/
				972	ctxt->doc->root = xmlParseElement(ctxt);
				973
				974	return(0);
				975	}
				976
				977	/*
				978	* xmlParseDoc : parse an XML in-memory document and build a tree.
				979	*/
				980
				981	xmlDocPtr xmlParseDoc(CHAR *cur) {
				982	xmlDocPtr ret;
				983	xmlParserCtxtPtr ctxt;
				984
				985	if (cur == NULL) return(NULL);
				986
				987	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				988	if (ctxt == NULL) {
				989	perror("malloc");
				990	return(NULL);
				991	}
				992
				993	xmlInitParserCtxt(ctxt);
				994	ctxt->base = cur;
				995	ctxt->cur = cur;
				996
				997	xmlParseDocument(ctxt);
				998	ret = ctxt->doc;
				999	free(ctxt->nodes);
				1000	free(ctxt);
				1001
				1002	return(ret);
				1003	}
				1004
				1005	/*
				1006	* xmlParseFile : parse an XML file and build a tree.
				1007	*/
				1008
				1009	xmlDocPtr xmlParseFile(const char *filename) {
				1010	xmlDocPtr ret;
				1011	#ifdef HAVE_ZLIB_H
				1012	gzFile input;
				1013	#else
				1014	int input;
				1015	#endif
				1016	int res;
				1017	struct stat buf;
				1018	char *buffer;
				1019	xmlParserCtxtPtr ctxt;
				1020
				1021	res = stat(filename, &buf);
				1022	if (res < 0) return(NULL);
				1023
				1024	#ifdef HAVE_ZLIB_H
				1025	retry_bigger:
				1026	buffer = malloc((buf.st_size * 20) + 100);
				1027	#else
				1028	buffer = malloc(buf.st_size + 100);
				1029	#endif
				1030	if (buffer == NULL) {
				1031	perror("malloc");
				1032	return(NULL);
				1033	}
				1034
				1035	memset(buffer, 0, sizeof(buffer));
				1036	#ifdef HAVE_ZLIB_H
				1037	input = gzopen (filename, "r");
				1038	if (input == NULL) {
				1039	fprintf (stderr, "Cannot read file %s :\n", filename);
				1040	perror ("gzopen failed");
				1041	return(NULL);
				1042	}
				1043	#else
				1044	input = open (filename, O_RDONLY);
				1045	if (input < 0) {
				1046	fprintf (stderr, "Cannot read file %s :\n", filename);
				1047	perror ("open failed");
				1048	return(NULL);
				1049	}
				1050	#endif
				1051	#ifdef HAVE_ZLIB_H
				1052	res = gzread(input, buffer, 20 * buf.st_size);
				1053	#else
				1054	res = read(input, buffer, buf.st_size);
				1055	#endif
				1056	if (res < 0) {
				1057	fprintf (stderr, "Cannot read file %s :\n", filename);
				1058	#ifdef HAVE_ZLIB_H
				1059	perror ("gzread failed");
				1060	#else
				1061	perror ("read failed");
				1062	#endif
				1063	return(NULL);
				1064	}
				1065	#ifdef HAVE_ZLIB_H
				1066	gzclose(input);
				1067	if (res >= 20 * buf.st_size) {
				1068	free(buffer);
				1069	buf.st_size *= 2;
				1070	goto retry_bigger;
				1071	}
				1072	buf.st_size = res;
				1073	#else
				1074	close(input);
				1075	#endif
				1076
				1077
				1078	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				1079	if (ctxt == NULL) {
				1080	perror("malloc");
				1081	return(NULL);
				1082	}
				1083	buffer[buf.st_size] = '\0';
				1084
				1085	xmlInitParserCtxt(ctxt);
				1086	ctxt->filename = filename;
				1087	ctxt->base = buffer;
				1088	ctxt->cur = buffer;
				1089
				1090	xmlParseDocument(ctxt);
				1091	ret = ctxt->doc;
				1092	free(buffer);
				1093	free(ctxt->nodes);
				1094	free(ctxt);
				1095
				1096	return(ret);
				1097	}
				1098
				1099	/*
				1100	* xmlParseFile : parse an XML memory block and build a tree.
				1101	*/
				1102
				1103	xmlDocPtr xmlParseMemory(char *buffer, int size) {
				1104	xmlDocPtr ret;
				1105	xmlParserCtxtPtr ctxt;
				1106
				1107	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				1108	if (ctxt == NULL) {
				1109	perror("malloc");
				1110	return(NULL);
				1111	}
				1112
				1113	buffer[size - 1] = '\0';
				1114
				1115	xmlInitParserCtxt(ctxt);
				1116	ctxt->base = buffer;
				1117	ctxt->cur = buffer;
				1118
				1119	xmlParseDocument(ctxt);
				1120	ret = ctxt->doc;
				1121	free(ctxt->nodes);
				1122	free(ctxt);
				1123
				1124	return(ret);
				1125	}
				1126
				1127
				1128
				1129
				1130	/* Initialize parser context */
				1131	void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
				1132	{
				1133	int i;
				1134
				1135	ctxt->filename = NULL;
				1136	ctxt->base = NULL;
				1137	ctxt->cur = NULL;
				1138	ctxt->line = 1;
				1139	ctxt->col = 1;
				1140	ctxt->doc = NULL;
				1141	ctxt->depth = 0;
				1142	ctxt->max_depth = 10;
				1143	ctxt->nodes = (xmlNodePtr ) malloc(ctxt->max_depth sizeof(xmlNodePtr));
				1144	if (ctxt->nodes == NULL) {
				1145	fprintf(stderr, "malloc of %d byte failed\n",
				1146	ctxt->max_depth * sizeof(xmlNodePtr));
				1147	ctxt->max_depth = 0;
				1148	} else {
				1149	for (i = 0;i < ctxt->max_depth;i++)
				1150	ctxt->nodes[i] = NULL;
				1151	}
				1152	}
				1153
				1154
				1155	/*
				1156	* Clear (release owned resources) and reinitialize context
				1157	*/
				1158	void xmlClearParserCtxt(xmlParserCtxtPtr ctx)
				1159	{
				1160	xmlInitParserCtxt(ctx);
				1161	}
				1162
				1163
				1164	/*
				1165	* Setup the parser context to parse a new buffer; Clears any prior
				1166	* contents from the parser context. The buffer parameter must not be
				1167	* NULL, but the filename parameter can be
				1168	*/
				1169	void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
				1170	const char* filename)
				1171	{
				1172	xmlClearParserCtxt(ctxt);
				1173	ctxt->base = buffer;
				1174	ctxt->cur = buffer;
				1175	ctxt->filename = filename;
				1176	}
				1177
				1178
				1179
				1180	void xmlReportError(xmlParserCtxtPtr ctx, const CHAR* msg)
				1181	{
				1182	fputs(msg, stderr);
				1183	}