Blame - parser.c - platform/external/libxml2

blob: 1b3b384ee7d1c9f02d0fa5b1c37e0f512e3cdfc3 [file] [log] [blame]

Daniel Veillard	260a68f	1998-08-13 03:39:55 +0000	[diff] [blame]	1	/*
				2	* parser.c : an XML 1.0 non-verifying parser
				3	*
				4	* See Copyright for the status of this software.
				5	*
				6	* $Id$
				7	*/
				8
				9	#ifdef WIN32
				10	#define HAVE_FCNTL_H
				11	#include <io.h>
				12	#else
				13	#include <config.h>
				14	#endif
				15	#include <stdio.h>
				16	#include <ctype.h>
				17	#include <string.h> /* for memset() only */
				18	#include <malloc.h>
				19	#include <sys/stat.h>
				20	#ifdef HAVE_FCNTL_H
				21	#include <fcntl.h>
				22	#endif
				23	#ifdef HAVE_UNISTD_H
				24	#include <unistd.h>
				25	#endif
				26	#ifdef HAVE_ZLIB_H
				27	#include <zlib.h>
				28	#endif
				29
				30	#include "tree.h"
				31	#include "parser.h"
				32	#include "entities.h"
				33
				34	/************************************************************************
				35	* *
				36	* Parser stacks related functions and macros *
				37	* *
				38	************************************************************************/
				39	/*
				40	* Generic function for accessing stacks in the Parser Context
				41	*/
				42
				43	#define PUSH_AND_POP(type, name) \
				44	int name##Push(xmlParserCtxtPtr ctxt, type value) { \
				45	if (ctxt->name##Nr >= ctxt->name##Max) { \
				46	ctxt->name##Max *= 2; \
				47	ctxt->name##Tab = (void *) realloc(ctxt->name##Tab, \
				48	ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
				49	if (ctxt->name##Tab == NULL) { \
				50	fprintf(stderr, "realloc failed !\n"); \
				51	exit(1); \
				52	} \
				53	} \
				54	ctxt->name##Tab[ctxt->name##Nr] = value; \
				55	ctxt->name = value; \
				56	return(ctxt->name##Nr++); \
				57	} \
				58	type name##Pop(xmlParserCtxtPtr ctxt) { \
				59	if (ctxt->name##Nr <= 0) return(0); \
				60	ctxt->name##Nr--; \
				61	ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
				62	return(ctxt->name); \
				63	} \
				64
				65	PUSH_AND_POP(xmlParserInputPtr, input)
				66	PUSH_AND_POP(xmlNodePtr, node)
				67
				68	/*************
				69	#define CUR ((ctxt->input->cur) ? (ctxt->input->cur) : xmlPopInput(ctxt))
				70	#define NEXT (((*(ctxt->input->cur) == '\n') ? \
				71	(ctxt->input->line++, ctxt->input->col = 1) : \
				72	(ctxt->input->col++)), ctxt->input->cur++)
				73	*************/
				74
				75	#define CUR (*ctxt->input->cur)
				76	#define NEXT ((*ctxt->input->cur) ? \
				77	(((*(ctxt->input->cur) == '\n') ? \
				78	(ctxt->input->line++, ctxt->input->col = 1) : \
				79	(ctxt->input->col++)), ctxt->input->cur++) : \
				80	(xmlPopInput(ctxt), ctxt->input->cur))
				81
				82	#define CUR_PTR ctxt->input->cur
				83
				84	#define NXT(val) ctxt->input->cur[(val)]
				85
				86	#define SKIP(val) ctxt->input->cur += (val)
				87	#define SKIP_BLANKS \
				88	while (IS_BLANK(*(ctxt->input->cur))) NEXT
				89
				90
				91	/*
				92	* xmlPopInput: the current input pointed by ctxt->input came to an end
				93	* pop it and return the next char.
				94	*
				95	* TODO A deallocation of the popped Input structure is needed
				96	*/
				97	CHAR xmlPopInput(xmlParserCtxtPtr ctxt) {
				98	if (ctxt->inputNr == 1) return(0); /* End of main Input */
				99	inputPop(ctxt);
				100	return(CUR);
				101	}
				102
				103	/*
				104	* xmlPushInput: switch to a new input stream which is stacked on top
				105	* of the previous one(s).
				106	*/
				107	void xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
				108	if (input == NULL) return;
				109	inputPush(ctxt, input);
				110	}
				111
				112	/*
				113	* Create a new input stream based on a memory buffer.
				114	*/
				115	void xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
				116	xmlParserInputPtr input;
				117
				118	if (entity == NULL) {
				119	xmlParserError(ctxt,
				120	"internal: xmlNewEntityInputStream entity = NULL\n");
				121	return;
				122	}
				123	if (entity->content == NULL) {
				124	xmlParserError(ctxt,
				125	"internal: xmlNewEntityInputStream entity->input = NULL\n");
				126	return;
				127	}
				128	input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
				129	if (input == NULL) {
				130	xmlParserError(ctxt, "malloc: couldn't allocate a new input stream\n");
				131	return;
				132	}
				133	input->filename = entity->SystemID; /* TODO !!! char <- CHAR */
				134	input->base = entity->content;
				135	input->cur = entity->content;
				136	input->line = 1;
				137	input->col = 1;
				138	xmlPushInput(ctxt, input);
				139	}
				140
				141	/*
				142	* A few macros needed to help building the parser.
				143	*/
				144
				145	#ifdef UNICODE
				146	/************************************************************************
				147	* *
				148	* UNICODE version of the macros. *
				149	* *
				150	************************************************************************/
				151	/*
				152	* [2] Char ::= #x9 \| #xA \| #xD \| [#x20-#xD7FF] \| [#xE000-#xFFFD]
				153	* \| [#x10000-#x10FFFF]
				154	* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
				155	*/
				156	#define IS_CHAR(c) \
				157	(((c) == 0x09) \|\| ((c) == 0x0a) \|\| ((c) == 0x0d) \|\| \
				158	(((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF)))
				159
				160	/*
				161	* [3] S ::= (#x20 \| #x9 \| #xD \| #xA)+
				162	*/
				163	#define IS_BLANK(c) (((c) == 0x20) \|\| ((c) == 0x09) \|\| ((c) == 0xa) \|\| \
				164	((c) == 0x0D))
				165
				166	/*
				167	* [85] BaseChar ::= ... long list see REC ...
				168	*
				169	* VI is your friend !
				170	* :1,$ s/\[#x$[0-9A-Z]$-#x$[0-9A-Z]$\]/ (((c) >= 0x\1) \&\& ((c) <= 0x\2)) \|\|/
				171	* and
				172	* :1,$ s/#x$[0-9A-Z]*$/ ((c) == 0x\1) \|\|/
				173	*/
				174	#define IS_BASECHAR(c) \
				175	((((c) >= 0x0041) && ((c) <= 0x005A)) \|\| \
				176	(((c) >= 0x0061) && ((c) <= 0x007A)) \|\| \
				177	(((c) >= 0x00C0) && ((c) <= 0x00D6)) \|\| \
				178	(((c) >= 0x00D8) && ((c) <= 0x00F6)) \|\| \
				179	(((c) >= 0x00F8) && ((c) <= 0x00FF)) \|\| \
				180	(((c) >= 0x0100) && ((c) <= 0x0131)) \|\| \
				181	(((c) >= 0x0134) && ((c) <= 0x013E)) \|\| \
				182	(((c) >= 0x0141) && ((c) <= 0x0148)) \|\| \
				183	(((c) >= 0x014A) && ((c) <= 0x017E)) \|\| \
				184	(((c) >= 0x0180) && ((c) <= 0x01C3)) \|\| \
				185	(((c) >= 0x01CD) && ((c) <= 0x01F0)) \|\| \
				186	(((c) >= 0x01F4) && ((c) <= 0x01F5)) \|\| \
				187	(((c) >= 0x01FA) && ((c) <= 0x0217)) \|\| \
				188	(((c) >= 0x0250) && ((c) <= 0x02A8)) \|\| \
				189	(((c) >= 0x02BB) && ((c) <= 0x02C1)) \|\| \
				190	((c) == 0x0386) \|\| \
				191	(((c) >= 0x0388) && ((c) <= 0x038A)) \|\| \
				192	((c) == 0x038C) \|\| \
				193	(((c) >= 0x038E) && ((c) <= 0x03A1)) \|\| \
				194	(((c) >= 0x03A3) && ((c) <= 0x03CE)) \|\| \
				195	(((c) >= 0x03D0) && ((c) <= 0x03D6)) \|\| \
				196	((c) == 0x03DA) \|\| \
				197	((c) == 0x03DC) \|\| \
				198	((c) == 0x03DE) \|\| \
				199	((c) == 0x03E0) \|\| \
				200	(((c) >= 0x03E2) && ((c) <= 0x03F3)) \|\| \
				201	(((c) >= 0x0401) && ((c) <= 0x040C)) \|\| \
				202	(((c) >= 0x040E) && ((c) <= 0x044F)) \|\| \
				203	(((c) >= 0x0451) && ((c) <= 0x045C)) \|\| \
				204	(((c) >= 0x045E) && ((c) <= 0x0481)) \|\| \
				205	(((c) >= 0x0490) && ((c) <= 0x04C4)) \|\| \
				206	(((c) >= 0x04C7) && ((c) <= 0x04C8)) \|\| \
				207	(((c) >= 0x04CB) && ((c) <= 0x04CC)) \|\| \
				208	(((c) >= 0x04D0) && ((c) <= 0x04EB)) \|\| \
				209	(((c) >= 0x04EE) && ((c) <= 0x04F5)) \|\| \
				210	(((c) >= 0x04F8) && ((c) <= 0x04F9)) \|\| \
				211	(((c) >= 0x0531) && ((c) <= 0x0556)) \|\| \
				212	((c) == 0x0559) \|\| \
				213	(((c) >= 0x0561) && ((c) <= 0x0586)) \|\| \
				214	(((c) >= 0x05D0) && ((c) <= 0x05EA)) \|\| \
				215	(((c) >= 0x05F0) && ((c) <= 0x05F2)) \|\| \
				216	(((c) >= 0x0621) && ((c) <= 0x063A)) \|\| \
				217	(((c) >= 0x0641) && ((c) <= 0x064A)) \|\| \
				218	(((c) >= 0x0671) && ((c) <= 0x06B7)) \|\| \
				219	(((c) >= 0x06BA) && ((c) <= 0x06BE)) \|\| \
				220	(((c) >= 0x06C0) && ((c) <= 0x06CE)) \|\| \
				221	(((c) >= 0x06D0) && ((c) <= 0x06D3)) \|\| \
				222	((c) == 0x06D5) \|\| \
				223	(((c) >= 0x06E5) && ((c) <= 0x06E6)) \|\| \
				224	(((c) >= 0x0905) && ((c) <= 0x0939)) \|\| \
				225	((c) == 0x093D) \|\| \
				226	(((c) >= 0x0958) && ((c) <= 0x0961)) \|\| \
				227	(((c) >= 0x0985) && ((c) <= 0x098C)) \|\| \
				228	(((c) >= 0x098F) && ((c) <= 0x0990)) \|\| \
				229	(((c) >= 0x0993) && ((c) <= 0x09A8)) \|\| \
				230	(((c) >= 0x09AA) && ((c) <= 0x09B0)) \|\| \
				231	((c) == 0x09B2) \|\| \
				232	(((c) >= 0x09B6) && ((c) <= 0x09B9)) \|\| \
				233	(((c) >= 0x09DC) && ((c) <= 0x09DD)) \|\| \
				234	(((c) >= 0x09DF) && ((c) <= 0x09E1)) \|\| \
				235	(((c) >= 0x09F0) && ((c) <= 0x09F1)) \|\| \
				236	(((c) >= 0x0A05) && ((c) <= 0x0A0A)) \|\| \
				237	(((c) >= 0x0A0F) && ((c) <= 0x0A10)) \|\| \
				238	(((c) >= 0x0A13) && ((c) <= 0x0A28)) \|\| \
				239	(((c) >= 0x0A2A) && ((c) <= 0x0A30)) \|\| \
				240	(((c) >= 0x0A32) && ((c) <= 0x0A33)) \|\| \
				241	(((c) >= 0x0A35) && ((c) <= 0x0A36)) \|\| \
				242	(((c) >= 0x0A38) && ((c) <= 0x0A39)) \|\| \
				243	(((c) >= 0x0A59) && ((c) <= 0x0A5C)) \|\| \
				244	((c) == 0x0A5E) \|\| \
				245	(((c) >= 0x0A72) && ((c) <= 0x0A74)) \|\| \
				246	(((c) >= 0x0A85) && ((c) <= 0x0A8B)) \|\| \
				247	((c) == 0x0A8D) \|\| \
				248	(((c) >= 0x0A8F) && ((c) <= 0x0A91)) \|\| \
				249	(((c) >= 0x0A93) && ((c) <= 0x0AA8)) \|\| \
				250	(((c) >= 0x0AAA) && ((c) <= 0x0AB0)) \|\| \
				251	(((c) >= 0x0AB2) && ((c) <= 0x0AB3)) \|\| \
				252	(((c) >= 0x0AB5) && ((c) <= 0x0AB9)) \|\| \
				253	((c) == 0x0ABD) \|\| \
				254	((c) == 0x0AE0) \|\| \
				255	(((c) >= 0x0B05) && ((c) <= 0x0B0C)) \|\| \
				256	(((c) >= 0x0B0F) && ((c) <= 0x0B10)) \|\| \
				257	(((c) >= 0x0B13) && ((c) <= 0x0B28)) \|\| \
				258	(((c) >= 0x0B2A) && ((c) <= 0x0B30)) \|\| \
				259	(((c) >= 0x0B32) && ((c) <= 0x0B33)) \|\| \
				260	(((c) >= 0x0B36) && ((c) <= 0x0B39)) \|\| \
				261	((c) == 0x0B3D) \|\| \
				262	(((c) >= 0x0B5C) && ((c) <= 0x0B5D)) \|\| \
				263	(((c) >= 0x0B5F) && ((c) <= 0x0B61)) \|\| \
				264	(((c) >= 0x0B85) && ((c) <= 0x0B8A)) \|\| \
				265	(((c) >= 0x0B8E) && ((c) <= 0x0B90)) \|\| \
				266	(((c) >= 0x0B92) && ((c) <= 0x0B95)) \|\| \
				267	(((c) >= 0x0B99) && ((c) <= 0x0B9A)) \|\| \
				268	((c) == 0x0B9C) \|\| \
				269	(((c) >= 0x0B9E) && ((c) <= 0x0B9F)) \|\| \
				270	(((c) >= 0x0BA3) && ((c) <= 0x0BA4)) \|\| \
				271	(((c) >= 0x0BA8) && ((c) <= 0x0BAA)) \|\| \
				272	(((c) >= 0x0BAE) && ((c) <= 0x0BB5)) \|\| \
				273	(((c) >= 0x0BB7) && ((c) <= 0x0BB9)) \|\| \
				274	(((c) >= 0x0C05) && ((c) <= 0x0C0C)) \|\| \
				275	(((c) >= 0x0C0E) && ((c) <= 0x0C10)) \|\| \
				276	(((c) >= 0x0C12) && ((c) <= 0x0C28)) \|\| \
				277	(((c) >= 0x0C2A) && ((c) <= 0x0C33)) \|\| \
				278	(((c) >= 0x0C35) && ((c) <= 0x0C39)) \|\| \
				279	(((c) >= 0x0C60) && ((c) <= 0x0C61)) \|\| \
				280	(((c) >= 0x0C85) && ((c) <= 0x0C8C)) \|\| \
				281	(((c) >= 0x0C8E) && ((c) <= 0x0C90)) \|\| \
				282	(((c) >= 0x0C92) && ((c) <= 0x0CA8)) \|\| \
				283	(((c) >= 0x0CAA) && ((c) <= 0x0CB3)) \|\| \
				284	(((c) >= 0x0CB5) && ((c) <= 0x0CB9)) \|\| \
				285	((c) == 0x0CDE) \|\| \
				286	(((c) >= 0x0CE0) && ((c) <= 0x0CE1)) \|\| \
				287	(((c) >= 0x0D05) && ((c) <= 0x0D0C)) \|\| \
				288	(((c) >= 0x0D0E) && ((c) <= 0x0D10)) \|\| \
				289	(((c) >= 0x0D12) && ((c) <= 0x0D28)) \|\| \
				290	(((c) >= 0x0D2A) && ((c) <= 0x0D39)) \|\| \
				291	(((c) >= 0x0D60) && ((c) <= 0x0D61)) \|\| \
				292	(((c) >= 0x0E01) && ((c) <= 0x0E2E)) \|\| \
				293	((c) == 0x0E30) \|\| \
				294	(((c) >= 0x0E32) && ((c) <= 0x0E33)) \|\| \
				295	(((c) >= 0x0E40) && ((c) <= 0x0E45)) \|\| \
				296	(((c) >= 0x0E81) && ((c) <= 0x0E82)) \|\| \
				297	((c) == 0x0E84) \|\| \
				298	(((c) >= 0x0E87) && ((c) <= 0x0E88)) \|\| \
				299	((c) == 0x0E8A) \|\| \
				300	((c) == 0x0E8D) \|\| \
				301	(((c) >= 0x0E94) && ((c) <= 0x0E97)) \|\| \
				302	(((c) >= 0x0E99) && ((c) <= 0x0E9F)) \|\| \
				303	(((c) >= 0x0EA1) && ((c) <= 0x0EA3)) \|\| \
				304	((c) == 0x0EA5) \|\| \
				305	((c) == 0x0EA7) \|\| \
				306	(((c) >= 0x0EAA) && ((c) <= 0x0EAB)) \|\| \
				307	(((c) >= 0x0EAD) && ((c) <= 0x0EAE)) \|\| \
				308	((c) == 0x0EB0) \|\| \
				309	(((c) >= 0x0EB2) && ((c) <= 0x0EB3)) \|\| \
				310	((c) == 0x0EBD) \|\| \
				311	(((c) >= 0x0EC0) && ((c) <= 0x0EC4)) \|\| \
				312	(((c) >= 0x0F40) && ((c) <= 0x0F47)) \|\| \
				313	(((c) >= 0x0F49) && ((c) <= 0x0F69)) \|\| \
				314	(((c) >= 0x10A0) && ((c) <= 0x10C5)) \|\| \
				315	(((c) >= 0x10D0) && ((c) <= 0x10F6)) \|\| \
				316	((c) == 0x1100) \|\| \
				317	(((c) >= 0x1102) && ((c) <= 0x1103)) \|\| \
				318	(((c) >= 0x1105) && ((c) <= 0x1107)) \|\| \
				319	((c) == 0x1109) \|\| \
				320	(((c) >= 0x110B) && ((c) <= 0x110C)) \|\| \
				321	(((c) >= 0x110E) && ((c) <= 0x1112)) \|\| \
				322	((c) == 0x113C) \|\| \
				323	((c) == 0x113E) \|\| \
				324	((c) == 0x1140) \|\| \
				325	((c) == 0x114C) \|\| \
				326	((c) == 0x114E) \|\| \
				327	((c) == 0x1150) \|\| \
				328	(((c) >= 0x1154) && ((c) <= 0x1155)) \|\| \
				329	((c) == 0x1159) \|\| \
				330	(((c) >= 0x115F) && ((c) <= 0x1161)) \|\| \
				331	((c) == 0x1163) \|\| \
				332	((c) == 0x1165) \|\| \
				333	((c) == 0x1167) \|\| \
				334	((c) == 0x1169) \|\| \
				335	(((c) >= 0x116D) && ((c) <= 0x116E)) \|\| \
				336	(((c) >= 0x1172) && ((c) <= 0x1173)) \|\| \
				337	((c) == 0x1175) \|\| \
				338	((c) == 0x119E) \|\| \
				339	((c) == 0x11A8) \|\| \
				340	((c) == 0x11AB) \|\| \
				341	(((c) >= 0x11AE) && ((c) <= 0x11AF)) \|\| \
				342	(((c) >= 0x11B7) && ((c) <= 0x11B8)) \|\| \
				343	((c) == 0x11BA) \|\| \
				344	(((c) >= 0x11BC) && ((c) <= 0x11C2)) \|\| \
				345	((c) == 0x11EB) \|\| \
				346	((c) == 0x11F0) \|\| \
				347	((c) == 0x11F9) \|\| \
				348	(((c) >= 0x1E00) && ((c) <= 0x1E9B)) \|\| \
				349	(((c) >= 0x1EA0) && ((c) <= 0x1EF9)) \|\| \
				350	(((c) >= 0x1F00) && ((c) <= 0x1F15)) \|\| \
				351	(((c) >= 0x1F18) && ((c) <= 0x1F1D)) \|\| \
				352	(((c) >= 0x1F20) && ((c) <= 0x1F45)) \|\| \
				353	(((c) >= 0x1F48) && ((c) <= 0x1F4D)) \|\| \
				354	(((c) >= 0x1F50) && ((c) <= 0x1F57)) \|\| \
				355	((c) == 0x1F59) \|\| \
				356	((c) == 0x1F5B) \|\| \
				357	((c) == 0x1F5D) \|\| \
				358	(((c) >= 0x1F5F) && ((c) <= 0x1F7D)) \|\| \
				359	(((c) >= 0x1F80) && ((c) <= 0x1FB4)) \|\| \
				360	(((c) >= 0x1FB6) && ((c) <= 0x1FBC)) \|\| \
				361	((c) == 0x1FBE) \|\| \
				362	(((c) >= 0x1FC2) && ((c) <= 0x1FC4)) \|\| \
				363	(((c) >= 0x1FC6) && ((c) <= 0x1FCC)) \|\| \
				364	(((c) >= 0x1FD0) && ((c) <= 0x1FD3)) \|\| \
				365	(((c) >= 0x1FD6) && ((c) <= 0x1FDB)) \|\| \
				366	(((c) >= 0x1FE0) && ((c) <= 0x1FEC)) \|\| \
				367	(((c) >= 0x1FF2) && ((c) <= 0x1FF4)) \|\| \
				368	(((c) >= 0x1FF6) && ((c) <= 0x1FFC)) \|\| \
				369	((c) == 0x2126) \|\| \
				370	(((c) >= 0x212A) && ((c) <= 0x212B)) \|\| \
				371	((c) == 0x212E) \|\| \
				372	(((c) >= 0x2180) && ((c) <= 0x2182)) \|\| \
				373	(((c) >= 0x3041) && ((c) <= 0x3094)) \|\| \
				374	(((c) >= 0x30A1) && ((c) <= 0x30FA)) \|\| \
				375	(((c) >= 0x3105) && ((c) <= 0x312C)) \|\| \
				376	(((c) >= 0xAC00) && ((c) <= 0xD7A3)))
				377
				378	/*
				379	* [88] Digit ::= ... long list see REC ...
				380	*/
				381	#define IS_DIGIT(c) \
				382	((((c) >= 0x0030) && ((c) <= 0x0039)) \|\| \
				383	(((c) >= 0x0660) && ((c) <= 0x0669)) \|\| \
				384	(((c) >= 0x06F0) && ((c) <= 0x06F9)) \|\| \
				385	(((c) >= 0x0966) && ((c) <= 0x096F)) \|\| \
				386	(((c) >= 0x09E6) && ((c) <= 0x09EF)) \|\| \
				387	(((c) >= 0x0A66) && ((c) <= 0x0A6F)) \|\| \
				388	(((c) >= 0x0AE6) && ((c) <= 0x0AEF)) \|\| \
				389	(((c) >= 0x0B66) && ((c) <= 0x0B6F)) \|\| \
				390	(((c) >= 0x0BE7) && ((c) <= 0x0BEF)) \|\| \
				391	(((c) >= 0x0C66) && ((c) <= 0x0C6F)) \|\| \
				392	(((c) >= 0x0CE6) && ((c) <= 0x0CEF)) \|\| \
				393	(((c) >= 0x0D66) && ((c) <= 0x0D6F)) \|\| \
				394	(((c) >= 0x0E50) && ((c) <= 0x0E59)) \|\| \
				395	(((c) >= 0x0ED0) && ((c) <= 0x0ED9)) \|\| \
				396	(((c) >= 0x0F20) && ((c) <= 0x0F29)))
				397
				398	/*
				399	* [87] CombiningChar ::= ... long list see REC ...
				400	*/
				401	#define IS_COMBINING(c) \
				402	((((c) >= 0x0300) && ((c) <= 0x0345)) \|\| \
				403	(((c) >= 0x0360) && ((c) <= 0x0361)) \|\| \
				404	(((c) >= 0x0483) && ((c) <= 0x0486)) \|\| \
				405	(((c) >= 0x0591) && ((c) <= 0x05A1)) \|\| \
				406	(((c) >= 0x05A3) && ((c) <= 0x05B9)) \|\| \
				407	(((c) >= 0x05BB) && ((c) <= 0x05BD)) \|\| \
				408	((c) == 0x05BF) \|\| \
				409	(((c) >= 0x05C1) && ((c) <= 0x05C2)) \|\| \
				410	((c) == 0x05C4) \|\| \
				411	(((c) >= 0x064B) && ((c) <= 0x0652)) \|\| \
				412	((c) == 0x0670) \|\| \
				413	(((c) >= 0x06D6) && ((c) <= 0x06DC)) \|\| \
				414	(((c) >= 0x06DD) && ((c) <= 0x06DF)) \|\| \
				415	(((c) >= 0x06E0) && ((c) <= 0x06E4)) \|\| \
				416	(((c) >= 0x06E7) && ((c) <= 0x06E8)) \|\| \
				417	(((c) >= 0x06EA) && ((c) <= 0x06ED)) \|\| \
				418	(((c) >= 0x0901) && ((c) <= 0x0903)) \|\| \
				419	((c) == 0x093C) \|\| \
				420	(((c) >= 0x093E) && ((c) <= 0x094C)) \|\| \
				421	((c) == 0x094D) \|\| \
				422	(((c) >= 0x0951) && ((c) <= 0x0954)) \|\| \
				423	(((c) >= 0x0962) && ((c) <= 0x0963)) \|\| \
				424	(((c) >= 0x0981) && ((c) <= 0x0983)) \|\| \
				425	((c) == 0x09BC) \|\| \
				426	((c) == 0x09BE) \|\| \
				427	((c) == 0x09BF) \|\| \
				428	(((c) >= 0x09C0) && ((c) <= 0x09C4)) \|\| \
				429	(((c) >= 0x09C7) && ((c) <= 0x09C8)) \|\| \
				430	(((c) >= 0x09CB) && ((c) <= 0x09CD)) \|\| \
				431	((c) == 0x09D7) \|\| \
				432	(((c) >= 0x09E2) && ((c) <= 0x09E3)) \|\| \
				433	((c) == 0x0A02) \|\| \
				434	((c) == 0x0A3C) \|\| \
				435	((c) == 0x0A3E) \|\| \
				436	((c) == 0x0A3F) \|\| \
				437	(((c) >= 0x0A40) && ((c) <= 0x0A42)) \|\| \
				438	(((c) >= 0x0A47) && ((c) <= 0x0A48)) \|\| \
				439	(((c) >= 0x0A4B) && ((c) <= 0x0A4D)) \|\| \
				440	(((c) >= 0x0A70) && ((c) <= 0x0A71)) \|\| \
				441	(((c) >= 0x0A81) && ((c) <= 0x0A83)) \|\| \
				442	((c) == 0x0ABC) \|\| \
				443	(((c) >= 0x0ABE) && ((c) <= 0x0AC5)) \|\| \
				444	(((c) >= 0x0AC7) && ((c) <= 0x0AC9)) \|\| \
				445	(((c) >= 0x0ACB) && ((c) <= 0x0ACD)) \|\| \
				446	(((c) >= 0x0B01) && ((c) <= 0x0B03)) \|\| \
				447	((c) == 0x0B3C) \|\| \
				448	(((c) >= 0x0B3E) && ((c) <= 0x0B43)) \|\| \
				449	(((c) >= 0x0B47) && ((c) <= 0x0B48)) \|\| \
				450	(((c) >= 0x0B4B) && ((c) <= 0x0B4D)) \|\| \
				451	(((c) >= 0x0B56) && ((c) <= 0x0B57)) \|\| \
				452	(((c) >= 0x0B82) && ((c) <= 0x0B83)) \|\| \
				453	(((c) >= 0x0BBE) && ((c) <= 0x0BC2)) \|\| \
				454	(((c) >= 0x0BC6) && ((c) <= 0x0BC8)) \|\| \
				455	(((c) >= 0x0BCA) && ((c) <= 0x0BCD)) \|\| \
				456	((c) == 0x0BD7) \|\| \
				457	(((c) >= 0x0C01) && ((c) <= 0x0C03)) \|\| \
				458	(((c) >= 0x0C3E) && ((c) <= 0x0C44)) \|\| \
				459	(((c) >= 0x0C46) && ((c) <= 0x0C48)) \|\| \
				460	(((c) >= 0x0C4A) && ((c) <= 0x0C4D)) \|\| \
				461	(((c) >= 0x0C55) && ((c) <= 0x0C56)) \|\| \
				462	(((c) >= 0x0C82) && ((c) <= 0x0C83)) \|\| \
				463	(((c) >= 0x0CBE) && ((c) <= 0x0CC4)) \|\| \
				464	(((c) >= 0x0CC6) && ((c) <= 0x0CC8)) \|\| \
				465	(((c) >= 0x0CCA) && ((c) <= 0x0CCD)) \|\| \
				466	(((c) >= 0x0CD5) && ((c) <= 0x0CD6)) \|\| \
				467	(((c) >= 0x0D02) && ((c) <= 0x0D03)) \|\| \
				468	(((c) >= 0x0D3E) && ((c) <= 0x0D43)) \|\| \
				469	(((c) >= 0x0D46) && ((c) <= 0x0D48)) \|\| \
				470	(((c) >= 0x0D4A) && ((c) <= 0x0D4D)) \|\| \
				471	((c) == 0x0D57) \|\| \
				472	((c) == 0x0E31) \|\| \
				473	(((c) >= 0x0E34) && ((c) <= 0x0E3A)) \|\| \
				474	(((c) >= 0x0E47) && ((c) <= 0x0E4E)) \|\| \
				475	((c) == 0x0EB1) \|\| \
				476	(((c) >= 0x0EB4) && ((c) <= 0x0EB9)) \|\| \
				477	(((c) >= 0x0EBB) && ((c) <= 0x0EBC)) \|\| \
				478	(((c) >= 0x0EC8) && ((c) <= 0x0ECD)) \|\| \
				479	(((c) >= 0x0F18) && ((c) <= 0x0F19)) \|\| \
				480	((c) == 0x0F35) \|\| \
				481	((c) == 0x0F37) \|\| \
				482	((c) == 0x0F39) \|\| \
				483	((c) == 0x0F3E) \|\| \
				484	((c) == 0x0F3F) \|\| \
				485	(((c) >= 0x0F71) && ((c) <= 0x0F84)) \|\| \
				486	(((c) >= 0x0F86) && ((c) <= 0x0F8B)) \|\| \
				487	(((c) >= 0x0F90) && ((c) <= 0x0F95)) \|\| \
				488	((c) == 0x0F97) \|\| \
				489	(((c) >= 0x0F99) && ((c) <= 0x0FAD)) \|\| \
				490	(((c) >= 0x0FB1) && ((c) <= 0x0FB7)) \|\| \
				491	((c) == 0x0FB9) \|\| \
				492	(((c) >= 0x20D0) && ((c) <= 0x20DC)) \|\| \
				493	((c) == 0x20E1) \|\| \
				494	(((c) >= 0x302A) && ((c) <= 0x302F)) \|\| \
				495	((c) == 0x3099) \|\| \
				496	((c) == 0x309A))
				497
				498	/*
				499	* [89] Extender ::= #x00B7 \| #x02D0 \| #x02D1 \| #x0387 \| #x0640 \|
				500	* #x0E46 \| #x0EC6 \| #x3005 \| [#x3031-#x3035] \|
				501	* [#x309D-#x309E] \| [#x30FC-#x30FE]
				502	*/
				503	#define IS_EXTENDER(c) \
				504	(((c) == 0xb7) \|\| ((c) == 0x2d0) \|\| ((c) == 0x2d1) \|\| \
				505	((c) == 0x387) \|\| ((c) == 0x640) \|\| ((c) == 0xe46) \|\| \
				506	((c) == 0xec6) \|\| ((c) == 0x3005) \
				507	(((c) >= 0x3031) && ((c) <= 0x3035)) \|\| \
				508	(((c) >= 0x309b) && ((c) <= 0x309e)) \|\| \
				509	(((c) >= 0x30fc) && ((c) <= 0x30fe)))
				510
				511	/*
				512	* [86] Ideographic ::= [#x4E00-#x9FA5] \| #x3007 \| [#x3021-#x3029]
				513	*/
				514	#define IS_IDEOGRAPHIC(c) \
				515	((((c) >= 0x4e00) && ((c) <= 0x9fa5)) \|\| \
				516	(((c) >= 0xf900) && ((c) <= 0xfa2d)) \|\| \
				517	(((c) >= 0x3021) && ((c) <= 0x3029)) \|\| \
				518	((c) == 0x3007))
				519
				520	/*
				521	* [84] Letter ::= BaseChar \| Ideographic
				522	*/
				523	#define IS_LETTER(c) (IS_BASECHAR(c) \|\| IS_IDEOGRAPHIC(c))
				524
				525	#else
				526	/************************************************************************
				527	* *
				528	* 8bits / ASCII version of the macros. *
				529	* *
				530	************************************************************************/
				531	/*
				532	* [2] Char ::= #x9 \| #xA \| #xD \| [#x20-#xD7FF] \| [#xE000-#xFFFD]
				533	* \| [#x10000-#x10FFFF]
				534	* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
				535	*/
				536	#define IS_CHAR(c) \
				537	(((c) == 0x09) \|\| ((c) == 0x0a) \|\| ((c) == 0x0d) \|\| ((c) >= 0x20) \|\|\
				538	((c) == 0xa))
				539
				540	/*
				541	* [85] BaseChar ::= ... long list see REC ...
				542	*/
				543	#define IS_BASECHAR(c) \
				544	((((c) >= 0x41) && ((c) <= 0x5a)) \|\| \
				545	(((c) >= 0x61) && ((c) <= 0x7a)) \|\| \
				546	(((c) >= 0xaa) && ((c) <= 0x5b)) \|\| \
				547	(((c) >= 0xc0) && ((c) <= 0xd6)) \|\| \
				548	(((c) >= 0xd8) && ((c) <= 0xf6)) \|\| \
				549	(((c) >= 0xf8) && ((c) <= 0xff)) \|\| \
				550	((c) == 0xba))
				551
				552	/*
				553	* [88] Digit ::= ... long list see REC ...
				554	*/
				555	#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
				556
				557	/*
				558	* [84] Letter ::= BaseChar \| Ideographic
				559	*/
				560	#define IS_LETTER(c) IS_BASECHAR(c)
				561
				562
				563	/*
				564	* [87] CombiningChar ::= ... long list see REC ...
				565	*/
				566	#define IS_COMBINING(c) 0
				567
				568	/*
				569	* [89] Extender ::= #x00B7 \| #x02D0 \| #x02D1 \| #x0387 \| #x0640 \|
				570	* #x0E46 \| #x0EC6 \| #x3005 \| [#x3031-#x3035] \|
				571	* [#x309D-#x309E] \| [#x30FC-#x30FE]
				572	*/
				573	#define IS_EXTENDER(c) ((c) == 0xb7)
				574
				575	#endif /* !UNICODE */
				576
				577	/*
				578	* Blank chars.
				579	*
				580	* [3] S ::= (#x20 \| #x9 \| #xD \| #xA)+
				581	*/
				582	#define IS_BLANK(c) (((c) == 0x20) \|\| ((c) == 0x09) \|\| ((c) == 0xa) \|\| \
				583	((c) == 0x0D))
				584
				585	/*
				586	* [13] PubidChar ::= #x20 \| #xD \| #xA \| [a-zA-Z0-9] \| [-'()+,./:=?;!*#@$_%]
				587	*/
				588	#define IS_PUBIDCHAR(c) \
				589	(((c) == 0x20) \|\| ((c) == 0x0D) \|\| ((c) == 0x0A) \|\| \
				590	(((c) >= 'a') && ((c) <= 'z')) \|\| \
				591	(((c) >= 'A') && ((c) <= 'Z')) \|\| \
				592	(((c) >= '0') && ((c) <= '9')) \|\| \
				593	((c) == '-') \|\| ((c) == '\'') \|\| ((c) == '(') \|\| ((c) == ')') \|\| \
				594	((c) == '+') \|\| ((c) == ',') \|\| ((c) == '.') \|\| ((c) == '/') \|\| \
				595	((c) == ':') \|\| ((c) == '=') \|\| ((c) == '?') \|\| ((c) == ';') \|\| \
				596	((c) == '!') \|\| ((c) == '*') \|\| ((c) == '#') \|\| ((c) == '@') \|\| \
				597	((c) == '$') \|\| ((c) == '_') \|\| ((c) == '%'))
				598
				599	#define SKIP_EOL(p) \
				600	if ((p) == 0x13) { p++ ; if ((p) == 0x10) p++; } \
				601	if ((p) == 0x10) { p++ ; if ((p) == 0x13) p++; }
				602
				603	#define MOVETO_ENDTAG(p) \
				604	while (IS_CHAR(p) && ((p) != '>')) (p)++
				605
				606	#define MOVETO_STARTTAG(p) \
				607	while (IS_CHAR(p) && ((p) != '<')) (p)++
				608
				609	/************************************************************************
				610	* *
				611	* Commodity functions to handle CHARs *
				612	* *
				613	************************************************************************/
				614
				615	/*
				616	* xmlStrndup : a strndup for array of CHAR's
				617	*/
				618
				619	CHAR xmlStrndup(const CHAR cur, int len) {
				620	CHAR ret = malloc((len + 1) sizeof(CHAR));
				621
				622	if (ret == NULL) {
				623	fprintf(stderr, "malloc of %d byte failed\n",
				624	(len + 1) * sizeof(CHAR));
				625	return(NULL);
				626	}
				627	memcpy(ret, cur, len * sizeof(CHAR));
				628	ret[len] = 0;
				629	return(ret);
				630	}
				631
				632	/*
				633	* xmlStrdup : a strdup for CHAR's
				634	*/
				635
				636	CHAR xmlStrdup(const CHAR cur) {
				637	const CHAR *p = cur;
				638
				639	while (IS_CHAR(*p)) p++;
				640	return(xmlStrndup(cur, p - cur));
				641	}
				642
				643	/*
				644	* xmlCharStrndup : a strndup for char's to CHAR's
				645	*/
				646
				647	CHAR xmlCharStrndup(const char cur, int len) {
				648	int i;
				649	CHAR ret = malloc((len + 1) sizeof(CHAR));
				650
				651	if (ret == NULL) {
				652	fprintf(stderr, "malloc of %d byte failed\n",
				653	(len + 1) * sizeof(CHAR));
				654	return(NULL);
				655	}
				656	for (i = 0;i < len;i++)
				657	ret[i] = (CHAR) cur[i];
				658	ret[len] = 0;
				659	return(ret);
				660	}
				661
				662	/*
				663	* xmlCharStrdup : a strdup for char's to CHAR's
				664	*/
				665
				666	CHAR xmlCharStrdup(const char cur) {
				667	const char *p = cur;
				668
				669	while (*p != '\0') p++;
				670	return(xmlCharStrndup(cur, p - cur));
				671	}
				672
				673	/*
				674	* xmlStrcmp : a strcmp for CHAR's
				675	*/
				676
				677	int xmlStrcmp(const CHAR str1, const CHAR str2) {
				678	register int tmp;
				679
				680	do {
				681	tmp = str1++ - str2++;
				682	if (tmp != 0) return(tmp);
				683	} while ((str1 != 0) && (str2 != 0));
				684	return (str1 - str2);
				685	}
				686
				687	/*
				688	* xmlStrncmp : a strncmp for CHAR's
				689	*/
				690
				691	int xmlStrncmp(const CHAR str1, const CHAR str2, int len) {
				692	register int tmp;
				693
				694	if (len <= 0) return(0);
				695	do {
				696	tmp = str1++ - str2++;
				697	if (tmp != 0) return(tmp);
				698	len--;
				699	if (len <= 0) return(0);
				700	} while ((str1 != 0) && (str2 != 0));
				701	return (str1 - str2);
				702	}
				703
				704	/*
				705	* xmlStrchr : a strchr for CHAR's
				706	*/
				707
				708	CHAR xmlStrchr(const CHAR str, CHAR val) {
				709	while (*str != 0) {
				710	if (str == val) return((CHAR ) str);
				711	str++;
				712	}
				713	return(NULL);
				714	}
				715
				716	/*
				717	* xmlStrlen : lenght of a CHAR's string
				718	*/
				719
				720	int xmlStrlen(const CHAR *str) {
				721	int len = 0;
				722
				723	if (str == NULL) return(0);
				724	while (*str != 0) {
				725	str++;
				726	len++;
				727	}
				728	return(len);
				729	}
				730
				731	/*
				732	* xmlStrncat : a strncat for array of CHAR's
				733	*/
				734
				735	CHAR xmlStrncat(CHAR cur, const CHAR *add, int len) {
				736	int size;
				737	CHAR *ret;
				738
				739	if ((add == NULL) \|\| (len == 0))
				740	return(cur);
				741	if (cur == NULL)
				742	return(xmlStrndup(add, len));
				743
				744	size = xmlStrlen(cur);
				745	ret = realloc(cur, (size + len + 1) * sizeof(CHAR));
				746	if (ret == NULL) {
				747	fprintf(stderr, "xmlStrncat: realloc of %d byte failed\n",
				748	(size + len + 1) * sizeof(CHAR));
				749	return(cur);
				750	}
				751	memcpy(&ret[size], add, len * sizeof(CHAR));
				752	ret[size + len] = 0;
				753	return(ret);
				754	}
				755
				756	/*
				757	* xmlStrcat : a strcat for CHAR's
				758	*/
				759
				760	CHAR xmlStrcat(CHAR cur, const CHAR *add) {
				761	const CHAR *p = add;
				762
				763	if (add == NULL) return(cur);
				764	if (cur == NULL)
				765	return(xmlStrdup(add));
				766
				767	while (IS_CHAR(*p)) p++;
				768	return(xmlStrncat(cur, add, p - add));
				769	}
				770
				771	/************************************************************************
				772	* *
				773	* Commodity functions, cleanup needed ? *
				774	* *
				775	************************************************************************/
				776
				777	/*
				778	* Is this a sequence of blank chars that one can ignore ?
				779	*/
				780
				781	static int areBlanks(xmlParserCtxtPtr ctxt, const CHAR *str, int len) {
				782	int i;
				783	xmlNodePtr lastChild;
				784
				785	for (i = 0;i < len;i++)
				786	if (!(IS_BLANK(str[i]))) return(0);
				787
				788	if (CUR != '<') return(0);
				789	lastChild = xmlGetLastChild(ctxt->node);
				790	if (lastChild == NULL) {
				791	if (ctxt->node->content != NULL) return(0);
				792	} else if (xmlNodeIsText(lastChild))
				793	return(0);
				794	return(1);
				795	}
				796
				797	/*
				798	* Handling of defined entities, when should we define a new input
				799	* stream ? When do we just handle that as a set of chars ?
				800	*/
				801
				802	void xmlHandleEntity(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
				803	int len;
				804
				805	if (entity->content == NULL) {
				806	xmlParserError(ctxt, "xmlHandleEntity %s: content == NULL\n",
				807	entity->name);
				808	return;
				809	}
				810	len = xmlStrlen(entity->content);
				811	if (len <= 2) goto handle_as_char;
				812
				813	/*
				814	* Redefine its content as an input stream.
				815	*/
				816	xmlNewEntityInputStream(ctxt, entity);
				817	return;
				818
				819	handle_as_char:
				820	/*
				821	* Just handle the content as a set of chars.
				822	*/
				823	if (ctxt->sax != NULL)
				824	ctxt->sax->characters(ctxt, entity->content, 0, len);
				825
				826	}
				827
				828	/*
				829	* Forward definition for recusive behaviour.
				830	*/
				831	xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt);
				832	CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine);
				833	CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine);
				834
				835	/************************************************************************
				836	* *
				837	* Extra stuff for namespace support *
				838	* Relates to http://www.w3.org/TR/WD-xml-names *
				839	* *
				840	************************************************************************/
				841
				842	/*
				843	* xmlNamespaceParseNCName : parse an XML namespace name.
				844	*
				845	* [NS 3] NCName ::= (Letter \| '_') (NCNameChar)*
				846	*
				847	* [NS 4] NCNameChar ::= Letter \| Digit \| '.' \| '-' \| '_' \|
				848	* CombiningChar \| Extender
				849	*/
				850
				851	CHAR *xmlNamespaceParseNCName(xmlParserCtxtPtr ctxt) {
				852	const CHAR *q;
				853	CHAR *ret = NULL;
				854
				855	if (!IS_LETTER(CUR) && (CUR != '_')) return(NULL);
				856	q = NEXT;
				857
				858	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				859	(CUR == '.') \|\| (CUR == '-') \|\|
				860	(CUR == '_') \|\|
				861	(IS_COMBINING(CUR)) \|\|
				862	(IS_EXTENDER(CUR)))
				863	NEXT;
				864
				865	ret = xmlStrndup(q, CUR_PTR - q);
				866
				867	return(ret);
				868	}
				869
				870	/*
				871	* xmlNamespaceParseQName : parse an XML qualified name
				872	*
				873	* [NS 5] QName ::= (Prefix ':')? LocalPart
				874	*
				875	* [NS 6] Prefix ::= NCName
				876	*
				877	* [NS 7] LocalPart ::= NCName
				878	*/
				879
				880	CHAR xmlNamespaceParseQName(xmlParserCtxtPtr ctxt, CHAR *prefix) {
				881	CHAR *ret = NULL;
				882
				883	*prefix = NULL;
				884	ret = xmlNamespaceParseNCName(ctxt);
				885	if (CUR == ':') {
				886	*prefix = ret;
				887	NEXT;
				888	ret = xmlNamespaceParseNCName(ctxt);
				889	}
				890
				891	return(ret);
				892	}
				893
				894	/*
				895	* xmlNamespaceParseNSDef : parse a namespace prefix declaration
				896	*
				897	* [NS 1] NSDef ::= PrefixDef Eq SystemLiteral
				898	*
				899	* [NS 2] PrefixDef ::= 'xmlns' (':' NCName)?
				900	*/
				901
				902	CHAR *xmlNamespaceParseNSDef(xmlParserCtxtPtr ctxt) {
				903	CHAR *name = NULL;
				904
				905	if ((CUR == 'x') && (NXT(1) == 'm') &&
				906	(NXT(2) == 'l') && (NXT(3) == 'n') &&
				907	(NXT(4) == 's')) {
				908	SKIP(5);
				909	if (CUR == ':') {
				910	NEXT;
				911	name = xmlNamespaceParseNCName(ctxt);
				912	}
				913	}
				914	return(name);
				915	}
				916
				917	/*
				918	* [OLD] Parse and return a string between quotes or doublequotes
				919	*/
				920	CHAR *xmlParseQuotedString(xmlParserCtxtPtr ctxt) {
				921	CHAR *ret = NULL;
				922	const CHAR *q;
				923
				924	if (CUR == '"') {
				925	NEXT;
				926	q = CUR_PTR;
				927	while (IS_CHAR(CUR) && (CUR != '"')) NEXT;
				928	if (CUR != '"')
				929	xmlParserError(ctxt, "String not closed\"%.50s\n", q);
				930	else {
				931	ret = xmlStrndup(q, CUR_PTR - q);
				932	NEXT;
				933	}
				934	} else if (CUR == '\''){
				935	NEXT;
				936	q = CUR_PTR;
				937	while (IS_CHAR(CUR) && (CUR != '\'')) NEXT;
				938	if (CUR != '\'')
				939	xmlParserError(ctxt, "String not closed\"%.50s\n", q);
				940	else {
				941	ret = xmlStrndup(q, CUR_PTR - q);
				942	NEXT;
				943	}
				944	}
				945	return(ret);
				946	}
				947
				948	/*
				949	* [OLD] xmlParseNamespace: parse specific PI '<?namespace ...' constructs.
				950	*
				951	* This is what the older xml-name Working Draft specified, a bunch of
				952	* other stuff may still rely on it, so support is still here as
				953	* if ot was declared on the root of the Tree:-(
				954	*/
				955
				956	void xmlParseNamespace(xmlParserCtxtPtr ctxt) {
				957	CHAR *href = NULL;
				958	CHAR *prefix = NULL;
				959	int garbage = 0;
				960
				961	/*
				962	* We just skipped "namespace" or "xml:namespace"
				963	*/
				964	SKIP_BLANKS;
				965
				966	while (IS_CHAR(CUR) && (CUR != '>')) {
				967	/*
				968	* We can have "ns" or "prefix" attributes
				969	* Old encoding as 'href' or 'AS' attributes is still supported
				970	*/
				971	if ((CUR == 'n') && (NXT(1) == 's')) {
				972	garbage = 0;
				973	SKIP(2);
				974	SKIP_BLANKS;
				975
				976	if (CUR != '=') continue;
				977	NEXT;
				978	SKIP_BLANKS;
				979
				980	href = xmlParseQuotedString(ctxt);
				981	SKIP_BLANKS;
				982	} else if ((CUR == 'h') && (NXT(1) == 'r') &&
				983	(NXT(2) == 'e') && (NXT(3) == 'f')) {
				984	garbage = 0;
				985	SKIP(4);
				986	SKIP_BLANKS;
				987
				988	if (CUR != '=') continue;
				989	NEXT;
				990	SKIP_BLANKS;
				991
				992	href = xmlParseQuotedString(ctxt);
				993	SKIP_BLANKS;
				994	} else if ((CUR == 'p') && (NXT(1) == 'r') &&
				995	(NXT(2) == 'e') && (NXT(3) == 'f') &&
				996	(NXT(4) == 'i') && (NXT(5) == 'x')) {
				997	garbage = 0;
				998	SKIP(6);
				999	SKIP_BLANKS;
				1000
				1001	if (CUR != '=') continue;
				1002	NEXT;
				1003	SKIP_BLANKS;
				1004
				1005	prefix = xmlParseQuotedString(ctxt);
				1006	SKIP_BLANKS;
				1007	} else if ((CUR == 'A') && (NXT(1) == 'S')) {
				1008	garbage = 0;
				1009	SKIP(2);
				1010	SKIP_BLANKS;
				1011
				1012	if (CUR != '=') continue;
				1013	NEXT;
				1014	SKIP_BLANKS;
				1015
				1016	prefix = xmlParseQuotedString(ctxt);
				1017	SKIP_BLANKS;
				1018	} else if ((CUR == '?') && (NXT(1) == '>')) {
				1019	garbage = 0;
				1020	CUR_PTR ++;
				1021	} else {
				1022	/*
				1023	* Found garbage when parsing the namespace
				1024	*/
				1025	if (!garbage)
				1026	xmlParserError(ctxt, "xmlParseNamespace found garbage\n");
				1027	NEXT;
				1028	}
				1029	}
				1030
				1031	MOVETO_ENDTAG(CUR_PTR);
				1032	NEXT;
				1033
				1034	/*
				1035	* Register the DTD.
				1036	*/
				1037	if (href != NULL)
				1038	xmlNewGlobalNs(ctxt->doc, href, prefix);
				1039
				1040	if (prefix != NULL) free(prefix);
				1041	if (href != NULL) free(href);
				1042	}
				1043
				1044	/************************************************************************
				1045	* *
				1046	* The parser itself *
				1047	* Relates to http://www.w3.org/TR/REC-xml *
				1048	* *
				1049	************************************************************************/
				1050
				1051	/*
				1052	* xmlParseName : parse an XML name.
				1053	*
				1054	* [4] NameChar ::= Letter \| Digit \| '.' \| '-' \| '_' \| ':' \|
				1055	* CombiningChar \| Extender
				1056	*
				1057	* [5] Name ::= (Letter \| '_' \| ':') (NameChar)*
				1058	*
				1059	* [6] Names ::= Name (S Name)*
				1060	*/
				1061
				1062	CHAR *xmlParseName(xmlParserCtxtPtr ctxt) {
				1063	const CHAR *q;
				1064	CHAR *ret = NULL;
				1065
				1066	if (!IS_LETTER(CUR) && (CUR != '_') &&
				1067	(CUR != ':')) return(NULL);
				1068	q = NEXT;
				1069
				1070	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				1071	(CUR == '.') \|\| (CUR == '-') \|\|
				1072	(CUR == '_') \|\| (CUR == ':') \|\|
				1073	(IS_COMBINING(CUR)) \|\|
				1074	(IS_EXTENDER(CUR)))
				1075	NEXT;
				1076
				1077	ret = xmlStrndup(q, CUR_PTR - q);
				1078
				1079	return(ret);
				1080	}
				1081
				1082	/*
				1083	* xmlParseNmtoken : parse an XML Nmtoken.
				1084	*
				1085	* [7] Nmtoken ::= (NameChar)+
				1086	*
				1087	* [8] Nmtokens ::= Nmtoken (S Nmtoken)*
				1088	*/
				1089
				1090	CHAR *xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
				1091	const CHAR *q;
				1092	CHAR *ret = NULL;
				1093
				1094	q = NEXT;
				1095
				1096	while ((IS_LETTER(CUR)) \|\| (IS_DIGIT(CUR)) \|\|
				1097	(CUR == '.') \|\| (CUR == '-') \|\|
				1098	(CUR == '_') \|\| (CUR == ':') \|\|
				1099	(IS_COMBINING(CUR)) \|\|
				1100	(IS_EXTENDER(CUR)))
				1101	NEXT;
				1102
				1103	ret = xmlStrndup(q, CUR_PTR - q);
				1104
				1105	return(ret);
				1106	}
				1107
				1108	/*
				1109	* xmlParseEntityValue : parse a value for ENTITY decl.
				1110	*
				1111	* [9] EntityValue ::= '"' ([^%&"] \| PEReference \| Reference)* '"' \|
				1112	* "'" ([^%&'] \| PEReference \| Reference)* "'"
				1113	*/
				1114
				1115	CHAR *xmlParseEntityValue(xmlParserCtxtPtr ctxt) {
				1116	CHAR ret = NULL, cur;
				1117	const CHAR *q;
				1118
				1119	if (CUR == '"') {
				1120	NEXT;
				1121
				1122	q = CUR_PTR;
				1123	while ((IS_CHAR(CUR)) && (CUR != '"')) {
				1124	if (CUR == '%') {
				1125	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1126	cur = xmlParsePEReference(ctxt, 1);
				1127	ret = xmlStrcat(ret, cur);
				1128	q = CUR_PTR;
				1129	} else if (CUR == '&') {
				1130	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1131	cur = xmlParseReference(ctxt, 1);
				1132	ret = xmlStrcat(ret, cur);
				1133	q = CUR_PTR;
				1134	} else
				1135	NEXT;
				1136	}
				1137	if (!IS_CHAR(CUR)) {
				1138	xmlParserError(ctxt, "Unfinished EntityValue\n");
				1139	} else {
				1140	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1141	NEXT;
				1142	}
				1143	} else if (CUR == '\'') {
				1144	NEXT;
				1145	q = CUR_PTR;
				1146	while ((IS_CHAR(CUR)) && (CUR != '\'')) {
				1147	if (CUR == '%') {
				1148	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1149	cur = xmlParsePEReference(ctxt, 1);
				1150	ret = xmlStrcat(ret, cur);
				1151	q = CUR_PTR;
				1152	} else if (CUR == '&') {
				1153	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1154	cur = xmlParseReference(ctxt, 1);
				1155	ret = xmlStrcat(ret, cur);
				1156	q = CUR_PTR;
				1157	} else
				1158	NEXT;
				1159	}
				1160	if (!IS_CHAR(CUR)) {
				1161	xmlParserError(ctxt, "Unfinished EntityValue\n");
				1162	} else {
				1163	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1164	NEXT;
				1165	}
				1166	} else {
				1167	xmlParserError(ctxt, "xmlParseEntityValue \" or ' expected\n");
				1168	}
				1169
				1170	return(ret);
				1171	}
				1172
				1173	/*
				1174	* xmlParseAttValue : parse a value for an attribute
				1175	*
				1176	* [10] AttValue ::= '"' ([^<&"] \| Reference)* '"' \|
				1177	* "'" ([^<&'] \| Reference)* "'"
				1178	*/
				1179
				1180	CHAR *xmlParseAttValue(xmlParserCtxtPtr ctxt) {
				1181	CHAR ret = NULL, cur;
				1182	const CHAR *q;
				1183
				1184	if (CUR == '"') {
				1185	NEXT;
				1186
				1187	q = CUR_PTR;
				1188	while ((IS_CHAR(CUR)) && (CUR != '"')) {
				1189	if (CUR == '&') {
				1190	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1191	cur = xmlParseReference(ctxt, 1);
				1192	ret = xmlStrcat(ret, cur);
				1193	q = CUR_PTR;
				1194	} else
				1195	NEXT;
				1196	}
				1197	if (!IS_CHAR(CUR)) {
				1198	xmlParserError(ctxt, "Unfinished AttValue\n");
				1199	} else {
				1200	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1201	NEXT;
				1202	}
				1203	} else if (CUR == '\'') {
				1204	NEXT;
				1205	q = CUR_PTR;
				1206	while ((IS_CHAR(CUR)) && (CUR != '\'')) {
				1207	if (CUR == '&') {
				1208	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1209	cur = xmlParseReference(ctxt, 1);
				1210	ret = xmlStrcat(ret, cur);
				1211	q = CUR_PTR;
				1212	} else
				1213	NEXT;
				1214	}
				1215	if (!IS_CHAR(CUR)) {
				1216	xmlParserError(ctxt, "Unfinished AttValue\n");
				1217	} else {
				1218	ret = xmlStrncat(ret, q, CUR_PTR - q);
				1219	NEXT;
				1220	}
				1221	} else {
				1222	xmlParserError(ctxt, "AttValue: \" or ' expected\n");
				1223	}
				1224
				1225	return(ret);
				1226	}
				1227
				1228	/*
				1229	* xmlParseSystemLiteral : parse an XML Literal
				1230	*
				1231	* [11] SystemLiteral ::= ('"' [^"]* '"') \| ("'" [^']* "'")
				1232	*/
				1233
				1234	CHAR *xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
				1235	const CHAR *q;
				1236	CHAR *ret = NULL;
				1237
				1238	if (CUR == '"') {
				1239	NEXT;
				1240	q = CUR_PTR;
				1241	while ((IS_CHAR(CUR)) && (CUR != '"'))
				1242	NEXT;
				1243	if (!IS_CHAR(CUR)) {
				1244	xmlParserError(ctxt, "Unfinished SystemLiteral\n");
				1245	} else {
				1246	ret = xmlStrndup(q, CUR_PTR - q);
				1247	NEXT;
				1248	}
				1249	} else if (CUR == '\'') {
				1250	NEXT;
				1251	q = CUR_PTR;
				1252	while ((IS_CHAR(CUR)) && (CUR != '\''))
				1253	NEXT;
				1254	if (!IS_CHAR(CUR)) {
				1255	xmlParserError(ctxt, "Unfinished SystemLiteral\n");
				1256	} else {
				1257	ret = xmlStrndup(q, CUR_PTR - q);
				1258	NEXT;
				1259	}
				1260	} else {
				1261	xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
				1262	}
				1263
				1264	return(ret);
				1265	}
				1266
				1267	/*
				1268	* xmlParsePubidLiteral: parse an XML public literal
				1269	*
				1270	* [12] PubidLiteral ::= '"' PubidChar* '"' \| "'" (PubidChar - "'")* "'"
				1271	*/
				1272
				1273	CHAR *xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
				1274	const CHAR *q;
				1275	CHAR *ret = NULL;
				1276	/*
				1277	* Name ::= (Letter \| '_') (NameChar)*
				1278	*/
				1279	if (CUR == '"') {
				1280	NEXT;
				1281	q = CUR_PTR;
				1282	while (IS_PUBIDCHAR(CUR)) NEXT;
				1283	if (CUR != '"') {
				1284	xmlParserError(ctxt, "Unfinished PubidLiteral\n");
				1285	} else {
				1286	ret = xmlStrndup(q, CUR_PTR - q);
				1287	NEXT;
				1288	}
				1289	} else if (CUR == '\'') {
				1290	NEXT;
				1291	q = CUR_PTR;
				1292	while ((IS_LETTER(CUR)) && (CUR != '\''))
				1293	NEXT;
				1294	if (!IS_LETTER(CUR)) {
				1295	xmlParserError(ctxt, "Unfinished PubidLiteral\n");
				1296	} else {
				1297	ret = xmlStrndup(q, CUR_PTR - q);
				1298	NEXT;
				1299	}
				1300	} else {
				1301	xmlParserError(ctxt, "SystemLiteral \" or ' expected\n");
				1302	}
				1303
				1304	return(ret);
				1305	}
				1306
				1307	/*
				1308	* xmlParseCharData: parse a CharData section.
				1309	* if we are within a CDATA section ']]>' marks an end of section.
				1310	*
				1311	* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
				1312	*/
				1313
				1314	void xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
				1315	const CHAR *q;
				1316
				1317	q = CUR_PTR;
				1318	while ((IS_CHAR(CUR)) && (CUR != '<') &&
				1319	(CUR != '&')) {
				1320	NEXT;
				1321	if ((cdata) && (CUR == ']') && (NXT(1) == ']') &&
				1322	(NXT(2) == '>')) break;
				1323	}
				1324	if (q == CUR_PTR) return;
				1325
				1326	/*
				1327	* Ok the segment [q CUR_PTR] is to be consumed as chars.
				1328	*/
				1329	if (ctxt->sax != NULL) {
				1330	if (areBlanks(ctxt, q, CUR_PTR - q))
				1331	ctxt->sax->ignorableWhitespace(ctxt, q, 0, CUR_PTR - q);
				1332	else
				1333	ctxt->sax->characters(ctxt, q, 0, CUR_PTR - q);
				1334	}
				1335	}
				1336
				1337	/*
				1338	* xmlParseExternalID: Parse an External ID
				1339	*
				1340	* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
				1341	* \| 'PUBLIC' S PubidLiteral S SystemLiteral
				1342	*/
				1343
				1344	CHAR xmlParseExternalID(xmlParserCtxtPtr ctxt, CHAR *publicID) {
				1345	CHAR *URI = NULL;
				1346
				1347	if ((CUR == 'S') && (NXT(1) == 'Y') &&
				1348	(NXT(2) == 'S') && (NXT(3) == 'T') &&
				1349	(NXT(4) == 'E') && (NXT(5) == 'M')) {
				1350	SKIP(6);
				1351	SKIP_BLANKS;
				1352	URI = xmlParseSystemLiteral(ctxt);
				1353	if (URI == NULL)
				1354	xmlParserError(ctxt,
				1355	"xmlParseExternalID: SYSTEM, no URI\n");
				1356	} else if ((CUR == 'P') && (NXT(1) == 'U') &&
				1357	(NXT(2) == 'B') && (NXT(3) == 'L') &&
				1358	(NXT(4) == 'I') && (NXT(5) == 'C')) {
				1359	SKIP(6);
				1360	SKIP_BLANKS;
				1361	*publicID = xmlParsePubidLiteral(ctxt);
				1362	if (*publicID == NULL)
				1363	xmlParserError(ctxt,
				1364	"xmlParseExternalID: PUBLIC, no Public Identifier\n");
				1365	SKIP_BLANKS;
				1366	URI = xmlParseSystemLiteral(ctxt);
				1367	if (URI == NULL)
				1368	xmlParserError(ctxt,
				1369	"xmlParseExternalID: PUBLIC, no URI\n");
				1370	}
				1371	return(URI);
				1372	}
				1373
				1374	/*
				1375	* Skip an XML (SGML) comment <!-- .... -->
				1376	* This may or may not create a node (depending on the context)
				1377	* The spec says that "For compatibility, the string "--" (double-hyphen)
				1378	* must not occur within comments. "
				1379	*
				1380	* [15] Comment ::= '<!--' ((Char - '-') \| ('-' (Char - '-')))* '-->'
				1381	*/
				1382	xmlNodePtr xmlParseComment(xmlParserCtxtPtr ctxt, int create) {
				1383	xmlNodePtr ret = NULL;
				1384	const CHAR q, start;
				1385	const CHAR *r;
				1386	CHAR *val;
				1387
				1388	/*
				1389	* Check that there is a comment right here.
				1390	*/
				1391	if ((CUR != '<') \|\| (NXT(1) != '!') \|\|
				1392	(NXT(2) != '-') \|\| (NXT(3) != '-')) return(NULL);
				1393
				1394	SKIP(4);
				1395	start = q = CUR_PTR;
				1396	NEXT;
				1397	r = CUR_PTR;
				1398	NEXT;
				1399	while (IS_CHAR(CUR) &&
				1400	((CUR == ':') \|\| (CUR != '>') \|\|
				1401	(r != '-') \|\| (q != '-'))) {
				1402	if ((r == '-') && (q == '-'))
				1403	xmlParserError(ctxt,
				1404	"Comment must not contain '--' (double-hyphen)`\n");
				1405	NEXT;r++;q++;
				1406	}
				1407	if (!IS_CHAR(CUR)) {
				1408	xmlParserError(ctxt, "Comment not terminated \n<!--%.50s\n", start);
				1409	} else {
				1410	NEXT;
				1411	if (create) {
				1412	val = xmlStrndup(start, q - start);
				1413	ret = xmlNewComment(val);
				1414	free(val);
				1415	}
				1416	}
				1417	return(ret);
				1418	}
				1419
				1420	/*
				1421	* xmlParsePITarget: parse the name of a PI
				1422	*
				1423	* [17] PITarget ::= Name - (('X' \| 'x') ('M' \| 'm') ('L' \| 'l'))
				1424	*/
				1425
				1426	CHAR *xmlParsePITarget(xmlParserCtxtPtr ctxt) {
				1427	CHAR *name;
				1428
				1429	name = xmlParseName(ctxt);
				1430	if ((name != NULL) && (name[3] == 0) &&
				1431	((name[0] == 'x') \|\| (name[0] == 'X')) &&
				1432	((name[1] == 'm') \|\| (name[1] == 'M')) &&
				1433	((name[2] == 'l') \|\| (name[2] == 'L'))) {
				1434	xmlParserError(ctxt, "xmlParsePItarget: invalid name prefix 'xml'\n");
				1435	return(NULL);
				1436	}
				1437	return(name);
				1438	}
				1439
				1440	/*
				1441	* xmlParsePI: parse an XML Processing Instruction.
				1442	*
				1443	* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
				1444	*/
				1445
				1446	void xmlParsePI(xmlParserCtxtPtr ctxt) {
				1447	CHAR *target;
				1448
				1449	if ((CUR == '<') && (NXT(1) == '?')) {
				1450	/*
				1451	* this is a Processing Instruction.
				1452	*/
				1453	SKIP(2);
				1454
				1455	/*
				1456	* Parse the target name and check for special support like
				1457	* namespace.
				1458	*
				1459	* TODO : PI handling should be dynamically redefinable using an
				1460	* API. Only namespace should be in the code IMHO ...
				1461	*/
				1462	target = xmlParsePITarget(ctxt);
				1463	if (target != NULL) {
				1464	/*
				1465	* Support for the old Processing Instruction related to namespace.
				1466	*/
				1467	if ((target[0] == 'n') && (target[1] == 'a') &&
				1468	(target[2] == 'm') && (target[3] == 'e') &&
				1469	(target[4] == 's') && (target[5] == 'p') &&
				1470	(target[6] == 'a') && (target[7] == 'c') &&
				1471	(target[8] == 'e')) {
				1472	xmlParseNamespace(ctxt);
				1473	} else if ((target[0] == 'x') && (target[1] == 'm') &&
				1474	(target[2] == 'l') && (target[3] == ':') &&
				1475	(target[4] == 'n') && (target[5] == 'a') &&
				1476	(target[6] == 'm') && (target[7] == 'e') &&
				1477	(target[8] == 's') && (target[9] == 'p') &&
				1478	(target[10] == 'a') && (target[11] == 'c') &&
				1479	(target[12] == 'e')) {
				1480	xmlParseNamespace(ctxt);
				1481	} else {
				1482	const CHAR *q = CUR_PTR;
				1483
				1484	while (IS_CHAR(CUR) &&
				1485	((CUR != '?') \|\| (NXT(1) != '>')))
				1486	NEXT;
				1487	if (!IS_CHAR(CUR)) {
				1488	xmlParserError(ctxt, "xmlParsePI: PI %s never end ...\n",
				1489	target);
				1490	} else {
				1491	CHAR *data;
				1492
				1493	data = xmlStrndup(CUR_PTR, CUR_PTR - q);
				1494	SKIP(2);
				1495
				1496	/*
				1497	* SAX: PI detected.
				1498	*/
				1499	if (ctxt->sax)
				1500	ctxt->sax->processingInstruction(ctxt, target, data);
				1501	/*
				1502	* Unknown PI, ignore it !
				1503	*/
				1504	else
				1505	xmlParserWarning(ctxt,
				1506	"xmlParsePI : skipping unknown PI %s\n",
				1507	target);
				1508	free(data);
				1509	}
				1510	}
				1511	free(target);
				1512	} else {
				1513	xmlParserError(ctxt, "xmlParsePI : no target name\n");
				1514	/********* Should we try to complete parsing the PI ???
				1515	while (IS_CHAR(CUR) &&
				1516	(CUR != '?') && (CUR != '>'))
				1517	NEXT;
				1518	if (!IS_CHAR(CUR)) {
				1519	fprintf(stderr, "xmlParsePI: PI %s never end ...\n",
				1520	target);
				1521	}
				1522	********************************************************/
				1523	}
				1524	}
				1525	}
				1526
				1527	/*
				1528	* xmlParseNotationDecl: parse a notation declaration
				1529	*
				1530	* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID \| PublicID) S? '>'
				1531	*
				1532	* [83] PublicID ::= 'PUBLIC' S PubidLiteral
				1533	*
				1534	* NOTE: Actually [75] and [83] interract badly since [75] can generate
				1535	* 'PUBLIC' S PubidLiteral S SystemLiteral
				1536	*
				1537	* Hence there is actually 3 choices:
				1538	* 'PUBLIC' S PubidLiteral
				1539	* 'PUBLIC' S PubidLiteral S SystemLiteral
				1540	* and 'SYSTEM' S SystemLiteral
				1541	*/
				1542
				1543	void xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
				1544	CHAR *name;
				1545
				1546	if ((CUR == '<') && (NXT(1) == '!') &&
				1547	(NXT(2) == 'N') && (NXT(3) == 'O') &&
				1548	(NXT(4) == 'T') && (NXT(5) == 'A') &&
				1549	(NXT(6) == 'T') && (NXT(7) == 'I') &&
				1550	(NXT(8) == 'O') && (NXT(9) == 'N') &&
				1551	(IS_BLANK(NXT(10)))) {
				1552	SKIP(10);
				1553	SKIP_BLANKS;
				1554
				1555	name = xmlParseName(ctxt);
				1556	if (name == NULL) {
				1557	xmlParserError(ctxt,
				1558	"xmlParseAttributeListDecl: no name for Element\n");
				1559	return;
				1560	}
				1561	SKIP_BLANKS;
				1562	/*
				1563	* TODO !!!
				1564	*/
				1565	while ((IS_CHAR(CUR)) && (CUR != '>'))
				1566	NEXT;
				1567	free(name);
				1568	}
				1569	}
				1570
				1571	/*
				1572	* xmlParseEntityDecl: parse <!ENTITY declarations
				1573	*
				1574	* [70] EntityDecl ::= GEDecl \| PEDecl
				1575	*
				1576	* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
				1577	*
				1578	* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
				1579	*
				1580	* [73] EntityDef ::= EntityValue \| (ExternalID NDataDecl?)
				1581	*
				1582	* [74] PEDef ::= EntityValue \| ExternalID
				1583	*
				1584	* [76] NDataDecl ::= S 'NDATA' S Name
				1585	*/
				1586
				1587	void xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
				1588	CHAR *name = NULL;
				1589	CHAR *value = NULL;
				1590	CHAR URI = NULL, literal = NULL;
				1591	CHAR *ndata = NULL;
				1592	int isParameter = 0;
				1593
				1594	if ((CUR == '<') && (NXT(1) == '!') &&
				1595	(NXT(2) == 'E') && (NXT(3) == 'N') &&
				1596	(NXT(4) == 'T') && (NXT(5) == 'I') &&
				1597	(NXT(6) == 'T') && (NXT(7) == 'Y') &&
				1598	(IS_BLANK(NXT(8)))) {
				1599	SKIP(8);
				1600	SKIP_BLANKS;
				1601
				1602	if (CUR == '%') {
				1603	NEXT;
				1604	SKIP_BLANKS;
				1605	isParameter = 1;
				1606	}
				1607
				1608	name = xmlParseName(ctxt);
				1609	if (name == NULL) {
				1610	xmlParserError(ctxt, "xmlParseEntityDecl: no name\n");
				1611	return;
				1612	}
				1613	SKIP_BLANKS;
				1614
				1615	/*
				1616	* TODO handle the various case of definitions...
				1617	*/
				1618	if (isParameter) {
				1619	if ((CUR == '"') \|\| (CUR == '\''))
				1620	value = xmlParseEntityValue(ctxt);
				1621	if (value) {
				1622	xmlAddDocEntity(ctxt->doc, name,
				1623	XML_INTERNAL_PARAMETER_ENTITY,
				1624	NULL, NULL, value);
				1625	}
				1626	else {
				1627	URI = xmlParseExternalID(ctxt, &literal);
				1628	if (URI) {
				1629	xmlAddDocEntity(ctxt->doc, name,
				1630	XML_EXTERNAL_PARAMETER_ENTITY,
				1631	literal, URI, NULL);
				1632	}
				1633	}
				1634	} else {
				1635	if ((CUR == '"') \|\| (CUR == '\'')) {
				1636	value = xmlParseEntityValue(ctxt);
				1637	xmlAddDocEntity(ctxt->doc, name,
				1638	XML_INTERNAL_GENERAL_ENTITY,
				1639	NULL, NULL, value);
				1640	} else {
				1641	URI = xmlParseExternalID(ctxt, &literal);
				1642	SKIP_BLANKS;
				1643	if ((CUR == 'N') && (NXT(1) == 'D') &&
				1644	(NXT(2) == 'A') && (NXT(3) == 'T') &&
				1645	(NXT(4) == 'A')) {
				1646	SKIP(5);
				1647	SKIP_BLANKS;
				1648	ndata = xmlParseName(ctxt);
				1649	xmlAddDocEntity(ctxt->doc, name,
				1650	XML_EXTERNAL_GENERAL_UNPARSED_ENTITY,
				1651	literal, URI, ndata);
				1652	} else {
				1653	xmlAddDocEntity(ctxt->doc, name,
				1654	XML_EXTERNAL_GENERAL_PARSED_ENTITY,
				1655	literal, URI, NULL);
				1656	}
				1657	}
				1658	}
				1659	SKIP_BLANKS;
				1660	if (CUR != '>') {
				1661	xmlParserError(ctxt,
				1662	"xmlParseEntityDecl: entity %s not terminated\n", name);
				1663	} else
				1664	NEXT;
				1665	if (name != NULL) free(name);
				1666	if (value != NULL) free(value);
				1667	if (URI != NULL) free(URI);
				1668	if (literal != NULL) free(literal);
				1669	if (ndata != NULL) free(ndata);
				1670	}
				1671	}
				1672
				1673	/*
				1674	* xmlParseEnumeratedType: parse and Enumerated attribute type.
				1675	*
				1676	* [57] EnumeratedType ::= NotationType \| Enumeration
				1677	*
				1678	* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '\|' S? Name)* S? ')'
				1679	*
				1680	* [59] Enumeration ::= '(' S? Nmtoken (S? '\|' S? Nmtoken)* S? ')'
				1681	*/
				1682
				1683	void xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, CHAR *name) {
				1684	/*
				1685	* TODO !!!
				1686	*/
				1687	while ((IS_CHAR(CUR)) && (CUR != '>'))
				1688	NEXT;
				1689	}
				1690
				1691	/*
				1692	* xmlParseAttributeType: parse the Attribute list def for an element
				1693	*
				1694	* [54] AttType ::= StringType \| TokenizedType \| EnumeratedType
				1695	*
				1696	* [55] StringType ::= 'CDATA'
				1697	*
				1698	* [56] TokenizedType ::= 'ID' \| 'IDREF' \| 'IDREFS' \| 'ENTITY' \|
				1699	* 'ENTITIES' \| 'NMTOKEN' \| 'NMTOKENS'
				1700	*/
				1701	void xmlParseAttributeType(xmlParserCtxtPtr ctxt, CHAR *name) {
				1702	/* TODO !!! */
				1703	if ((CUR == 'C') && (NXT(1) == 'D') &&
				1704	(NXT(2) == 'A') && (NXT(3) == 'T') &&
				1705	(NXT(4) == 'A')) {
				1706	SKIP(5);
				1707	} else if ((CUR == 'I') && (NXT(1) == 'D')) {
				1708	SKIP(2);
				1709	} else if ((CUR == 'I') && (NXT(1) == 'D') &&
				1710	(NXT(2) == 'R') && (NXT(3) == 'E') &&
				1711	(NXT(4) == 'F')) {
				1712	SKIP(5);
				1713	} else if ((CUR == 'I') && (NXT(1) == 'D') &&
				1714	(NXT(2) == 'R') && (NXT(3) == 'E') &&
				1715	(NXT(4) == 'F') && (NXT(5) == 'S')) {
				1716	SKIP(6);
				1717	} else if ((CUR == 'E') && (NXT(1) == 'N') &&
				1718	(NXT(2) == 'T') && (NXT(3) == 'I') &&
				1719	(NXT(4) == 'T') && (NXT(5) == 'Y')) {
				1720	SKIP(6);
				1721	} else if ((CUR == 'E') && (NXT(1) == 'N') &&
				1722	(NXT(2) == 'T') && (NXT(3) == 'I') &&
				1723	(NXT(4) == 'T') && (NXT(5) == 'I') &&
				1724	(NXT(6) == 'E') && (NXT(7) == 'S')) {
				1725	SKIP(8);
				1726	} else if ((CUR == 'N') && (NXT(1) == 'M') &&
				1727	(NXT(2) == 'T') && (NXT(3) == 'O') &&
				1728	(NXT(4) == 'K') && (NXT(5) == 'E') &&
				1729	(NXT(6) == 'N')) {
				1730	SKIP(7);
				1731	} else if ((CUR == 'N') && (NXT(1) == 'M') &&
				1732	(NXT(2) == 'T') && (NXT(3) == 'O') &&
				1733	(NXT(4) == 'K') && (NXT(5) == 'E') &&
				1734	(NXT(6) == 'N') && (NXT(7) == 'S')) {
				1735	} else {
				1736	xmlParseEnumeratedType(ctxt, name);
				1737	}
				1738	}
				1739
				1740	/*
				1741	* xmlParseAttributeListDecl: parse the Attribute list def for an element
				1742	*
				1743	* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
				1744	*
				1745	* [53] AttDef ::= S Name S AttType S DefaultDecl
				1746	*/
				1747	void xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
				1748	CHAR *name;
				1749
				1750	/* TODO !!! */
				1751	if ((CUR == '<') && (NXT(1) == '!') &&
				1752	(NXT(2) == 'A') && (NXT(3) == 'T') &&
				1753	(NXT(4) == 'T') && (NXT(5) == 'L') &&
				1754	(NXT(6) == 'I') && (NXT(7) == 'S') &&
				1755	(NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
				1756	SKIP(9);
				1757	SKIP_BLANKS;
				1758	name = xmlParseName(ctxt);
				1759	if (name == NULL) {
				1760	xmlParserError(ctxt,
				1761	"xmlParseAttributeListDecl: no name for Element\n");
				1762	return;
				1763	}
				1764	SKIP_BLANKS;
				1765	while (CUR != '>') {
				1766	const CHAR *check = CUR_PTR;
				1767
				1768	xmlParseAttributeType(ctxt, name);
				1769	SKIP_BLANKS;
				1770	if (check == CUR_PTR) {
				1771	xmlParserError(ctxt,
				1772	"xmlParseAttributeListDecl: detected error\n");
				1773	break;
				1774	}
				1775	}
				1776	if (CUR == '>')
				1777	NEXT;
				1778
				1779	free(name);
				1780	}
				1781	}
				1782
				1783	/*
				1784	* xmlParseElementContentDecl: parse the declaration for an Element content
				1785	* either Mixed or Children, the cases EMPTY and ANY being handled
				1786	* int xmlParseElementDecl.
				1787	*
				1788	* [47] children ::= (choice \| seq) ('?' \| '*' \| '+')?
				1789	*
				1790	* [48] cp ::= (Name \| choice \| seq) ('?' \| '*' \| '+')?
				1791	*
				1792	* [49] choice ::= '(' S? cp ( S? '\|' S? cp )* S? ')'
				1793	*
				1794	* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
				1795	*
				1796	* or
				1797	*
				1798	* [51] Mixed ::= '(' S? '#PCDATA' (S? '\|' S? Name)* S? ')*' \|
				1799	* '(' S? '#PCDATA' S? ')'
				1800	*/
				1801
				1802	void xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, CHAR *name) {
				1803	/*
				1804	* TODO This has to be parsed correctly, currently we just skip until
				1805	* we reach the first '>'.
				1806	* !!!
				1807	*/
				1808	while ((IS_CHAR(CUR)) && (CUR != '>'))
				1809	NEXT;
				1810	}
				1811
				1812	/*
				1813	* xmlParseElementDecl: parse an Element declaration.
				1814	*
				1815	* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
				1816	*
				1817	* [46] contentspec ::= 'EMPTY' \| 'ANY' \| Mixed \| children
				1818	*
				1819	* TODO There is a check [ VC: Unique Element Type Declaration ]
				1820	*/
				1821	void xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
				1822	CHAR *name;
				1823
				1824	if ((CUR == '<') && (NXT(1) == '!') &&
				1825	(NXT(2) == 'E') && (NXT(3) == 'L') &&
				1826	(NXT(4) == 'E') && (NXT(5) == 'M') &&
				1827	(NXT(6) == 'E') && (NXT(7) == 'N') &&
				1828	(NXT(8) == 'T') && (IS_BLANK(NXT(9)))) {
				1829	SKIP(9);
				1830	SKIP_BLANKS;
				1831	name = xmlParseName(ctxt);
				1832	if (name == NULL) {
				1833	xmlParserError(ctxt, "xmlParseElementDecl: no name for Element\n");
				1834	return;
				1835	}
				1836	SKIP_BLANKS;
				1837	if ((CUR == 'E') && (NXT(1) == 'M') &&
				1838	(NXT(2) == 'P') && (NXT(3) == 'T') &&
				1839	(NXT(4) == 'Y')) {
				1840	SKIP(5);
				1841	/*
				1842	* Element must always be empty.
				1843	*/
				1844	} else if ((CUR == 'A') && (NXT(1) == 'N') &&
				1845	(NXT(2) == 'Y')) {
				1846	SKIP(3);
				1847	/*
				1848	* Element is a generic container.
				1849	*/
				1850	} else {
				1851	xmlParseElementContentDecl(ctxt, name);
				1852	}
				1853	SKIP_BLANKS;
				1854	if (CUR != '>') {
				1855	xmlParserError(ctxt,
				1856	"xmlParseElementDecl: expected '>' at the end\n");
				1857	} else
				1858	NEXT;
				1859	}
				1860	}
				1861
				1862	/*
				1863	* xmlParseMarkupDecl: parse Markup declarations
				1864	*
				1865	* [29] markupdecl ::= elementdecl \| AttlistDecl \| EntityDecl \|
				1866	* NotationDecl \| PI \| Comment
				1867	*
				1868	* TODO There is a check [ VC: Proper Declaration/PE Nesting ]
				1869	*/
				1870	void xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
				1871	xmlParseElementDecl(ctxt);
				1872	xmlParseAttributeListDecl(ctxt);
				1873	xmlParseEntityDecl(ctxt);
				1874	xmlParseNotationDecl(ctxt);
				1875	xmlParsePI(ctxt);
				1876	xmlParseComment(ctxt, 0);
				1877	}
				1878
				1879	/*
				1880	* xmlParseCharRef: parse Reference declarations
				1881	*
				1882	* [66] CharRef ::= '&#' [0-9]+ ';' \|
				1883	* '&#x' [0-9a-fA-F]+ ';'
				1884	*/
				1885	CHAR *xmlParseCharRef(xmlParserCtxtPtr ctxt, int inLine) {
				1886	int val = 0;
				1887	CHAR buf[2];
				1888
				1889	if ((CUR == '&') && (NXT(1) == '#') &&
				1890	(NXT(2) == 'x')) {
				1891	SKIP(3);
				1892	while (CUR != ';') {
				1893	if ((CUR >= '0') && (CUR <= '9'))
				1894	val = val * 16 + (CUR - '0');
				1895	else if ((CUR >= 'a') && (CUR <= 'f'))
				1896	val = val * 16 + (CUR - 'a') + 10;
				1897	else if ((CUR >= 'A') && (CUR <= 'F'))
				1898	val = val * 16 + (CUR - 'A') + 10;
				1899	else {
				1900	xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
				1901	val = 0;
				1902	break;
				1903	}
Daniel Veillard	845664d	1998-08-13 04:43:19 +0000	[diff] [blame]	1904	NEXT;
Daniel Veillard	260a68f	1998-08-13 03:39:55 +0000	[diff] [blame]	1905	}
				1906	if (CUR != ';')
				1907	NEXT;
				1908	} else if ((CUR == '&') && (NXT(1) == '#')) {
				1909	SKIP(2);
				1910	while (CUR != ';') {
				1911	if ((CUR >= '0') && (CUR <= '9'))
				1912	val = val * 16 + (CUR - '0');
				1913	else {
				1914	xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
				1915	val = 0;
				1916	break;
				1917	}
Daniel Veillard	845664d	1998-08-13 04:43:19 +0000	[diff] [blame]	1918	NEXT;
Daniel Veillard	260a68f	1998-08-13 03:39:55 +0000	[diff] [blame]	1919	}
				1920	if (CUR != ';')
				1921	NEXT;
				1922	} else {
				1923	xmlParserError(ctxt, "xmlParseCharRef: invalid value\n");
				1924	}
				1925	/*
				1926	* Check the value IS_CHAR ...
				1927	*/
				1928	if (IS_CHAR(val)) {
				1929	buf[0] = (CHAR) val;
				1930	buf[1] = 0;
				1931	if (inLine)
				1932	return(xmlStrndup(buf, 1));
				1933	else if (ctxt->sax != NULL)
				1934	ctxt->sax->characters(ctxt, buf, 0, 1);
				1935	} else {
				1936	xmlParserError(ctxt, "xmlParseCharRef: invalid value");
				1937	}
				1938	return(NULL);
				1939	}
				1940
				1941	/*
				1942	* xmlParseEntityRef: parse ENTITY references declarations
				1943	*
				1944	* [68] EntityRef ::= '&' Name ';'
				1945	*/
				1946	CHAR *xmlParseEntityRef(xmlParserCtxtPtr ctxt, int inLine) {
				1947	CHAR *ret = NULL;
				1948	CHAR *name;
				1949	xmlEntityPtr entity;
				1950
				1951	if (CUR == '&') {
				1952	NEXT;
				1953	name = xmlParseName(ctxt);
				1954	if (name == NULL) {
				1955	xmlParserError(ctxt, "xmlParseEntityRef: no name\n");
				1956	} else {
				1957	if (CUR == ';') {
				1958	NEXT;
				1959	entity = xmlGetDocEntity(ctxt->doc, name);
				1960	if (entity == NULL) {
				1961	/* TODO !!! Create a reference ! */
				1962	xmlParserWarning(ctxt,
				1963	"xmlParseEntityRef: &%s; not found\n", name);
				1964	}
				1965	/*
				1966	* If we can get the content, push the entity content
				1967	* as the next input stream.
				1968	*/
				1969	else {
				1970	switch (entity->type) {
				1971	case XML_INTERNAL_PARAMETER_ENTITY:
				1972	case XML_EXTERNAL_PARAMETER_ENTITY:
				1973	xmlParserError(ctxt,
				1974	"internal: xmlGetDtdEntity returned a general entity\n");
				1975	break;
				1976	case XML_INTERNAL_GENERAL_ENTITY:
				1977	if (inLine)
				1978	ret = entity->content;
				1979	else
				1980	xmlHandleEntity(ctxt, entity);
				1981	break;
				1982	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
				1983	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
				1984	xmlParserWarning(ctxt,
				1985	"xmlParseEntityRef: external entity &%s; not supported\n",
				1986	name);
				1987	break;
				1988	default:
				1989	xmlParserError(ctxt,
				1990	"internal: xmlParseEntityRef: unknown entity type %d\n",
				1991	entity->type);
				1992	}
				1993	}
				1994	} else {
				1995	char cst[2] = { '&', 0 };
				1996
				1997	xmlParserError(ctxt, "xmlParseEntityRef: expecting ';'\n");
				1998	ret = xmlStrndup(cst, 1);
				1999	ret = xmlStrcat(ret, name);
				2000	}
				2001	free(name);
				2002	}
				2003	}
				2004	return(ret);
				2005	}
				2006
				2007	/*
				2008	* xmlParseReference: parse Reference declarations
				2009	*
				2010	* [67] Reference ::= EntityRef \| CharRef
				2011	*/
				2012	CHAR *xmlParseReference(xmlParserCtxtPtr ctxt, int inLine) {
				2013	if ((CUR == '&') && (NXT(1) == '#')) {
				2014	return(xmlParseCharRef(ctxt, inLine));
				2015	} else if (CUR == '&') {
				2016	return(xmlParseEntityRef(ctxt, inLine));
				2017	}
				2018	return(NULL);
				2019	}
				2020
				2021	/*
				2022	* xmlParsePEReference: parse PEReference declarations
				2023	*
				2024	* [69] PEReference ::= '%' Name ';'
				2025	*/
				2026	CHAR *xmlParsePEReference(xmlParserCtxtPtr ctxt, int inLine) {
				2027	CHAR *ret = NULL;
				2028	CHAR *name;
				2029	xmlEntityPtr entity;
				2030
				2031	if (CUR == '%') {
				2032	NEXT;
				2033	name = xmlParseName(ctxt);
				2034	if (name == NULL) {
				2035	xmlParserError(ctxt, "xmlParsePEReference: no name\n");
				2036	} else {
				2037	if (CUR == ';') {
				2038	NEXT;
				2039	entity = xmlGetDtdEntity(ctxt->doc, name);
				2040	if (entity == NULL) {
				2041	xmlParserWarning(ctxt,
				2042	"xmlParsePEReference: %%%s; not found\n");
				2043	}
				2044	/*
				2045	* If we can get the content, push the entity content
				2046	* as the next input stream.
				2047	*/
				2048	else {
				2049	switch (entity->type) {
				2050	case XML_INTERNAL_PARAMETER_ENTITY:
				2051	if (inLine)
				2052	ret = entity->content;
				2053	else
				2054	xmlNewEntityInputStream(ctxt, entity);
				2055	break;
				2056	case XML_EXTERNAL_PARAMETER_ENTITY:
				2057	xmlParserWarning(ctxt,
				2058	"xmlParsePEReference: external entity %%%s; not supported\n");
				2059	break;
				2060	case XML_INTERNAL_GENERAL_ENTITY:
				2061	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
				2062	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
				2063	xmlParserError(ctxt,
				2064	"internal: xmlGetDtdEntity returned a general entity\n");
				2065	break;
				2066	default:
				2067	xmlParserError(ctxt,
				2068	"internal: xmlParsePEReference: unknown entity type %d\n",
				2069	entity->type);
				2070	}
				2071	}
				2072	} else {
				2073	char cst[2] = { '&', 0 };
				2074
				2075	xmlParserError(ctxt, "xmlParsePEReference: expecting ';'\n");
				2076	ret = xmlStrndup(cst, 1);
				2077	ret = xmlStrcat(ret, name);
				2078	}
				2079	free(name);
				2080	}
				2081	}
				2082	return(ret);
				2083	}
				2084
				2085	/*
				2086	* xmlParseDocTypeDecl : parse a DOCTYPE declaration
				2087	*
				2088	* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
				2089	* ('[' (markupdecl \| PEReference \| S)* ']' S?)? '>'
				2090	*/
				2091
				2092	void xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
				2093	xmlDtdPtr dtd;
				2094	CHAR *name;
				2095	CHAR *ExternalID = NULL;
				2096	CHAR *URI = NULL;
				2097
				2098	/*
				2099	* We know that '<!DOCTYPE' has been detected.
				2100	*/
				2101	SKIP(9);
				2102
				2103	SKIP_BLANKS;
				2104
				2105	/*
				2106	* Parse the DOCTYPE name.
				2107	*/
				2108	name = xmlParseName(ctxt);
				2109	if (name == NULL) {
				2110	xmlParserError(ctxt, "xmlParseDocTypeDecl : no DOCTYPE name !\n");
				2111	}
				2112
				2113	SKIP_BLANKS;
				2114
				2115	/*
				2116	* Check for SystemID and ExternalID
				2117	*/
				2118	URI = xmlParseExternalID(ctxt, &ExternalID);
				2119	SKIP_BLANKS;
				2120
				2121	dtd = xmlNewDtd(ctxt->doc, name, ExternalID, URI);
				2122
				2123	/*
				2124	* Is there any DTD definition ?
				2125	*/
				2126	if (CUR == '[') {
				2127	NEXT;
				2128	/*
				2129	* Parse the succession of Markup declarations and
				2130	* PEReferences.
				2131	* Subsequence (markupdecl \| PEReference \| S)*
				2132	*/
				2133	while (CUR != ']') {
				2134	const CHAR *check = CUR_PTR;
				2135
				2136	SKIP_BLANKS;
				2137	xmlParseMarkupDecl(ctxt);
				2138	xmlParsePEReference(ctxt, 0);
				2139
				2140	if (CUR_PTR == check) {
				2141	xmlParserError(ctxt,
				2142	"xmlParseDocTypeDecl: error detected in Markup declaration\n");
				2143	break;
				2144	}
				2145	}
				2146	if (CUR == ']') NEXT;
				2147	}
				2148
				2149	/*
				2150	* We should be at the end of the DOCTYPE declaration.
				2151	*/
				2152	if (CUR != '>') {
				2153	xmlParserError(ctxt, "DOCTYPE unproperly terminated\n");
				2154	/* We shouldn't try to resynchronize ... */
				2155	}
				2156	NEXT;
				2157
				2158	/*
				2159	* Cleanup, since we don't use all those identifiers
				2160	* TODO : the DOCTYPE if available should be stored !
				2161	*/
				2162	if (URI != NULL) free(URI);
				2163	if (ExternalID != NULL) free(ExternalID);
				2164	if (name != NULL) free(name);
				2165	}
				2166
				2167	/*
				2168	* xmlParseAttribute: parse a start of tag.
				2169	*
				2170	* [41] Attribute ::= Name Eq AttValue
				2171	*
				2172	* [25] Eq ::= S? '=' S?
				2173	*
				2174	* With namespace:
				2175	*
				2176	* [NS 11] Attribute ::= QName Eq AttValue
				2177	*
				2178	* Also the case QName == xmlns:??? is handled independently as a namespace
				2179	* definition.
				2180	*/
				2181
				2182	void xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlNodePtr node) {
				2183	CHAR name, value = NULL;
				2184	CHAR *ns;
				2185
				2186	name = xmlNamespaceParseQName(ctxt, &ns);
				2187	if (name == NULL) {
				2188	xmlParserError(ctxt, "error parsing attribute name\n");
				2189	return;
				2190	}
				2191
				2192	/*
				2193	* read the value
				2194	*/
				2195	SKIP_BLANKS;
				2196	if (CUR == '=') {
				2197	NEXT;
				2198	SKIP_BLANKS;
				2199	value = xmlParseAttValue(ctxt);
				2200	} else {
				2201	xmlParserError(ctxt, "Specification mandate value for attribute %s\n",
				2202	name);
				2203	}
				2204
				2205	/*
				2206	* Check whether it's a namespace definition
				2207	*/
				2208	if ((ns == NULL) &&
				2209	(name[0] == 'x') && (name[1] == 'm') && (name[2] == 'l') &&
				2210	(name[3] == 'n') && (name[4] == 's') && (name[5] == 0)) {
				2211	/* a default namespace definition */
				2212	xmlNewNs(node, value, NULL);
				2213	if (name != NULL)
				2214	free(name);
				2215	if (value != NULL)
				2216	free(value);
				2217	return;
				2218	}
				2219	if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
				2220	(ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
				2221	/* a standard namespace definition */
				2222	xmlNewNs(node, value, name);
				2223	if (name != NULL)
				2224	free(name);
				2225	if (value != NULL)
				2226	free(value);
				2227	return;
				2228	}
				2229
				2230	/*
				2231	* Add the attribute to the node.
				2232	*/
				2233	if (name != NULL) {
				2234	xmlNewProp(node, name, value);
				2235	free(name);
				2236	}
				2237	if (value != NULL)
				2238	free(value);
				2239	}
				2240
				2241	/*
				2242	* xmlParseStartTag: parse a start of tag either for rule element or
				2243	* EmptyElement. In both case we don't parse the tag closing chars.
				2244	*
				2245	* [40] STag ::= '<' Name (S Attribute)* S? '>'
				2246	*
				2247	* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
				2248	*
				2249	* With namespace:
				2250	*
				2251	* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
				2252	*
				2253	* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
				2254	*/
				2255
				2256	xmlNodePtr xmlParseStartTag(xmlParserCtxtPtr ctxt) {
				2257	CHAR namespace, name;
				2258	xmlNsPtr ns = NULL;
				2259	xmlNodePtr ret = NULL;
				2260
				2261	if (CUR != '<') return(NULL);
				2262	NEXT;
				2263
				2264	name = xmlNamespaceParseQName(ctxt, &namespace);
				2265
				2266	/*
				2267	* Note : the namespace resolution is deferred until the end of the
				2268	* attributes parsing, since local namespace can be defined as
				2269	* an attribute at this level.
				2270	*/
				2271	ret = xmlNewNode(ns, name, NULL);
				2272
				2273	/*
				2274	* Now parse the attributes, it ends up with the ending
				2275	*
				2276	* (S Attribute)* S?
				2277	*/
				2278	SKIP_BLANKS;
				2279	while ((IS_CHAR(CUR)) &&
				2280	(CUR != '>') &&
				2281	((CUR != '/') \|\| (NXT(1) != '>'))) {
				2282	const CHAR *q = CUR_PTR;
				2283
				2284	xmlParseAttribute(ctxt, ret);
				2285	SKIP_BLANKS;
				2286
				2287	if (q == CUR_PTR) {
				2288	xmlParserError(ctxt,
				2289	"xmlParseStartTag: problem parsing attributes\n");
				2290	break;
				2291	}
				2292	}
				2293
				2294	/*
				2295	* Search the namespace
				2296	*/
				2297	ns = xmlSearchNs(ctxt->doc, ret, namespace);
				2298	if (ns == NULL) /* ret still doesn't have a parent yet ! */
				2299	ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
				2300	xmlSetNs(ret, ns);
				2301	if (namespace != NULL)
				2302	free(namespace);
				2303
				2304	/*
				2305	* We are parsing a new node.
				2306	*/
				2307	nodePush(ctxt, ret);
				2308
				2309	/*
				2310	* SAX: Start of Element !
				2311	*/
				2312	if (ctxt->sax != NULL)
				2313	ctxt->sax->startElement(ctxt, name);
				2314
				2315	return(ret);
				2316	}
				2317
				2318	/*
				2319	* xmlParseEndTag: parse an end of tag
				2320	*
				2321	* [42] ETag ::= '</' Name S? '>'
				2322	*
				2323	* With namespace
				2324	*
				2325	* [9] ETag ::= '</' QName S? '>'
				2326	*/
				2327
				2328	void xmlParseEndTag(xmlParserCtxtPtr ctxt, xmlNsPtr nsPtr, CHAR *tagPtr) {
				2329	CHAR namespace, name;
				2330	xmlNsPtr ns = NULL;
				2331
				2332	*nsPtr = NULL;
				2333	*tagPtr = NULL;
				2334
				2335	if ((CUR != '<') \|\| (NXT(1) != '/')) {
				2336	xmlParserError(ctxt, "xmlParseEndTag: '</' not found\n");
				2337	return;
				2338	}
				2339	SKIP(2);
				2340
				2341	name = xmlNamespaceParseQName(ctxt, &namespace);
				2342
				2343	/*
				2344	* Search the namespace
				2345	*/
				2346	ns = xmlSearchNs(ctxt->doc, ctxt->node, namespace);
				2347	if (namespace != NULL)
				2348	free(namespace);
				2349
				2350	*nsPtr = ns;
				2351	*tagPtr = name;
				2352
				2353	/*
				2354	* We should definitely be at the ending "S? '>'" part
				2355	*/
				2356	SKIP_BLANKS;
				2357	if ((!IS_CHAR(CUR)) \|\| (CUR != '>')) {
				2358	xmlParserError(ctxt, "End tag : expected '>'\n");
				2359	} else
				2360	NEXT;
				2361
				2362	return;
				2363	}
				2364
				2365	/*
				2366	* xmlParseCDSect: escaped pure raw content.
				2367	*
				2368	* [18] CDSect ::= CDStart CData CDEnd
				2369	*
				2370	* [19] CDStart ::= '<![CDATA['
				2371	*
				2372	* [20] Data ::= (Char* - (Char* ']]>' Char*))
				2373	*
				2374	* [21] CDEnd ::= ']]>'
				2375	*/
				2376	void xmlParseCDSect(xmlParserCtxtPtr ctxt) {
				2377	const CHAR r, s, *base;
				2378
				2379	if ((CUR == '<') && (NXT(1) == '!') &&
				2380	(NXT(2) == '[') && (NXT(3) == 'C') &&
				2381	(NXT(4) == 'D') && (NXT(5) == 'A') &&
				2382	(NXT(6) == 'T') && (NXT(7) == 'A') &&
				2383	(NXT(8) == '[')) {
				2384	SKIP(9);
				2385	} else
				2386	return;
				2387	base = CUR_PTR;
				2388	if (!IS_CHAR(CUR)) {
				2389	xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
				2390	return;
				2391	}
				2392	r = NEXT;
				2393	if (!IS_CHAR(CUR)) {
				2394	xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
				2395	return;
				2396	}
				2397	s = NEXT;
				2398	while (IS_CHAR(CUR) &&
				2399	((r != ']') \|\| (s != ']') \|\| (CUR != '>'))) {
				2400	r++;s++;NEXT;
				2401	}
				2402	if (!IS_CHAR(CUR)) {
				2403	xmlParserError(ctxt, "CData section not finished\n%.50s\n", base);
				2404	return;
				2405	}
				2406
				2407	/*
				2408	* Ok the segment [base CUR_PTR] is to be consumed as chars.
				2409	*/
				2410	if (ctxt->sax != NULL) {
				2411	if (areBlanks(ctxt, base, CUR_PTR - base))
				2412	ctxt->sax->ignorableWhitespace(ctxt, base, 0, CUR_PTR - base);
				2413	else
				2414	ctxt->sax->characters(ctxt, base, 0, CUR_PTR - base);
				2415	}
				2416	}
				2417
				2418	/*
				2419	* xmlParseContent: a content is
				2420	* (element \| PCData \| Reference \| CDSect \| PI \| Comment)
				2421	*
				2422	* [43] content ::= (element \| CharData \| Reference \| CDSect \| PI \| Comment)*
				2423	*/
				2424
				2425	void xmlParseContent(xmlParserCtxtPtr ctxt) {
				2426	xmlNodePtr ret = NULL;
				2427
				2428	while ((CUR != '<') \|\| (NXT(1) != '/')) {
				2429	const CHAR *test = CUR_PTR;
				2430	ret = NULL;
				2431
				2432	/*
				2433	* First case : a Processing Instruction.
				2434	*/
				2435	if ((CUR == '<') && (NXT(1) == '?')) {
				2436	xmlParsePI(ctxt);
				2437	}
				2438	/*
				2439	* Second case : a CDSection
				2440	*/
				2441	else if ((CUR == '<') && (NXT(1) == '!') &&
				2442	(NXT(2) == '[') && (NXT(3) == 'C') &&
				2443	(NXT(4) == 'D') && (NXT(5) == 'A') &&
				2444	(NXT(6) == 'T') && (NXT(7) == 'A') &&
				2445	(NXT(8) == '[')) {
				2446	xmlParseCDSect(ctxt);
				2447	}
				2448	/*
				2449	* Third case : a comment
				2450	*/
				2451	else if ((CUR == '<') && (NXT(1) == '!') &&
				2452	(NXT(2) == '-') && (NXT(3) == '-')) {
				2453	ret = xmlParseComment(ctxt, 1);
				2454	}
				2455	/*
				2456	* Fourth case : a sub-element.
				2457	*/
				2458	else if (CUR == '<') {
				2459	ret = xmlParseElement(ctxt);
				2460	}
				2461	/*
				2462	* Fifth case : a reference.
				2463	*/
				2464	else if (CUR == '&') {
				2465	xmlParseReference(ctxt, 0);
				2466	}
				2467	/*
				2468	* Last case, text. Note that References are handled directly.
				2469	*/
				2470	else {
				2471	xmlParseCharData(ctxt, 0);
				2472	}
				2473
				2474	/*
				2475	* Pop-up of finished entities.
				2476	*/
				2477	while ((CUR == 0) && (ctxt->inputNr > 1)) xmlPopInput(ctxt);
				2478
				2479	if (test == CUR_PTR) {
				2480	xmlParserError(ctxt, "detected an error in element content\n");
				2481	break;
				2482	}
				2483	}
				2484	}
				2485
				2486	/*
				2487	* xmlParseElement: parse an XML element
				2488	*
				2489	* [39] element ::= EmptyElemTag \| STag content ETag
				2490	*
				2491	* [41] Attribute ::= Name Eq AttValue
				2492	*/
				2493
				2494
				2495	xmlNodePtr xmlParseElement(xmlParserCtxtPtr ctxt) {
				2496	xmlNodePtr ret;
				2497	const CHAR *openTag = CUR_PTR;
				2498	xmlParserNodeInfo node_info;
				2499	CHAR *endTag;
				2500	xmlNsPtr endNs;
				2501
				2502	/* Capture start position */
				2503	node_info.begin_pos = CUR_PTR - ctxt->input->base;
				2504	node_info.begin_line = ctxt->input->line;
				2505
				2506	ret = xmlParseStartTag(ctxt);
				2507	if (ret == NULL) {
				2508	return(NULL);
				2509	}
				2510
				2511	/*
				2512	* Check for an Empty Element.
				2513	*/
				2514	if ((CUR == '/') && (NXT(1) == '>')) {
				2515	SKIP(2);
				2516	if (ctxt->sax != NULL)
				2517	ctxt->sax->endElement(ctxt, ret->name);
				2518
				2519	/*
				2520	* end of parsing of this node.
				2521	*/
				2522	nodePop(ctxt);
				2523
				2524	return(ret);
				2525	}
				2526	if (CUR == '>') NEXT;
				2527	else {
				2528	xmlParserError(ctxt, "Couldn't find end of Start Tag\n%.30s\n",openTag);
				2529
				2530	/*
				2531	* end of parsing of this node.
				2532	*/
				2533	nodePop(ctxt);
				2534
				2535	return(NULL);
				2536	}
				2537
				2538	/*
				2539	* Parse the content of the element:
				2540	*/
				2541	xmlParseContent(ctxt);
				2542	if (!IS_CHAR(CUR)) {
				2543	xmlParserError(ctxt, "Premature end of data in tag %.30s\n%.30s\n",
				2544	openTag);
				2545
				2546	/*
				2547	* end of parsing of this node.
				2548	*/
				2549	nodePop(ctxt);
				2550
				2551	return(NULL);
				2552	}
				2553
				2554	/*
				2555	* parse the end of tag: '</' should be here.
				2556	*/
				2557	xmlParseEndTag(ctxt, &endNs, &endTag);
				2558
				2559	/*
				2560	* Check that the Name in the ETag is the same as in the STag.
				2561	*/
				2562	if (endNs != ret->ns) {
				2563	xmlParserError(ctxt,
				2564	"Start and End tags don't use the same namespace\n%.30s\n%.30s\n",
				2565	openTag, endTag);
				2566	}
				2567	if (endTag == NULL ) {
				2568	xmlParserError(ctxt, "The End tag has no name\n%.30s\n", openTag);
				2569	} else if (xmlStrcmp(ret->name, endTag)) {
				2570	xmlParserError(ctxt,
				2571	"Start and End tags don't use the same name\n%.30s\n%.30s\n",
				2572	openTag, endTag);
				2573	}
				2574	/*
				2575	* SAX: End of Tag
				2576	*/
				2577	else if (ctxt->sax != NULL)
				2578	ctxt->sax->endElement(ctxt, endTag);
				2579
				2580	if (endTag != NULL)
				2581	free(endTag);
				2582
				2583	/* Capture end position and add node */
				2584	if ( ret != NULL && ctxt->record_info ) {
				2585	node_info.end_pos = CUR_PTR - ctxt->input->base;
				2586	node_info.end_line = ctxt->input->line;
				2587	node_info.node = ret;
				2588	xmlParserAddNodeInfo(ctxt, &node_info);
				2589	}
				2590
				2591	/*
				2592	* end of parsing of this node.
				2593	*/
				2594	nodePop(ctxt);
				2595
				2596	return(ret);
				2597	}
				2598
				2599	/*
				2600	* xmlParseVersionNum: parse the XML version value.
				2601	*
				2602	* [26] VersionNum ::= ([a-zA-Z0-9_.:] \| '-')+
				2603	*/
				2604	CHAR *xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
				2605	const CHAR *q = CUR_PTR;
				2606	CHAR *ret;
				2607
				2608	while (IS_CHAR(CUR) &&
				2609	(((CUR >= 'a') && (CUR <= 'z')) \|\|
				2610	((CUR >= 'A') && (CUR <= 'Z')) \|\|
				2611	((CUR >= '0') && (CUR <= '9')) \|\|
				2612	(CUR == '_') \|\| (CUR == '.') \|\|
				2613	(CUR == ':') \|\| (CUR == '-'))) NEXT;
				2614	ret = xmlStrndup(q, CUR_PTR - q);
				2615	return(ret);
				2616	}
				2617
				2618	/*
				2619	* xmlParseVersionInfo: parse the XML version.
				2620	*
				2621	* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' \| " VersionNum ")
				2622	*
				2623	* [25] Eq ::= S? '=' S?
				2624	*/
				2625
				2626	CHAR *xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
				2627	CHAR *version = NULL;
				2628	const CHAR *q;
				2629
				2630	if ((CUR == 'v') && (NXT(1) == 'e') &&
				2631	(NXT(2) == 'r') && (NXT(3) == 's') &&
				2632	(NXT(4) == 'i') && (NXT(5) == 'o') &&
				2633	(NXT(6) == 'n')) {
				2634	SKIP(7);
				2635	SKIP_BLANKS;
				2636	if (CUR != '=') {
				2637	xmlParserError(ctxt, "xmlParseVersionInfo : expected '='\n");
				2638	return(NULL);
				2639	}
				2640	NEXT;
				2641	SKIP_BLANKS;
				2642	if (CUR == '"') {
				2643	NEXT;
				2644	q = CUR_PTR;
				2645	version = xmlParseVersionNum(ctxt);
				2646	if (CUR != '"')
				2647	xmlParserError(ctxt, "String not closed\n%.50s\n", q);
				2648	else
				2649	NEXT;
				2650	} else if (CUR == '\''){
				2651	NEXT;
				2652	q = CUR_PTR;
				2653	version = xmlParseVersionNum(ctxt);
				2654	if (CUR != '\'')
				2655	xmlParserError(ctxt, "String not closed\n%.50s\n", q);
				2656	else
				2657	NEXT;
				2658	} else {
				2659	xmlParserError(ctxt, "xmlParseVersionInfo : expected ' or \"\n");
				2660	}
				2661	}
				2662	return(version);
				2663	}
				2664
				2665	/*
				2666	* xmlParseEncName: parse the XML encoding name
				2667	*
				2668	* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] \| '-')*
				2669	*/
				2670	CHAR *xmlParseEncName(xmlParserCtxtPtr ctxt) {
				2671	const CHAR *q = CUR_PTR;
				2672	CHAR *ret = NULL;
				2673
				2674	if (((CUR >= 'a') && (CUR <= 'z')) \|\|
				2675	((CUR >= 'A') && (CUR <= 'Z'))) {
				2676	NEXT;
				2677	while (IS_CHAR(CUR) &&
				2678	(((CUR >= 'a') && (CUR <= 'z')) \|\|
				2679	((CUR >= 'A') && (CUR <= 'Z')) \|\|
				2680	((CUR >= '0') && (CUR <= '9')) \|\|
				2681	(CUR == '-'))) NEXT;
				2682	ret = xmlStrndup(q, CUR_PTR - q);
				2683	} else {
				2684	xmlParserError(ctxt, "Invalid XML encoding name\n");
				2685	}
				2686	return(ret);
				2687	}
				2688
				2689	/*
				2690	* xmlParseEncodingDecl: parse the XML encoding declaration
				2691	*
				2692	* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' \| "'" EncName "'")
				2693	*/
				2694
				2695	CHAR *xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
				2696	CHAR *encoding = NULL;
				2697	const CHAR *q;
				2698
				2699	SKIP_BLANKS;
				2700	if ((CUR == 'e') && (NXT(1) == 'n') &&
				2701	(NXT(2) == 'c') && (NXT(3) == 'o') &&
				2702	(NXT(4) == 'd') && (NXT(5) == 'i') &&
				2703	(NXT(6) == 'n') && (NXT(7) == 'g')) {
				2704	SKIP(8);
				2705	SKIP_BLANKS;
				2706	if (CUR != '=') {
				2707	xmlParserError(ctxt, "xmlParseEncodingDecl : expected '='\n");
				2708	return(NULL);
				2709	}
				2710	NEXT;
				2711	SKIP_BLANKS;
				2712	if (CUR == '"') {
				2713	NEXT;
				2714	q = CUR_PTR;
				2715	encoding = xmlParseEncName(ctxt);
				2716	if (CUR != '"')
				2717	xmlParserError(ctxt, "String not closed\n%.50s\n", q);
				2718	else
				2719	NEXT;
				2720	} else if (CUR == '\''){
				2721	NEXT;
				2722	q = CUR_PTR;
				2723	encoding = xmlParseEncName(ctxt);
				2724	if (CUR != '\'')
				2725	xmlParserError(ctxt, "String not closed\n%.50s\n", q);
				2726	else
				2727	NEXT;
				2728	} else if (CUR == '"'){
				2729	xmlParserError(ctxt, "xmlParseEncodingDecl : expected ' or \"\n");
				2730	}
				2731	}
				2732	return(encoding);
				2733	}
				2734
				2735	/*
				2736	* xmlParseSDDecl: parse the XML standalone declaration
				2737	*
				2738	* [32] SDDecl ::= S 'standalone' Eq
				2739	* (("'" ('yes' \| 'no') "'") \| ('"' ('yes' \| 'no')'"'))
				2740	*/
				2741
				2742	int xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
				2743	int standalone = -1;
				2744
				2745	SKIP_BLANKS;
				2746	if ((CUR == 's') && (NXT(1) == 't') &&
				2747	(NXT(2) == 'a') && (NXT(3) == 'n') &&
				2748	(NXT(4) == 'd') && (NXT(5) == 'a') &&
				2749	(NXT(6) == 'l') && (NXT(7) == 'o') &&
				2750	(NXT(8) == 'n') && (NXT(9) == 'e')) {
				2751	SKIP(10);
				2752	if (CUR != '=') {
				2753	xmlParserError(ctxt, "XML standalone declaration : expected '='\n");
				2754	return(standalone);
				2755	}
				2756	NEXT;
				2757	SKIP_BLANKS;
				2758	if (CUR == '\''){
				2759	NEXT;
				2760	if ((CUR == 'n') && (NXT(1) == 'o')) {
				2761	standalone = 0;
				2762	SKIP(2);
				2763	} else if ((CUR == 'y') && (NXT(1) == 'e') &&
				2764	(NXT(2) == 's')) {
				2765	standalone = 1;
				2766	SKIP(3);
				2767	} else {
				2768	xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
				2769	}
				2770	if (CUR != '\'')
				2771	xmlParserError(ctxt, "String not closed\n");
				2772	else
				2773	NEXT;
				2774	} else if (CUR == '"'){
				2775	NEXT;
				2776	if ((CUR == 'n') && (NXT(1) == 'o')) {
				2777	standalone = 0;
				2778	SKIP(2);
				2779	} else if ((CUR == 'y') && (NXT(1) == 'e') &&
				2780	(NXT(2) == 's')) {
				2781	standalone = 1;
				2782	SKIP(3);
				2783	} else {
				2784	xmlParserError(ctxt, "standalone accepts only 'yes' or 'no'\n");
				2785	}
				2786	if (CUR != '"')
				2787	xmlParserError(ctxt, "String not closed\n");
				2788	else
				2789	NEXT;
				2790	} else {
				2791	xmlParserError(ctxt, "Standalone value not found\n");
				2792	}
				2793	}
				2794	return(standalone);
				2795	}
				2796
				2797	/*
				2798	* xmlParseXMLDecl: parse an XML declaration header
				2799	*
				2800	* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
				2801	*/
				2802
				2803	void xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
				2804	CHAR *version;
				2805
				2806	/*
				2807	* We know that '<?xml' is here.
				2808	*/
				2809	SKIP(5);
				2810
				2811	SKIP_BLANKS;
				2812
				2813	/*
				2814	* We should have the VersionInfo here.
				2815	*/
				2816	version = xmlParseVersionInfo(ctxt);
				2817	if (version == NULL)
				2818	version = xmlCharStrdup(XML_DEFAULT_VERSION);
				2819	ctxt->doc = xmlNewDoc(version);
				2820	free(version);
				2821
				2822	/*
				2823	* We may have the encoding declaration
				2824	*/
				2825	ctxt->doc->encoding = xmlParseEncodingDecl(ctxt);
				2826
				2827	/*
				2828	* We may have the standalone status.
				2829	*/
				2830	ctxt->doc->standalone = xmlParseSDDecl(ctxt);
				2831
				2832	SKIP_BLANKS;
				2833	if ((CUR == '?') && (NXT(1) == '>')) {
				2834	SKIP(2);
				2835	} else if (CUR == '>') {
				2836	/* Deprecated old WD ... */
				2837	xmlParserError(ctxt, "XML declaration must end-up with '?>'\n");
				2838	NEXT;
				2839	} else {
				2840	xmlParserError(ctxt, "parsing XML declaration: '?>' expected\n");
				2841	MOVETO_ENDTAG(CUR_PTR);
				2842	NEXT;
				2843	}
				2844	}
				2845
				2846	/*
				2847	* xmlParseMisc: parse an XML Misc* optionnal field.
				2848	* Misc*
				2849	*
				2850	* [27] Misc ::= Comment \| PI \| S
				2851	*/
				2852
				2853	void xmlParseMisc(xmlParserCtxtPtr ctxt) {
				2854	while (((CUR == '<') && (NXT(1) == '?')) \|\|
				2855	((CUR == '<') && (NXT(1) == '!') &&
				2856	(NXT(2) == '-') && (NXT(3) == '-')) \|\|
				2857	IS_BLANK(CUR)) {
				2858	if ((CUR == '<') && (NXT(1) == '?')) {
				2859	xmlParsePI(ctxt);
				2860	} else if (IS_BLANK(CUR)) {
				2861	NEXT;
				2862	} else
				2863	xmlParseComment(ctxt, 0);
				2864	}
				2865	}
				2866
				2867	/*
				2868	* xmlParseDocument : parse an XML document and build a tree.
				2869	*
				2870	* [1] document ::= prolog element Misc*
				2871	*
				2872	* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
				2873	*/
				2874
				2875	int xmlParseDocument(xmlParserCtxtPtr ctxt) {
				2876	xmlDefaultSAXHandlerInit();
				2877
				2878	/*
				2879	* SAX: beginning of the document processing.
				2880	*/
				2881	if (ctxt->sax)
				2882	ctxt->sax->setDocumentLocator(ctxt, &xmlDefaultSAXLocator);
				2883	if (ctxt->sax)
				2884	ctxt->sax->startDocument(ctxt);
				2885
				2886	/*
				2887	* We should check for encoding here and plug-in some
				2888	* conversion code TODO !!!!
				2889	*/
				2890
				2891	/*
				2892	* Wipe out everything which is before the first '<'
				2893	*/
				2894	SKIP_BLANKS;
				2895
				2896	/*
				2897	* Check for the XMLDecl in the Prolog.
				2898	*/
				2899	if ((CUR == '<') && (NXT(1) == '?') &&
				2900	(NXT(2) == 'x') && (NXT(3) == 'm') &&
				2901	(NXT(4) == 'l')) {
				2902	xmlParseXMLDecl(ctxt);
				2903	/* SKIP_EOL(cur); */
				2904	SKIP_BLANKS;
				2905	} else if ((CUR == '<') && (NXT(1) == '?') &&
				2906	(NXT(2) == 'X') && (NXT(3) == 'M') &&
				2907	(NXT(4) == 'L')) {
				2908	/*
				2909	* The first drafts were using <?XML and the final W3C REC
				2910	* now use <?xml ...
				2911	*/
				2912	xmlParseXMLDecl(ctxt);
				2913	/* SKIP_EOL(cur); */
				2914	SKIP_BLANKS;
				2915	} else {
				2916	CHAR *version;
				2917
				2918	version = xmlCharStrdup(XML_DEFAULT_VERSION);
				2919	ctxt->doc = xmlNewDoc(version);
				2920	free(version);
				2921	}
				2922
				2923	/*
				2924	* The Misc part of the Prolog
				2925	*/
				2926	xmlParseMisc(ctxt);
				2927
				2928	/*
				2929	* Then possibly doc type declaration(s) and more Misc
				2930	* (doctypedecl Misc*)?
				2931	*/
				2932	if ((CUR == '<') && (NXT(1) == '!') &&
				2933	(NXT(2) == 'D') && (NXT(3) == 'O') &&
				2934	(NXT(4) == 'C') && (NXT(5) == 'T') &&
				2935	(NXT(6) == 'Y') && (NXT(7) == 'P') &&
				2936	(NXT(8) == 'E')) {
				2937	xmlParseDocTypeDecl(ctxt);
				2938	xmlParseMisc(ctxt);
				2939	}
				2940
				2941	/*
				2942	* Time to start parsing the tree itself
				2943	*/
				2944	ctxt->doc->root = xmlParseElement(ctxt);
				2945
				2946	/*
				2947	* The Misc part at the end
				2948	*/
				2949	xmlParseMisc(ctxt);
				2950
				2951	/*
				2952	* SAX: end of the document processing.
				2953	*/
				2954	if (ctxt->sax)
				2955	ctxt->sax->endDocument(ctxt);
				2956	return(0);
				2957	}
				2958
				2959	/*
				2960	* xmlParseDoc : parse an XML in-memory document and build a tree.
				2961	*/
				2962
				2963	xmlDocPtr xmlParseDoc(CHAR *cur) {
				2964	xmlDocPtr ret;
				2965	xmlParserCtxtPtr ctxt;
				2966	xmlParserInputPtr input;
				2967
				2968	if (cur == NULL) return(NULL);
				2969
				2970	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				2971	if (ctxt == NULL) {
				2972	perror("malloc");
				2973	return(NULL);
				2974	}
				2975	xmlInitParserCtxt(ctxt);
				2976	input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
				2977	if (input == NULL) {
				2978	perror("malloc");
				2979	free(ctxt);
				2980	return(NULL);
				2981	}
				2982
				2983	input->filename = NULL;
				2984	input->line = 1;
				2985	input->col = 1;
				2986	input->base = cur;
				2987	input->cur = cur;
				2988
				2989	inputPush(ctxt, input);
				2990
				2991
				2992	xmlParseDocument(ctxt);
				2993	ret = ctxt->doc;
				2994	free(ctxt);
				2995
				2996	return(ret);
				2997	}
				2998
				2999	/*
				3000	* xmlParseFile : parse an XML file and build a tree.
				3001	*/
				3002
				3003	xmlDocPtr xmlParseFile(const char *filename) {
				3004	xmlDocPtr ret;
				3005	#ifdef HAVE_ZLIB_H
				3006	gzFile input;
				3007	#else
				3008	int input;
				3009	#endif
				3010	int res;
				3011	struct stat buf;
				3012	char *buffer;
				3013	xmlParserCtxtPtr ctxt;
				3014	xmlParserInputPtr inputStream;
				3015
				3016	res = stat(filename, &buf);
				3017	if (res < 0) return(NULL);
				3018
				3019	#ifdef HAVE_ZLIB_H
				3020	retry_bigger:
				3021	buffer = malloc((buf.st_size * 20) + 100);
				3022	#else
				3023	buffer = malloc(buf.st_size + 100);
				3024	#endif
				3025	if (buffer == NULL) {
				3026	perror("malloc");
				3027	return(NULL);
				3028	}
				3029
				3030	memset(buffer, 0, sizeof(buffer));
				3031	#ifdef HAVE_ZLIB_H
				3032	input = gzopen (filename, "r");
				3033	if (input == NULL) {
				3034	fprintf (stderr, "Cannot read file %s :\n", filename);
				3035	perror ("gzopen failed");
				3036	return(NULL);
				3037	}
				3038	#else
				3039	input = open (filename, O_RDONLY);
				3040	if (input < 0) {
				3041	fprintf (stderr, "Cannot read file %s :\n", filename);
				3042	perror ("open failed");
				3043	return(NULL);
				3044	}
				3045	#endif
				3046	#ifdef HAVE_ZLIB_H
				3047	res = gzread(input, buffer, 20 * buf.st_size);
				3048	#else
				3049	res = read(input, buffer, buf.st_size);
				3050	#endif
				3051	if (res < 0) {
				3052	fprintf (stderr, "Cannot read file %s :\n", filename);
				3053	#ifdef HAVE_ZLIB_H
				3054	perror ("gzread failed");
				3055	#else
				3056	perror ("read failed");
				3057	#endif
				3058	return(NULL);
				3059	}
				3060	#ifdef HAVE_ZLIB_H
				3061	gzclose(input);
Daniel Veillard	70120ff	1998-09-22 00:24:21 +0000	[diff] [blame]	3062	if (res >= 20 * buf.st_size + 20) {
Daniel Veillard	260a68f	1998-08-13 03:39:55 +0000	[diff] [blame]	3063	free(buffer);
				3064	buf.st_size *= 2;
				3065	goto retry_bigger;
				3066	}
				3067	buf.st_size = res;
				3068	#else
				3069	close(input);
				3070	#endif
				3071
				3072	buffer[buf.st_size] = '\0';
				3073
				3074	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				3075	if (ctxt == NULL) {
				3076	perror("malloc");
				3077	return(NULL);
				3078	}
				3079	xmlInitParserCtxt(ctxt);
				3080	inputStream = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
				3081	if (inputStream == NULL) {
				3082	perror("malloc");
				3083	free(ctxt);
				3084	return(NULL);
				3085	}
				3086
				3087	inputStream->filename = strdup(filename);
				3088	inputStream->line = 1;
				3089	inputStream->col = 1;
				3090
				3091	/*
				3092	* TODO : plug some encoding conversion routines here. !!!
				3093	*/
				3094	inputStream->base = buffer;
				3095	inputStream->cur = buffer;
				3096
				3097	inputPush(ctxt, inputStream);
				3098
				3099	xmlParseDocument(ctxt);
				3100
				3101	ret = ctxt->doc;
				3102	free(buffer);
				3103	free(ctxt);
				3104
				3105	return(ret);
				3106	}
				3107
				3108
				3109	/*
				3110	* xmlParseMemory : parse an XML memory block and build a tree.
				3111	*/
				3112	xmlDocPtr xmlParseMemory(char *buffer, int size) {
				3113	xmlDocPtr ret;
				3114	xmlParserCtxtPtr ctxt;
				3115	xmlParserInputPtr input;
				3116
				3117	buffer[size - 1] = '\0';
				3118
				3119	ctxt = (xmlParserCtxtPtr) malloc(sizeof(xmlParserCtxt));
				3120	if (ctxt == NULL) {
				3121	perror("malloc");
				3122	return(NULL);
				3123	}
				3124	xmlInitParserCtxt(ctxt);
				3125	input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
				3126	if (input == NULL) {
				3127	perror("malloc");
				3128	free(ctxt);
				3129	return(NULL);
				3130	}
				3131
				3132	input->filename = NULL;
				3133	input->line = 1;
				3134	input->col = 1;
				3135
				3136	/*
				3137	* TODO : plug some encoding conversion routines here. !!!
				3138	*/
				3139	input->base = buffer;
				3140	input->cur = buffer;
				3141
				3142	inputPush(ctxt, input);
				3143
				3144	xmlParseDocument(ctxt);
				3145
				3146	ret = ctxt->doc;
				3147	free(ctxt);
				3148
				3149	return(ret);
				3150	}
				3151
				3152
				3153	/* Initialize parser context */
				3154	void xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
				3155	{
				3156	/* Allocate the Input stack */
				3157	ctxt->inputTab = (xmlParserInputPtr ) malloc(5 sizeof(xmlParserInputPtr));
				3158	ctxt->inputNr = 0;
				3159	ctxt->inputMax = 5;
				3160	ctxt->input = NULL;
				3161
				3162	/* Allocate the Node stack */
				3163	ctxt->nodeTab = (xmlNodePtr ) malloc(10 sizeof(xmlNodePtr));
				3164	ctxt->nodeNr = 0;
				3165	ctxt->nodeMax = 10;
				3166	ctxt->node = NULL;
				3167
				3168	ctxt->sax = &xmlDefaultSAXHandler;
				3169	ctxt->doc = NULL;
				3170	ctxt->record_info = 0;
				3171	xmlInitNodeInfoSeq(&ctxt->node_seq);
				3172	}
				3173
				3174
				3175	/*
				3176	* Clear (release owned resources) and reinitialize context
				3177	*/
				3178	void xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
				3179	{
				3180	xmlClearNodeInfoSeq(&ctxt->node_seq);
				3181	xmlInitParserCtxt(ctxt);
				3182	}
				3183
				3184
				3185	/*
				3186	* Setup the parser context to parse a new buffer; Clears any prior
				3187	* contents from the parser context. The buffer parameter must not be
				3188	* NULL, but the filename parameter can be
				3189	*/
				3190	void xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const CHAR* buffer,
				3191	const char* filename)
				3192	{
				3193	xmlParserInputPtr input;
				3194
				3195	input = (xmlParserInputPtr) malloc(sizeof(xmlParserInput));
				3196	if (input == NULL) {
				3197	perror("malloc");
				3198	free(ctxt);
				3199	exit(1);
				3200	}
				3201
				3202	xmlClearParserCtxt(ctxt);
				3203	if (input->filename != NULL)
				3204	input->filename = strdup(filename);
				3205	else
				3206	input->filename = NULL;
				3207	input->line = 1;
				3208	input->col = 1;
				3209	input->base = buffer;
				3210	input->cur = buffer;
				3211
				3212	inputPush(ctxt, input);
				3213	}
				3214
				3215
				3216	/*
				3217	* xmlParserFindNodeInfo : Find the parser node info struct for a given node
				3218	*/
				3219	const xmlParserNodeInfo* xmlParserFindNodeInfo(const xmlParserCtxt* ctx,
				3220	const xmlNode* node)
				3221	{
				3222	unsigned long pos;
				3223
				3224	/* Find position where node should be at */
				3225	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
				3226	if ( ctx->node_seq.buffer[pos].node == node )
				3227	return &ctx->node_seq.buffer[pos];
				3228	else
				3229	return NULL;
				3230	}
				3231
				3232
				3233	/*
				3234	* xmlInitNodeInfoSeq -- Initialize (set to initial state) node info sequence
				3235	*/
				3236	void xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
				3237	{
				3238	seq->length = 0;
				3239	seq->maximum = 0;
				3240	seq->buffer = NULL;
				3241	}
				3242
				3243	/*
				3244	* xmlClearNodeInfoSeq -- Clear (release memory and reinitialize) node
				3245	* info sequence
				3246	*/
				3247	void xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
				3248	{
				3249	if ( seq->buffer != NULL )
				3250	free(seq->buffer);
				3251	xmlInitNodeInfoSeq(seq);
				3252	}
				3253
				3254
				3255	/*
				3256	* xmlParserFindNodeInfoIndex : Find the index that the info record for
				3257	* the given node is or should be at in a sorted sequence
				3258	*/
				3259	unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
				3260	const xmlNode* node)
				3261	{
				3262	unsigned long upper, lower, middle;
				3263	int found = 0;
				3264
				3265	/* Do a binary search for the key */
				3266	lower = 1;
				3267	upper = seq->length;
				3268	middle = 0;
				3269	while ( lower <= upper && !found) {
				3270	middle = lower + (upper - lower) / 2;
				3271	if ( node == seq->buffer[middle - 1].node )
				3272	found = 1;
				3273	else if ( node < seq->buffer[middle - 1].node )
				3274	upper = middle - 1;
				3275	else
				3276	lower = middle + 1;
				3277	}
				3278
				3279	/* Return position */
				3280	if ( middle == 0 \|\| seq->buffer[middle - 1].node < node )
				3281	return middle;
				3282	else
				3283	return middle - 1;
				3284	}
				3285
				3286
				3287	/*
				3288	* xmlParserAddNodeInfo : Insert node info record into sorted sequence
				3289	*/
				3290	void xmlParserAddNodeInfo(xmlParserCtxtPtr ctx,
				3291	const xmlParserNodeInfo* info)
				3292	{
				3293	unsigned long pos;
				3294	static unsigned int block_size = 5;
				3295
				3296	/* Find pos and check to see if node is already in the sequence */
				3297	pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, info->node);
				3298	if ( pos < ctx->node_seq.length
				3299	&& ctx->node_seq.buffer[pos].node == info->node ) {
				3300	ctx->node_seq.buffer[pos] = *info;
				3301	}
				3302
				3303	/* Otherwise, we need to add new node to buffer */
				3304	else {
				3305	/* Expand buffer by 5 if needed */
				3306	if ( ctx->node_seq.length + 1 > ctx->node_seq.maximum ) {
				3307	xmlParserNodeInfo* tmp_buffer;
				3308	unsigned int byte_size = (sizeof(*ctx->node_seq.buffer)
				3309	*(ctx->node_seq.maximum + block_size));
				3310
				3311	if ( ctx->node_seq.buffer == NULL )
				3312	tmp_buffer = (xmlParserNodeInfo*)malloc(byte_size);
				3313	else
				3314	tmp_buffer = (xmlParserNodeInfo*)realloc(ctx->node_seq.buffer, byte_size);
				3315
				3316	if ( tmp_buffer == NULL ) {
				3317	xmlParserError(ctx, "Out of memory");
				3318	return;
				3319	}
				3320	ctx->node_seq.buffer = tmp_buffer;
				3321	ctx->node_seq.maximum += block_size;
				3322	}
				3323
				3324	/* If position is not at end, move elements out of the way */
				3325	if ( pos != ctx->node_seq.length ) {
				3326	unsigned long i;
				3327
				3328	for ( i = ctx->node_seq.length; i > pos; i-- )
				3329	ctx->node_seq.buffer[i] = ctx->node_seq.buffer[i - 1];
				3330	}
				3331
				3332	/* Copy element and increase length */
				3333	ctx->node_seq.buffer[pos] = *info;
				3334	ctx->node_seq.length++;
				3335	}
				3336	}