blob: a14cdbda878b3e076dd79501eca1ca2cd4581ef5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Daniel Veillard34ce8be2002-03-18 19:37:11 +000033#define IN_LIBXML
Bjorn Reese70a9da52001-04-21 16:57:29 +000034#include "libxml.h"
35
Daniel Veillard3c5ed912002-01-08 10:36:16 +000036#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000037#define XML_DIR_SEP '\\'
38#else
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '/'
40#endif
41
Owen Taylor3473f882001-02-23 17:55:21 +000042#include <stdlib.h>
43#include <string.h>
44#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000045#include <libxml/threads.h>
46#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000047#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000056#ifdef LIBXML_CATALOG_ENABLED
57#include <libxml/catalog.h>
58#endif
Owen Taylor3473f882001-02-23 17:55:21 +000059
60#ifdef HAVE_CTYPE_H
61#include <ctype.h>
62#endif
63#ifdef HAVE_STDLIB_H
64#include <stdlib.h>
65#endif
66#ifdef HAVE_SYS_STAT_H
67#include <sys/stat.h>
68#endif
69#ifdef HAVE_FCNTL_H
70#include <fcntl.h>
71#endif
72#ifdef HAVE_UNISTD_H
73#include <unistd.h>
74#endif
75#ifdef HAVE_ZLIB_H
76#include <zlib.h>
77#endif
78
79
Daniel Veillard21a0f912001-02-25 19:54:14 +000080#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000081#define XML_PARSER_BUFFER_SIZE 100
82
Daniel Veillard5997aca2002-03-18 18:36:20 +000083#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
84
Owen Taylor3473f882001-02-23 17:55:21 +000085/*
Owen Taylor3473f882001-02-23 17:55:21 +000086 * List of XML prefixed PI allowed by W3C specs
87 */
88
Daniel Veillardb44025c2001-10-11 22:55:55 +000089static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000090 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000095xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
96 const xmlChar **str);
97
Daniel Veillard257d9102001-05-08 10:41:44 +000098static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000099xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
100 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000101 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000102 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000103
Daniel Veillard8107a222002-01-13 14:10:10 +0000104static void
105xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
106 xmlNodePtr lastNode);
107
Owen Taylor3473f882001-02-23 17:55:21 +0000108/************************************************************************
109 * *
110 * Parser stacks related functions and macros *
111 * *
112 ************************************************************************/
113
114xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
115 const xmlChar ** str);
116
117/*
118 * Generic function for accessing stacks in the Parser Context
119 */
120
121#define PUSH_AND_POP(scope, type, name) \
122scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
123 if (ctxt->name##Nr >= ctxt->name##Max) { \
124 ctxt->name##Max *= 2; \
125 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
126 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
127 if (ctxt->name##Tab == NULL) { \
128 xmlGenericError(xmlGenericErrorContext, \
129 "realloc failed !\n"); \
130 return(0); \
131 } \
132 } \
133 ctxt->name##Tab[ctxt->name##Nr] = value; \
134 ctxt->name = value; \
135 return(ctxt->name##Nr++); \
136} \
137scope type name##Pop(xmlParserCtxtPtr ctxt) { \
138 type ret; \
139 if (ctxt->name##Nr <= 0) return(0); \
140 ctxt->name##Nr--; \
141 if (ctxt->name##Nr > 0) \
142 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
143 else \
144 ctxt->name = NULL; \
145 ret = ctxt->name##Tab[ctxt->name##Nr]; \
146 ctxt->name##Tab[ctxt->name##Nr] = 0; \
147 return(ret); \
148} \
149
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000150/**
151 * inputPop:
152 * @ctxt: an XML parser context
153 *
154 * Pops the top parser input from the input stack
155 *
156 * Returns the input just removed
157 */
158/**
159 * inputPush:
160 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000161 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000162 *
163 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000164 *
165 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000166 */
167/**
168 * namePop:
169 * @ctxt: an XML parser context
170 *
171 * Pops the top element name from the name stack
172 *
173 * Returns the name just removed
174 */
175/**
176 * namePush:
177 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000178 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000179 *
180 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000181 *
182 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000183 */
184/**
185 * nodePop:
186 * @ctxt: an XML parser context
187 *
188 * Pops the top element node from the node stack
189 *
190 * Returns the node just removed
191 */
192/**
193 * nodePush:
194 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000195 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000196 *
197 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000198 *
199 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000200 */
Owen Taylor3473f882001-02-23 17:55:21 +0000201/*
202 * Those macros actually generate the functions
203 */
204PUSH_AND_POP(extern, xmlParserInputPtr, input)
205PUSH_AND_POP(extern, xmlNodePtr, node)
206PUSH_AND_POP(extern, xmlChar*, name)
207
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000208static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000209 if (ctxt->spaceNr >= ctxt->spaceMax) {
210 ctxt->spaceMax *= 2;
211 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
212 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
213 if (ctxt->spaceTab == NULL) {
214 xmlGenericError(xmlGenericErrorContext,
215 "realloc failed !\n");
216 return(0);
217 }
218 }
219 ctxt->spaceTab[ctxt->spaceNr] = val;
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
221 return(ctxt->spaceNr++);
222}
223
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000225 int ret;
226 if (ctxt->spaceNr <= 0) return(0);
227 ctxt->spaceNr--;
228 if (ctxt->spaceNr > 0)
229 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
230 else
231 ctxt->space = NULL;
232 ret = ctxt->spaceTab[ctxt->spaceNr];
233 ctxt->spaceTab[ctxt->spaceNr] = -1;
234 return(ret);
235}
236
237/*
238 * Macros for accessing the content. Those should be used only by the parser,
239 * and not exported.
240 *
241 * Dirty macros, i.e. one often need to make assumption on the context to
242 * use them
243 *
244 * CUR_PTR return the current pointer to the xmlChar to be parsed.
245 * To be used with extreme caution since operations consuming
246 * characters may move the input buffer to a different location !
247 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
248 * This should be used internally by the parser
249 * only to compare to ASCII values otherwise it would break when
250 * running with UTF-8 encoding.
251 * RAW same as CUR but in the input buffer, bypass any token
252 * extraction that may have been done
253 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
254 * to compare on ASCII based substring.
255 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
256 * strings within the parser.
257 *
258 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
259 *
260 * NEXT Skip to the next character, this does the proper decoding
261 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000262 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000263 * CUR_CHAR(l) returns the current unicode character (int), set l
264 * to the number of xmlChars used for the encoding [0-5].
265 * CUR_SCHAR same but operate on a string instead of the context
266 * COPY_BUF copy the current unicode char to the target buffer, increment
267 * the index
268 * GROW, SHRINK handling of input buffers
269 */
270
Daniel Veillard561b7f82002-03-20 21:55:57 +0000271#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
272#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
Owen Taylor3473f882001-02-23 17:55:21 +0000273#define NXT(val) ctxt->input->cur[(val)]
274#define CUR_PTR ctxt->input->cur
275
276#define SKIP(val) do { \
277 ctxt->nbChars += (val),ctxt->input->cur += (val); \
278 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000279 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
281 xmlPopInput(ctxt); \
282 } while (0)
283
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000285 xmlParserInputShrink(ctxt->input); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000286 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000287 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
288 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000289 }
Owen Taylor3473f882001-02-23 17:55:21 +0000290
Daniel Veillard48b2f892001-02-25 16:11:03 +0000291#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000292 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000293 if ((*ctxt->input->cur == 0) && \
Owen Taylor3473f882001-02-23 17:55:21 +0000294 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
295 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000296 }
Owen Taylor3473f882001-02-23 17:55:21 +0000297
298#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
299
300#define NEXT xmlNextChar(ctxt)
301
Daniel Veillard21a0f912001-02-25 19:54:14 +0000302#define NEXT1 { \
303 ctxt->input->cur++; \
304 ctxt->nbChars++; \
Daniel Veillard561b7f82002-03-20 21:55:57 +0000305 if (*ctxt->input->cur == 0) \
Daniel Veillard21a0f912001-02-25 19:54:14 +0000306 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
307 }
308
Owen Taylor3473f882001-02-23 17:55:21 +0000309#define NEXTL(l) do { \
310 if (*(ctxt->input->cur) == '\n') { \
311 ctxt->input->line++; ctxt->input->col = 1; \
312 } else ctxt->input->col++; \
313 ctxt->token = 0; ctxt->input->cur += l; \
314 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000315 } while (0)
316
317#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
318#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
319
320#define COPY_BUF(l,b,i,v) \
321 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000322 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000323
324/**
325 * xmlSkipBlankChars:
326 * @ctxt: the XML parser context
327 *
328 * skip all blanks character found at that point in the input streams.
329 * It pops up finished entities in the process if allowable at that point.
330 *
331 * Returns the number of space chars skipped
332 */
333
334int
335xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000337
Daniel Veillard02141ea2001-04-30 11:46:40 +0000338 if (ctxt->token != 0) {
339 if (!IS_BLANK(ctxt->token))
340 return(0);
341 ctxt->token = 0;
342 res++;
343 }
Owen Taylor3473f882001-02-23 17:55:21 +0000344 /*
345 * It's Okay to use CUR/NEXT here since all the blanks are on
346 * the ASCII range.
347 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000348 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
349 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000350 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000351 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000352 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000353 cur = ctxt->input->cur;
354 while (IS_BLANK(*cur)) {
355 if (*cur == '\n') {
356 ctxt->input->line++; ctxt->input->col = 1;
357 }
358 cur++;
359 res++;
360 if (*cur == 0) {
361 ctxt->input->cur = cur;
362 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
363 cur = ctxt->input->cur;
364 }
365 }
366 ctxt->input->cur = cur;
367 } else {
368 int cur;
369 do {
370 cur = CUR;
371 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
372 NEXT;
373 cur = CUR;
374 res++;
375 }
376 while ((cur == 0) && (ctxt->inputNr > 1) &&
377 (ctxt->instate != XML_PARSER_COMMENT)) {
378 xmlPopInput(ctxt);
379 cur = CUR;
380 }
381 /*
382 * Need to handle support of entities branching here
383 */
384 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
385 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
386 }
Owen Taylor3473f882001-02-23 17:55:21 +0000387 return(res);
388}
389
390/************************************************************************
391 * *
392 * Commodity functions to handle entities *
393 * *
394 ************************************************************************/
395
396/**
397 * xmlPopInput:
398 * @ctxt: an XML parser context
399 *
400 * xmlPopInput: the current input pointed by ctxt->input came to an end
401 * pop it and return the next char.
402 *
403 * Returns the current xmlChar in the parser context
404 */
405xmlChar
406xmlPopInput(xmlParserCtxtPtr ctxt) {
407 if (ctxt->inputNr == 1) return(0); /* End of main Input */
408 if (xmlParserDebugEntities)
409 xmlGenericError(xmlGenericErrorContext,
410 "Popping input %d\n", ctxt->inputNr);
411 xmlFreeInputStream(inputPop(ctxt));
Daniel Veillard561b7f82002-03-20 21:55:57 +0000412 if ((*ctxt->input->cur == 0) &&
Owen Taylor3473f882001-02-23 17:55:21 +0000413 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
414 return(xmlPopInput(ctxt));
415 return(CUR);
416}
417
418/**
419 * xmlPushInput:
420 * @ctxt: an XML parser context
421 * @input: an XML parser input fragment (entity, XML fragment ...).
422 *
423 * xmlPushInput: switch to a new input stream which is stacked on top
424 * of the previous one(s).
425 */
426void
427xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
428 if (input == NULL) return;
429
430 if (xmlParserDebugEntities) {
431 if ((ctxt->input != NULL) && (ctxt->input->filename))
432 xmlGenericError(xmlGenericErrorContext,
433 "%s(%d): ", ctxt->input->filename,
434 ctxt->input->line);
435 xmlGenericError(xmlGenericErrorContext,
436 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
437 }
438 inputPush(ctxt, input);
439 GROW;
440}
441
442/**
443 * xmlParseCharRef:
444 * @ctxt: an XML parser context
445 *
446 * parse Reference declarations
447 *
448 * [66] CharRef ::= '&#' [0-9]+ ';' |
449 * '&#x' [0-9a-fA-F]+ ';'
450 *
451 * [ WFC: Legal Character ]
452 * Characters referred to using character references must match the
453 * production for Char.
454 *
455 * Returns the value parsed (as an int), 0 in case of error
456 */
457int
458xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000459 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000460 int count = 0;
461
462 if (ctxt->token != 0) {
463 val = ctxt->token;
464 ctxt->token = 0;
465 return(val);
466 }
467 /*
468 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
469 */
Daniel Veillard561b7f82002-03-20 21:55:57 +0000470 if ((RAW == '&') && (NXT(1) == '#') &&
Owen Taylor3473f882001-02-23 17:55:21 +0000471 (NXT(2) == 'x')) {
472 SKIP(3);
473 GROW;
474 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000475 if (count++ > 20) {
476 count = 0;
477 GROW;
478 }
479 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000480 val = val * 16 + (CUR - '0');
481 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
482 val = val * 16 + (CUR - 'a') + 10;
483 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
484 val = val * 16 + (CUR - 'A') + 10;
485 else {
486 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
488 ctxt->sax->error(ctxt->userData,
489 "xmlParseCharRef: invalid hexadecimal value\n");
490 ctxt->wellFormed = 0;
491 ctxt->disableSAX = 1;
492 val = 0;
493 break;
494 }
495 NEXT;
496 count++;
497 }
498 if (RAW == ';') {
499 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
500 ctxt->nbChars ++;
501 ctxt->input->cur++;
502 }
Daniel Veillard561b7f82002-03-20 21:55:57 +0000503 } else if ((RAW == '&') && (NXT(1) == '#')) {
Owen Taylor3473f882001-02-23 17:55:21 +0000504 SKIP(2);
505 GROW;
506 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000507 if (count++ > 20) {
508 count = 0;
509 GROW;
510 }
511 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000512 val = val * 10 + (CUR - '0');
513 else {
514 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
516 ctxt->sax->error(ctxt->userData,
517 "xmlParseCharRef: invalid decimal value\n");
518 ctxt->wellFormed = 0;
519 ctxt->disableSAX = 1;
520 val = 0;
521 break;
522 }
523 NEXT;
524 count++;
525 }
526 if (RAW == ';') {
527 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
528 ctxt->nbChars ++;
529 ctxt->input->cur++;
530 }
531 } else {
532 ctxt->errNo = XML_ERR_INVALID_CHARREF;
533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
534 ctxt->sax->error(ctxt->userData,
535 "xmlParseCharRef: invalid value\n");
536 ctxt->wellFormed = 0;
537 ctxt->disableSAX = 1;
538 }
539
540 /*
541 * [ WFC: Legal Character ]
542 * Characters referred to using character references must match the
543 * production for Char.
544 */
545 if (IS_CHAR(val)) {
546 return(val);
547 } else {
548 ctxt->errNo = XML_ERR_INVALID_CHAR;
549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000550 ctxt->sax->error(ctxt->userData,
551 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000552 val);
553 ctxt->wellFormed = 0;
554 ctxt->disableSAX = 1;
555 }
556 return(0);
557}
558
559/**
560 * xmlParseStringCharRef:
561 * @ctxt: an XML parser context
562 * @str: a pointer to an index in the string
563 *
564 * parse Reference declarations, variant parsing from a string rather
565 * than an an input flow.
566 *
567 * [66] CharRef ::= '&#' [0-9]+ ';' |
568 * '&#x' [0-9a-fA-F]+ ';'
569 *
570 * [ WFC: Legal Character ]
571 * Characters referred to using character references must match the
572 * production for Char.
573 *
574 * Returns the value parsed (as an int), 0 in case of error, str will be
575 * updated to the current value of the index
576 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000577static int
Owen Taylor3473f882001-02-23 17:55:21 +0000578xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
579 const xmlChar *ptr;
580 xmlChar cur;
581 int val = 0;
582
583 if ((str == NULL) || (*str == NULL)) return(0);
584 ptr = *str;
585 cur = *ptr;
586 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
587 ptr += 3;
588 cur = *ptr;
589 while (cur != ';') { /* Non input consuming loop */
590 if ((cur >= '0') && (cur <= '9'))
591 val = val * 16 + (cur - '0');
592 else if ((cur >= 'a') && (cur <= 'f'))
593 val = val * 16 + (cur - 'a') + 10;
594 else if ((cur >= 'A') && (cur <= 'F'))
595 val = val * 16 + (cur - 'A') + 10;
596 else {
597 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid hexadecimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else if ((cur == '&') && (ptr[1] == '#')){
612 ptr += 2;
613 cur = *ptr;
614 while (cur != ';') { /* Non input consuming loops */
615 if ((cur >= '0') && (cur <= '9'))
616 val = val * 10 + (cur - '0');
617 else {
618 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
620 ctxt->sax->error(ctxt->userData,
621 "xmlParseStringCharRef: invalid decimal value\n");
622 ctxt->wellFormed = 0;
623 ctxt->disableSAX = 1;
624 val = 0;
625 break;
626 }
627 ptr++;
628 cur = *ptr;
629 }
630 if (cur == ';')
631 ptr++;
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHARREF;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000636 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 return(0);
640 }
641 *str = ptr;
642
643 /*
644 * [ WFC: Legal Character ]
645 * Characters referred to using character references must match the
646 * production for Char.
647 */
648 if (IS_CHAR(val)) {
649 return(val);
650 } else {
651 ctxt->errNo = XML_ERR_INVALID_CHAR;
652 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
653 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000654 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000655 ctxt->wellFormed = 0;
656 ctxt->disableSAX = 1;
657 }
658 return(0);
659}
660
661/**
662 * xmlParserHandlePEReference:
663 * @ctxt: the parser context
664 *
665 * [69] PEReference ::= '%' Name ';'
666 *
667 * [ WFC: No Recursion ]
668 * A parsed entity must not contain a recursive
669 * reference to itself, either directly or indirectly.
670 *
671 * [ WFC: Entity Declared ]
672 * In a document without any DTD, a document with only an internal DTD
673 * subset which contains no parameter entity references, or a document
674 * with "standalone='yes'", ... ... The declaration of a parameter
675 * entity must precede any reference to it...
676 *
677 * [ VC: Entity Declared ]
678 * In a document with an external subset or external parameter entities
679 * with "standalone='no'", ... ... The declaration of a parameter entity
680 * must precede any reference to it...
681 *
682 * [ WFC: In DTD ]
683 * Parameter-entity references may only appear in the DTD.
684 * NOTE: misleading but this is handled.
685 *
686 * A PEReference may have been detected in the current input stream
687 * the handling is done accordingly to
688 * http://www.w3.org/TR/REC-xml#entproc
689 * i.e.
690 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000691 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000692 */
693void
694xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
695 xmlChar *name;
696 xmlEntityPtr entity = NULL;
697 xmlParserInputPtr input;
698
699 if (ctxt->token != 0) {
700 return;
701 }
702 if (RAW != '%') return;
703 switch(ctxt->instate) {
704 case XML_PARSER_CDATA_SECTION:
705 return;
706 case XML_PARSER_COMMENT:
707 return;
708 case XML_PARSER_START_TAG:
709 return;
710 case XML_PARSER_END_TAG:
711 return;
712 case XML_PARSER_EOF:
713 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
716 ctxt->wellFormed = 0;
717 ctxt->disableSAX = 1;
718 return;
719 case XML_PARSER_PROLOG:
720 case XML_PARSER_START:
721 case XML_PARSER_MISC:
722 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
724 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
725 ctxt->wellFormed = 0;
726 ctxt->disableSAX = 1;
727 return;
728 case XML_PARSER_ENTITY_DECL:
729 case XML_PARSER_CONTENT:
730 case XML_PARSER_ATTRIBUTE_VALUE:
731 case XML_PARSER_PI:
732 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000733 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000734 /* we just ignore it there */
735 return;
736 case XML_PARSER_EPILOG:
737 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
738 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
739 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
740 ctxt->wellFormed = 0;
741 ctxt->disableSAX = 1;
742 return;
743 case XML_PARSER_ENTITY_VALUE:
744 /*
745 * NOTE: in the case of entity values, we don't do the
746 * substitution here since we need the literal
747 * entity value to be able to save the internal
748 * subset of the document.
749 * This will be handled by xmlStringDecodeEntities
750 */
751 return;
752 case XML_PARSER_DTD:
753 /*
754 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
755 * In the internal DTD subset, parameter-entity references
756 * can occur only where markup declarations can occur, not
757 * within markup declarations.
758 * In that case this is handled in xmlParseMarkupDecl
759 */
760 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
761 return;
762 break;
763 case XML_PARSER_IGNORE:
764 return;
765 }
766
767 NEXT;
768 name = xmlParseName(ctxt);
769 if (xmlParserDebugEntities)
770 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000771 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000772 if (name == NULL) {
773 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000775 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000776 ctxt->wellFormed = 0;
777 ctxt->disableSAX = 1;
778 } else {
779 if (RAW == ';') {
780 NEXT;
781 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
782 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
783 if (entity == NULL) {
784
785 /*
786 * [ WFC: Entity Declared ]
787 * In a document without any DTD, a document with only an
788 * internal DTD subset which contains no parameter entity
789 * references, or a document with "standalone='yes'", ...
790 * ... The declaration of a parameter entity must precede
791 * any reference to it...
792 */
793 if ((ctxt->standalone == 1) ||
794 ((ctxt->hasExternalSubset == 0) &&
795 (ctxt->hasPErefs == 0))) {
796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
797 ctxt->sax->error(ctxt->userData,
798 "PEReference: %%%s; not found\n", name);
799 ctxt->wellFormed = 0;
800 ctxt->disableSAX = 1;
801 } else {
802 /*
803 * [ VC: Entity Declared ]
804 * In a document with an external subset or external
805 * parameter entities with "standalone='no'", ...
806 * ... The declaration of a parameter entity must precede
807 * any reference to it...
808 */
809 if ((!ctxt->disableSAX) &&
810 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
811 ctxt->vctxt.error(ctxt->vctxt.userData,
812 "PEReference: %%%s; not found\n", name);
813 } else if ((!ctxt->disableSAX) &&
814 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
815 ctxt->sax->warning(ctxt->userData,
816 "PEReference: %%%s; not found\n", name);
817 ctxt->valid = 0;
818 }
819 } else {
820 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
821 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000822 xmlChar start[4];
823 xmlCharEncoding enc;
824
Owen Taylor3473f882001-02-23 17:55:21 +0000825 /*
826 * handle the extra spaces added before and after
827 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000828 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000829 */
830 input = xmlNewEntityInputStream(ctxt, entity);
831 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000832
833 /*
834 * Get the 4 first bytes and decode the charset
835 * if enc != XML_CHAR_ENCODING_NONE
836 * plug some encoding conversion routines.
837 */
838 GROW
Daniel Veillard561b7f82002-03-20 21:55:57 +0000839 start[0] = RAW;
840 start[1] = NXT(1);
841 start[2] = NXT(2);
842 start[3] = NXT(3);
843 enc = xmlDetectCharEncoding(start, 4);
844 if (enc != XML_CHAR_ENCODING_NONE) {
845 xmlSwitchEncoding(ctxt, enc);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000846 }
847
Owen Taylor3473f882001-02-23 17:55:21 +0000848 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
849 (RAW == '<') && (NXT(1) == '?') &&
850 (NXT(2) == 'x') && (NXT(3) == 'm') &&
851 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
852 xmlParseTextDecl(ctxt);
853 }
854 if (ctxt->token == 0)
855 ctxt->token = ' ';
856 } else {
857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
858 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000859 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000860 name);
861 ctxt->wellFormed = 0;
862 ctxt->disableSAX = 1;
863 }
864 }
865 } else {
866 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
868 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000869 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000870 ctxt->wellFormed = 0;
871 ctxt->disableSAX = 1;
872 }
873 xmlFree(name);
874 }
875}
876
877/*
878 * Macro used to grow the current buffer.
879 */
880#define growBuffer(buffer) { \
881 buffer##_size *= 2; \
882 buffer = (xmlChar *) \
883 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
884 if (buffer == NULL) { \
885 perror("realloc failed"); \
886 return(NULL); \
887 } \
888}
889
890/**
891 * xmlStringDecodeEntities:
892 * @ctxt: the parser context
893 * @str: the input string
894 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
895 * @end: an end marker xmlChar, 0 if none
896 * @end2: an end marker xmlChar, 0 if none
897 * @end3: an end marker xmlChar, 0 if none
898 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000899 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000900 *
901 * [67] Reference ::= EntityRef | CharRef
902 *
903 * [69] PEReference ::= '%' Name ';'
904 *
905 * Returns A newly allocated string with the substitution done. The caller
906 * must deallocate it !
907 */
908xmlChar *
909xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
910 xmlChar end, xmlChar end2, xmlChar end3) {
911 xmlChar *buffer = NULL;
912 int buffer_size = 0;
913
914 xmlChar *current = NULL;
915 xmlEntityPtr ent;
916 int c,l;
917 int nbchars = 0;
918
919 if (str == NULL)
920 return(NULL);
921
922 if (ctxt->depth > 40) {
923 ctxt->errNo = XML_ERR_ENTITY_LOOP;
924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
925 ctxt->sax->error(ctxt->userData,
926 "Detected entity reference loop\n");
927 ctxt->wellFormed = 0;
928 ctxt->disableSAX = 1;
929 return(NULL);
930 }
931
932 /*
933 * allocate a translation buffer.
934 */
935 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
936 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
937 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000938 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000939 return(NULL);
940 }
941
942 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000943 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000944 * we are operating on already parsed values.
945 */
946 c = CUR_SCHAR(str, l);
947 while ((c != 0) && (c != end) && /* non input consuming loop */
948 (c != end2) && (c != end3)) {
949
950 if (c == 0) break;
951 if ((c == '&') && (str[1] == '#')) {
952 int val = xmlParseStringCharRef(ctxt, &str);
953 if (val != 0) {
954 COPY_BUF(0,buffer,nbchars,val);
955 }
956 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
957 if (xmlParserDebugEntities)
958 xmlGenericError(xmlGenericErrorContext,
959 "String decoding Entity Reference: %.30s\n",
960 str);
961 ent = xmlParseStringEntityRef(ctxt, &str);
962 if ((ent != NULL) &&
963 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
964 if (ent->content != NULL) {
965 COPY_BUF(0,buffer,nbchars,ent->content[0]);
966 } else {
967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
968 ctxt->sax->error(ctxt->userData,
969 "internal error entity has no content\n");
970 }
971 } else if ((ent != NULL) && (ent->content != NULL)) {
972 xmlChar *rep;
973
974 ctxt->depth++;
975 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
976 0, 0, 0);
977 ctxt->depth--;
978 if (rep != NULL) {
979 current = rep;
980 while (*current != 0) { /* non input consuming loop */
981 buffer[nbchars++] = *current++;
982 if (nbchars >
983 buffer_size - XML_PARSER_BUFFER_SIZE) {
984 growBuffer(buffer);
985 }
986 }
987 xmlFree(rep);
988 }
989 } else if (ent != NULL) {
990 int i = xmlStrlen(ent->name);
991 const xmlChar *cur = ent->name;
992
993 buffer[nbchars++] = '&';
994 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
995 growBuffer(buffer);
996 }
997 for (;i > 0;i--)
998 buffer[nbchars++] = *cur++;
999 buffer[nbchars++] = ';';
1000 }
1001 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1002 if (xmlParserDebugEntities)
1003 xmlGenericError(xmlGenericErrorContext,
1004 "String decoding PE Reference: %.30s\n", str);
1005 ent = xmlParseStringPEReference(ctxt, &str);
1006 if (ent != NULL) {
1007 xmlChar *rep;
1008
1009 ctxt->depth++;
1010 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1011 0, 0, 0);
1012 ctxt->depth--;
1013 if (rep != NULL) {
1014 current = rep;
1015 while (*current != 0) { /* non input consuming loop */
1016 buffer[nbchars++] = *current++;
1017 if (nbchars >
1018 buffer_size - XML_PARSER_BUFFER_SIZE) {
1019 growBuffer(buffer);
1020 }
1021 }
1022 xmlFree(rep);
1023 }
1024 }
1025 } else {
1026 COPY_BUF(l,buffer,nbchars,c);
1027 str += l;
1028 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1029 growBuffer(buffer);
1030 }
1031 }
1032 c = CUR_SCHAR(str, l);
1033 }
1034 buffer[nbchars++] = 0;
1035 return(buffer);
1036}
1037
1038
1039/************************************************************************
1040 * *
1041 * Commodity functions to handle xmlChars *
1042 * *
1043 ************************************************************************/
1044
1045/**
1046 * xmlStrndup:
1047 * @cur: the input xmlChar *
1048 * @len: the len of @cur
1049 *
1050 * a strndup for array of xmlChar's
1051 *
1052 * Returns a new xmlChar * or NULL
1053 */
1054xmlChar *
1055xmlStrndup(const xmlChar *cur, int len) {
1056 xmlChar *ret;
1057
1058 if ((cur == NULL) || (len < 0)) return(NULL);
1059 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1060 if (ret == NULL) {
1061 xmlGenericError(xmlGenericErrorContext,
1062 "malloc of %ld byte failed\n",
1063 (len + 1) * (long)sizeof(xmlChar));
1064 return(NULL);
1065 }
1066 memcpy(ret, cur, len * sizeof(xmlChar));
1067 ret[len] = 0;
1068 return(ret);
1069}
1070
1071/**
1072 * xmlStrdup:
1073 * @cur: the input xmlChar *
1074 *
1075 * a strdup for array of xmlChar's. Since they are supposed to be
1076 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1077 * a termination mark of '0'.
1078 *
1079 * Returns a new xmlChar * or NULL
1080 */
1081xmlChar *
1082xmlStrdup(const xmlChar *cur) {
1083 const xmlChar *p = cur;
1084
1085 if (cur == NULL) return(NULL);
1086 while (*p != 0) p++; /* non input consuming */
1087 return(xmlStrndup(cur, p - cur));
1088}
1089
1090/**
1091 * xmlCharStrndup:
1092 * @cur: the input char *
1093 * @len: the len of @cur
1094 *
1095 * a strndup for char's to xmlChar's
1096 *
1097 * Returns a new xmlChar * or NULL
1098 */
1099
1100xmlChar *
1101xmlCharStrndup(const char *cur, int len) {
1102 int i;
1103 xmlChar *ret;
1104
1105 if ((cur == NULL) || (len < 0)) return(NULL);
1106 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1107 if (ret == NULL) {
1108 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1109 (len + 1) * (long)sizeof(xmlChar));
1110 return(NULL);
1111 }
1112 for (i = 0;i < len;i++)
1113 ret[i] = (xmlChar) cur[i];
1114 ret[len] = 0;
1115 return(ret);
1116}
1117
1118/**
1119 * xmlCharStrdup:
1120 * @cur: the input char *
1121 * @len: the len of @cur
1122 *
1123 * a strdup for char's to xmlChar's
1124 *
1125 * Returns a new xmlChar * or NULL
1126 */
1127
1128xmlChar *
1129xmlCharStrdup(const char *cur) {
1130 const char *p = cur;
1131
1132 if (cur == NULL) return(NULL);
1133 while (*p != '\0') p++; /* non input consuming */
1134 return(xmlCharStrndup(cur, p - cur));
1135}
1136
1137/**
1138 * xmlStrcmp:
1139 * @str1: the first xmlChar *
1140 * @str2: the second xmlChar *
1141 *
1142 * a strcmp for xmlChar's
1143 *
1144 * Returns the integer result of the comparison
1145 */
1146
1147int
1148xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1149 register int tmp;
1150
1151 if (str1 == str2) return(0);
1152 if (str1 == NULL) return(-1);
1153 if (str2 == NULL) return(1);
1154 do {
1155 tmp = *str1++ - *str2;
1156 if (tmp != 0) return(tmp);
1157 } while (*str2++ != 0);
1158 return 0;
1159}
1160
1161/**
1162 * xmlStrEqual:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 *
1166 * Check if both string are equal of have same content
1167 * Should be a bit more readable and faster than xmlStrEqual()
1168 *
1169 * Returns 1 if they are equal, 0 if they are different
1170 */
1171
1172int
1173xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1174 if (str1 == str2) return(1);
1175 if (str1 == NULL) return(0);
1176 if (str2 == NULL) return(0);
1177 do {
1178 if (*str1++ != *str2) return(0);
1179 } while (*str2++);
1180 return(1);
1181}
1182
1183/**
1184 * xmlStrncmp:
1185 * @str1: the first xmlChar *
1186 * @str2: the second xmlChar *
1187 * @len: the max comparison length
1188 *
1189 * a strncmp for xmlChar's
1190 *
1191 * Returns the integer result of the comparison
1192 */
1193
1194int
1195xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1196 register int tmp;
1197
1198 if (len <= 0) return(0);
1199 if (str1 == str2) return(0);
1200 if (str1 == NULL) return(-1);
1201 if (str2 == NULL) return(1);
1202 do {
1203 tmp = *str1++ - *str2;
1204 if (tmp != 0 || --len == 0) return(tmp);
1205 } while (*str2++ != 0);
1206 return 0;
1207}
1208
Daniel Veillardb44025c2001-10-11 22:55:55 +00001209static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001210 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1211 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1212 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1213 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1214 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1215 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1216 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1217 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1218 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1219 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1220 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1221 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1222 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1223 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1224 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1225 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1226 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1227 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1228 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1229 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1230 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1231 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1232 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1233 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1234 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1235 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1236 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1237 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1238 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1239 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1240 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1241 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1242};
1243
1244/**
1245 * xmlStrcasecmp:
1246 * @str1: the first xmlChar *
1247 * @str2: the second xmlChar *
1248 *
1249 * a strcasecmp for xmlChar's
1250 *
1251 * Returns the integer result of the comparison
1252 */
1253
1254int
1255xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1256 register int tmp;
1257
1258 if (str1 == str2) return(0);
1259 if (str1 == NULL) return(-1);
1260 if (str2 == NULL) return(1);
1261 do {
1262 tmp = casemap[*str1++] - casemap[*str2];
1263 if (tmp != 0) return(tmp);
1264 } while (*str2++ != 0);
1265 return 0;
1266}
1267
1268/**
1269 * xmlStrncasecmp:
1270 * @str1: the first xmlChar *
1271 * @str2: the second xmlChar *
1272 * @len: the max comparison length
1273 *
1274 * a strncasecmp for xmlChar's
1275 *
1276 * Returns the integer result of the comparison
1277 */
1278
1279int
1280xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1281 register int tmp;
1282
1283 if (len <= 0) return(0);
1284 if (str1 == str2) return(0);
1285 if (str1 == NULL) return(-1);
1286 if (str2 == NULL) return(1);
1287 do {
1288 tmp = casemap[*str1++] - casemap[*str2];
1289 if (tmp != 0 || --len == 0) return(tmp);
1290 } while (*str2++ != 0);
1291 return 0;
1292}
1293
1294/**
1295 * xmlStrchr:
1296 * @str: the xmlChar * array
1297 * @val: the xmlChar to search
1298 *
1299 * a strchr for xmlChar's
1300 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001301 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001302 */
1303
1304const xmlChar *
1305xmlStrchr(const xmlChar *str, xmlChar val) {
1306 if (str == NULL) return(NULL);
1307 while (*str != 0) { /* non input consuming */
1308 if (*str == val) return((xmlChar *) str);
1309 str++;
1310 }
1311 return(NULL);
1312}
1313
1314/**
1315 * xmlStrstr:
1316 * @str: the xmlChar * array (haystack)
1317 * @val: the xmlChar to search (needle)
1318 *
1319 * a strstr for xmlChar's
1320 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001321 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001322 */
1323
1324const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001325xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001326 int n;
1327
1328 if (str == NULL) return(NULL);
1329 if (val == NULL) return(NULL);
1330 n = xmlStrlen(val);
1331
1332 if (n == 0) return(str);
1333 while (*str != 0) { /* non input consuming */
1334 if (*str == *val) {
1335 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1336 }
1337 str++;
1338 }
1339 return(NULL);
1340}
1341
1342/**
1343 * xmlStrcasestr:
1344 * @str: the xmlChar * array (haystack)
1345 * @val: the xmlChar to search (needle)
1346 *
1347 * a case-ignoring strstr for xmlChar's
1348 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001349 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001350 */
1351
1352const xmlChar *
1353xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1354 int n;
1355
1356 if (str == NULL) return(NULL);
1357 if (val == NULL) return(NULL);
1358 n = xmlStrlen(val);
1359
1360 if (n == 0) return(str);
1361 while (*str != 0) { /* non input consuming */
1362 if (casemap[*str] == casemap[*val])
1363 if (!xmlStrncasecmp(str, val, n)) return(str);
1364 str++;
1365 }
1366 return(NULL);
1367}
1368
1369/**
1370 * xmlStrsub:
1371 * @str: the xmlChar * array (haystack)
1372 * @start: the index of the first char (zero based)
1373 * @len: the length of the substring
1374 *
1375 * Extract a substring of a given string
1376 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001377 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001378 */
1379
1380xmlChar *
1381xmlStrsub(const xmlChar *str, int start, int len) {
1382 int i;
1383
1384 if (str == NULL) return(NULL);
1385 if (start < 0) return(NULL);
1386 if (len < 0) return(NULL);
1387
1388 for (i = 0;i < start;i++) {
1389 if (*str == 0) return(NULL);
1390 str++;
1391 }
1392 if (*str == 0) return(NULL);
1393 return(xmlStrndup(str, len));
1394}
1395
1396/**
1397 * xmlStrlen:
1398 * @str: the xmlChar * array
1399 *
1400 * length of a xmlChar's string
1401 *
1402 * Returns the number of xmlChar contained in the ARRAY.
1403 */
1404
1405int
1406xmlStrlen(const xmlChar *str) {
1407 int len = 0;
1408
1409 if (str == NULL) return(0);
1410 while (*str != 0) { /* non input consuming */
1411 str++;
1412 len++;
1413 }
1414 return(len);
1415}
1416
1417/**
1418 * xmlStrncat:
1419 * @cur: the original xmlChar * array
1420 * @add: the xmlChar * array added
1421 * @len: the length of @add
1422 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001423 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001424 * first bytes of @add.
1425 *
1426 * Returns a new xmlChar *, the original @cur is reallocated if needed
1427 * and should not be freed
1428 */
1429
1430xmlChar *
1431xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1432 int size;
1433 xmlChar *ret;
1434
1435 if ((add == NULL) || (len == 0))
1436 return(cur);
1437 if (cur == NULL)
1438 return(xmlStrndup(add, len));
1439
1440 size = xmlStrlen(cur);
1441 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1442 if (ret == NULL) {
1443 xmlGenericError(xmlGenericErrorContext,
1444 "xmlStrncat: realloc of %ld byte failed\n",
1445 (size + len + 1) * (long)sizeof(xmlChar));
1446 return(cur);
1447 }
1448 memcpy(&ret[size], add, len * sizeof(xmlChar));
1449 ret[size + len] = 0;
1450 return(ret);
1451}
1452
1453/**
1454 * xmlStrcat:
1455 * @cur: the original xmlChar * array
1456 * @add: the xmlChar * array added
1457 *
1458 * a strcat for array of xmlChar's. Since they are supposed to be
1459 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1460 * a termination mark of '0'.
1461 *
1462 * Returns a new xmlChar * containing the concatenated string.
1463 */
1464xmlChar *
1465xmlStrcat(xmlChar *cur, const xmlChar *add) {
1466 const xmlChar *p = add;
1467
1468 if (add == NULL) return(cur);
1469 if (cur == NULL)
1470 return(xmlStrdup(add));
1471
1472 while (*p != 0) p++; /* non input consuming */
1473 return(xmlStrncat(cur, add, p - add));
1474}
1475
1476/************************************************************************
1477 * *
1478 * Commodity functions, cleanup needed ? *
1479 * *
1480 ************************************************************************/
1481
1482/**
1483 * areBlanks:
1484 * @ctxt: an XML parser context
1485 * @str: a xmlChar *
1486 * @len: the size of @str
1487 *
1488 * Is this a sequence of blank chars that one can ignore ?
1489 *
1490 * Returns 1 if ignorable 0 otherwise.
1491 */
1492
1493static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1494 int i, ret;
1495 xmlNodePtr lastChild;
1496
Daniel Veillard05c13a22001-09-09 08:38:09 +00001497 /*
1498 * Don't spend time trying to differentiate them, the same callback is
1499 * used !
1500 */
1501 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001502 return(0);
1503
Owen Taylor3473f882001-02-23 17:55:21 +00001504 /*
1505 * Check for xml:space value.
1506 */
1507 if (*(ctxt->space) == 1)
1508 return(0);
1509
1510 /*
1511 * Check that the string is made of blanks
1512 */
1513 for (i = 0;i < len;i++)
1514 if (!(IS_BLANK(str[i]))) return(0);
1515
1516 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001517 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001518 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001519 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001520 if (ctxt->myDoc != NULL) {
1521 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1522 if (ret == 0) return(1);
1523 if (ret == 1) return(0);
1524 }
1525
1526 /*
1527 * Otherwise, heuristic :-\
1528 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00001529 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001530 if ((ctxt->node->children == NULL) &&
1531 (RAW == '<') && (NXT(1) == '/')) return(0);
1532
1533 lastChild = xmlGetLastChild(ctxt->node);
1534 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001535 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1536 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001537 } else if (xmlNodeIsText(lastChild))
1538 return(0);
1539 else if ((ctxt->node->children != NULL) &&
1540 (xmlNodeIsText(ctxt->node->children)))
1541 return(0);
1542 return(1);
1543}
1544
Owen Taylor3473f882001-02-23 17:55:21 +00001545/************************************************************************
1546 * *
1547 * Extra stuff for namespace support *
1548 * Relates to http://www.w3.org/TR/WD-xml-names *
1549 * *
1550 ************************************************************************/
1551
1552/**
1553 * xmlSplitQName:
1554 * @ctxt: an XML parser context
1555 * @name: an XML parser context
1556 * @prefix: a xmlChar **
1557 *
1558 * parse an UTF8 encoded XML qualified name string
1559 *
1560 * [NS 5] QName ::= (Prefix ':')? LocalPart
1561 *
1562 * [NS 6] Prefix ::= NCName
1563 *
1564 * [NS 7] LocalPart ::= NCName
1565 *
1566 * Returns the local part, and prefix is updated
1567 * to get the Prefix if any.
1568 */
1569
1570xmlChar *
1571xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1572 xmlChar buf[XML_MAX_NAMELEN + 5];
1573 xmlChar *buffer = NULL;
1574 int len = 0;
1575 int max = XML_MAX_NAMELEN;
1576 xmlChar *ret = NULL;
1577 const xmlChar *cur = name;
1578 int c;
1579
1580 *prefix = NULL;
1581
1582#ifndef XML_XML_NAMESPACE
1583 /* xml: prefix is not really a namespace */
1584 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1585 (cur[2] == 'l') && (cur[3] == ':'))
1586 return(xmlStrdup(name));
1587#endif
1588
1589 /* nasty but valid */
1590 if (cur[0] == ':')
1591 return(xmlStrdup(name));
1592
1593 c = *cur++;
1594 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1595 buf[len++] = c;
1596 c = *cur++;
1597 }
1598 if (len >= max) {
1599 /*
1600 * Okay someone managed to make a huge name, so he's ready to pay
1601 * for the processing speed.
1602 */
1603 max = len * 2;
1604
1605 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1606 if (buffer == NULL) {
1607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1608 ctxt->sax->error(ctxt->userData,
1609 "xmlSplitQName: out of memory\n");
1610 return(NULL);
1611 }
1612 memcpy(buffer, buf, len);
1613 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1614 if (len + 10 > max) {
1615 max *= 2;
1616 buffer = (xmlChar *) xmlRealloc(buffer,
1617 max * sizeof(xmlChar));
1618 if (buffer == NULL) {
1619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1620 ctxt->sax->error(ctxt->userData,
1621 "xmlSplitQName: out of memory\n");
1622 return(NULL);
1623 }
1624 }
1625 buffer[len++] = c;
1626 c = *cur++;
1627 }
1628 buffer[len] = 0;
1629 }
1630
1631 if (buffer == NULL)
1632 ret = xmlStrndup(buf, len);
1633 else {
1634 ret = buffer;
1635 buffer = NULL;
1636 max = XML_MAX_NAMELEN;
1637 }
1638
1639
1640 if (c == ':') {
1641 c = *cur++;
1642 if (c == 0) return(ret);
1643 *prefix = ret;
1644 len = 0;
1645
1646 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1647 buf[len++] = c;
1648 c = *cur++;
1649 }
1650 if (len >= max) {
1651 /*
1652 * Okay someone managed to make a huge name, so he's ready to pay
1653 * for the processing speed.
1654 */
1655 max = len * 2;
1656
1657 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1658 if (buffer == NULL) {
1659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1660 ctxt->sax->error(ctxt->userData,
1661 "xmlSplitQName: out of memory\n");
1662 return(NULL);
1663 }
1664 memcpy(buffer, buf, len);
1665 while (c != 0) { /* tested bigname2.xml */
1666 if (len + 10 > max) {
1667 max *= 2;
1668 buffer = (xmlChar *) xmlRealloc(buffer,
1669 max * sizeof(xmlChar));
1670 if (buffer == NULL) {
1671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1672 ctxt->sax->error(ctxt->userData,
1673 "xmlSplitQName: out of memory\n");
1674 return(NULL);
1675 }
1676 }
1677 buffer[len++] = c;
1678 c = *cur++;
1679 }
1680 buffer[len] = 0;
1681 }
1682
1683 if (buffer == NULL)
1684 ret = xmlStrndup(buf, len);
1685 else {
1686 ret = buffer;
1687 }
1688 }
1689
1690 return(ret);
1691}
1692
1693/************************************************************************
1694 * *
1695 * The parser itself *
1696 * Relates to http://www.w3.org/TR/REC-xml *
1697 * *
1698 ************************************************************************/
1699
Daniel Veillard76d66f42001-05-16 21:05:17 +00001700static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001701/**
1702 * xmlParseName:
1703 * @ctxt: an XML parser context
1704 *
1705 * parse an XML name.
1706 *
1707 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1708 * CombiningChar | Extender
1709 *
1710 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1711 *
1712 * [6] Names ::= Name (S Name)*
1713 *
1714 * Returns the Name parsed or NULL
1715 */
1716
1717xmlChar *
1718xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001719 const xmlChar *in;
1720 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001721 int count = 0;
1722
1723 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724
1725 /*
1726 * Accelerator for simple ASCII names
1727 */
1728 in = ctxt->input->cur;
1729 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1730 ((*in >= 0x41) && (*in <= 0x5A)) ||
1731 (*in == '_') || (*in == ':')) {
1732 in++;
1733 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1734 ((*in >= 0x41) && (*in <= 0x5A)) ||
1735 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736 (*in == '_') || (*in == '-') ||
1737 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001738 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001739 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001740 count = in - ctxt->input->cur;
1741 ret = xmlStrndup(ctxt->input->cur, count);
1742 ctxt->input->cur = in;
1743 return(ret);
1744 }
1745 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001746 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001747}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001748
Daniel Veillard76d66f42001-05-16 21:05:17 +00001749static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001750xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1751 xmlChar buf[XML_MAX_NAMELEN + 5];
1752 int len = 0, l;
1753 int c;
1754 int count = 0;
1755
1756 /*
1757 * Handler for more complex cases
1758 */
1759 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001760 c = CUR_CHAR(l);
1761 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1762 (!IS_LETTER(c) && (c != '_') &&
1763 (c != ':'))) {
1764 return(NULL);
1765 }
1766
1767 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1768 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1769 (c == '.') || (c == '-') ||
1770 (c == '_') || (c == ':') ||
1771 (IS_COMBINING(c)) ||
1772 (IS_EXTENDER(c)))) {
1773 if (count++ > 100) {
1774 count = 0;
1775 GROW;
1776 }
1777 COPY_BUF(l,buf,len,c);
1778 NEXTL(l);
1779 c = CUR_CHAR(l);
1780 if (len >= XML_MAX_NAMELEN) {
1781 /*
1782 * Okay someone managed to make a huge name, so he's ready to pay
1783 * for the processing speed.
1784 */
1785 xmlChar *buffer;
1786 int max = len * 2;
1787
1788 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001792 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001793 return(NULL);
1794 }
1795 memcpy(buffer, buf, len);
1796 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1797 (c == '.') || (c == '-') ||
1798 (c == '_') || (c == ':') ||
1799 (IS_COMBINING(c)) ||
1800 (IS_EXTENDER(c))) {
1801 if (count++ > 100) {
1802 count = 0;
1803 GROW;
1804 }
1805 if (len + 10 > max) {
1806 max *= 2;
1807 buffer = (xmlChar *) xmlRealloc(buffer,
1808 max * sizeof(xmlChar));
1809 if (buffer == NULL) {
1810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1811 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001812 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001813 return(NULL);
1814 }
1815 }
1816 COPY_BUF(l,buffer,len,c);
1817 NEXTL(l);
1818 c = CUR_CHAR(l);
1819 }
1820 buffer[len] = 0;
1821 return(buffer);
1822 }
1823 }
1824 return(xmlStrndup(buf, len));
1825}
1826
1827/**
1828 * xmlParseStringName:
1829 * @ctxt: an XML parser context
1830 * @str: a pointer to the string pointer (IN/OUT)
1831 *
1832 * parse an XML name.
1833 *
1834 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1835 * CombiningChar | Extender
1836 *
1837 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1838 *
1839 * [6] Names ::= Name (S Name)*
1840 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001841 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001842 * is updated to the current location in the string.
1843 */
1844
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001845static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001846xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1847 xmlChar buf[XML_MAX_NAMELEN + 5];
1848 const xmlChar *cur = *str;
1849 int len = 0, l;
1850 int c;
1851
1852 c = CUR_SCHAR(cur, l);
1853 if (!IS_LETTER(c) && (c != '_') &&
1854 (c != ':')) {
1855 return(NULL);
1856 }
1857
1858 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1859 (c == '.') || (c == '-') ||
1860 (c == '_') || (c == ':') ||
1861 (IS_COMBINING(c)) ||
1862 (IS_EXTENDER(c))) {
1863 COPY_BUF(l,buf,len,c);
1864 cur += l;
1865 c = CUR_SCHAR(cur, l);
1866 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1867 /*
1868 * Okay someone managed to make a huge name, so he's ready to pay
1869 * for the processing speed.
1870 */
1871 xmlChar *buffer;
1872 int max = len * 2;
1873
1874 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 memcpy(buffer, buf, len);
1882 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1883 (c == '.') || (c == '-') ||
1884 (c == '_') || (c == ':') ||
1885 (IS_COMBINING(c)) ||
1886 (IS_EXTENDER(c))) {
1887 if (len + 10 > max) {
1888 max *= 2;
1889 buffer = (xmlChar *) xmlRealloc(buffer,
1890 max * sizeof(xmlChar));
1891 if (buffer == NULL) {
1892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1893 ctxt->sax->error(ctxt->userData,
1894 "xmlParseStringName: out of memory\n");
1895 return(NULL);
1896 }
1897 }
1898 COPY_BUF(l,buffer,len,c);
1899 cur += l;
1900 c = CUR_SCHAR(cur, l);
1901 }
1902 buffer[len] = 0;
1903 *str = cur;
1904 return(buffer);
1905 }
1906 }
1907 *str = cur;
1908 return(xmlStrndup(buf, len));
1909}
1910
1911/**
1912 * xmlParseNmtoken:
1913 * @ctxt: an XML parser context
1914 *
1915 * parse an XML Nmtoken.
1916 *
1917 * [7] Nmtoken ::= (NameChar)+
1918 *
1919 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1920 *
1921 * Returns the Nmtoken parsed or NULL
1922 */
1923
1924xmlChar *
1925xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1926 xmlChar buf[XML_MAX_NAMELEN + 5];
1927 int len = 0, l;
1928 int c;
1929 int count = 0;
1930
1931 GROW;
1932 c = CUR_CHAR(l);
1933
1934 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1935 (c == '.') || (c == '-') ||
1936 (c == '_') || (c == ':') ||
1937 (IS_COMBINING(c)) ||
1938 (IS_EXTENDER(c))) {
1939 if (count++ > 100) {
1940 count = 0;
1941 GROW;
1942 }
1943 COPY_BUF(l,buf,len,c);
1944 NEXTL(l);
1945 c = CUR_CHAR(l);
1946 if (len >= XML_MAX_NAMELEN) {
1947 /*
1948 * Okay someone managed to make a huge token, so he's ready to pay
1949 * for the processing speed.
1950 */
1951 xmlChar *buffer;
1952 int max = len * 2;
1953
1954 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1955 if (buffer == NULL) {
1956 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1957 ctxt->sax->error(ctxt->userData,
1958 "xmlParseNmtoken: out of memory\n");
1959 return(NULL);
1960 }
1961 memcpy(buffer, buf, len);
1962 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1963 (c == '.') || (c == '-') ||
1964 (c == '_') || (c == ':') ||
1965 (IS_COMBINING(c)) ||
1966 (IS_EXTENDER(c))) {
1967 if (count++ > 100) {
1968 count = 0;
1969 GROW;
1970 }
1971 if (len + 10 > max) {
1972 max *= 2;
1973 buffer = (xmlChar *) xmlRealloc(buffer,
1974 max * sizeof(xmlChar));
1975 if (buffer == NULL) {
1976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1977 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001978 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001979 return(NULL);
1980 }
1981 }
1982 COPY_BUF(l,buffer,len,c);
1983 NEXTL(l);
1984 c = CUR_CHAR(l);
1985 }
1986 buffer[len] = 0;
1987 return(buffer);
1988 }
1989 }
1990 if (len == 0)
1991 return(NULL);
1992 return(xmlStrndup(buf, len));
1993}
1994
1995/**
1996 * xmlParseEntityValue:
1997 * @ctxt: an XML parser context
1998 * @orig: if non-NULL store a copy of the original entity value
1999 *
2000 * parse a value for ENTITY declarations
2001 *
2002 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2003 * "'" ([^%&'] | PEReference | Reference)* "'"
2004 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002005 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002006 */
2007
2008xmlChar *
2009xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2010 xmlChar *buf = NULL;
2011 int len = 0;
2012 int size = XML_PARSER_BUFFER_SIZE;
2013 int c, l;
2014 xmlChar stop;
2015 xmlChar *ret = NULL;
2016 const xmlChar *cur = NULL;
2017 xmlParserInputPtr input;
2018
2019 if (RAW == '"') stop = '"';
2020 else if (RAW == '\'') stop = '\'';
2021 else {
2022 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2024 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2025 ctxt->wellFormed = 0;
2026 ctxt->disableSAX = 1;
2027 return(NULL);
2028 }
2029 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2030 if (buf == NULL) {
2031 xmlGenericError(xmlGenericErrorContext,
2032 "malloc of %d byte failed\n", size);
2033 return(NULL);
2034 }
2035
2036 /*
2037 * The content of the entity definition is copied in a buffer.
2038 */
2039
2040 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2041 input = ctxt->input;
2042 GROW;
2043 NEXT;
2044 c = CUR_CHAR(l);
2045 /*
2046 * NOTE: 4.4.5 Included in Literal
2047 * When a parameter entity reference appears in a literal entity
2048 * value, ... a single or double quote character in the replacement
2049 * text is always treated as a normal data character and will not
2050 * terminate the literal.
2051 * In practice it means we stop the loop only when back at parsing
2052 * the initial entity and the quote is found
2053 */
2054 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2055 (ctxt->input != input))) {
2056 if (len + 5 >= size) {
2057 size *= 2;
2058 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2059 if (buf == NULL) {
2060 xmlGenericError(xmlGenericErrorContext,
2061 "realloc of %d byte failed\n", size);
2062 return(NULL);
2063 }
2064 }
2065 COPY_BUF(l,buf,len,c);
2066 NEXTL(l);
2067 /*
2068 * Pop-up of finished entities.
2069 */
2070 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2071 xmlPopInput(ctxt);
2072
2073 GROW;
2074 c = CUR_CHAR(l);
2075 if (c == 0) {
2076 GROW;
2077 c = CUR_CHAR(l);
2078 }
2079 }
2080 buf[len] = 0;
2081
2082 /*
2083 * Raise problem w.r.t. '&' and '%' being used in non-entities
2084 * reference constructs. Note Charref will be handled in
2085 * xmlStringDecodeEntities()
2086 */
2087 cur = buf;
2088 while (*cur != 0) { /* non input consuming */
2089 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2090 xmlChar *name;
2091 xmlChar tmp = *cur;
2092
2093 cur++;
2094 name = xmlParseStringName(ctxt, &cur);
2095 if ((name == NULL) || (*cur != ';')) {
2096 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2098 ctxt->sax->error(ctxt->userData,
2099 "EntityValue: '%c' forbidden except for entities references\n",
2100 tmp);
2101 ctxt->wellFormed = 0;
2102 ctxt->disableSAX = 1;
2103 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002104 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2105 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002106 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData,
2109 "EntityValue: PEReferences forbidden in internal subset\n",
2110 tmp);
2111 ctxt->wellFormed = 0;
2112 ctxt->disableSAX = 1;
2113 }
2114 if (name != NULL)
2115 xmlFree(name);
2116 }
2117 cur++;
2118 }
2119
2120 /*
2121 * Then PEReference entities are substituted.
2122 */
2123 if (c != stop) {
2124 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2126 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2127 ctxt->wellFormed = 0;
2128 ctxt->disableSAX = 1;
2129 xmlFree(buf);
2130 } else {
2131 NEXT;
2132 /*
2133 * NOTE: 4.4.7 Bypassed
2134 * When a general entity reference appears in the EntityValue in
2135 * an entity declaration, it is bypassed and left as is.
2136 * so XML_SUBSTITUTE_REF is not set here.
2137 */
2138 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2139 0, 0, 0);
2140 if (orig != NULL)
2141 *orig = buf;
2142 else
2143 xmlFree(buf);
2144 }
2145
2146 return(ret);
2147}
2148
2149/**
2150 * xmlParseAttValue:
2151 * @ctxt: an XML parser context
2152 *
2153 * parse a value for an attribute
2154 * Note: the parser won't do substitution of entities here, this
2155 * will be handled later in xmlStringGetNodeList
2156 *
2157 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2158 * "'" ([^<&'] | Reference)* "'"
2159 *
2160 * 3.3.3 Attribute-Value Normalization:
2161 * Before the value of an attribute is passed to the application or
2162 * checked for validity, the XML processor must normalize it as follows:
2163 * - a character reference is processed by appending the referenced
2164 * character to the attribute value
2165 * - an entity reference is processed by recursively processing the
2166 * replacement text of the entity
2167 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2168 * appending #x20 to the normalized value, except that only a single
2169 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2170 * parsed entity or the literal entity value of an internal parsed entity
2171 * - other characters are processed by appending them to the normalized value
2172 * If the declared value is not CDATA, then the XML processor must further
2173 * process the normalized attribute value by discarding any leading and
2174 * trailing space (#x20) characters, and by replacing sequences of space
2175 * (#x20) characters by a single space (#x20) character.
2176 * All attributes for which no declaration has been read should be treated
2177 * by a non-validating parser as if declared CDATA.
2178 *
2179 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2180 */
2181
2182xmlChar *
2183xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2184 xmlChar limit = 0;
2185 xmlChar *buf = NULL;
2186 int len = 0;
2187 int buf_size = 0;
2188 int c, l;
2189 xmlChar *current = NULL;
2190 xmlEntityPtr ent;
2191
2192
2193 SHRINK;
2194 if (NXT(0) == '"') {
2195 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2196 limit = '"';
2197 NEXT;
2198 } else if (NXT(0) == '\'') {
2199 limit = '\'';
2200 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2201 NEXT;
2202 } else {
2203 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2205 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2206 ctxt->wellFormed = 0;
2207 ctxt->disableSAX = 1;
2208 return(NULL);
2209 }
2210
2211 /*
2212 * allocate a translation buffer.
2213 */
2214 buf_size = XML_PARSER_BUFFER_SIZE;
2215 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2216 if (buf == NULL) {
2217 perror("xmlParseAttValue: malloc failed");
2218 return(NULL);
2219 }
2220
2221 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002222 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002223 */
2224 c = CUR_CHAR(l);
2225 while (((NXT(0) != limit) && /* checked */
2226 (c != '<')) || (ctxt->token != 0)) {
2227 if (c == 0) break;
2228 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002229 if (ctxt->replaceEntities) {
2230 if (len > buf_size - 10) {
2231 growBuffer(buf);
2232 }
2233 buf[len++] = '&';
2234 } else {
2235 /*
2236 * The reparsing will be done in xmlStringGetNodeList()
2237 * called by the attribute() function in SAX.c
2238 */
2239 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002240
Daniel Veillard319a7422001-09-11 09:27:09 +00002241 if (len > buf_size - 10) {
2242 growBuffer(buf);
2243 }
2244 current = &buffer[0];
2245 while (*current != 0) { /* non input consuming */
2246 buf[len++] = *current++;
2247 }
2248 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002249 }
Owen Taylor3473f882001-02-23 17:55:21 +00002250 } else if (c == '&') {
2251 if (NXT(1) == '#') {
2252 int val = xmlParseCharRef(ctxt);
2253 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002254 if (ctxt->replaceEntities) {
2255 if (len > buf_size - 10) {
2256 growBuffer(buf);
2257 }
2258 buf[len++] = '&';
2259 } else {
2260 /*
2261 * The reparsing will be done in xmlStringGetNodeList()
2262 * called by the attribute() function in SAX.c
2263 */
2264 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002265
Daniel Veillard319a7422001-09-11 09:27:09 +00002266 if (len > buf_size - 10) {
2267 growBuffer(buf);
2268 }
2269 current = &buffer[0];
2270 while (*current != 0) { /* non input consuming */
2271 buf[len++] = *current++;
2272 }
Owen Taylor3473f882001-02-23 17:55:21 +00002273 }
2274 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002275 if (len > buf_size - 10) {
2276 growBuffer(buf);
2277 }
Owen Taylor3473f882001-02-23 17:55:21 +00002278 len += xmlCopyChar(0, &buf[len], val);
2279 }
2280 } else {
2281 ent = xmlParseEntityRef(ctxt);
2282 if ((ent != NULL) &&
2283 (ctxt->replaceEntities != 0)) {
2284 xmlChar *rep;
2285
2286 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2287 rep = xmlStringDecodeEntities(ctxt, ent->content,
2288 XML_SUBSTITUTE_REF, 0, 0, 0);
2289 if (rep != NULL) {
2290 current = rep;
2291 while (*current != 0) { /* non input consuming */
2292 buf[len++] = *current++;
2293 if (len > buf_size - 10) {
2294 growBuffer(buf);
2295 }
2296 }
2297 xmlFree(rep);
2298 }
2299 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002300 if (len > buf_size - 10) {
2301 growBuffer(buf);
2302 }
Owen Taylor3473f882001-02-23 17:55:21 +00002303 if (ent->content != NULL)
2304 buf[len++] = ent->content[0];
2305 }
2306 } else if (ent != NULL) {
2307 int i = xmlStrlen(ent->name);
2308 const xmlChar *cur = ent->name;
2309
2310 /*
2311 * This may look absurd but is needed to detect
2312 * entities problems
2313 */
2314 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2315 (ent->content != NULL)) {
2316 xmlChar *rep;
2317 rep = xmlStringDecodeEntities(ctxt, ent->content,
2318 XML_SUBSTITUTE_REF, 0, 0, 0);
2319 if (rep != NULL)
2320 xmlFree(rep);
2321 }
2322
2323 /*
2324 * Just output the reference
2325 */
2326 buf[len++] = '&';
2327 if (len > buf_size - i - 10) {
2328 growBuffer(buf);
2329 }
2330 for (;i > 0;i--)
2331 buf[len++] = *cur++;
2332 buf[len++] = ';';
2333 }
2334 }
2335 } else {
2336 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2337 COPY_BUF(l,buf,len,0x20);
2338 if (len > buf_size - 10) {
2339 growBuffer(buf);
2340 }
2341 } else {
2342 COPY_BUF(l,buf,len,c);
2343 if (len > buf_size - 10) {
2344 growBuffer(buf);
2345 }
2346 }
2347 NEXTL(l);
2348 }
2349 GROW;
2350 c = CUR_CHAR(l);
2351 }
2352 buf[len++] = 0;
2353 if (RAW == '<') {
2354 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2356 ctxt->sax->error(ctxt->userData,
2357 "Unescaped '<' not allowed in attributes values\n");
2358 ctxt->wellFormed = 0;
2359 ctxt->disableSAX = 1;
2360 } else if (RAW != limit) {
2361 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2364 ctxt->wellFormed = 0;
2365 ctxt->disableSAX = 1;
2366 } else
2367 NEXT;
2368 return(buf);
2369}
2370
2371/**
2372 * xmlParseSystemLiteral:
2373 * @ctxt: an XML parser context
2374 *
2375 * parse an XML Literal
2376 *
2377 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2378 *
2379 * Returns the SystemLiteral parsed or NULL
2380 */
2381
2382xmlChar *
2383xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2384 xmlChar *buf = NULL;
2385 int len = 0;
2386 int size = XML_PARSER_BUFFER_SIZE;
2387 int cur, l;
2388 xmlChar stop;
2389 int state = ctxt->instate;
2390 int count = 0;
2391
2392 SHRINK;
2393 if (RAW == '"') {
2394 NEXT;
2395 stop = '"';
2396 } else if (RAW == '\'') {
2397 NEXT;
2398 stop = '\'';
2399 } else {
2400 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2402 ctxt->sax->error(ctxt->userData,
2403 "SystemLiteral \" or ' expected\n");
2404 ctxt->wellFormed = 0;
2405 ctxt->disableSAX = 1;
2406 return(NULL);
2407 }
2408
2409 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2410 if (buf == NULL) {
2411 xmlGenericError(xmlGenericErrorContext,
2412 "malloc of %d byte failed\n", size);
2413 return(NULL);
2414 }
2415 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2416 cur = CUR_CHAR(l);
2417 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2418 if (len + 5 >= size) {
2419 size *= 2;
2420 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2421 if (buf == NULL) {
2422 xmlGenericError(xmlGenericErrorContext,
2423 "realloc of %d byte failed\n", size);
2424 ctxt->instate = (xmlParserInputState) state;
2425 return(NULL);
2426 }
2427 }
2428 count++;
2429 if (count > 50) {
2430 GROW;
2431 count = 0;
2432 }
2433 COPY_BUF(l,buf,len,cur);
2434 NEXTL(l);
2435 cur = CUR_CHAR(l);
2436 if (cur == 0) {
2437 GROW;
2438 SHRINK;
2439 cur = CUR_CHAR(l);
2440 }
2441 }
2442 buf[len] = 0;
2443 ctxt->instate = (xmlParserInputState) state;
2444 if (!IS_CHAR(cur)) {
2445 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2447 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2448 ctxt->wellFormed = 0;
2449 ctxt->disableSAX = 1;
2450 } else {
2451 NEXT;
2452 }
2453 return(buf);
2454}
2455
2456/**
2457 * xmlParsePubidLiteral:
2458 * @ctxt: an XML parser context
2459 *
2460 * parse an XML public literal
2461 *
2462 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2463 *
2464 * Returns the PubidLiteral parsed or NULL.
2465 */
2466
2467xmlChar *
2468xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2469 xmlChar *buf = NULL;
2470 int len = 0;
2471 int size = XML_PARSER_BUFFER_SIZE;
2472 xmlChar cur;
2473 xmlChar stop;
2474 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002475 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002476
2477 SHRINK;
2478 if (RAW == '"') {
2479 NEXT;
2480 stop = '"';
2481 } else if (RAW == '\'') {
2482 NEXT;
2483 stop = '\'';
2484 } else {
2485 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2487 ctxt->sax->error(ctxt->userData,
2488 "SystemLiteral \" or ' expected\n");
2489 ctxt->wellFormed = 0;
2490 ctxt->disableSAX = 1;
2491 return(NULL);
2492 }
2493 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2494 if (buf == NULL) {
2495 xmlGenericError(xmlGenericErrorContext,
2496 "malloc of %d byte failed\n", size);
2497 return(NULL);
2498 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002499 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002500 cur = CUR;
2501 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2502 if (len + 1 >= size) {
2503 size *= 2;
2504 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2505 if (buf == NULL) {
2506 xmlGenericError(xmlGenericErrorContext,
2507 "realloc of %d byte failed\n", size);
2508 return(NULL);
2509 }
2510 }
2511 buf[len++] = cur;
2512 count++;
2513 if (count > 50) {
2514 GROW;
2515 count = 0;
2516 }
2517 NEXT;
2518 cur = CUR;
2519 if (cur == 0) {
2520 GROW;
2521 SHRINK;
2522 cur = CUR;
2523 }
2524 }
2525 buf[len] = 0;
2526 if (cur != stop) {
2527 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2528 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2529 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2530 ctxt->wellFormed = 0;
2531 ctxt->disableSAX = 1;
2532 } else {
2533 NEXT;
2534 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002535 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002536 return(buf);
2537}
2538
Daniel Veillard48b2f892001-02-25 16:11:03 +00002539void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002540/**
2541 * xmlParseCharData:
2542 * @ctxt: an XML parser context
2543 * @cdata: int indicating whether we are within a CDATA section
2544 *
2545 * parse a CharData section.
2546 * if we are within a CDATA section ']]>' marks an end of section.
2547 *
2548 * The right angle bracket (>) may be represented using the string "&gt;",
2549 * and must, for compatibility, be escaped using "&gt;" or a character
2550 * reference when it appears in the string "]]>" in content, when that
2551 * string is not marking the end of a CDATA section.
2552 *
2553 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2554 */
2555
2556void
2557xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00002558 const xmlChar *in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002559 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002560 int line = ctxt->input->line;
2561 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002562
2563 SHRINK;
2564 GROW;
2565 /*
2566 * Accelerated common case where input don't need to be
2567 * modified before passing it to the handler.
2568 */
2569 if ((ctxt->token == 0) && (!cdata)) {
2570 in = ctxt->input->cur;
2571 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002572get_more:
Daniel Veillard561b7f82002-03-20 21:55:57 +00002573 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
2574 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
Daniel Veillard48b2f892001-02-25 16:11:03 +00002575 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002576 if (*in == 0xA) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002577 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002578 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002579 while (*in == 0xA) {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002580 ctxt->input->line++;
2581 in++;
2582 }
2583 goto get_more;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002584 }
2585 if (*in == ']') {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002586 if ((in[1] == ']') && (in[2] == '>')) {
2587 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2589 ctxt->sax->error(ctxt->userData,
2590 "Sequence ']]>' not allowed in content\n");
2591 ctxt->input->cur = in;
2592 ctxt->wellFormed = 0;
2593 ctxt->disableSAX = 1;
2594 return;
2595 }
2596 in++;
2597 goto get_more;
2598 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002600 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002601 if (IS_BLANK(*ctxt->input->cur)) {
2602 const xmlChar *tmp = ctxt->input->cur;
2603 ctxt->input->cur = in;
2604 if (areBlanks(ctxt, tmp, nbchar)) {
2605 if (ctxt->sax->ignorableWhitespace != NULL)
2606 ctxt->sax->ignorableWhitespace(ctxt->userData,
2607 tmp, nbchar);
2608 } else {
2609 if (ctxt->sax->characters != NULL)
2610 ctxt->sax->characters(ctxt->userData,
2611 tmp, nbchar);
2612 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002613 line = ctxt->input->line;
2614 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002615 } else {
2616 if (ctxt->sax->characters != NULL)
2617 ctxt->sax->characters(ctxt->userData,
2618 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002619 line = ctxt->input->line;
2620 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002621 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002622 }
2623 ctxt->input->cur = in;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002624 if (*in == 0xD) {
2625 in++;
2626 if (*in == 0xA) {
2627 ctxt->input->cur = in;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002628 in++;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002629 ctxt->input->line++;
2630 continue; /* while */
Daniel Veillard48b2f892001-02-25 16:11:03 +00002631 }
Daniel Veillard561b7f82002-03-20 21:55:57 +00002632 in--;
2633 }
2634 if (*in == '<') {
2635 return;
2636 }
2637 if (*in == '&') {
2638 return;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002639 }
2640 SHRINK;
2641 GROW;
2642 in = ctxt->input->cur;
Daniel Veillard561b7f82002-03-20 21:55:57 +00002643 } while ((*in >= 0x20) && (*in <= 0x7F));
Daniel Veillard48b2f892001-02-25 16:11:03 +00002644 nbchar = 0;
2645 }
Daniel Veillard50582112001-03-26 22:52:16 +00002646 ctxt->input->line = line;
2647 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002648 xmlParseCharDataComplex(ctxt, cdata);
2649}
2650
2651void
2652xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002653 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2654 int nbchar = 0;
2655 int cur, l;
2656 int count = 0;
2657
2658 SHRINK;
2659 GROW;
2660 cur = CUR_CHAR(l);
2661 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2662 ((cur != '&') || (ctxt->token == '&')) &&
2663 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2664 if ((cur == ']') && (NXT(1) == ']') &&
2665 (NXT(2) == '>')) {
2666 if (cdata) break;
2667 else {
2668 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2670 ctxt->sax->error(ctxt->userData,
2671 "Sequence ']]>' not allowed in content\n");
2672 /* Should this be relaxed ??? I see a "must here */
2673 ctxt->wellFormed = 0;
2674 ctxt->disableSAX = 1;
2675 }
2676 }
2677 COPY_BUF(l,buf,nbchar,cur);
2678 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2679 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002680 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002681 */
2682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2683 if (areBlanks(ctxt, buf, nbchar)) {
2684 if (ctxt->sax->ignorableWhitespace != NULL)
2685 ctxt->sax->ignorableWhitespace(ctxt->userData,
2686 buf, nbchar);
2687 } else {
2688 if (ctxt->sax->characters != NULL)
2689 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2690 }
2691 }
2692 nbchar = 0;
2693 }
2694 count++;
2695 if (count > 50) {
2696 GROW;
2697 count = 0;
2698 }
2699 NEXTL(l);
2700 cur = CUR_CHAR(l);
2701 }
2702 if (nbchar != 0) {
2703 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002704 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002705 */
2706 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2707 if (areBlanks(ctxt, buf, nbchar)) {
2708 if (ctxt->sax->ignorableWhitespace != NULL)
2709 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2710 } else {
2711 if (ctxt->sax->characters != NULL)
2712 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2713 }
2714 }
2715 }
2716}
2717
2718/**
2719 * xmlParseExternalID:
2720 * @ctxt: an XML parser context
2721 * @publicID: a xmlChar** receiving PubidLiteral
2722 * @strict: indicate whether we should restrict parsing to only
2723 * production [75], see NOTE below
2724 *
2725 * Parse an External ID or a Public ID
2726 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002727 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002728 * 'PUBLIC' S PubidLiteral S SystemLiteral
2729 *
2730 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2731 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2732 *
2733 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2734 *
2735 * Returns the function returns SystemLiteral and in the second
2736 * case publicID receives PubidLiteral, is strict is off
2737 * it is possible to return NULL and have publicID set.
2738 */
2739
2740xmlChar *
2741xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2742 xmlChar *URI = NULL;
2743
2744 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002745
2746 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002747 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2748 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2749 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2750 SKIP(6);
2751 if (!IS_BLANK(CUR)) {
2752 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2753 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2754 ctxt->sax->error(ctxt->userData,
2755 "Space required after 'SYSTEM'\n");
2756 ctxt->wellFormed = 0;
2757 ctxt->disableSAX = 1;
2758 }
2759 SKIP_BLANKS;
2760 URI = xmlParseSystemLiteral(ctxt);
2761 if (URI == NULL) {
2762 ctxt->errNo = XML_ERR_URI_REQUIRED;
2763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2764 ctxt->sax->error(ctxt->userData,
2765 "xmlParseExternalID: SYSTEM, no URI\n");
2766 ctxt->wellFormed = 0;
2767 ctxt->disableSAX = 1;
2768 }
2769 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2770 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2771 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2772 SKIP(6);
2773 if (!IS_BLANK(CUR)) {
2774 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2775 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2776 ctxt->sax->error(ctxt->userData,
2777 "Space required after 'PUBLIC'\n");
2778 ctxt->wellFormed = 0;
2779 ctxt->disableSAX = 1;
2780 }
2781 SKIP_BLANKS;
2782 *publicID = xmlParsePubidLiteral(ctxt);
2783 if (*publicID == NULL) {
2784 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2785 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2786 ctxt->sax->error(ctxt->userData,
2787 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2788 ctxt->wellFormed = 0;
2789 ctxt->disableSAX = 1;
2790 }
2791 if (strict) {
2792 /*
2793 * We don't handle [83] so "S SystemLiteral" is required.
2794 */
2795 if (!IS_BLANK(CUR)) {
2796 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2798 ctxt->sax->error(ctxt->userData,
2799 "Space required after the Public Identifier\n");
2800 ctxt->wellFormed = 0;
2801 ctxt->disableSAX = 1;
2802 }
2803 } else {
2804 /*
2805 * We handle [83] so we return immediately, if
2806 * "S SystemLiteral" is not detected. From a purely parsing
2807 * point of view that's a nice mess.
2808 */
2809 const xmlChar *ptr;
2810 GROW;
2811
2812 ptr = CUR_PTR;
2813 if (!IS_BLANK(*ptr)) return(NULL);
2814
2815 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2816 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2817 }
2818 SKIP_BLANKS;
2819 URI = xmlParseSystemLiteral(ctxt);
2820 if (URI == NULL) {
2821 ctxt->errNo = XML_ERR_URI_REQUIRED;
2822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2823 ctxt->sax->error(ctxt->userData,
2824 "xmlParseExternalID: PUBLIC, no URI\n");
2825 ctxt->wellFormed = 0;
2826 ctxt->disableSAX = 1;
2827 }
2828 }
2829 return(URI);
2830}
2831
2832/**
2833 * xmlParseComment:
2834 * @ctxt: an XML parser context
2835 *
2836 * Skip an XML (SGML) comment <!-- .... -->
2837 * The spec says that "For compatibility, the string "--" (double-hyphen)
2838 * must not occur within comments. "
2839 *
2840 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2841 */
2842void
2843xmlParseComment(xmlParserCtxtPtr ctxt) {
2844 xmlChar *buf = NULL;
2845 int len;
2846 int size = XML_PARSER_BUFFER_SIZE;
2847 int q, ql;
2848 int r, rl;
2849 int cur, l;
2850 xmlParserInputState state;
2851 xmlParserInputPtr input = ctxt->input;
2852 int count = 0;
2853
2854 /*
2855 * Check that there is a comment right here.
2856 */
2857 if ((RAW != '<') || (NXT(1) != '!') ||
2858 (NXT(2) != '-') || (NXT(3) != '-')) return;
2859
2860 state = ctxt->instate;
2861 ctxt->instate = XML_PARSER_COMMENT;
2862 SHRINK;
2863 SKIP(4);
2864 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2865 if (buf == NULL) {
2866 xmlGenericError(xmlGenericErrorContext,
2867 "malloc of %d byte failed\n", size);
2868 ctxt->instate = state;
2869 return;
2870 }
2871 q = CUR_CHAR(ql);
2872 NEXTL(ql);
2873 r = CUR_CHAR(rl);
2874 NEXTL(rl);
2875 cur = CUR_CHAR(l);
2876 len = 0;
2877 while (IS_CHAR(cur) && /* checked */
2878 ((cur != '>') ||
2879 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002880 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002881 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2882 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2883 ctxt->sax->error(ctxt->userData,
2884 "Comment must not contain '--' (double-hyphen)`\n");
2885 ctxt->wellFormed = 0;
2886 ctxt->disableSAX = 1;
2887 }
2888 if (len + 5 >= size) {
2889 size *= 2;
2890 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2891 if (buf == NULL) {
2892 xmlGenericError(xmlGenericErrorContext,
2893 "realloc of %d byte failed\n", size);
2894 ctxt->instate = state;
2895 return;
2896 }
2897 }
2898 COPY_BUF(ql,buf,len,q);
2899 q = r;
2900 ql = rl;
2901 r = cur;
2902 rl = l;
2903
2904 count++;
2905 if (count > 50) {
2906 GROW;
2907 count = 0;
2908 }
2909 NEXTL(l);
2910 cur = CUR_CHAR(l);
2911 if (cur == 0) {
2912 SHRINK;
2913 GROW;
2914 cur = CUR_CHAR(l);
2915 }
2916 }
2917 buf[len] = 0;
2918 if (!IS_CHAR(cur)) {
2919 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2921 ctxt->sax->error(ctxt->userData,
2922 "Comment not terminated \n<!--%.50s\n", buf);
2923 ctxt->wellFormed = 0;
2924 ctxt->disableSAX = 1;
2925 xmlFree(buf);
2926 } else {
2927 if (input != ctxt->input) {
2928 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931"Comment doesn't start and stop in the same entity\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 }
2935 NEXT;
2936 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2937 (!ctxt->disableSAX))
2938 ctxt->sax->comment(ctxt->userData, buf);
2939 xmlFree(buf);
2940 }
2941 ctxt->instate = state;
2942}
2943
2944/**
2945 * xmlParsePITarget:
2946 * @ctxt: an XML parser context
2947 *
2948 * parse the name of a PI
2949 *
2950 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2951 *
2952 * Returns the PITarget name or NULL
2953 */
2954
2955xmlChar *
2956xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2957 xmlChar *name;
2958
2959 name = xmlParseName(ctxt);
2960 if ((name != NULL) &&
2961 ((name[0] == 'x') || (name[0] == 'X')) &&
2962 ((name[1] == 'm') || (name[1] == 'M')) &&
2963 ((name[2] == 'l') || (name[2] == 'L'))) {
2964 int i;
2965 if ((name[0] == 'x') && (name[1] == 'm') &&
2966 (name[2] == 'l') && (name[3] == 0)) {
2967 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2969 ctxt->sax->error(ctxt->userData,
2970 "XML declaration allowed only at the start of the document\n");
2971 ctxt->wellFormed = 0;
2972 ctxt->disableSAX = 1;
2973 return(name);
2974 } else if (name[3] == 0) {
2975 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2977 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2978 ctxt->wellFormed = 0;
2979 ctxt->disableSAX = 1;
2980 return(name);
2981 }
2982 for (i = 0;;i++) {
2983 if (xmlW3CPIs[i] == NULL) break;
2984 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2985 return(name);
2986 }
2987 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2988 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2989 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002990 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002991 }
2992 }
2993 return(name);
2994}
2995
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002996#ifdef LIBXML_CATALOG_ENABLED
2997/**
2998 * xmlParseCatalogPI:
2999 * @ctxt: an XML parser context
3000 * @catalog: the PI value string
3001 *
3002 * parse an XML Catalog Processing Instruction.
3003 *
3004 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3005 *
3006 * Occurs only if allowed by the user and if happening in the Misc
3007 * part of the document before any doctype informations
3008 * This will add the given catalog to the parsing context in order
3009 * to be used if there is a resolution need further down in the document
3010 */
3011
3012static void
3013xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3014 xmlChar *URL = NULL;
3015 const xmlChar *tmp, *base;
3016 xmlChar marker;
3017
3018 tmp = catalog;
3019 while (IS_BLANK(*tmp)) tmp++;
3020 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3021 goto error;
3022 tmp += 7;
3023 while (IS_BLANK(*tmp)) tmp++;
3024 if (*tmp != '=') {
3025 return;
3026 }
3027 tmp++;
3028 while (IS_BLANK(*tmp)) tmp++;
3029 marker = *tmp;
3030 if ((marker != '\'') && (marker != '"'))
3031 goto error;
3032 tmp++;
3033 base = tmp;
3034 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3035 if (*tmp == 0)
3036 goto error;
3037 URL = xmlStrndup(base, tmp - base);
3038 tmp++;
3039 while (IS_BLANK(*tmp)) tmp++;
3040 if (*tmp != 0)
3041 goto error;
3042
3043 if (URL != NULL) {
3044 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3045 xmlFree(URL);
3046 }
3047 return;
3048
3049error:
3050 ctxt->errNo = XML_WAR_CATALOG_PI;
3051 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3052 ctxt->sax->warning(ctxt->userData,
3053 "Catalog PI syntax error: %s\n", catalog);
3054 if (URL != NULL)
3055 xmlFree(URL);
3056}
3057#endif
3058
Owen Taylor3473f882001-02-23 17:55:21 +00003059/**
3060 * xmlParsePI:
3061 * @ctxt: an XML parser context
3062 *
3063 * parse an XML Processing Instruction.
3064 *
3065 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3066 *
3067 * The processing is transfered to SAX once parsed.
3068 */
3069
3070void
3071xmlParsePI(xmlParserCtxtPtr ctxt) {
3072 xmlChar *buf = NULL;
3073 int len = 0;
3074 int size = XML_PARSER_BUFFER_SIZE;
3075 int cur, l;
3076 xmlChar *target;
3077 xmlParserInputState state;
3078 int count = 0;
3079
3080 if ((RAW == '<') && (NXT(1) == '?')) {
3081 xmlParserInputPtr input = ctxt->input;
3082 state = ctxt->instate;
3083 ctxt->instate = XML_PARSER_PI;
3084 /*
3085 * this is a Processing Instruction.
3086 */
3087 SKIP(2);
3088 SHRINK;
3089
3090 /*
3091 * Parse the target name and check for special support like
3092 * namespace.
3093 */
3094 target = xmlParsePITarget(ctxt);
3095 if (target != NULL) {
3096 if ((RAW == '?') && (NXT(1) == '>')) {
3097 if (input != ctxt->input) {
3098 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3100 ctxt->sax->error(ctxt->userData,
3101 "PI declaration doesn't start and stop in the same entity\n");
3102 ctxt->wellFormed = 0;
3103 ctxt->disableSAX = 1;
3104 }
3105 SKIP(2);
3106
3107 /*
3108 * SAX: PI detected.
3109 */
3110 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3111 (ctxt->sax->processingInstruction != NULL))
3112 ctxt->sax->processingInstruction(ctxt->userData,
3113 target, NULL);
3114 ctxt->instate = state;
3115 xmlFree(target);
3116 return;
3117 }
3118 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3119 if (buf == NULL) {
3120 xmlGenericError(xmlGenericErrorContext,
3121 "malloc of %d byte failed\n", size);
3122 ctxt->instate = state;
3123 return;
3124 }
3125 cur = CUR;
3126 if (!IS_BLANK(cur)) {
3127 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3128 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3129 ctxt->sax->error(ctxt->userData,
3130 "xmlParsePI: PI %s space expected\n", target);
3131 ctxt->wellFormed = 0;
3132 ctxt->disableSAX = 1;
3133 }
3134 SKIP_BLANKS;
3135 cur = CUR_CHAR(l);
3136 while (IS_CHAR(cur) && /* checked */
3137 ((cur != '?') || (NXT(1) != '>'))) {
3138 if (len + 5 >= size) {
3139 size *= 2;
3140 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3141 if (buf == NULL) {
3142 xmlGenericError(xmlGenericErrorContext,
3143 "realloc of %d byte failed\n", size);
3144 ctxt->instate = state;
3145 return;
3146 }
3147 }
3148 count++;
3149 if (count > 50) {
3150 GROW;
3151 count = 0;
3152 }
3153 COPY_BUF(l,buf,len,cur);
3154 NEXTL(l);
3155 cur = CUR_CHAR(l);
3156 if (cur == 0) {
3157 SHRINK;
3158 GROW;
3159 cur = CUR_CHAR(l);
3160 }
3161 }
3162 buf[len] = 0;
3163 if (cur != '?') {
3164 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3166 ctxt->sax->error(ctxt->userData,
3167 "xmlParsePI: PI %s never end ...\n", target);
3168 ctxt->wellFormed = 0;
3169 ctxt->disableSAX = 1;
3170 } else {
3171 if (input != ctxt->input) {
3172 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3173 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3174 ctxt->sax->error(ctxt->userData,
3175 "PI declaration doesn't start and stop in the same entity\n");
3176 ctxt->wellFormed = 0;
3177 ctxt->disableSAX = 1;
3178 }
3179 SKIP(2);
3180
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003181#ifdef LIBXML_CATALOG_ENABLED
3182 if (((state == XML_PARSER_MISC) ||
3183 (state == XML_PARSER_START)) &&
3184 (xmlStrEqual(target, XML_CATALOG_PI))) {
3185 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3186 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3187 (allow == XML_CATA_ALLOW_ALL))
3188 xmlParseCatalogPI(ctxt, buf);
3189 }
3190#endif
3191
3192
Owen Taylor3473f882001-02-23 17:55:21 +00003193 /*
3194 * SAX: PI detected.
3195 */
3196 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3197 (ctxt->sax->processingInstruction != NULL))
3198 ctxt->sax->processingInstruction(ctxt->userData,
3199 target, buf);
3200 }
3201 xmlFree(buf);
3202 xmlFree(target);
3203 } else {
3204 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3206 ctxt->sax->error(ctxt->userData,
3207 "xmlParsePI : no target name\n");
3208 ctxt->wellFormed = 0;
3209 ctxt->disableSAX = 1;
3210 }
3211 ctxt->instate = state;
3212 }
3213}
3214
3215/**
3216 * xmlParseNotationDecl:
3217 * @ctxt: an XML parser context
3218 *
3219 * parse a notation declaration
3220 *
3221 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3222 *
3223 * Hence there is actually 3 choices:
3224 * 'PUBLIC' S PubidLiteral
3225 * 'PUBLIC' S PubidLiteral S SystemLiteral
3226 * and 'SYSTEM' S SystemLiteral
3227 *
3228 * See the NOTE on xmlParseExternalID().
3229 */
3230
3231void
3232xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3233 xmlChar *name;
3234 xmlChar *Pubid;
3235 xmlChar *Systemid;
3236
3237 if ((RAW == '<') && (NXT(1) == '!') &&
3238 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3239 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3240 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3241 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3242 xmlParserInputPtr input = ctxt->input;
3243 SHRINK;
3244 SKIP(10);
3245 if (!IS_BLANK(CUR)) {
3246 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3248 ctxt->sax->error(ctxt->userData,
3249 "Space required after '<!NOTATION'\n");
3250 ctxt->wellFormed = 0;
3251 ctxt->disableSAX = 1;
3252 return;
3253 }
3254 SKIP_BLANKS;
3255
Daniel Veillard76d66f42001-05-16 21:05:17 +00003256 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003257 if (name == NULL) {
3258 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3260 ctxt->sax->error(ctxt->userData,
3261 "NOTATION: Name expected here\n");
3262 ctxt->wellFormed = 0;
3263 ctxt->disableSAX = 1;
3264 return;
3265 }
3266 if (!IS_BLANK(CUR)) {
3267 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3269 ctxt->sax->error(ctxt->userData,
3270 "Space required after the NOTATION name'\n");
3271 ctxt->wellFormed = 0;
3272 ctxt->disableSAX = 1;
3273 return;
3274 }
3275 SKIP_BLANKS;
3276
3277 /*
3278 * Parse the IDs.
3279 */
3280 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3281 SKIP_BLANKS;
3282
3283 if (RAW == '>') {
3284 if (input != ctxt->input) {
3285 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288"Notation declaration doesn't start and stop in the same entity\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 NEXT;
3293 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3294 (ctxt->sax->notationDecl != NULL))
3295 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3296 } else {
3297 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3298 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3299 ctxt->sax->error(ctxt->userData,
3300 "'>' required to close NOTATION declaration\n");
3301 ctxt->wellFormed = 0;
3302 ctxt->disableSAX = 1;
3303 }
3304 xmlFree(name);
3305 if (Systemid != NULL) xmlFree(Systemid);
3306 if (Pubid != NULL) xmlFree(Pubid);
3307 }
3308}
3309
3310/**
3311 * xmlParseEntityDecl:
3312 * @ctxt: an XML parser context
3313 *
3314 * parse <!ENTITY declarations
3315 *
3316 * [70] EntityDecl ::= GEDecl | PEDecl
3317 *
3318 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3319 *
3320 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3321 *
3322 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3323 *
3324 * [74] PEDef ::= EntityValue | ExternalID
3325 *
3326 * [76] NDataDecl ::= S 'NDATA' S Name
3327 *
3328 * [ VC: Notation Declared ]
3329 * The Name must match the declared name of a notation.
3330 */
3331
3332void
3333xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3334 xmlChar *name = NULL;
3335 xmlChar *value = NULL;
3336 xmlChar *URI = NULL, *literal = NULL;
3337 xmlChar *ndata = NULL;
3338 int isParameter = 0;
3339 xmlChar *orig = NULL;
3340
3341 GROW;
3342 if ((RAW == '<') && (NXT(1) == '!') &&
3343 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3344 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3345 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3346 xmlParserInputPtr input = ctxt->input;
3347 ctxt->instate = XML_PARSER_ENTITY_DECL;
3348 SHRINK;
3349 SKIP(8);
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after '<!ENTITY'\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 }
3358 SKIP_BLANKS;
3359
3360 if (RAW == '%') {
3361 NEXT;
3362 if (!IS_BLANK(CUR)) {
3363 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3364 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3365 ctxt->sax->error(ctxt->userData,
3366 "Space required after '%'\n");
3367 ctxt->wellFormed = 0;
3368 ctxt->disableSAX = 1;
3369 }
3370 SKIP_BLANKS;
3371 isParameter = 1;
3372 }
3373
Daniel Veillard76d66f42001-05-16 21:05:17 +00003374 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003375 if (name == NULL) {
3376 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3379 ctxt->wellFormed = 0;
3380 ctxt->disableSAX = 1;
3381 return;
3382 }
3383 if (!IS_BLANK(CUR)) {
3384 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3386 ctxt->sax->error(ctxt->userData,
3387 "Space required after the entity name\n");
3388 ctxt->wellFormed = 0;
3389 ctxt->disableSAX = 1;
3390 }
3391 SKIP_BLANKS;
3392
3393 /*
3394 * handle the various case of definitions...
3395 */
3396 if (isParameter) {
3397 if ((RAW == '"') || (RAW == '\'')) {
3398 value = xmlParseEntityValue(ctxt, &orig);
3399 if (value) {
3400 if ((ctxt->sax != NULL) &&
3401 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3402 ctxt->sax->entityDecl(ctxt->userData, name,
3403 XML_INTERNAL_PARAMETER_ENTITY,
3404 NULL, NULL, value);
3405 }
3406 } else {
3407 URI = xmlParseExternalID(ctxt, &literal, 1);
3408 if ((URI == NULL) && (literal == NULL)) {
3409 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "Entity value required\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 }
3416 if (URI) {
3417 xmlURIPtr uri;
3418
3419 uri = xmlParseURI((const char *) URI);
3420 if (uri == NULL) {
3421 ctxt->errNo = XML_ERR_INVALID_URI;
3422 if ((ctxt->sax != NULL) &&
3423 (!ctxt->disableSAX) &&
3424 (ctxt->sax->error != NULL))
3425 ctxt->sax->error(ctxt->userData,
3426 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003427 /*
3428 * This really ought to be a well formedness error
3429 * but the XML Core WG decided otherwise c.f. issue
3430 * E26 of the XML erratas.
3431 */
Owen Taylor3473f882001-02-23 17:55:21 +00003432 } else {
3433 if (uri->fragment != NULL) {
3434 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3435 if ((ctxt->sax != NULL) &&
3436 (!ctxt->disableSAX) &&
3437 (ctxt->sax->error != NULL))
3438 ctxt->sax->error(ctxt->userData,
3439 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003440 /*
3441 * Okay this is foolish to block those but not
3442 * invalid URIs.
3443 */
Owen Taylor3473f882001-02-23 17:55:21 +00003444 ctxt->wellFormed = 0;
3445 } else {
3446 if ((ctxt->sax != NULL) &&
3447 (!ctxt->disableSAX) &&
3448 (ctxt->sax->entityDecl != NULL))
3449 ctxt->sax->entityDecl(ctxt->userData, name,
3450 XML_EXTERNAL_PARAMETER_ENTITY,
3451 literal, URI, NULL);
3452 }
3453 xmlFreeURI(uri);
3454 }
3455 }
3456 }
3457 } else {
3458 if ((RAW == '"') || (RAW == '\'')) {
3459 value = xmlParseEntityValue(ctxt, &orig);
3460 if ((ctxt->sax != NULL) &&
3461 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3462 ctxt->sax->entityDecl(ctxt->userData, name,
3463 XML_INTERNAL_GENERAL_ENTITY,
3464 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003465 /*
3466 * For expat compatibility in SAX mode.
3467 */
3468 if ((ctxt->myDoc == NULL) ||
3469 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3470 if (ctxt->myDoc == NULL) {
3471 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3472 }
3473 if (ctxt->myDoc->intSubset == NULL)
3474 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3475 BAD_CAST "fake", NULL, NULL);
3476
3477 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3478 NULL, NULL, value);
3479 }
Owen Taylor3473f882001-02-23 17:55:21 +00003480 } else {
3481 URI = xmlParseExternalID(ctxt, &literal, 1);
3482 if ((URI == NULL) && (literal == NULL)) {
3483 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3485 ctxt->sax->error(ctxt->userData,
3486 "Entity value required\n");
3487 ctxt->wellFormed = 0;
3488 ctxt->disableSAX = 1;
3489 }
3490 if (URI) {
3491 xmlURIPtr uri;
3492
3493 uri = xmlParseURI((const char *)URI);
3494 if (uri == NULL) {
3495 ctxt->errNo = XML_ERR_INVALID_URI;
3496 if ((ctxt->sax != NULL) &&
3497 (!ctxt->disableSAX) &&
3498 (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003501 /*
3502 * This really ought to be a well formedness error
3503 * but the XML Core WG decided otherwise c.f. issue
3504 * E26 of the XML erratas.
3505 */
Owen Taylor3473f882001-02-23 17:55:21 +00003506 } else {
3507 if (uri->fragment != NULL) {
3508 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3509 if ((ctxt->sax != NULL) &&
3510 (!ctxt->disableSAX) &&
3511 (ctxt->sax->error != NULL))
3512 ctxt->sax->error(ctxt->userData,
3513 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003514 /*
3515 * Okay this is foolish to block those but not
3516 * invalid URIs.
3517 */
Owen Taylor3473f882001-02-23 17:55:21 +00003518 ctxt->wellFormed = 0;
3519 }
3520 xmlFreeURI(uri);
3521 }
3522 }
3523 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3524 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3525 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3526 ctxt->sax->error(ctxt->userData,
3527 "Space required before 'NDATA'\n");
3528 ctxt->wellFormed = 0;
3529 ctxt->disableSAX = 1;
3530 }
3531 SKIP_BLANKS;
3532 if ((RAW == 'N') && (NXT(1) == 'D') &&
3533 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3534 (NXT(4) == 'A')) {
3535 SKIP(5);
3536 if (!IS_BLANK(CUR)) {
3537 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3539 ctxt->sax->error(ctxt->userData,
3540 "Space required after 'NDATA'\n");
3541 ctxt->wellFormed = 0;
3542 ctxt->disableSAX = 1;
3543 }
3544 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003545 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003546 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3547 (ctxt->sax->unparsedEntityDecl != NULL))
3548 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3549 literal, URI, ndata);
3550 } else {
3551 if ((ctxt->sax != NULL) &&
3552 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3553 ctxt->sax->entityDecl(ctxt->userData, name,
3554 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3555 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003556 /*
3557 * For expat compatibility in SAX mode.
3558 * assuming the entity repalcement was asked for
3559 */
3560 if ((ctxt->replaceEntities != 0) &&
3561 ((ctxt->myDoc == NULL) ||
3562 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3563 if (ctxt->myDoc == NULL) {
3564 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3565 }
3566
3567 if (ctxt->myDoc->intSubset == NULL)
3568 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3569 BAD_CAST "fake", NULL, NULL);
3570 entityDecl(ctxt, name,
3571 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3572 literal, URI, NULL);
3573 }
Owen Taylor3473f882001-02-23 17:55:21 +00003574 }
3575 }
3576 }
3577 SKIP_BLANKS;
3578 if (RAW != '>') {
3579 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3581 ctxt->sax->error(ctxt->userData,
3582 "xmlParseEntityDecl: entity %s not terminated\n", name);
3583 ctxt->wellFormed = 0;
3584 ctxt->disableSAX = 1;
3585 } else {
3586 if (input != ctxt->input) {
3587 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3589 ctxt->sax->error(ctxt->userData,
3590"Entity declaration doesn't start and stop in the same entity\n");
3591 ctxt->wellFormed = 0;
3592 ctxt->disableSAX = 1;
3593 }
3594 NEXT;
3595 }
3596 if (orig != NULL) {
3597 /*
3598 * Ugly mechanism to save the raw entity value.
3599 */
3600 xmlEntityPtr cur = NULL;
3601
3602 if (isParameter) {
3603 if ((ctxt->sax != NULL) &&
3604 (ctxt->sax->getParameterEntity != NULL))
3605 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3606 } else {
3607 if ((ctxt->sax != NULL) &&
3608 (ctxt->sax->getEntity != NULL))
3609 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003610 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3611 cur = getEntity(ctxt, name);
3612 }
Owen Taylor3473f882001-02-23 17:55:21 +00003613 }
3614 if (cur != NULL) {
3615 if (cur->orig != NULL)
3616 xmlFree(orig);
3617 else
3618 cur->orig = orig;
3619 } else
3620 xmlFree(orig);
3621 }
3622 if (name != NULL) xmlFree(name);
3623 if (value != NULL) xmlFree(value);
3624 if (URI != NULL) xmlFree(URI);
3625 if (literal != NULL) xmlFree(literal);
3626 if (ndata != NULL) xmlFree(ndata);
3627 }
3628}
3629
3630/**
3631 * xmlParseDefaultDecl:
3632 * @ctxt: an XML parser context
3633 * @value: Receive a possible fixed default value for the attribute
3634 *
3635 * Parse an attribute default declaration
3636 *
3637 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3638 *
3639 * [ VC: Required Attribute ]
3640 * if the default declaration is the keyword #REQUIRED, then the
3641 * attribute must be specified for all elements of the type in the
3642 * attribute-list declaration.
3643 *
3644 * [ VC: Attribute Default Legal ]
3645 * The declared default value must meet the lexical constraints of
3646 * the declared attribute type c.f. xmlValidateAttributeDecl()
3647 *
3648 * [ VC: Fixed Attribute Default ]
3649 * if an attribute has a default value declared with the #FIXED
3650 * keyword, instances of that attribute must match the default value.
3651 *
3652 * [ WFC: No < in Attribute Values ]
3653 * handled in xmlParseAttValue()
3654 *
3655 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3656 * or XML_ATTRIBUTE_FIXED.
3657 */
3658
3659int
3660xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3661 int val;
3662 xmlChar *ret;
3663
3664 *value = NULL;
3665 if ((RAW == '#') && (NXT(1) == 'R') &&
3666 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3667 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3668 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3669 (NXT(8) == 'D')) {
3670 SKIP(9);
3671 return(XML_ATTRIBUTE_REQUIRED);
3672 }
3673 if ((RAW == '#') && (NXT(1) == 'I') &&
3674 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3675 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3676 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3677 SKIP(8);
3678 return(XML_ATTRIBUTE_IMPLIED);
3679 }
3680 val = XML_ATTRIBUTE_NONE;
3681 if ((RAW == '#') && (NXT(1) == 'F') &&
3682 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3683 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3684 SKIP(6);
3685 val = XML_ATTRIBUTE_FIXED;
3686 if (!IS_BLANK(CUR)) {
3687 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3689 ctxt->sax->error(ctxt->userData,
3690 "Space required after '#FIXED'\n");
3691 ctxt->wellFormed = 0;
3692 ctxt->disableSAX = 1;
3693 }
3694 SKIP_BLANKS;
3695 }
3696 ret = xmlParseAttValue(ctxt);
3697 ctxt->instate = XML_PARSER_DTD;
3698 if (ret == NULL) {
3699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3700 ctxt->sax->error(ctxt->userData,
3701 "Attribute default value declaration error\n");
3702 ctxt->wellFormed = 0;
3703 ctxt->disableSAX = 1;
3704 } else
3705 *value = ret;
3706 return(val);
3707}
3708
3709/**
3710 * xmlParseNotationType:
3711 * @ctxt: an XML parser context
3712 *
3713 * parse an Notation attribute type.
3714 *
3715 * Note: the leading 'NOTATION' S part has already being parsed...
3716 *
3717 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3718 *
3719 * [ VC: Notation Attributes ]
3720 * Values of this type must match one of the notation names included
3721 * in the declaration; all notation names in the declaration must be declared.
3722 *
3723 * Returns: the notation attribute tree built while parsing
3724 */
3725
3726xmlEnumerationPtr
3727xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3728 xmlChar *name;
3729 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3730
3731 if (RAW != '(') {
3732 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "'(' required to start 'NOTATION'\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 return(NULL);
3739 }
3740 SHRINK;
3741 do {
3742 NEXT;
3743 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003744 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003745 if (name == NULL) {
3746 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3748 ctxt->sax->error(ctxt->userData,
3749 "Name expected in NOTATION declaration\n");
3750 ctxt->wellFormed = 0;
3751 ctxt->disableSAX = 1;
3752 return(ret);
3753 }
3754 cur = xmlCreateEnumeration(name);
3755 xmlFree(name);
3756 if (cur == NULL) return(ret);
3757 if (last == NULL) ret = last = cur;
3758 else {
3759 last->next = cur;
3760 last = cur;
3761 }
3762 SKIP_BLANKS;
3763 } while (RAW == '|');
3764 if (RAW != ')') {
3765 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3767 ctxt->sax->error(ctxt->userData,
3768 "')' required to finish NOTATION declaration\n");
3769 ctxt->wellFormed = 0;
3770 ctxt->disableSAX = 1;
3771 if ((last != NULL) && (last != ret))
3772 xmlFreeEnumeration(last);
3773 return(ret);
3774 }
3775 NEXT;
3776 return(ret);
3777}
3778
3779/**
3780 * xmlParseEnumerationType:
3781 * @ctxt: an XML parser context
3782 *
3783 * parse an Enumeration attribute type.
3784 *
3785 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3786 *
3787 * [ VC: Enumeration ]
3788 * Values of this type must match one of the Nmtoken tokens in
3789 * the declaration
3790 *
3791 * Returns: the enumeration attribute tree built while parsing
3792 */
3793
3794xmlEnumerationPtr
3795xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3796 xmlChar *name;
3797 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3798
3799 if (RAW != '(') {
3800 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3802 ctxt->sax->error(ctxt->userData,
3803 "'(' required to start ATTLIST enumeration\n");
3804 ctxt->wellFormed = 0;
3805 ctxt->disableSAX = 1;
3806 return(NULL);
3807 }
3808 SHRINK;
3809 do {
3810 NEXT;
3811 SKIP_BLANKS;
3812 name = xmlParseNmtoken(ctxt);
3813 if (name == NULL) {
3814 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3816 ctxt->sax->error(ctxt->userData,
3817 "NmToken expected in ATTLIST enumeration\n");
3818 ctxt->wellFormed = 0;
3819 ctxt->disableSAX = 1;
3820 return(ret);
3821 }
3822 cur = xmlCreateEnumeration(name);
3823 xmlFree(name);
3824 if (cur == NULL) return(ret);
3825 if (last == NULL) ret = last = cur;
3826 else {
3827 last->next = cur;
3828 last = cur;
3829 }
3830 SKIP_BLANKS;
3831 } while (RAW == '|');
3832 if (RAW != ')') {
3833 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3835 ctxt->sax->error(ctxt->userData,
3836 "')' required to finish ATTLIST enumeration\n");
3837 ctxt->wellFormed = 0;
3838 ctxt->disableSAX = 1;
3839 return(ret);
3840 }
3841 NEXT;
3842 return(ret);
3843}
3844
3845/**
3846 * xmlParseEnumeratedType:
3847 * @ctxt: an XML parser context
3848 * @tree: the enumeration tree built while parsing
3849 *
3850 * parse an Enumerated attribute type.
3851 *
3852 * [57] EnumeratedType ::= NotationType | Enumeration
3853 *
3854 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3855 *
3856 *
3857 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3858 */
3859
3860int
3861xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3862 if ((RAW == 'N') && (NXT(1) == 'O') &&
3863 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3864 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3865 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3866 SKIP(8);
3867 if (!IS_BLANK(CUR)) {
3868 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3870 ctxt->sax->error(ctxt->userData,
3871 "Space required after 'NOTATION'\n");
3872 ctxt->wellFormed = 0;
3873 ctxt->disableSAX = 1;
3874 return(0);
3875 }
3876 SKIP_BLANKS;
3877 *tree = xmlParseNotationType(ctxt);
3878 if (*tree == NULL) return(0);
3879 return(XML_ATTRIBUTE_NOTATION);
3880 }
3881 *tree = xmlParseEnumerationType(ctxt);
3882 if (*tree == NULL) return(0);
3883 return(XML_ATTRIBUTE_ENUMERATION);
3884}
3885
3886/**
3887 * xmlParseAttributeType:
3888 * @ctxt: an XML parser context
3889 * @tree: the enumeration tree built while parsing
3890 *
3891 * parse the Attribute list def for an element
3892 *
3893 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3894 *
3895 * [55] StringType ::= 'CDATA'
3896 *
3897 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3898 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3899 *
3900 * Validity constraints for attribute values syntax are checked in
3901 * xmlValidateAttributeValue()
3902 *
3903 * [ VC: ID ]
3904 * Values of type ID must match the Name production. A name must not
3905 * appear more than once in an XML document as a value of this type;
3906 * i.e., ID values must uniquely identify the elements which bear them.
3907 *
3908 * [ VC: One ID per Element Type ]
3909 * No element type may have more than one ID attribute specified.
3910 *
3911 * [ VC: ID Attribute Default ]
3912 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3913 *
3914 * [ VC: IDREF ]
3915 * Values of type IDREF must match the Name production, and values
3916 * of type IDREFS must match Names; each IDREF Name must match the value
3917 * of an ID attribute on some element in the XML document; i.e. IDREF
3918 * values must match the value of some ID attribute.
3919 *
3920 * [ VC: Entity Name ]
3921 * Values of type ENTITY must match the Name production, values
3922 * of type ENTITIES must match Names; each Entity Name must match the
3923 * name of an unparsed entity declared in the DTD.
3924 *
3925 * [ VC: Name Token ]
3926 * Values of type NMTOKEN must match the Nmtoken production; values
3927 * of type NMTOKENS must match Nmtokens.
3928 *
3929 * Returns the attribute type
3930 */
3931int
3932xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3933 SHRINK;
3934 if ((RAW == 'C') && (NXT(1) == 'D') &&
3935 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3936 (NXT(4) == 'A')) {
3937 SKIP(5);
3938 return(XML_ATTRIBUTE_CDATA);
3939 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3940 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3941 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3942 SKIP(6);
3943 return(XML_ATTRIBUTE_IDREFS);
3944 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3945 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3946 (NXT(4) == 'F')) {
3947 SKIP(5);
3948 return(XML_ATTRIBUTE_IDREF);
3949 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3950 SKIP(2);
3951 return(XML_ATTRIBUTE_ID);
3952 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3953 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3954 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3955 SKIP(6);
3956 return(XML_ATTRIBUTE_ENTITY);
3957 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3958 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3959 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3960 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3961 SKIP(8);
3962 return(XML_ATTRIBUTE_ENTITIES);
3963 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3964 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3965 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3966 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3967 SKIP(8);
3968 return(XML_ATTRIBUTE_NMTOKENS);
3969 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3970 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3971 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3972 (NXT(6) == 'N')) {
3973 SKIP(7);
3974 return(XML_ATTRIBUTE_NMTOKEN);
3975 }
3976 return(xmlParseEnumeratedType(ctxt, tree));
3977}
3978
3979/**
3980 * xmlParseAttributeListDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * : parse the Attribute list def for an element
3984 *
3985 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3986 *
3987 * [53] AttDef ::= S Name S AttType S DefaultDecl
3988 *
3989 */
3990void
3991xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3992 xmlChar *elemName;
3993 xmlChar *attrName;
3994 xmlEnumerationPtr tree;
3995
3996 if ((RAW == '<') && (NXT(1) == '!') &&
3997 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3998 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3999 (NXT(6) == 'I') && (NXT(7) == 'S') &&
4000 (NXT(8) == 'T')) {
4001 xmlParserInputPtr input = ctxt->input;
4002
4003 SKIP(9);
4004 if (!IS_BLANK(CUR)) {
4005 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4007 ctxt->sax->error(ctxt->userData,
4008 "Space required after '<!ATTLIST'\n");
4009 ctxt->wellFormed = 0;
4010 ctxt->disableSAX = 1;
4011 }
4012 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004013 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004014 if (elemName == NULL) {
4015 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "ATTLIST: no name for Element\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 return;
4022 }
4023 SKIP_BLANKS;
4024 GROW;
4025 while (RAW != '>') {
4026 const xmlChar *check = CUR_PTR;
4027 int type;
4028 int def;
4029 xmlChar *defaultValue = NULL;
4030
4031 GROW;
4032 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004033 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004034 if (attrName == NULL) {
4035 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4037 ctxt->sax->error(ctxt->userData,
4038 "ATTLIST: no name for Attribute\n");
4039 ctxt->wellFormed = 0;
4040 ctxt->disableSAX = 1;
4041 break;
4042 }
4043 GROW;
4044 if (!IS_BLANK(CUR)) {
4045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4047 ctxt->sax->error(ctxt->userData,
4048 "Space required after the attribute name\n");
4049 ctxt->wellFormed = 0;
4050 ctxt->disableSAX = 1;
4051 if (attrName != NULL)
4052 xmlFree(attrName);
4053 if (defaultValue != NULL)
4054 xmlFree(defaultValue);
4055 break;
4056 }
4057 SKIP_BLANKS;
4058
4059 type = xmlParseAttributeType(ctxt, &tree);
4060 if (type <= 0) {
4061 if (attrName != NULL)
4062 xmlFree(attrName);
4063 if (defaultValue != NULL)
4064 xmlFree(defaultValue);
4065 break;
4066 }
4067
4068 GROW;
4069 if (!IS_BLANK(CUR)) {
4070 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4072 ctxt->sax->error(ctxt->userData,
4073 "Space required after the attribute type\n");
4074 ctxt->wellFormed = 0;
4075 ctxt->disableSAX = 1;
4076 if (attrName != NULL)
4077 xmlFree(attrName);
4078 if (defaultValue != NULL)
4079 xmlFree(defaultValue);
4080 if (tree != NULL)
4081 xmlFreeEnumeration(tree);
4082 break;
4083 }
4084 SKIP_BLANKS;
4085
4086 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4087 if (def <= 0) {
4088 if (attrName != NULL)
4089 xmlFree(attrName);
4090 if (defaultValue != NULL)
4091 xmlFree(defaultValue);
4092 if (tree != NULL)
4093 xmlFreeEnumeration(tree);
4094 break;
4095 }
4096
4097 GROW;
4098 if (RAW != '>') {
4099 if (!IS_BLANK(CUR)) {
4100 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4102 ctxt->sax->error(ctxt->userData,
4103 "Space required after the attribute default value\n");
4104 ctxt->wellFormed = 0;
4105 ctxt->disableSAX = 1;
4106 if (attrName != NULL)
4107 xmlFree(attrName);
4108 if (defaultValue != NULL)
4109 xmlFree(defaultValue);
4110 if (tree != NULL)
4111 xmlFreeEnumeration(tree);
4112 break;
4113 }
4114 SKIP_BLANKS;
4115 }
4116 if (check == CUR_PTR) {
4117 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4119 ctxt->sax->error(ctxt->userData,
4120 "xmlParseAttributeListDecl: detected internal error\n");
4121 if (attrName != NULL)
4122 xmlFree(attrName);
4123 if (defaultValue != NULL)
4124 xmlFree(defaultValue);
4125 if (tree != NULL)
4126 xmlFreeEnumeration(tree);
4127 break;
4128 }
4129 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4130 (ctxt->sax->attributeDecl != NULL))
4131 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4132 type, def, defaultValue, tree);
4133 if (attrName != NULL)
4134 xmlFree(attrName);
4135 if (defaultValue != NULL)
4136 xmlFree(defaultValue);
4137 GROW;
4138 }
4139 if (RAW == '>') {
4140 if (input != ctxt->input) {
4141 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143 ctxt->sax->error(ctxt->userData,
4144"Attribute list declaration doesn't start and stop in the same entity\n");
4145 ctxt->wellFormed = 0;
4146 ctxt->disableSAX = 1;
4147 }
4148 NEXT;
4149 }
4150
4151 xmlFree(elemName);
4152 }
4153}
4154
4155/**
4156 * xmlParseElementMixedContentDecl:
4157 * @ctxt: an XML parser context
4158 *
4159 * parse the declaration for a Mixed Element content
4160 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4161 *
4162 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4163 * '(' S? '#PCDATA' S? ')'
4164 *
4165 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4166 *
4167 * [ VC: No Duplicate Types ]
4168 * The same name must not appear more than once in a single
4169 * mixed-content declaration.
4170 *
4171 * returns: the list of the xmlElementContentPtr describing the element choices
4172 */
4173xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004174xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004175 xmlElementContentPtr ret = NULL, cur = NULL, n;
4176 xmlChar *elem = NULL;
4177
4178 GROW;
4179 if ((RAW == '#') && (NXT(1) == 'P') &&
4180 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4181 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4182 (NXT(6) == 'A')) {
4183 SKIP(7);
4184 SKIP_BLANKS;
4185 SHRINK;
4186 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004187 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4188 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4189 if (ctxt->vctxt.error != NULL)
4190 ctxt->vctxt.error(ctxt->vctxt.userData,
4191"Element content declaration doesn't start and stop in the same entity\n");
4192 ctxt->valid = 0;
4193 }
Owen Taylor3473f882001-02-23 17:55:21 +00004194 NEXT;
4195 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4196 if (RAW == '*') {
4197 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4198 NEXT;
4199 }
4200 return(ret);
4201 }
4202 if ((RAW == '(') || (RAW == '|')) {
4203 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4204 if (ret == NULL) return(NULL);
4205 }
4206 while (RAW == '|') {
4207 NEXT;
4208 if (elem == NULL) {
4209 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4210 if (ret == NULL) return(NULL);
4211 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004212 if (cur != NULL)
4213 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004214 cur = ret;
4215 } else {
4216 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4217 if (n == NULL) return(NULL);
4218 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (n->c1 != NULL)
4220 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004222 if (n != NULL)
4223 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004224 cur = n;
4225 xmlFree(elem);
4226 }
4227 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004228 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004229 if (elem == NULL) {
4230 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4232 ctxt->sax->error(ctxt->userData,
4233 "xmlParseElementMixedContentDecl : Name expected\n");
4234 ctxt->wellFormed = 0;
4235 ctxt->disableSAX = 1;
4236 xmlFreeElementContent(cur);
4237 return(NULL);
4238 }
4239 SKIP_BLANKS;
4240 GROW;
4241 }
4242 if ((RAW == ')') && (NXT(1) == '*')) {
4243 if (elem != NULL) {
4244 cur->c2 = xmlNewElementContent(elem,
4245 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004246 if (cur->c2 != NULL)
4247 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004248 xmlFree(elem);
4249 }
4250 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004251 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4252 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4253 if (ctxt->vctxt.error != NULL)
4254 ctxt->vctxt.error(ctxt->vctxt.userData,
4255"Element content declaration doesn't start and stop in the same entity\n");
4256 ctxt->valid = 0;
4257 }
Owen Taylor3473f882001-02-23 17:55:21 +00004258 SKIP(2);
4259 } else {
4260 if (elem != NULL) xmlFree(elem);
4261 xmlFreeElementContent(ret);
4262 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4264 ctxt->sax->error(ctxt->userData,
4265 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4266 ctxt->wellFormed = 0;
4267 ctxt->disableSAX = 1;
4268 return(NULL);
4269 }
4270
4271 } else {
4272 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4274 ctxt->sax->error(ctxt->userData,
4275 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4276 ctxt->wellFormed = 0;
4277 ctxt->disableSAX = 1;
4278 }
4279 return(ret);
4280}
4281
4282/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004283 * xmlParseElementChildrenContentD:
4284 * @ctxt: an XML parser context
4285 *
4286 * VMS version of xmlParseElementChildrenContentDecl()
4287 *
4288 * Returns the tree of xmlElementContentPtr describing the element
4289 * hierarchy.
4290 */
4291/**
Owen Taylor3473f882001-02-23 17:55:21 +00004292 * xmlParseElementChildrenContentDecl:
4293 * @ctxt: an XML parser context
4294 *
4295 * parse the declaration for a Mixed Element content
4296 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4297 *
4298 *
4299 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4300 *
4301 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4302 *
4303 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4304 *
4305 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4306 *
4307 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4308 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004309 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004310 * opening or closing parentheses in a choice, seq, or Mixed
4311 * construct is contained in the replacement text for a parameter
4312 * entity, both must be contained in the same replacement text. For
4313 * interoperability, if a parameter-entity reference appears in a
4314 * choice, seq, or Mixed construct, its replacement text should not
4315 * be empty, and neither the first nor last non-blank character of
4316 * the replacement text should be a connector (| or ,).
4317 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004318 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004319 * hierarchy.
4320 */
4321xmlElementContentPtr
4322#ifdef VMS
4323xmlParseElementChildrenContentD
4324#else
4325xmlParseElementChildrenContentDecl
4326#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004327(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004328 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4329 xmlChar *elem;
4330 xmlChar type = 0;
4331
4332 SKIP_BLANKS;
4333 GROW;
4334 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004335 xmlParserInputPtr input = ctxt->input;
4336
Owen Taylor3473f882001-02-23 17:55:21 +00004337 /* Recurse on first child */
4338 NEXT;
4339 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004340 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004341 SKIP_BLANKS;
4342 GROW;
4343 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004344 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004345 if (elem == NULL) {
4346 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4348 ctxt->sax->error(ctxt->userData,
4349 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4350 ctxt->wellFormed = 0;
4351 ctxt->disableSAX = 1;
4352 return(NULL);
4353 }
4354 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4355 GROW;
4356 if (RAW == '?') {
4357 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4358 NEXT;
4359 } else if (RAW == '*') {
4360 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4361 NEXT;
4362 } else if (RAW == '+') {
4363 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4364 NEXT;
4365 } else {
4366 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4367 }
4368 xmlFree(elem);
4369 GROW;
4370 }
4371 SKIP_BLANKS;
4372 SHRINK;
4373 while (RAW != ')') {
4374 /*
4375 * Each loop we parse one separator and one element.
4376 */
4377 if (RAW == ',') {
4378 if (type == 0) type = CUR;
4379
4380 /*
4381 * Detect "Name | Name , Name" error
4382 */
4383 else if (type != CUR) {
4384 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4386 ctxt->sax->error(ctxt->userData,
4387 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4388 type);
4389 ctxt->wellFormed = 0;
4390 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004391 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004392 xmlFreeElementContent(last);
4393 if (ret != NULL)
4394 xmlFreeElementContent(ret);
4395 return(NULL);
4396 }
4397 NEXT;
4398
4399 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4400 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004401 if ((last != NULL) && (last != ret))
4402 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 xmlFreeElementContent(ret);
4404 return(NULL);
4405 }
4406 if (last == NULL) {
4407 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004408 if (ret != NULL)
4409 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004410 ret = cur = op;
4411 } else {
4412 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004413 if (op != NULL)
4414 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004415 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004416 if (last != NULL)
4417 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004418 cur =op;
4419 last = NULL;
4420 }
4421 } else if (RAW == '|') {
4422 if (type == 0) type = CUR;
4423
4424 /*
4425 * Detect "Name , Name | Name" error
4426 */
4427 else if (type != CUR) {
4428 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4430 ctxt->sax->error(ctxt->userData,
4431 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4432 type);
4433 ctxt->wellFormed = 0;
4434 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004435 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004436 xmlFreeElementContent(last);
4437 if (ret != NULL)
4438 xmlFreeElementContent(ret);
4439 return(NULL);
4440 }
4441 NEXT;
4442
4443 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4444 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004445 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004446 xmlFreeElementContent(last);
4447 if (ret != NULL)
4448 xmlFreeElementContent(ret);
4449 return(NULL);
4450 }
4451 if (last == NULL) {
4452 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004453 if (ret != NULL)
4454 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 ret = cur = op;
4456 } else {
4457 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004458 if (op != NULL)
4459 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004460 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004461 if (last != NULL)
4462 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004463 cur =op;
4464 last = NULL;
4465 }
4466 } else {
4467 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
4470 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004473 if (ret != NULL)
4474 xmlFreeElementContent(ret);
4475 return(NULL);
4476 }
4477 GROW;
4478 SKIP_BLANKS;
4479 GROW;
4480 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004481 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004482 /* Recurse on second child */
4483 NEXT;
4484 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004485 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004486 SKIP_BLANKS;
4487 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004488 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 if (elem == NULL) {
4490 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4492 ctxt->sax->error(ctxt->userData,
4493 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4494 ctxt->wellFormed = 0;
4495 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004496 if (ret != NULL)
4497 xmlFreeElementContent(ret);
4498 return(NULL);
4499 }
4500 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4501 xmlFree(elem);
4502 if (RAW == '?') {
4503 last->ocur = XML_ELEMENT_CONTENT_OPT;
4504 NEXT;
4505 } else if (RAW == '*') {
4506 last->ocur = XML_ELEMENT_CONTENT_MULT;
4507 NEXT;
4508 } else if (RAW == '+') {
4509 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4510 NEXT;
4511 } else {
4512 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4513 }
4514 }
4515 SKIP_BLANKS;
4516 GROW;
4517 }
4518 if ((cur != NULL) && (last != NULL)) {
4519 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004520 if (last != NULL)
4521 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004522 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004523 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4524 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4525 if (ctxt->vctxt.error != NULL)
4526 ctxt->vctxt.error(ctxt->vctxt.userData,
4527"Element content declaration doesn't start and stop in the same entity\n");
4528 ctxt->valid = 0;
4529 }
Owen Taylor3473f882001-02-23 17:55:21 +00004530 NEXT;
4531 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004532 if (ret != NULL)
4533 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004534 NEXT;
4535 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004536 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004537 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004538 cur = ret;
4539 /*
4540 * Some normalization:
4541 * (a | b* | c?)* == (a | b | c)*
4542 */
4543 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4544 if ((cur->c1 != NULL) &&
4545 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4546 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4547 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4548 if ((cur->c2 != NULL) &&
4549 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4550 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4551 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4552 cur = cur->c2;
4553 }
4554 }
Owen Taylor3473f882001-02-23 17:55:21 +00004555 NEXT;
4556 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004557 if (ret != NULL) {
4558 int found = 0;
4559
Daniel Veillarde470df72001-04-18 21:41:07 +00004560 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004561 /*
4562 * Some normalization:
4563 * (a | b*)+ == (a | b)*
4564 * (a | b?)+ == (a | b)*
4565 */
4566 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4567 if ((cur->c1 != NULL) &&
4568 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4569 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4570 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4571 found = 1;
4572 }
4573 if ((cur->c2 != NULL) &&
4574 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4575 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4576 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4577 found = 1;
4578 }
4579 cur = cur->c2;
4580 }
4581 if (found)
4582 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4583 }
Owen Taylor3473f882001-02-23 17:55:21 +00004584 NEXT;
4585 }
4586 return(ret);
4587}
4588
4589/**
4590 * xmlParseElementContentDecl:
4591 * @ctxt: an XML parser context
4592 * @name: the name of the element being defined.
4593 * @result: the Element Content pointer will be stored here if any
4594 *
4595 * parse the declaration for an Element content either Mixed or Children,
4596 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4597 *
4598 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4599 *
4600 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4601 */
4602
4603int
4604xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4605 xmlElementContentPtr *result) {
4606
4607 xmlElementContentPtr tree = NULL;
4608 xmlParserInputPtr input = ctxt->input;
4609 int res;
4610
4611 *result = NULL;
4612
4613 if (RAW != '(') {
4614 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4616 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004617 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004618 ctxt->wellFormed = 0;
4619 ctxt->disableSAX = 1;
4620 return(-1);
4621 }
4622 NEXT;
4623 GROW;
4624 SKIP_BLANKS;
4625 if ((RAW == '#') && (NXT(1) == 'P') &&
4626 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4627 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4628 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004629 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004630 res = XML_ELEMENT_TYPE_MIXED;
4631 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004632 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004633 res = XML_ELEMENT_TYPE_ELEMENT;
4634 }
Owen Taylor3473f882001-02-23 17:55:21 +00004635 SKIP_BLANKS;
4636 *result = tree;
4637 return(res);
4638}
4639
4640/**
4641 * xmlParseElementDecl:
4642 * @ctxt: an XML parser context
4643 *
4644 * parse an Element declaration.
4645 *
4646 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4647 *
4648 * [ VC: Unique Element Type Declaration ]
4649 * No element type may be declared more than once
4650 *
4651 * Returns the type of the element, or -1 in case of error
4652 */
4653int
4654xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4655 xmlChar *name;
4656 int ret = -1;
4657 xmlElementContentPtr content = NULL;
4658
4659 GROW;
4660 if ((RAW == '<') && (NXT(1) == '!') &&
4661 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4662 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4663 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4664 (NXT(8) == 'T')) {
4665 xmlParserInputPtr input = ctxt->input;
4666
4667 SKIP(9);
4668 if (!IS_BLANK(CUR)) {
4669 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4671 ctxt->sax->error(ctxt->userData,
4672 "Space required after 'ELEMENT'\n");
4673 ctxt->wellFormed = 0;
4674 ctxt->disableSAX = 1;
4675 }
4676 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004677 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004678 if (name == NULL) {
4679 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4681 ctxt->sax->error(ctxt->userData,
4682 "xmlParseElementDecl: no name for Element\n");
4683 ctxt->wellFormed = 0;
4684 ctxt->disableSAX = 1;
4685 return(-1);
4686 }
4687 while ((RAW == 0) && (ctxt->inputNr > 1))
4688 xmlPopInput(ctxt);
4689 if (!IS_BLANK(CUR)) {
4690 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4691 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4692 ctxt->sax->error(ctxt->userData,
4693 "Space required after the element name\n");
4694 ctxt->wellFormed = 0;
4695 ctxt->disableSAX = 1;
4696 }
4697 SKIP_BLANKS;
4698 if ((RAW == 'E') && (NXT(1) == 'M') &&
4699 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4700 (NXT(4) == 'Y')) {
4701 SKIP(5);
4702 /*
4703 * Element must always be empty.
4704 */
4705 ret = XML_ELEMENT_TYPE_EMPTY;
4706 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4707 (NXT(2) == 'Y')) {
4708 SKIP(3);
4709 /*
4710 * Element is a generic container.
4711 */
4712 ret = XML_ELEMENT_TYPE_ANY;
4713 } else if (RAW == '(') {
4714 ret = xmlParseElementContentDecl(ctxt, name, &content);
4715 } else {
4716 /*
4717 * [ WFC: PEs in Internal Subset ] error handling.
4718 */
4719 if ((RAW == '%') && (ctxt->external == 0) &&
4720 (ctxt->inputNr == 1)) {
4721 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4723 ctxt->sax->error(ctxt->userData,
4724 "PEReference: forbidden within markup decl in internal subset\n");
4725 } else {
4726 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4728 ctxt->sax->error(ctxt->userData,
4729 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4730 }
4731 ctxt->wellFormed = 0;
4732 ctxt->disableSAX = 1;
4733 if (name != NULL) xmlFree(name);
4734 return(-1);
4735 }
4736
4737 SKIP_BLANKS;
4738 /*
4739 * Pop-up of finished entities.
4740 */
4741 while ((RAW == 0) && (ctxt->inputNr > 1))
4742 xmlPopInput(ctxt);
4743 SKIP_BLANKS;
4744
4745 if (RAW != '>') {
4746 ctxt->errNo = XML_ERR_GT_REQUIRED;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "xmlParseElementDecl: expected '>' at the end\n");
4750 ctxt->wellFormed = 0;
4751 ctxt->disableSAX = 1;
4752 } else {
4753 if (input != ctxt->input) {
4754 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4756 ctxt->sax->error(ctxt->userData,
4757"Element declaration doesn't start and stop in the same entity\n");
4758 ctxt->wellFormed = 0;
4759 ctxt->disableSAX = 1;
4760 }
4761
4762 NEXT;
4763 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4764 (ctxt->sax->elementDecl != NULL))
4765 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4766 content);
4767 }
4768 if (content != NULL) {
4769 xmlFreeElementContent(content);
4770 }
4771 if (name != NULL) {
4772 xmlFree(name);
4773 }
4774 }
4775 return(ret);
4776}
4777
4778/**
Owen Taylor3473f882001-02-23 17:55:21 +00004779 * xmlParseConditionalSections
4780 * @ctxt: an XML parser context
4781 *
4782 * [61] conditionalSect ::= includeSect | ignoreSect
4783 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4784 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4785 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4786 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4787 */
4788
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004789static void
Owen Taylor3473f882001-02-23 17:55:21 +00004790xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4791 SKIP(3);
4792 SKIP_BLANKS;
4793 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4794 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4795 (NXT(6) == 'E')) {
4796 SKIP(7);
4797 SKIP_BLANKS;
4798 if (RAW != '[') {
4799 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4801 ctxt->sax->error(ctxt->userData,
4802 "XML conditional section '[' expected\n");
4803 ctxt->wellFormed = 0;
4804 ctxt->disableSAX = 1;
4805 } else {
4806 NEXT;
4807 }
4808 if (xmlParserDebugEntities) {
4809 if ((ctxt->input != NULL) && (ctxt->input->filename))
4810 xmlGenericError(xmlGenericErrorContext,
4811 "%s(%d): ", ctxt->input->filename,
4812 ctxt->input->line);
4813 xmlGenericError(xmlGenericErrorContext,
4814 "Entering INCLUDE Conditional Section\n");
4815 }
4816
4817 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4818 (NXT(2) != '>'))) {
4819 const xmlChar *check = CUR_PTR;
4820 int cons = ctxt->input->consumed;
4821 int tok = ctxt->token;
4822
4823 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4824 xmlParseConditionalSections(ctxt);
4825 } else if (IS_BLANK(CUR)) {
4826 NEXT;
4827 } else if (RAW == '%') {
4828 xmlParsePEReference(ctxt);
4829 } else
4830 xmlParseMarkupDecl(ctxt);
4831
4832 /*
4833 * Pop-up of finished entities.
4834 */
4835 while ((RAW == 0) && (ctxt->inputNr > 1))
4836 xmlPopInput(ctxt);
4837
4838 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4839 (tok == ctxt->token)) {
4840 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "Content error in the external subset\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 break;
4847 }
4848 }
4849 if (xmlParserDebugEntities) {
4850 if ((ctxt->input != NULL) && (ctxt->input->filename))
4851 xmlGenericError(xmlGenericErrorContext,
4852 "%s(%d): ", ctxt->input->filename,
4853 ctxt->input->line);
4854 xmlGenericError(xmlGenericErrorContext,
4855 "Leaving INCLUDE Conditional Section\n");
4856 }
4857
4858 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4859 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4860 int state;
4861 int instate;
4862 int depth = 0;
4863
4864 SKIP(6);
4865 SKIP_BLANKS;
4866 if (RAW != '[') {
4867 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4869 ctxt->sax->error(ctxt->userData,
4870 "XML conditional section '[' expected\n");
4871 ctxt->wellFormed = 0;
4872 ctxt->disableSAX = 1;
4873 } else {
4874 NEXT;
4875 }
4876 if (xmlParserDebugEntities) {
4877 if ((ctxt->input != NULL) && (ctxt->input->filename))
4878 xmlGenericError(xmlGenericErrorContext,
4879 "%s(%d): ", ctxt->input->filename,
4880 ctxt->input->line);
4881 xmlGenericError(xmlGenericErrorContext,
4882 "Entering IGNORE Conditional Section\n");
4883 }
4884
4885 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004886 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004887 * But disable SAX event generating DTD building in the meantime
4888 */
4889 state = ctxt->disableSAX;
4890 instate = ctxt->instate;
4891 ctxt->disableSAX = 1;
4892 ctxt->instate = XML_PARSER_IGNORE;
4893
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004894 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004895 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4896 depth++;
4897 SKIP(3);
4898 continue;
4899 }
4900 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4901 if (--depth >= 0) SKIP(3);
4902 continue;
4903 }
4904 NEXT;
4905 continue;
4906 }
4907
4908 ctxt->disableSAX = state;
4909 ctxt->instate = instate;
4910
4911 if (xmlParserDebugEntities) {
4912 if ((ctxt->input != NULL) && (ctxt->input->filename))
4913 xmlGenericError(xmlGenericErrorContext,
4914 "%s(%d): ", ctxt->input->filename,
4915 ctxt->input->line);
4916 xmlGenericError(xmlGenericErrorContext,
4917 "Leaving IGNORE Conditional Section\n");
4918 }
4919
4920 } else {
4921 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4923 ctxt->sax->error(ctxt->userData,
4924 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4925 ctxt->wellFormed = 0;
4926 ctxt->disableSAX = 1;
4927 }
4928
4929 if (RAW == 0)
4930 SHRINK;
4931
4932 if (RAW == 0) {
4933 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4935 ctxt->sax->error(ctxt->userData,
4936 "XML conditional section not closed\n");
4937 ctxt->wellFormed = 0;
4938 ctxt->disableSAX = 1;
4939 } else {
4940 SKIP(3);
4941 }
4942}
4943
4944/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004945 * xmlParseMarkupDecl:
4946 * @ctxt: an XML parser context
4947 *
4948 * parse Markup declarations
4949 *
4950 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4951 * NotationDecl | PI | Comment
4952 *
4953 * [ VC: Proper Declaration/PE Nesting ]
4954 * Parameter-entity replacement text must be properly nested with
4955 * markup declarations. That is to say, if either the first character
4956 * or the last character of a markup declaration (markupdecl above) is
4957 * contained in the replacement text for a parameter-entity reference,
4958 * both must be contained in the same replacement text.
4959 *
4960 * [ WFC: PEs in Internal Subset ]
4961 * In the internal DTD subset, parameter-entity references can occur
4962 * only where markup declarations can occur, not within markup declarations.
4963 * (This does not apply to references that occur in external parameter
4964 * entities or to the external subset.)
4965 */
4966void
4967xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4968 GROW;
4969 xmlParseElementDecl(ctxt);
4970 xmlParseAttributeListDecl(ctxt);
4971 xmlParseEntityDecl(ctxt);
4972 xmlParseNotationDecl(ctxt);
4973 xmlParsePI(ctxt);
4974 xmlParseComment(ctxt);
4975 /*
4976 * This is only for internal subset. On external entities,
4977 * the replacement is done before parsing stage
4978 */
4979 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4980 xmlParsePEReference(ctxt);
4981
4982 /*
4983 * Conditional sections are allowed from entities included
4984 * by PE References in the internal subset.
4985 */
4986 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4987 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4988 xmlParseConditionalSections(ctxt);
4989 }
4990 }
4991
4992 ctxt->instate = XML_PARSER_DTD;
4993}
4994
4995/**
4996 * xmlParseTextDecl:
4997 * @ctxt: an XML parser context
4998 *
4999 * parse an XML declaration header for external entities
5000 *
5001 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5002 *
5003 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5004 */
5005
5006void
5007xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5008 xmlChar *version;
5009
5010 /*
5011 * We know that '<?xml' is here.
5012 */
5013 if ((RAW == '<') && (NXT(1) == '?') &&
5014 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5015 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5016 SKIP(5);
5017 } else {
5018 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5020 ctxt->sax->error(ctxt->userData,
5021 "Text declaration '<?xml' required\n");
5022 ctxt->wellFormed = 0;
5023 ctxt->disableSAX = 1;
5024
5025 return;
5026 }
5027
5028 if (!IS_BLANK(CUR)) {
5029 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5031 ctxt->sax->error(ctxt->userData,
5032 "Space needed after '<?xml'\n");
5033 ctxt->wellFormed = 0;
5034 ctxt->disableSAX = 1;
5035 }
5036 SKIP_BLANKS;
5037
5038 /*
5039 * We may have the VersionInfo here.
5040 */
5041 version = xmlParseVersionInfo(ctxt);
5042 if (version == NULL)
5043 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005044 else {
5045 if (!IS_BLANK(CUR)) {
5046 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5048 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5049 ctxt->wellFormed = 0;
5050 ctxt->disableSAX = 1;
5051 }
5052 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005053 ctxt->input->version = version;
5054
5055 /*
5056 * We must have the encoding declaration
5057 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005058 xmlParseEncodingDecl(ctxt);
5059 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5060 /*
5061 * The XML REC instructs us to stop parsing right here
5062 */
5063 return;
5064 }
5065
5066 SKIP_BLANKS;
5067 if ((RAW == '?') && (NXT(1) == '>')) {
5068 SKIP(2);
5069 } else if (RAW == '>') {
5070 /* Deprecated old WD ... */
5071 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5073 ctxt->sax->error(ctxt->userData,
5074 "XML declaration must end-up with '?>'\n");
5075 ctxt->wellFormed = 0;
5076 ctxt->disableSAX = 1;
5077 NEXT;
5078 } else {
5079 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5081 ctxt->sax->error(ctxt->userData,
5082 "parsing XML declaration: '?>' expected\n");
5083 ctxt->wellFormed = 0;
5084 ctxt->disableSAX = 1;
5085 MOVETO_ENDTAG(CUR_PTR);
5086 NEXT;
5087 }
5088}
5089
5090/**
Owen Taylor3473f882001-02-23 17:55:21 +00005091 * xmlParseExternalSubset:
5092 * @ctxt: an XML parser context
5093 * @ExternalID: the external identifier
5094 * @SystemID: the system identifier (or URL)
5095 *
5096 * parse Markup declarations from an external subset
5097 *
5098 * [30] extSubset ::= textDecl? extSubsetDecl
5099 *
5100 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5101 */
5102void
5103xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5104 const xmlChar *SystemID) {
5105 GROW;
5106 if ((RAW == '<') && (NXT(1) == '?') &&
5107 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5108 (NXT(4) == 'l')) {
5109 xmlParseTextDecl(ctxt);
5110 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5111 /*
5112 * The XML REC instructs us to stop parsing right here
5113 */
5114 ctxt->instate = XML_PARSER_EOF;
5115 return;
5116 }
5117 }
5118 if (ctxt->myDoc == NULL) {
5119 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5120 }
5121 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5122 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5123
5124 ctxt->instate = XML_PARSER_DTD;
5125 ctxt->external = 1;
5126 while (((RAW == '<') && (NXT(1) == '?')) ||
5127 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005128 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005129 const xmlChar *check = CUR_PTR;
5130 int cons = ctxt->input->consumed;
5131 int tok = ctxt->token;
5132
5133 GROW;
5134 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5135 xmlParseConditionalSections(ctxt);
5136 } else if (IS_BLANK(CUR)) {
5137 NEXT;
5138 } else if (RAW == '%') {
5139 xmlParsePEReference(ctxt);
5140 } else
5141 xmlParseMarkupDecl(ctxt);
5142
5143 /*
5144 * Pop-up of finished entities.
5145 */
5146 while ((RAW == 0) && (ctxt->inputNr > 1))
5147 xmlPopInput(ctxt);
5148
5149 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5150 (tok == ctxt->token)) {
5151 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5153 ctxt->sax->error(ctxt->userData,
5154 "Content error in the external subset\n");
5155 ctxt->wellFormed = 0;
5156 ctxt->disableSAX = 1;
5157 break;
5158 }
5159 }
5160
5161 if (RAW != 0) {
5162 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5163 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5164 ctxt->sax->error(ctxt->userData,
5165 "Extra content at the end of the document\n");
5166 ctxt->wellFormed = 0;
5167 ctxt->disableSAX = 1;
5168 }
5169
5170}
5171
5172/**
5173 * xmlParseReference:
5174 * @ctxt: an XML parser context
5175 *
5176 * parse and handle entity references in content, depending on the SAX
5177 * interface, this may end-up in a call to character() if this is a
5178 * CharRef, a predefined entity, if there is no reference() callback.
5179 * or if the parser was asked to switch to that mode.
5180 *
5181 * [67] Reference ::= EntityRef | CharRef
5182 */
5183void
5184xmlParseReference(xmlParserCtxtPtr ctxt) {
5185 xmlEntityPtr ent;
5186 xmlChar *val;
5187 if (RAW != '&') return;
5188
5189 if (NXT(1) == '#') {
5190 int i = 0;
5191 xmlChar out[10];
5192 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005193 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005194
5195 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5196 /*
5197 * So we are using non-UTF-8 buffers
5198 * Check that the char fit on 8bits, if not
5199 * generate a CharRef.
5200 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005201 if (value <= 0xFF) {
5202 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005203 out[1] = 0;
5204 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5205 (!ctxt->disableSAX))
5206 ctxt->sax->characters(ctxt->userData, out, 1);
5207 } else {
5208 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005209 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005210 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005211 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005212 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5213 (!ctxt->disableSAX))
5214 ctxt->sax->reference(ctxt->userData, out);
5215 }
5216 } else {
5217 /*
5218 * Just encode the value in UTF-8
5219 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005220 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005221 out[i] = 0;
5222 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5223 (!ctxt->disableSAX))
5224 ctxt->sax->characters(ctxt->userData, out, i);
5225 }
5226 } else {
5227 ent = xmlParseEntityRef(ctxt);
5228 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005229 if (!ctxt->wellFormed)
5230 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005231 if ((ent->name != NULL) &&
5232 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5233 xmlNodePtr list = NULL;
5234 int ret;
5235
5236
5237 /*
5238 * The first reference to the entity trigger a parsing phase
5239 * where the ent->children is filled with the result from
5240 * the parsing.
5241 */
5242 if (ent->children == NULL) {
5243 xmlChar *value;
5244 value = ent->content;
5245
5246 /*
5247 * Check that this entity is well formed
5248 */
5249 if ((value != NULL) &&
5250 (value[1] == 0) && (value[0] == '<') &&
5251 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5252 /*
5253 * DONE: get definite answer on this !!!
5254 * Lots of entity decls are used to declare a single
5255 * char
5256 * <!ENTITY lt "<">
5257 * Which seems to be valid since
5258 * 2.4: The ampersand character (&) and the left angle
5259 * bracket (<) may appear in their literal form only
5260 * when used ... They are also legal within the literal
5261 * entity value of an internal entity declaration;i
5262 * see "4.3.2 Well-Formed Parsed Entities".
5263 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5264 * Looking at the OASIS test suite and James Clark
5265 * tests, this is broken. However the XML REC uses
5266 * it. Is the XML REC not well-formed ????
5267 * This is a hack to avoid this problem
5268 *
5269 * ANSWER: since lt gt amp .. are already defined,
5270 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005271 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005272 * is lousy but acceptable.
5273 */
5274 list = xmlNewDocText(ctxt->myDoc, value);
5275 if (list != NULL) {
5276 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5277 (ent->children == NULL)) {
5278 ent->children = list;
5279 ent->last = list;
5280 list->parent = (xmlNodePtr) ent;
5281 } else {
5282 xmlFreeNodeList(list);
5283 }
5284 } else if (list != NULL) {
5285 xmlFreeNodeList(list);
5286 }
5287 } else {
5288 /*
5289 * 4.3.2: An internal general parsed entity is well-formed
5290 * if its replacement text matches the production labeled
5291 * content.
5292 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005293
5294 void *user_data;
5295 /*
5296 * This is a bit hackish but this seems the best
5297 * way to make sure both SAX and DOM entity support
5298 * behaves okay.
5299 */
5300 if (ctxt->userData == ctxt)
5301 user_data = NULL;
5302 else
5303 user_data = ctxt->userData;
5304
Owen Taylor3473f882001-02-23 17:55:21 +00005305 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5306 ctxt->depth++;
5307 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005308 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005309 value, &list);
5310 ctxt->depth--;
5311 } else if (ent->etype ==
5312 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5313 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005314 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005315 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005316 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005317 ctxt->depth--;
5318 } else {
5319 ret = -1;
5320 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5321 ctxt->sax->error(ctxt->userData,
5322 "Internal: invalid entity type\n");
5323 }
5324 if (ret == XML_ERR_ENTITY_LOOP) {
5325 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5327 ctxt->sax->error(ctxt->userData,
5328 "Detected entity reference loop\n");
5329 ctxt->wellFormed = 0;
5330 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005331 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005332 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005333 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5334 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005335 (ent->children == NULL)) {
5336 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005337 if (ctxt->replaceEntities) {
5338 /*
5339 * Prune it directly in the generated document
5340 * except for single text nodes.
5341 */
5342 if ((list->type == XML_TEXT_NODE) &&
5343 (list->next == NULL)) {
5344 list->parent = (xmlNodePtr) ent;
5345 list = NULL;
5346 } else {
5347 while (list != NULL) {
5348 list->parent = (xmlNodePtr) ctxt->node;
5349 if (list->next == NULL)
5350 ent->last = list;
5351 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005352 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005353 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005354 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5355 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005356 }
5357 } else {
5358 while (list != NULL) {
5359 list->parent = (xmlNodePtr) ent;
5360 if (list->next == NULL)
5361 ent->last = list;
5362 list = list->next;
5363 }
Owen Taylor3473f882001-02-23 17:55:21 +00005364 }
5365 } else {
5366 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005367 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005368 }
5369 } else if (ret > 0) {
5370 ctxt->errNo = ret;
5371 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5372 ctxt->sax->error(ctxt->userData,
5373 "Entity value required\n");
5374 ctxt->wellFormed = 0;
5375 ctxt->disableSAX = 1;
5376 } else if (list != NULL) {
5377 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005378 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005379 }
5380 }
5381 }
5382 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5383 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5384 /*
5385 * Create a node.
5386 */
5387 ctxt->sax->reference(ctxt->userData, ent->name);
5388 return;
5389 } else if (ctxt->replaceEntities) {
5390 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5391 /*
5392 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005393 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005394 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005395 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005396 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005397 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005398 cur = ent->children;
5399 while (cur != NULL) {
5400 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005401 if (firstChild == NULL){
5402 firstChild = new;
5403 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005404 xmlAddChild(ctxt->node, new);
5405 if (cur == ent->last)
5406 break;
5407 cur = cur->next;
5408 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005409 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5410 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005411 } else {
5412 /*
5413 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005414 * node with a possible previous text one which
5415 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005416 */
5417 if (ent->children->type == XML_TEXT_NODE)
5418 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5419 if ((ent->last != ent->children) &&
5420 (ent->last->type == XML_TEXT_NODE))
5421 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5422 xmlAddChildList(ctxt->node, ent->children);
5423 }
5424
Owen Taylor3473f882001-02-23 17:55:21 +00005425 /*
5426 * This is to avoid a nasty side effect, see
5427 * characters() in SAX.c
5428 */
5429 ctxt->nodemem = 0;
5430 ctxt->nodelen = 0;
5431 return;
5432 } else {
5433 /*
5434 * Probably running in SAX mode
5435 */
5436 xmlParserInputPtr input;
5437
5438 input = xmlNewEntityInputStream(ctxt, ent);
5439 xmlPushInput(ctxt, input);
5440 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5441 (RAW == '<') && (NXT(1) == '?') &&
5442 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5444 xmlParseTextDecl(ctxt);
5445 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5446 /*
5447 * The XML REC instructs us to stop parsing right here
5448 */
5449 ctxt->instate = XML_PARSER_EOF;
5450 return;
5451 }
5452 if (input->standalone == 1) {
5453 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5455 ctxt->sax->error(ctxt->userData,
5456 "external parsed entities cannot be standalone\n");
5457 ctxt->wellFormed = 0;
5458 ctxt->disableSAX = 1;
5459 }
5460 }
5461 return;
5462 }
5463 }
5464 } else {
5465 val = ent->content;
5466 if (val == NULL) return;
5467 /*
5468 * inline the entity.
5469 */
5470 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5471 (!ctxt->disableSAX))
5472 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5473 }
5474 }
5475}
5476
5477/**
5478 * xmlParseEntityRef:
5479 * @ctxt: an XML parser context
5480 *
5481 * parse ENTITY references declarations
5482 *
5483 * [68] EntityRef ::= '&' Name ';'
5484 *
5485 * [ WFC: Entity Declared ]
5486 * In a document without any DTD, a document with only an internal DTD
5487 * subset which contains no parameter entity references, or a document
5488 * with "standalone='yes'", the Name given in the entity reference
5489 * must match that in an entity declaration, except that well-formed
5490 * documents need not declare any of the following entities: amp, lt,
5491 * gt, apos, quot. The declaration of a parameter entity must precede
5492 * any reference to it. Similarly, the declaration of a general entity
5493 * must precede any reference to it which appears in a default value in an
5494 * attribute-list declaration. Note that if entities are declared in the
5495 * external subset or in external parameter entities, a non-validating
5496 * processor is not obligated to read and process their declarations;
5497 * for such documents, the rule that an entity must be declared is a
5498 * well-formedness constraint only if standalone='yes'.
5499 *
5500 * [ WFC: Parsed Entity ]
5501 * An entity reference must not contain the name of an unparsed entity
5502 *
5503 * Returns the xmlEntityPtr if found, or NULL otherwise.
5504 */
5505xmlEntityPtr
5506xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5507 xmlChar *name;
5508 xmlEntityPtr ent = NULL;
5509
5510 GROW;
5511
5512 if (RAW == '&') {
5513 NEXT;
5514 name = xmlParseName(ctxt);
5515 if (name == NULL) {
5516 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5518 ctxt->sax->error(ctxt->userData,
5519 "xmlParseEntityRef: no name\n");
5520 ctxt->wellFormed = 0;
5521 ctxt->disableSAX = 1;
5522 } else {
5523 if (RAW == ';') {
5524 NEXT;
5525 /*
5526 * Ask first SAX for entity resolution, otherwise try the
5527 * predefined set.
5528 */
5529 if (ctxt->sax != NULL) {
5530 if (ctxt->sax->getEntity != NULL)
5531 ent = ctxt->sax->getEntity(ctxt->userData, name);
5532 if (ent == NULL)
5533 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005534 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5535 ent = getEntity(ctxt, name);
5536 }
Owen Taylor3473f882001-02-23 17:55:21 +00005537 }
5538 /*
5539 * [ WFC: Entity Declared ]
5540 * In a document without any DTD, a document with only an
5541 * internal DTD subset which contains no parameter entity
5542 * references, or a document with "standalone='yes'", the
5543 * Name given in the entity reference must match that in an
5544 * entity declaration, except that well-formed documents
5545 * need not declare any of the following entities: amp, lt,
5546 * gt, apos, quot.
5547 * The declaration of a parameter entity must precede any
5548 * reference to it.
5549 * Similarly, the declaration of a general entity must
5550 * precede any reference to it which appears in a default
5551 * value in an attribute-list declaration. Note that if
5552 * entities are declared in the external subset or in
5553 * external parameter entities, a non-validating processor
5554 * is not obligated to read and process their declarations;
5555 * for such documents, the rule that an entity must be
5556 * declared is a well-formedness constraint only if
5557 * standalone='yes'.
5558 */
5559 if (ent == NULL) {
5560 if ((ctxt->standalone == 1) ||
5561 ((ctxt->hasExternalSubset == 0) &&
5562 (ctxt->hasPErefs == 0))) {
5563 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
5566 "Entity '%s' not defined\n", name);
5567 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005568 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005569 ctxt->disableSAX = 1;
5570 } else {
5571 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005573 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005574 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005575 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005576 }
5577 }
5578
5579 /*
5580 * [ WFC: Parsed Entity ]
5581 * An entity reference must not contain the name of an
5582 * unparsed entity
5583 */
5584 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5585 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5587 ctxt->sax->error(ctxt->userData,
5588 "Entity reference to unparsed entity %s\n", name);
5589 ctxt->wellFormed = 0;
5590 ctxt->disableSAX = 1;
5591 }
5592
5593 /*
5594 * [ WFC: No External Entity References ]
5595 * Attribute values cannot contain direct or indirect
5596 * entity references to external entities.
5597 */
5598 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5599 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5600 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5602 ctxt->sax->error(ctxt->userData,
5603 "Attribute references external entity '%s'\n", name);
5604 ctxt->wellFormed = 0;
5605 ctxt->disableSAX = 1;
5606 }
5607 /*
5608 * [ WFC: No < in Attribute Values ]
5609 * The replacement text of any entity referred to directly or
5610 * indirectly in an attribute value (other than "&lt;") must
5611 * not contain a <.
5612 */
5613 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5614 (ent != NULL) &&
5615 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5616 (ent->content != NULL) &&
5617 (xmlStrchr(ent->content, '<'))) {
5618 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5619 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5620 ctxt->sax->error(ctxt->userData,
5621 "'<' in entity '%s' is not allowed in attributes values\n", name);
5622 ctxt->wellFormed = 0;
5623 ctxt->disableSAX = 1;
5624 }
5625
5626 /*
5627 * Internal check, no parameter entities here ...
5628 */
5629 else {
5630 switch (ent->etype) {
5631 case XML_INTERNAL_PARAMETER_ENTITY:
5632 case XML_EXTERNAL_PARAMETER_ENTITY:
5633 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5635 ctxt->sax->error(ctxt->userData,
5636 "Attempt to reference the parameter entity '%s'\n", name);
5637 ctxt->wellFormed = 0;
5638 ctxt->disableSAX = 1;
5639 break;
5640 default:
5641 break;
5642 }
5643 }
5644
5645 /*
5646 * [ WFC: No Recursion ]
5647 * A parsed entity must not contain a recursive reference
5648 * to itself, either directly or indirectly.
5649 * Done somewhere else
5650 */
5651
5652 } else {
5653 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5655 ctxt->sax->error(ctxt->userData,
5656 "xmlParseEntityRef: expecting ';'\n");
5657 ctxt->wellFormed = 0;
5658 ctxt->disableSAX = 1;
5659 }
5660 xmlFree(name);
5661 }
5662 }
5663 return(ent);
5664}
5665
5666/**
5667 * xmlParseStringEntityRef:
5668 * @ctxt: an XML parser context
5669 * @str: a pointer to an index in the string
5670 *
5671 * parse ENTITY references declarations, but this version parses it from
5672 * a string value.
5673 *
5674 * [68] EntityRef ::= '&' Name ';'
5675 *
5676 * [ WFC: Entity Declared ]
5677 * In a document without any DTD, a document with only an internal DTD
5678 * subset which contains no parameter entity references, or a document
5679 * with "standalone='yes'", the Name given in the entity reference
5680 * must match that in an entity declaration, except that well-formed
5681 * documents need not declare any of the following entities: amp, lt,
5682 * gt, apos, quot. The declaration of a parameter entity must precede
5683 * any reference to it. Similarly, the declaration of a general entity
5684 * must precede any reference to it which appears in a default value in an
5685 * attribute-list declaration. Note that if entities are declared in the
5686 * external subset or in external parameter entities, a non-validating
5687 * processor is not obligated to read and process their declarations;
5688 * for such documents, the rule that an entity must be declared is a
5689 * well-formedness constraint only if standalone='yes'.
5690 *
5691 * [ WFC: Parsed Entity ]
5692 * An entity reference must not contain the name of an unparsed entity
5693 *
5694 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5695 * is updated to the current location in the string.
5696 */
5697xmlEntityPtr
5698xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5699 xmlChar *name;
5700 const xmlChar *ptr;
5701 xmlChar cur;
5702 xmlEntityPtr ent = NULL;
5703
5704 if ((str == NULL) || (*str == NULL))
5705 return(NULL);
5706 ptr = *str;
5707 cur = *ptr;
5708 if (cur == '&') {
5709 ptr++;
5710 cur = *ptr;
5711 name = xmlParseStringName(ctxt, &ptr);
5712 if (name == NULL) {
5713 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5715 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005716 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005717 ctxt->wellFormed = 0;
5718 ctxt->disableSAX = 1;
5719 } else {
5720 if (*ptr == ';') {
5721 ptr++;
5722 /*
5723 * Ask first SAX for entity resolution, otherwise try the
5724 * predefined set.
5725 */
5726 if (ctxt->sax != NULL) {
5727 if (ctxt->sax->getEntity != NULL)
5728 ent = ctxt->sax->getEntity(ctxt->userData, name);
5729 if (ent == NULL)
5730 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005731 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5732 ent = getEntity(ctxt, name);
5733 }
Owen Taylor3473f882001-02-23 17:55:21 +00005734 }
5735 /*
5736 * [ WFC: Entity Declared ]
5737 * In a document without any DTD, a document with only an
5738 * internal DTD subset which contains no parameter entity
5739 * references, or a document with "standalone='yes'", the
5740 * Name given in the entity reference must match that in an
5741 * entity declaration, except that well-formed documents
5742 * need not declare any of the following entities: amp, lt,
5743 * gt, apos, quot.
5744 * The declaration of a parameter entity must precede any
5745 * reference to it.
5746 * Similarly, the declaration of a general entity must
5747 * precede any reference to it which appears in a default
5748 * value in an attribute-list declaration. Note that if
5749 * entities are declared in the external subset or in
5750 * external parameter entities, a non-validating processor
5751 * is not obligated to read and process their declarations;
5752 * for such documents, the rule that an entity must be
5753 * declared is a well-formedness constraint only if
5754 * standalone='yes'.
5755 */
5756 if (ent == NULL) {
5757 if ((ctxt->standalone == 1) ||
5758 ((ctxt->hasExternalSubset == 0) &&
5759 (ctxt->hasPErefs == 0))) {
5760 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5762 ctxt->sax->error(ctxt->userData,
5763 "Entity '%s' not defined\n", name);
5764 ctxt->wellFormed = 0;
5765 ctxt->disableSAX = 1;
5766 } else {
5767 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5768 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5769 ctxt->sax->warning(ctxt->userData,
5770 "Entity '%s' not defined\n", name);
5771 }
5772 }
5773
5774 /*
5775 * [ WFC: Parsed Entity ]
5776 * An entity reference must not contain the name of an
5777 * unparsed entity
5778 */
5779 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5780 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5781 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5782 ctxt->sax->error(ctxt->userData,
5783 "Entity reference to unparsed entity %s\n", name);
5784 ctxt->wellFormed = 0;
5785 ctxt->disableSAX = 1;
5786 }
5787
5788 /*
5789 * [ WFC: No External Entity References ]
5790 * Attribute values cannot contain direct or indirect
5791 * entity references to external entities.
5792 */
5793 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5794 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5795 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5797 ctxt->sax->error(ctxt->userData,
5798 "Attribute references external entity '%s'\n", name);
5799 ctxt->wellFormed = 0;
5800 ctxt->disableSAX = 1;
5801 }
5802 /*
5803 * [ WFC: No < in Attribute Values ]
5804 * The replacement text of any entity referred to directly or
5805 * indirectly in an attribute value (other than "&lt;") must
5806 * not contain a <.
5807 */
5808 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5809 (ent != NULL) &&
5810 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5811 (ent->content != NULL) &&
5812 (xmlStrchr(ent->content, '<'))) {
5813 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData,
5816 "'<' in entity '%s' is not allowed in attributes values\n", name);
5817 ctxt->wellFormed = 0;
5818 ctxt->disableSAX = 1;
5819 }
5820
5821 /*
5822 * Internal check, no parameter entities here ...
5823 */
5824 else {
5825 switch (ent->etype) {
5826 case XML_INTERNAL_PARAMETER_ENTITY:
5827 case XML_EXTERNAL_PARAMETER_ENTITY:
5828 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5830 ctxt->sax->error(ctxt->userData,
5831 "Attempt to reference the parameter entity '%s'\n", name);
5832 ctxt->wellFormed = 0;
5833 ctxt->disableSAX = 1;
5834 break;
5835 default:
5836 break;
5837 }
5838 }
5839
5840 /*
5841 * [ WFC: No Recursion ]
5842 * A parsed entity must not contain a recursive reference
5843 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005844 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005845 */
5846
5847 } else {
5848 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5850 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005851 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005852 ctxt->wellFormed = 0;
5853 ctxt->disableSAX = 1;
5854 }
5855 xmlFree(name);
5856 }
5857 }
5858 *str = ptr;
5859 return(ent);
5860}
5861
5862/**
5863 * xmlParsePEReference:
5864 * @ctxt: an XML parser context
5865 *
5866 * parse PEReference declarations
5867 * The entity content is handled directly by pushing it's content as
5868 * a new input stream.
5869 *
5870 * [69] PEReference ::= '%' Name ';'
5871 *
5872 * [ WFC: No Recursion ]
5873 * A parsed entity must not contain a recursive
5874 * reference to itself, either directly or indirectly.
5875 *
5876 * [ WFC: Entity Declared ]
5877 * In a document without any DTD, a document with only an internal DTD
5878 * subset which contains no parameter entity references, or a document
5879 * with "standalone='yes'", ... ... The declaration of a parameter
5880 * entity must precede any reference to it...
5881 *
5882 * [ VC: Entity Declared ]
5883 * In a document with an external subset or external parameter entities
5884 * with "standalone='no'", ... ... The declaration of a parameter entity
5885 * must precede any reference to it...
5886 *
5887 * [ WFC: In DTD ]
5888 * Parameter-entity references may only appear in the DTD.
5889 * NOTE: misleading but this is handled.
5890 */
5891void
5892xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5893 xmlChar *name;
5894 xmlEntityPtr entity = NULL;
5895 xmlParserInputPtr input;
5896
5897 if (RAW == '%') {
5898 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005899 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005900 if (name == NULL) {
5901 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5903 ctxt->sax->error(ctxt->userData,
5904 "xmlParsePEReference: no name\n");
5905 ctxt->wellFormed = 0;
5906 ctxt->disableSAX = 1;
5907 } else {
5908 if (RAW == ';') {
5909 NEXT;
5910 if ((ctxt->sax != NULL) &&
5911 (ctxt->sax->getParameterEntity != NULL))
5912 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5913 name);
5914 if (entity == NULL) {
5915 /*
5916 * [ WFC: Entity Declared ]
5917 * In a document without any DTD, a document with only an
5918 * internal DTD subset which contains no parameter entity
5919 * references, or a document with "standalone='yes'", ...
5920 * ... The declaration of a parameter entity must precede
5921 * any reference to it...
5922 */
5923 if ((ctxt->standalone == 1) ||
5924 ((ctxt->hasExternalSubset == 0) &&
5925 (ctxt->hasPErefs == 0))) {
5926 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5927 if ((!ctxt->disableSAX) &&
5928 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5929 ctxt->sax->error(ctxt->userData,
5930 "PEReference: %%%s; not found\n", name);
5931 ctxt->wellFormed = 0;
5932 ctxt->disableSAX = 1;
5933 } else {
5934 /*
5935 * [ VC: Entity Declared ]
5936 * In a document with an external subset or external
5937 * parameter entities with "standalone='no'", ...
5938 * ... The declaration of a parameter entity must precede
5939 * any reference to it...
5940 */
5941 if ((!ctxt->disableSAX) &&
5942 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5943 ctxt->sax->warning(ctxt->userData,
5944 "PEReference: %%%s; not found\n", name);
5945 ctxt->valid = 0;
5946 }
5947 } else {
5948 /*
5949 * Internal checking in case the entity quest barfed
5950 */
5951 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5952 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5953 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5954 ctxt->sax->warning(ctxt->userData,
5955 "Internal: %%%s; is not a parameter entity\n", name);
5956 } else {
5957 /*
5958 * TODO !!!
5959 * handle the extra spaces added before and after
5960 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5961 */
5962 input = xmlNewEntityInputStream(ctxt, entity);
5963 xmlPushInput(ctxt, input);
5964 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5965 (RAW == '<') && (NXT(1) == '?') &&
5966 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5967 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5968 xmlParseTextDecl(ctxt);
5969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5970 /*
5971 * The XML REC instructs us to stop parsing
5972 * right here
5973 */
5974 ctxt->instate = XML_PARSER_EOF;
5975 xmlFree(name);
5976 return;
5977 }
5978 }
5979 if (ctxt->token == 0)
5980 ctxt->token = ' ';
5981 }
5982 }
5983 ctxt->hasPErefs = 1;
5984 } else {
5985 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5987 ctxt->sax->error(ctxt->userData,
5988 "xmlParsePEReference: expecting ';'\n");
5989 ctxt->wellFormed = 0;
5990 ctxt->disableSAX = 1;
5991 }
5992 xmlFree(name);
5993 }
5994 }
5995}
5996
5997/**
5998 * xmlParseStringPEReference:
5999 * @ctxt: an XML parser context
6000 * @str: a pointer to an index in the string
6001 *
6002 * parse PEReference declarations
6003 *
6004 * [69] PEReference ::= '%' Name ';'
6005 *
6006 * [ WFC: No Recursion ]
6007 * A parsed entity must not contain a recursive
6008 * reference to itself, either directly or indirectly.
6009 *
6010 * [ WFC: Entity Declared ]
6011 * In a document without any DTD, a document with only an internal DTD
6012 * subset which contains no parameter entity references, or a document
6013 * with "standalone='yes'", ... ... The declaration of a parameter
6014 * entity must precede any reference to it...
6015 *
6016 * [ VC: Entity Declared ]
6017 * In a document with an external subset or external parameter entities
6018 * with "standalone='no'", ... ... The declaration of a parameter entity
6019 * must precede any reference to it...
6020 *
6021 * [ WFC: In DTD ]
6022 * Parameter-entity references may only appear in the DTD.
6023 * NOTE: misleading but this is handled.
6024 *
6025 * Returns the string of the entity content.
6026 * str is updated to the current value of the index
6027 */
6028xmlEntityPtr
6029xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6030 const xmlChar *ptr;
6031 xmlChar cur;
6032 xmlChar *name;
6033 xmlEntityPtr entity = NULL;
6034
6035 if ((str == NULL) || (*str == NULL)) return(NULL);
6036 ptr = *str;
6037 cur = *ptr;
6038 if (cur == '%') {
6039 ptr++;
6040 cur = *ptr;
6041 name = xmlParseStringName(ctxt, &ptr);
6042 if (name == NULL) {
6043 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6044 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6045 ctxt->sax->error(ctxt->userData,
6046 "xmlParseStringPEReference: no name\n");
6047 ctxt->wellFormed = 0;
6048 ctxt->disableSAX = 1;
6049 } else {
6050 cur = *ptr;
6051 if (cur == ';') {
6052 ptr++;
6053 cur = *ptr;
6054 if ((ctxt->sax != NULL) &&
6055 (ctxt->sax->getParameterEntity != NULL))
6056 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6057 name);
6058 if (entity == NULL) {
6059 /*
6060 * [ WFC: Entity Declared ]
6061 * In a document without any DTD, a document with only an
6062 * internal DTD subset which contains no parameter entity
6063 * references, or a document with "standalone='yes'", ...
6064 * ... The declaration of a parameter entity must precede
6065 * any reference to it...
6066 */
6067 if ((ctxt->standalone == 1) ||
6068 ((ctxt->hasExternalSubset == 0) &&
6069 (ctxt->hasPErefs == 0))) {
6070 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6072 ctxt->sax->error(ctxt->userData,
6073 "PEReference: %%%s; not found\n", name);
6074 ctxt->wellFormed = 0;
6075 ctxt->disableSAX = 1;
6076 } else {
6077 /*
6078 * [ VC: Entity Declared ]
6079 * In a document with an external subset or external
6080 * parameter entities with "standalone='no'", ...
6081 * ... The declaration of a parameter entity must
6082 * precede any reference to it...
6083 */
6084 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6085 ctxt->sax->warning(ctxt->userData,
6086 "PEReference: %%%s; not found\n", name);
6087 ctxt->valid = 0;
6088 }
6089 } else {
6090 /*
6091 * Internal checking in case the entity quest barfed
6092 */
6093 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6094 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6095 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6096 ctxt->sax->warning(ctxt->userData,
6097 "Internal: %%%s; is not a parameter entity\n", name);
6098 }
6099 }
6100 ctxt->hasPErefs = 1;
6101 } else {
6102 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6104 ctxt->sax->error(ctxt->userData,
6105 "xmlParseStringPEReference: expecting ';'\n");
6106 ctxt->wellFormed = 0;
6107 ctxt->disableSAX = 1;
6108 }
6109 xmlFree(name);
6110 }
6111 }
6112 *str = ptr;
6113 return(entity);
6114}
6115
6116/**
6117 * xmlParseDocTypeDecl:
6118 * @ctxt: an XML parser context
6119 *
6120 * parse a DOCTYPE declaration
6121 *
6122 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6123 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6124 *
6125 * [ VC: Root Element Type ]
6126 * The Name in the document type declaration must match the element
6127 * type of the root element.
6128 */
6129
6130void
6131xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6132 xmlChar *name = NULL;
6133 xmlChar *ExternalID = NULL;
6134 xmlChar *URI = NULL;
6135
6136 /*
6137 * We know that '<!DOCTYPE' has been detected.
6138 */
6139 SKIP(9);
6140
6141 SKIP_BLANKS;
6142
6143 /*
6144 * Parse the DOCTYPE name.
6145 */
6146 name = xmlParseName(ctxt);
6147 if (name == NULL) {
6148 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData,
6151 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6152 ctxt->wellFormed = 0;
6153 ctxt->disableSAX = 1;
6154 }
6155 ctxt->intSubName = name;
6156
6157 SKIP_BLANKS;
6158
6159 /*
6160 * Check for SystemID and ExternalID
6161 */
6162 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6163
6164 if ((URI != NULL) || (ExternalID != NULL)) {
6165 ctxt->hasExternalSubset = 1;
6166 }
6167 ctxt->extSubURI = URI;
6168 ctxt->extSubSystem = ExternalID;
6169
6170 SKIP_BLANKS;
6171
6172 /*
6173 * Create and update the internal subset.
6174 */
6175 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6176 (!ctxt->disableSAX))
6177 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6178
6179 /*
6180 * Is there any internal subset declarations ?
6181 * they are handled separately in xmlParseInternalSubset()
6182 */
6183 if (RAW == '[')
6184 return;
6185
6186 /*
6187 * We should be at the end of the DOCTYPE declaration.
6188 */
6189 if (RAW != '>') {
6190 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006192 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006193 ctxt->wellFormed = 0;
6194 ctxt->disableSAX = 1;
6195 }
6196 NEXT;
6197}
6198
6199/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006200 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006201 * @ctxt: an XML parser context
6202 *
6203 * parse the internal subset declaration
6204 *
6205 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6206 */
6207
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006208static void
Owen Taylor3473f882001-02-23 17:55:21 +00006209xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6210 /*
6211 * Is there any DTD definition ?
6212 */
6213 if (RAW == '[') {
6214 ctxt->instate = XML_PARSER_DTD;
6215 NEXT;
6216 /*
6217 * Parse the succession of Markup declarations and
6218 * PEReferences.
6219 * Subsequence (markupdecl | PEReference | S)*
6220 */
6221 while (RAW != ']') {
6222 const xmlChar *check = CUR_PTR;
6223 int cons = ctxt->input->consumed;
6224
6225 SKIP_BLANKS;
6226 xmlParseMarkupDecl(ctxt);
6227 xmlParsePEReference(ctxt);
6228
6229 /*
6230 * Pop-up of finished entities.
6231 */
6232 while ((RAW == 0) && (ctxt->inputNr > 1))
6233 xmlPopInput(ctxt);
6234
6235 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6236 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6238 ctxt->sax->error(ctxt->userData,
6239 "xmlParseInternalSubset: error detected in Markup declaration\n");
6240 ctxt->wellFormed = 0;
6241 ctxt->disableSAX = 1;
6242 break;
6243 }
6244 }
6245 if (RAW == ']') {
6246 NEXT;
6247 SKIP_BLANKS;
6248 }
6249 }
6250
6251 /*
6252 * We should be at the end of the DOCTYPE declaration.
6253 */
6254 if (RAW != '>') {
6255 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006257 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006258 ctxt->wellFormed = 0;
6259 ctxt->disableSAX = 1;
6260 }
6261 NEXT;
6262}
6263
6264/**
6265 * xmlParseAttribute:
6266 * @ctxt: an XML parser context
6267 * @value: a xmlChar ** used to store the value of the attribute
6268 *
6269 * parse an attribute
6270 *
6271 * [41] Attribute ::= Name Eq AttValue
6272 *
6273 * [ WFC: No External Entity References ]
6274 * Attribute values cannot contain direct or indirect entity references
6275 * to external entities.
6276 *
6277 * [ WFC: No < in Attribute Values ]
6278 * The replacement text of any entity referred to directly or indirectly in
6279 * an attribute value (other than "&lt;") must not contain a <.
6280 *
6281 * [ VC: Attribute Value Type ]
6282 * The attribute must have been declared; the value must be of the type
6283 * declared for it.
6284 *
6285 * [25] Eq ::= S? '=' S?
6286 *
6287 * With namespace:
6288 *
6289 * [NS 11] Attribute ::= QName Eq AttValue
6290 *
6291 * Also the case QName == xmlns:??? is handled independently as a namespace
6292 * definition.
6293 *
6294 * Returns the attribute name, and the value in *value.
6295 */
6296
6297xmlChar *
6298xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6299 xmlChar *name, *val;
6300
6301 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006302 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006303 name = xmlParseName(ctxt);
6304 if (name == NULL) {
6305 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6307 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6308 ctxt->wellFormed = 0;
6309 ctxt->disableSAX = 1;
6310 return(NULL);
6311 }
6312
6313 /*
6314 * read the value
6315 */
6316 SKIP_BLANKS;
6317 if (RAW == '=') {
6318 NEXT;
6319 SKIP_BLANKS;
6320 val = xmlParseAttValue(ctxt);
6321 ctxt->instate = XML_PARSER_CONTENT;
6322 } else {
6323 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6325 ctxt->sax->error(ctxt->userData,
6326 "Specification mandate value for attribute %s\n", name);
6327 ctxt->wellFormed = 0;
6328 ctxt->disableSAX = 1;
6329 xmlFree(name);
6330 return(NULL);
6331 }
6332
6333 /*
6334 * Check that xml:lang conforms to the specification
6335 * No more registered as an error, just generate a warning now
6336 * since this was deprecated in XML second edition
6337 */
6338 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6339 if (!xmlCheckLanguageID(val)) {
6340 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6341 ctxt->sax->warning(ctxt->userData,
6342 "Malformed value for xml:lang : %s\n", val);
6343 }
6344 }
6345
6346 /*
6347 * Check that xml:space conforms to the specification
6348 */
6349 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6350 if (xmlStrEqual(val, BAD_CAST "default"))
6351 *(ctxt->space) = 0;
6352 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6353 *(ctxt->space) = 1;
6354 else {
6355 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6357 ctxt->sax->error(ctxt->userData,
6358"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6359 val);
6360 ctxt->wellFormed = 0;
6361 ctxt->disableSAX = 1;
6362 }
6363 }
6364
6365 *value = val;
6366 return(name);
6367}
6368
6369/**
6370 * xmlParseStartTag:
6371 * @ctxt: an XML parser context
6372 *
6373 * parse a start of tag either for rule element or
6374 * EmptyElement. In both case we don't parse the tag closing chars.
6375 *
6376 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6377 *
6378 * [ WFC: Unique Att Spec ]
6379 * No attribute name may appear more than once in the same start-tag or
6380 * empty-element tag.
6381 *
6382 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6383 *
6384 * [ WFC: Unique Att Spec ]
6385 * No attribute name may appear more than once in the same start-tag or
6386 * empty-element tag.
6387 *
6388 * With namespace:
6389 *
6390 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6391 *
6392 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6393 *
6394 * Returns the element name parsed
6395 */
6396
6397xmlChar *
6398xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6399 xmlChar *name;
6400 xmlChar *attname;
6401 xmlChar *attvalue;
6402 const xmlChar **atts = NULL;
6403 int nbatts = 0;
6404 int maxatts = 0;
6405 int i;
6406
6407 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006408 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006409
6410 name = xmlParseName(ctxt);
6411 if (name == NULL) {
6412 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6414 ctxt->sax->error(ctxt->userData,
6415 "xmlParseStartTag: invalid element name\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 return(NULL);
6419 }
6420
6421 /*
6422 * Now parse the attributes, it ends up with the ending
6423 *
6424 * (S Attribute)* S?
6425 */
6426 SKIP_BLANKS;
6427 GROW;
6428
Daniel Veillard21a0f912001-02-25 19:54:14 +00006429 while ((RAW != '>') &&
6430 ((RAW != '/') || (NXT(1) != '>')) &&
6431 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006432 const xmlChar *q = CUR_PTR;
6433 int cons = ctxt->input->consumed;
6434
6435 attname = xmlParseAttribute(ctxt, &attvalue);
6436 if ((attname != NULL) && (attvalue != NULL)) {
6437 /*
6438 * [ WFC: Unique Att Spec ]
6439 * No attribute name may appear more than once in the same
6440 * start-tag or empty-element tag.
6441 */
6442 for (i = 0; i < nbatts;i += 2) {
6443 if (xmlStrEqual(atts[i], attname)) {
6444 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6446 ctxt->sax->error(ctxt->userData,
6447 "Attribute %s redefined\n",
6448 attname);
6449 ctxt->wellFormed = 0;
6450 ctxt->disableSAX = 1;
6451 xmlFree(attname);
6452 xmlFree(attvalue);
6453 goto failed;
6454 }
6455 }
6456
6457 /*
6458 * Add the pair to atts
6459 */
6460 if (atts == NULL) {
6461 maxatts = 10;
6462 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6463 if (atts == NULL) {
6464 xmlGenericError(xmlGenericErrorContext,
6465 "malloc of %ld byte failed\n",
6466 maxatts * (long)sizeof(xmlChar *));
6467 return(NULL);
6468 }
6469 } else if (nbatts + 4 > maxatts) {
6470 maxatts *= 2;
6471 atts = (const xmlChar **) xmlRealloc((void *) atts,
6472 maxatts * sizeof(xmlChar *));
6473 if (atts == NULL) {
6474 xmlGenericError(xmlGenericErrorContext,
6475 "realloc of %ld byte failed\n",
6476 maxatts * (long)sizeof(xmlChar *));
6477 return(NULL);
6478 }
6479 }
6480 atts[nbatts++] = attname;
6481 atts[nbatts++] = attvalue;
6482 atts[nbatts] = NULL;
6483 atts[nbatts + 1] = NULL;
6484 } else {
6485 if (attname != NULL)
6486 xmlFree(attname);
6487 if (attvalue != NULL)
6488 xmlFree(attvalue);
6489 }
6490
6491failed:
6492
6493 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6494 break;
6495 if (!IS_BLANK(RAW)) {
6496 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6498 ctxt->sax->error(ctxt->userData,
6499 "attributes construct error\n");
6500 ctxt->wellFormed = 0;
6501 ctxt->disableSAX = 1;
6502 }
6503 SKIP_BLANKS;
6504 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6505 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6507 ctxt->sax->error(ctxt->userData,
6508 "xmlParseStartTag: problem parsing attributes\n");
6509 ctxt->wellFormed = 0;
6510 ctxt->disableSAX = 1;
6511 break;
6512 }
6513 GROW;
6514 }
6515
6516 /*
6517 * SAX: Start of Element !
6518 */
6519 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6520 (!ctxt->disableSAX))
6521 ctxt->sax->startElement(ctxt->userData, name, atts);
6522
6523 if (atts != NULL) {
6524 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6525 xmlFree((void *) atts);
6526 }
6527 return(name);
6528}
6529
6530/**
6531 * xmlParseEndTag:
6532 * @ctxt: an XML parser context
6533 *
6534 * parse an end of tag
6535 *
6536 * [42] ETag ::= '</' Name S? '>'
6537 *
6538 * With namespace
6539 *
6540 * [NS 9] ETag ::= '</' QName S? '>'
6541 */
6542
6543void
6544xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6545 xmlChar *name;
6546 xmlChar *oldname;
6547
6548 GROW;
6549 if ((RAW != '<') || (NXT(1) != '/')) {
6550 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6552 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6553 ctxt->wellFormed = 0;
6554 ctxt->disableSAX = 1;
6555 return;
6556 }
6557 SKIP(2);
6558
6559 name = xmlParseName(ctxt);
6560
6561 /*
6562 * We should definitely be at the ending "S? '>'" part
6563 */
6564 GROW;
6565 SKIP_BLANKS;
6566 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6567 ctxt->errNo = XML_ERR_GT_REQUIRED;
6568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6569 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6570 ctxt->wellFormed = 0;
6571 ctxt->disableSAX = 1;
6572 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006573 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006574
6575 /*
6576 * [ WFC: Element Type Match ]
6577 * The Name in an element's end-tag must match the element type in the
6578 * start-tag.
6579 *
6580 */
6581 if ((name == NULL) || (ctxt->name == NULL) ||
6582 (!xmlStrEqual(name, ctxt->name))) {
6583 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6584 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6585 if ((name != NULL) && (ctxt->name != NULL)) {
6586 ctxt->sax->error(ctxt->userData,
6587 "Opening and ending tag mismatch: %s and %s\n",
6588 ctxt->name, name);
6589 } else if (ctxt->name != NULL) {
6590 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006591 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006592 } else {
6593 ctxt->sax->error(ctxt->userData,
6594 "Ending tag error: internal error ???\n");
6595 }
6596
6597 }
6598 ctxt->wellFormed = 0;
6599 ctxt->disableSAX = 1;
6600 }
6601
6602 /*
6603 * SAX: End of Tag
6604 */
6605 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6606 (!ctxt->disableSAX))
6607 ctxt->sax->endElement(ctxt->userData, name);
6608
6609 if (name != NULL)
6610 xmlFree(name);
6611 oldname = namePop(ctxt);
6612 spacePop(ctxt);
6613 if (oldname != NULL) {
6614#ifdef DEBUG_STACK
6615 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6616#endif
6617 xmlFree(oldname);
6618 }
6619 return;
6620}
6621
6622/**
6623 * xmlParseCDSect:
6624 * @ctxt: an XML parser context
6625 *
6626 * Parse escaped pure raw content.
6627 *
6628 * [18] CDSect ::= CDStart CData CDEnd
6629 *
6630 * [19] CDStart ::= '<![CDATA['
6631 *
6632 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6633 *
6634 * [21] CDEnd ::= ']]>'
6635 */
6636void
6637xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6638 xmlChar *buf = NULL;
6639 int len = 0;
6640 int size = XML_PARSER_BUFFER_SIZE;
6641 int r, rl;
6642 int s, sl;
6643 int cur, l;
6644 int count = 0;
6645
6646 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6647 (NXT(2) == '[') && (NXT(3) == 'C') &&
6648 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6649 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6650 (NXT(8) == '[')) {
6651 SKIP(9);
6652 } else
6653 return;
6654
6655 ctxt->instate = XML_PARSER_CDATA_SECTION;
6656 r = CUR_CHAR(rl);
6657 if (!IS_CHAR(r)) {
6658 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660 ctxt->sax->error(ctxt->userData,
6661 "CData section not finished\n");
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 ctxt->instate = XML_PARSER_CONTENT;
6665 return;
6666 }
6667 NEXTL(rl);
6668 s = CUR_CHAR(sl);
6669 if (!IS_CHAR(s)) {
6670 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6671 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6672 ctxt->sax->error(ctxt->userData,
6673 "CData section not finished\n");
6674 ctxt->wellFormed = 0;
6675 ctxt->disableSAX = 1;
6676 ctxt->instate = XML_PARSER_CONTENT;
6677 return;
6678 }
6679 NEXTL(sl);
6680 cur = CUR_CHAR(l);
6681 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6682 if (buf == NULL) {
6683 xmlGenericError(xmlGenericErrorContext,
6684 "malloc of %d byte failed\n", size);
6685 return;
6686 }
6687 while (IS_CHAR(cur) &&
6688 ((r != ']') || (s != ']') || (cur != '>'))) {
6689 if (len + 5 >= size) {
6690 size *= 2;
6691 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6692 if (buf == NULL) {
6693 xmlGenericError(xmlGenericErrorContext,
6694 "realloc of %d byte failed\n", size);
6695 return;
6696 }
6697 }
6698 COPY_BUF(rl,buf,len,r);
6699 r = s;
6700 rl = sl;
6701 s = cur;
6702 sl = l;
6703 count++;
6704 if (count > 50) {
6705 GROW;
6706 count = 0;
6707 }
6708 NEXTL(l);
6709 cur = CUR_CHAR(l);
6710 }
6711 buf[len] = 0;
6712 ctxt->instate = XML_PARSER_CONTENT;
6713 if (cur != '>') {
6714 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6716 ctxt->sax->error(ctxt->userData,
6717 "CData section not finished\n%.50s\n", buf);
6718 ctxt->wellFormed = 0;
6719 ctxt->disableSAX = 1;
6720 xmlFree(buf);
6721 return;
6722 }
6723 NEXTL(l);
6724
6725 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006726 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006727 */
6728 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6729 if (ctxt->sax->cdataBlock != NULL)
6730 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006731 else if (ctxt->sax->characters != NULL)
6732 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006733 }
6734 xmlFree(buf);
6735}
6736
6737/**
6738 * xmlParseContent:
6739 * @ctxt: an XML parser context
6740 *
6741 * Parse a content:
6742 *
6743 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6744 */
6745
6746void
6747xmlParseContent(xmlParserCtxtPtr ctxt) {
6748 GROW;
Daniel Veillard561b7f82002-03-20 21:55:57 +00006749 while (((RAW != 0) || (ctxt->token != 0)) &&
Owen Taylor3473f882001-02-23 17:55:21 +00006750 ((RAW != '<') || (NXT(1) != '/'))) {
6751 const xmlChar *test = CUR_PTR;
6752 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006753 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006754 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006755
6756 /*
6757 * Handle possible processed charrefs.
6758 */
6759 if (ctxt->token != 0) {
6760 xmlParseCharData(ctxt, 0);
6761 }
6762 /*
6763 * First case : a Processing Instruction.
6764 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006765 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006766 xmlParsePI(ctxt);
6767 }
6768
6769 /*
6770 * Second case : a CDSection
6771 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006772 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006773 (NXT(2) == '[') && (NXT(3) == 'C') &&
6774 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6775 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6776 (NXT(8) == '[')) {
6777 xmlParseCDSect(ctxt);
6778 }
6779
6780 /*
6781 * Third case : a comment
6782 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006783 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006784 (NXT(2) == '-') && (NXT(3) == '-')) {
6785 xmlParseComment(ctxt);
6786 ctxt->instate = XML_PARSER_CONTENT;
6787 }
6788
6789 /*
6790 * Fourth case : a sub-element.
6791 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006792 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006793 xmlParseElement(ctxt);
6794 }
6795
6796 /*
6797 * Fifth case : a reference. If if has not been resolved,
6798 * parsing returns it's Name, create the node
6799 */
6800
Daniel Veillard21a0f912001-02-25 19:54:14 +00006801 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006802 xmlParseReference(ctxt);
6803 }
6804
6805 /*
6806 * Last case, text. Note that References are handled directly.
6807 */
6808 else {
6809 xmlParseCharData(ctxt, 0);
6810 }
6811
6812 GROW;
6813 /*
6814 * Pop-up of finished entities.
6815 */
Daniel Veillard561b7f82002-03-20 21:55:57 +00006816 while ((RAW == 0) && (ctxt->inputNr > 1))
Owen Taylor3473f882001-02-23 17:55:21 +00006817 xmlPopInput(ctxt);
6818 SHRINK;
6819
6820 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6821 (tok == ctxt->token)) {
6822 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6824 ctxt->sax->error(ctxt->userData,
6825 "detected an error in element content\n");
6826 ctxt->wellFormed = 0;
6827 ctxt->disableSAX = 1;
6828 ctxt->instate = XML_PARSER_EOF;
6829 break;
6830 }
6831 }
6832}
6833
6834/**
6835 * xmlParseElement:
6836 * @ctxt: an XML parser context
6837 *
6838 * parse an XML element, this is highly recursive
6839 *
6840 * [39] element ::= EmptyElemTag | STag content ETag
6841 *
6842 * [ WFC: Element Type Match ]
6843 * The Name in an element's end-tag must match the element type in the
6844 * start-tag.
6845 *
6846 * [ VC: Element Valid ]
6847 * An element is valid if there is a declaration matching elementdecl
6848 * where the Name matches the element type and one of the following holds:
6849 * - The declaration matches EMPTY and the element has no content.
6850 * - The declaration matches children and the sequence of child elements
6851 * belongs to the language generated by the regular expression in the
6852 * content model, with optional white space (characters matching the
6853 * nonterminal S) between each pair of child elements.
6854 * - The declaration matches Mixed and the content consists of character
6855 * data and child elements whose types match names in the content model.
6856 * - The declaration matches ANY, and the types of any child elements have
6857 * been declared.
6858 */
6859
6860void
6861xmlParseElement(xmlParserCtxtPtr ctxt) {
6862 const xmlChar *openTag = CUR_PTR;
6863 xmlChar *name;
6864 xmlChar *oldname;
6865 xmlParserNodeInfo node_info;
6866 xmlNodePtr ret;
6867
6868 /* Capture start position */
6869 if (ctxt->record_info) {
6870 node_info.begin_pos = ctxt->input->consumed +
6871 (CUR_PTR - ctxt->input->base);
6872 node_info.begin_line = ctxt->input->line;
6873 }
6874
6875 if (ctxt->spaceNr == 0)
6876 spacePush(ctxt, -1);
6877 else
6878 spacePush(ctxt, *ctxt->space);
6879
6880 name = xmlParseStartTag(ctxt);
6881 if (name == NULL) {
6882 spacePop(ctxt);
6883 return;
6884 }
6885 namePush(ctxt, name);
6886 ret = ctxt->node;
6887
6888 /*
6889 * [ VC: Root Element Type ]
6890 * The Name in the document type declaration must match the element
6891 * type of the root element.
6892 */
6893 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6894 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6895 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6896
6897 /*
6898 * Check for an Empty Element.
6899 */
6900 if ((RAW == '/') && (NXT(1) == '>')) {
6901 SKIP(2);
6902 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6903 (!ctxt->disableSAX))
6904 ctxt->sax->endElement(ctxt->userData, name);
6905 oldname = namePop(ctxt);
6906 spacePop(ctxt);
6907 if (oldname != NULL) {
6908#ifdef DEBUG_STACK
6909 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6910#endif
6911 xmlFree(oldname);
6912 }
6913 if ( ret != NULL && ctxt->record_info ) {
6914 node_info.end_pos = ctxt->input->consumed +
6915 (CUR_PTR - ctxt->input->base);
6916 node_info.end_line = ctxt->input->line;
6917 node_info.node = ret;
6918 xmlParserAddNodeInfo(ctxt, &node_info);
6919 }
6920 return;
6921 }
6922 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006923 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006924 } else {
6925 ctxt->errNo = XML_ERR_GT_REQUIRED;
6926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6927 ctxt->sax->error(ctxt->userData,
6928 "Couldn't find end of Start Tag\n%.30s\n",
6929 openTag);
6930 ctxt->wellFormed = 0;
6931 ctxt->disableSAX = 1;
6932
6933 /*
6934 * end of parsing of this node.
6935 */
6936 nodePop(ctxt);
6937 oldname = namePop(ctxt);
6938 spacePop(ctxt);
6939 if (oldname != NULL) {
6940#ifdef DEBUG_STACK
6941 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6942#endif
6943 xmlFree(oldname);
6944 }
6945
6946 /*
6947 * Capture end position and add node
6948 */
6949 if ( ret != NULL && ctxt->record_info ) {
6950 node_info.end_pos = ctxt->input->consumed +
6951 (CUR_PTR - ctxt->input->base);
6952 node_info.end_line = ctxt->input->line;
6953 node_info.node = ret;
6954 xmlParserAddNodeInfo(ctxt, &node_info);
6955 }
6956 return;
6957 }
6958
6959 /*
6960 * Parse the content of the element:
6961 */
6962 xmlParseContent(ctxt);
6963 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006964 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006965 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6966 ctxt->sax->error(ctxt->userData,
6967 "Premature end of data in tag %.30s\n", openTag);
6968 ctxt->wellFormed = 0;
6969 ctxt->disableSAX = 1;
6970
6971 /*
6972 * end of parsing of this node.
6973 */
6974 nodePop(ctxt);
6975 oldname = namePop(ctxt);
6976 spacePop(ctxt);
6977 if (oldname != NULL) {
6978#ifdef DEBUG_STACK
6979 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6980#endif
6981 xmlFree(oldname);
6982 }
6983 return;
6984 }
6985
6986 /*
6987 * parse the end of tag: '</' should be here.
6988 */
6989 xmlParseEndTag(ctxt);
6990
6991 /*
6992 * Capture end position and add node
6993 */
6994 if ( ret != NULL && ctxt->record_info ) {
6995 node_info.end_pos = ctxt->input->consumed +
6996 (CUR_PTR - ctxt->input->base);
6997 node_info.end_line = ctxt->input->line;
6998 node_info.node = ret;
6999 xmlParserAddNodeInfo(ctxt, &node_info);
7000 }
7001}
7002
7003/**
7004 * xmlParseVersionNum:
7005 * @ctxt: an XML parser context
7006 *
7007 * parse the XML version value.
7008 *
7009 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7010 *
7011 * Returns the string giving the XML version number, or NULL
7012 */
7013xmlChar *
7014xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7015 xmlChar *buf = NULL;
7016 int len = 0;
7017 int size = 10;
7018 xmlChar cur;
7019
7020 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7021 if (buf == NULL) {
7022 xmlGenericError(xmlGenericErrorContext,
7023 "malloc of %d byte failed\n", size);
7024 return(NULL);
7025 }
7026 cur = CUR;
7027 while (((cur >= 'a') && (cur <= 'z')) ||
7028 ((cur >= 'A') && (cur <= 'Z')) ||
7029 ((cur >= '0') && (cur <= '9')) ||
7030 (cur == '_') || (cur == '.') ||
7031 (cur == ':') || (cur == '-')) {
7032 if (len + 1 >= size) {
7033 size *= 2;
7034 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7035 if (buf == NULL) {
7036 xmlGenericError(xmlGenericErrorContext,
7037 "realloc of %d byte failed\n", size);
7038 return(NULL);
7039 }
7040 }
7041 buf[len++] = cur;
7042 NEXT;
7043 cur=CUR;
7044 }
7045 buf[len] = 0;
7046 return(buf);
7047}
7048
7049/**
7050 * xmlParseVersionInfo:
7051 * @ctxt: an XML parser context
7052 *
7053 * parse the XML version.
7054 *
7055 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7056 *
7057 * [25] Eq ::= S? '=' S?
7058 *
7059 * Returns the version string, e.g. "1.0"
7060 */
7061
7062xmlChar *
7063xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7064 xmlChar *version = NULL;
7065 const xmlChar *q;
7066
7067 if ((RAW == 'v') && (NXT(1) == 'e') &&
7068 (NXT(2) == 'r') && (NXT(3) == 's') &&
7069 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7070 (NXT(6) == 'n')) {
7071 SKIP(7);
7072 SKIP_BLANKS;
7073 if (RAW != '=') {
7074 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7076 ctxt->sax->error(ctxt->userData,
7077 "xmlParseVersionInfo : expected '='\n");
7078 ctxt->wellFormed = 0;
7079 ctxt->disableSAX = 1;
7080 return(NULL);
7081 }
7082 NEXT;
7083 SKIP_BLANKS;
7084 if (RAW == '"') {
7085 NEXT;
7086 q = CUR_PTR;
7087 version = xmlParseVersionNum(ctxt);
7088 if (RAW != '"') {
7089 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7091 ctxt->sax->error(ctxt->userData,
7092 "String not closed\n%.50s\n", q);
7093 ctxt->wellFormed = 0;
7094 ctxt->disableSAX = 1;
7095 } else
7096 NEXT;
7097 } else if (RAW == '\''){
7098 NEXT;
7099 q = CUR_PTR;
7100 version = xmlParseVersionNum(ctxt);
7101 if (RAW != '\'') {
7102 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7104 ctxt->sax->error(ctxt->userData,
7105 "String not closed\n%.50s\n", q);
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 } else
7109 NEXT;
7110 } else {
7111 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7112 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7113 ctxt->sax->error(ctxt->userData,
7114 "xmlParseVersionInfo : expected ' or \"\n");
7115 ctxt->wellFormed = 0;
7116 ctxt->disableSAX = 1;
7117 }
7118 }
7119 return(version);
7120}
7121
7122/**
7123 * xmlParseEncName:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse the XML encoding name
7127 *
7128 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7129 *
7130 * Returns the encoding name value or NULL
7131 */
7132xmlChar *
7133xmlParseEncName(xmlParserCtxtPtr ctxt) {
7134 xmlChar *buf = NULL;
7135 int len = 0;
7136 int size = 10;
7137 xmlChar cur;
7138
7139 cur = CUR;
7140 if (((cur >= 'a') && (cur <= 'z')) ||
7141 ((cur >= 'A') && (cur <= 'Z'))) {
7142 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7143 if (buf == NULL) {
7144 xmlGenericError(xmlGenericErrorContext,
7145 "malloc of %d byte failed\n", size);
7146 return(NULL);
7147 }
7148
7149 buf[len++] = cur;
7150 NEXT;
7151 cur = CUR;
7152 while (((cur >= 'a') && (cur <= 'z')) ||
7153 ((cur >= 'A') && (cur <= 'Z')) ||
7154 ((cur >= '0') && (cur <= '9')) ||
7155 (cur == '.') || (cur == '_') ||
7156 (cur == '-')) {
7157 if (len + 1 >= size) {
7158 size *= 2;
7159 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7160 if (buf == NULL) {
7161 xmlGenericError(xmlGenericErrorContext,
7162 "realloc of %d byte failed\n", size);
7163 return(NULL);
7164 }
7165 }
7166 buf[len++] = cur;
7167 NEXT;
7168 cur = CUR;
7169 if (cur == 0) {
7170 SHRINK;
7171 GROW;
7172 cur = CUR;
7173 }
7174 }
7175 buf[len] = 0;
7176 } else {
7177 ctxt->errNo = XML_ERR_ENCODING_NAME;
7178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7179 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7180 ctxt->wellFormed = 0;
7181 ctxt->disableSAX = 1;
7182 }
7183 return(buf);
7184}
7185
7186/**
7187 * xmlParseEncodingDecl:
7188 * @ctxt: an XML parser context
7189 *
7190 * parse the XML encoding declaration
7191 *
7192 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7193 *
7194 * this setups the conversion filters.
7195 *
7196 * Returns the encoding value or NULL
7197 */
7198
7199xmlChar *
7200xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7201 xmlChar *encoding = NULL;
7202 const xmlChar *q;
7203
7204 SKIP_BLANKS;
7205 if ((RAW == 'e') && (NXT(1) == 'n') &&
7206 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7207 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7208 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7209 SKIP(8);
7210 SKIP_BLANKS;
7211 if (RAW != '=') {
7212 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7214 ctxt->sax->error(ctxt->userData,
7215 "xmlParseEncodingDecl : expected '='\n");
7216 ctxt->wellFormed = 0;
7217 ctxt->disableSAX = 1;
7218 return(NULL);
7219 }
7220 NEXT;
7221 SKIP_BLANKS;
7222 if (RAW == '"') {
7223 NEXT;
7224 q = CUR_PTR;
7225 encoding = xmlParseEncName(ctxt);
7226 if (RAW != '"') {
7227 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "String not closed\n%.50s\n", q);
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 } else
7234 NEXT;
7235 } else if (RAW == '\''){
7236 NEXT;
7237 q = CUR_PTR;
7238 encoding = xmlParseEncName(ctxt);
7239 if (RAW != '\'') {
7240 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "String not closed\n%.50s\n", q);
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 } else
7247 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007248 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007249 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7251 ctxt->sax->error(ctxt->userData,
7252 "xmlParseEncodingDecl : expected ' or \"\n");
7253 ctxt->wellFormed = 0;
7254 ctxt->disableSAX = 1;
7255 }
7256 if (encoding != NULL) {
7257 xmlCharEncoding enc;
7258 xmlCharEncodingHandlerPtr handler;
7259
7260 if (ctxt->input->encoding != NULL)
7261 xmlFree((xmlChar *) ctxt->input->encoding);
7262 ctxt->input->encoding = encoding;
7263
7264 enc = xmlParseCharEncoding((const char *) encoding);
7265 /*
7266 * registered set of known encodings
7267 */
7268 if (enc != XML_CHAR_ENCODING_ERROR) {
7269 xmlSwitchEncoding(ctxt, enc);
7270 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7271 xmlFree(encoding);
7272 return(NULL);
7273 }
7274 } else {
7275 /*
7276 * fallback for unknown encodings
7277 */
7278 handler = xmlFindCharEncodingHandler((const char *) encoding);
7279 if (handler != NULL) {
7280 xmlSwitchToEncoding(ctxt, handler);
7281 } else {
7282 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7284 ctxt->sax->error(ctxt->userData,
7285 "Unsupported encoding %s\n", encoding);
7286 return(NULL);
7287 }
7288 }
7289 }
7290 }
7291 return(encoding);
7292}
7293
7294/**
7295 * xmlParseSDDecl:
7296 * @ctxt: an XML parser context
7297 *
7298 * parse the XML standalone declaration
7299 *
7300 * [32] SDDecl ::= S 'standalone' Eq
7301 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7302 *
7303 * [ VC: Standalone Document Declaration ]
7304 * TODO The standalone document declaration must have the value "no"
7305 * if any external markup declarations contain declarations of:
7306 * - attributes with default values, if elements to which these
7307 * attributes apply appear in the document without specifications
7308 * of values for these attributes, or
7309 * - entities (other than amp, lt, gt, apos, quot), if references
7310 * to those entities appear in the document, or
7311 * - attributes with values subject to normalization, where the
7312 * attribute appears in the document with a value which will change
7313 * as a result of normalization, or
7314 * - element types with element content, if white space occurs directly
7315 * within any instance of those types.
7316 *
7317 * Returns 1 if standalone, 0 otherwise
7318 */
7319
7320int
7321xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7322 int standalone = -1;
7323
7324 SKIP_BLANKS;
7325 if ((RAW == 's') && (NXT(1) == 't') &&
7326 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7327 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7328 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7329 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7330 SKIP(10);
7331 SKIP_BLANKS;
7332 if (RAW != '=') {
7333 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7335 ctxt->sax->error(ctxt->userData,
7336 "XML standalone declaration : expected '='\n");
7337 ctxt->wellFormed = 0;
7338 ctxt->disableSAX = 1;
7339 return(standalone);
7340 }
7341 NEXT;
7342 SKIP_BLANKS;
7343 if (RAW == '\''){
7344 NEXT;
7345 if ((RAW == 'n') && (NXT(1) == 'o')) {
7346 standalone = 0;
7347 SKIP(2);
7348 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7349 (NXT(2) == 's')) {
7350 standalone = 1;
7351 SKIP(3);
7352 } else {
7353 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7355 ctxt->sax->error(ctxt->userData,
7356 "standalone accepts only 'yes' or 'no'\n");
7357 ctxt->wellFormed = 0;
7358 ctxt->disableSAX = 1;
7359 }
7360 if (RAW != '\'') {
7361 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7363 ctxt->sax->error(ctxt->userData, "String not closed\n");
7364 ctxt->wellFormed = 0;
7365 ctxt->disableSAX = 1;
7366 } else
7367 NEXT;
7368 } else if (RAW == '"'){
7369 NEXT;
7370 if ((RAW == 'n') && (NXT(1) == 'o')) {
7371 standalone = 0;
7372 SKIP(2);
7373 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7374 (NXT(2) == 's')) {
7375 standalone = 1;
7376 SKIP(3);
7377 } else {
7378 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7380 ctxt->sax->error(ctxt->userData,
7381 "standalone accepts only 'yes' or 'no'\n");
7382 ctxt->wellFormed = 0;
7383 ctxt->disableSAX = 1;
7384 }
7385 if (RAW != '"') {
7386 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7388 ctxt->sax->error(ctxt->userData, "String not closed\n");
7389 ctxt->wellFormed = 0;
7390 ctxt->disableSAX = 1;
7391 } else
7392 NEXT;
7393 } else {
7394 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7396 ctxt->sax->error(ctxt->userData,
7397 "Standalone value not found\n");
7398 ctxt->wellFormed = 0;
7399 ctxt->disableSAX = 1;
7400 }
7401 }
7402 return(standalone);
7403}
7404
7405/**
7406 * xmlParseXMLDecl:
7407 * @ctxt: an XML parser context
7408 *
7409 * parse an XML declaration header
7410 *
7411 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7412 */
7413
7414void
7415xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7416 xmlChar *version;
7417
7418 /*
7419 * We know that '<?xml' is here.
7420 */
7421 SKIP(5);
7422
7423 if (!IS_BLANK(RAW)) {
7424 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7425 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7426 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7427 ctxt->wellFormed = 0;
7428 ctxt->disableSAX = 1;
7429 }
7430 SKIP_BLANKS;
7431
7432 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007433 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007434 */
7435 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007436 if (version == NULL) {
7437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7438 ctxt->sax->error(ctxt->userData,
7439 "Malformed declaration expecting version\n");
7440 ctxt->wellFormed = 0;
7441 ctxt->disableSAX = 1;
7442 } else {
7443 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7444 /*
7445 * TODO: Blueberry should be detected here
7446 */
7447 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7448 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7449 version);
7450 }
7451 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007452 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007453 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007454 }
Owen Taylor3473f882001-02-23 17:55:21 +00007455
7456 /*
7457 * We may have the encoding declaration
7458 */
7459 if (!IS_BLANK(RAW)) {
7460 if ((RAW == '?') && (NXT(1) == '>')) {
7461 SKIP(2);
7462 return;
7463 }
7464 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7465 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7466 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7467 ctxt->wellFormed = 0;
7468 ctxt->disableSAX = 1;
7469 }
7470 xmlParseEncodingDecl(ctxt);
7471 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7472 /*
7473 * The XML REC instructs us to stop parsing right here
7474 */
7475 return;
7476 }
7477
7478 /*
7479 * We may have the standalone status.
7480 */
7481 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7482 if ((RAW == '?') && (NXT(1) == '>')) {
7483 SKIP(2);
7484 return;
7485 }
7486 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7488 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7489 ctxt->wellFormed = 0;
7490 ctxt->disableSAX = 1;
7491 }
7492 SKIP_BLANKS;
7493 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7494
7495 SKIP_BLANKS;
7496 if ((RAW == '?') && (NXT(1) == '>')) {
7497 SKIP(2);
7498 } else if (RAW == '>') {
7499 /* Deprecated old WD ... */
7500 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7502 ctxt->sax->error(ctxt->userData,
7503 "XML declaration must end-up with '?>'\n");
7504 ctxt->wellFormed = 0;
7505 ctxt->disableSAX = 1;
7506 NEXT;
7507 } else {
7508 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7510 ctxt->sax->error(ctxt->userData,
7511 "parsing XML declaration: '?>' expected\n");
7512 ctxt->wellFormed = 0;
7513 ctxt->disableSAX = 1;
7514 MOVETO_ENDTAG(CUR_PTR);
7515 NEXT;
7516 }
7517}
7518
7519/**
7520 * xmlParseMisc:
7521 * @ctxt: an XML parser context
7522 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007523 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007524 *
7525 * [27] Misc ::= Comment | PI | S
7526 */
7527
7528void
7529xmlParseMisc(xmlParserCtxtPtr ctxt) {
Daniel Veillard561b7f82002-03-20 21:55:57 +00007530 while (((RAW == '<') && (NXT(1) == '?')) ||
7531 ((RAW == '<') && (NXT(1) == '!') &&
7532 (NXT(2) == '-') && (NXT(3) == '-')) ||
7533 IS_BLANK(CUR)) {
7534 if ((RAW == '<') && (NXT(1) == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00007535 xmlParsePI(ctxt);
Daniel Veillard561b7f82002-03-20 21:55:57 +00007536 } else if (IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00007537 NEXT;
7538 } else
7539 xmlParseComment(ctxt);
7540 }
7541}
7542
7543/**
7544 * xmlParseDocument:
7545 * @ctxt: an XML parser context
7546 *
7547 * parse an XML document (and build a tree if using the standard SAX
7548 * interface).
7549 *
7550 * [1] document ::= prolog element Misc*
7551 *
7552 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7553 *
7554 * Returns 0, -1 in case of error. the parser context is augmented
7555 * as a result of the parsing.
7556 */
7557
7558int
7559xmlParseDocument(xmlParserCtxtPtr ctxt) {
7560 xmlChar start[4];
7561 xmlCharEncoding enc;
7562
7563 xmlInitParser();
7564
7565 GROW;
7566
7567 /*
7568 * SAX: beginning of the document processing.
7569 */
7570 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7571 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7572
Daniel Veillard50f34372001-08-03 12:06:36 +00007573 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007574 /*
7575 * Get the 4 first bytes and decode the charset
7576 * if enc != XML_CHAR_ENCODING_NONE
7577 * plug some encoding conversion routines.
7578 */
7579 start[0] = RAW;
7580 start[1] = NXT(1);
7581 start[2] = NXT(2);
7582 start[3] = NXT(3);
7583 enc = xmlDetectCharEncoding(start, 4);
7584 if (enc != XML_CHAR_ENCODING_NONE) {
7585 xmlSwitchEncoding(ctxt, enc);
7586 }
Owen Taylor3473f882001-02-23 17:55:21 +00007587 }
7588
7589
7590 if (CUR == 0) {
7591 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7593 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7594 ctxt->wellFormed = 0;
7595 ctxt->disableSAX = 1;
7596 }
7597
7598 /*
7599 * Check for the XMLDecl in the Prolog.
7600 */
7601 GROW;
7602 if ((RAW == '<') && (NXT(1) == '?') &&
7603 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7604 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7605
7606 /*
7607 * Note that we will switch encoding on the fly.
7608 */
7609 xmlParseXMLDecl(ctxt);
7610 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7611 /*
7612 * The XML REC instructs us to stop parsing right here
7613 */
7614 return(-1);
7615 }
7616 ctxt->standalone = ctxt->input->standalone;
7617 SKIP_BLANKS;
7618 } else {
7619 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7620 }
7621 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7622 ctxt->sax->startDocument(ctxt->userData);
7623
7624 /*
7625 * The Misc part of the Prolog
7626 */
7627 GROW;
7628 xmlParseMisc(ctxt);
7629
7630 /*
7631 * Then possibly doc type declaration(s) and more Misc
7632 * (doctypedecl Misc*)?
7633 */
7634 GROW;
7635 if ((RAW == '<') && (NXT(1) == '!') &&
7636 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7637 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7638 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7639 (NXT(8) == 'E')) {
7640
7641 ctxt->inSubset = 1;
7642 xmlParseDocTypeDecl(ctxt);
7643 if (RAW == '[') {
7644 ctxt->instate = XML_PARSER_DTD;
7645 xmlParseInternalSubset(ctxt);
7646 }
7647
7648 /*
7649 * Create and update the external subset.
7650 */
7651 ctxt->inSubset = 2;
7652 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7653 (!ctxt->disableSAX))
7654 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7655 ctxt->extSubSystem, ctxt->extSubURI);
7656 ctxt->inSubset = 0;
7657
7658
7659 ctxt->instate = XML_PARSER_PROLOG;
7660 xmlParseMisc(ctxt);
7661 }
7662
7663 /*
7664 * Time to start parsing the tree itself
7665 */
7666 GROW;
7667 if (RAW != '<') {
7668 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7670 ctxt->sax->error(ctxt->userData,
7671 "Start tag expected, '<' not found\n");
7672 ctxt->wellFormed = 0;
7673 ctxt->disableSAX = 1;
7674 ctxt->instate = XML_PARSER_EOF;
7675 } else {
7676 ctxt->instate = XML_PARSER_CONTENT;
7677 xmlParseElement(ctxt);
7678 ctxt->instate = XML_PARSER_EPILOG;
7679
7680
7681 /*
7682 * The Misc part at the end
7683 */
7684 xmlParseMisc(ctxt);
7685
Daniel Veillard561b7f82002-03-20 21:55:57 +00007686 if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00007687 ctxt->errNo = XML_ERR_DOCUMENT_END;
7688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7689 ctxt->sax->error(ctxt->userData,
7690 "Extra content at the end of the document\n");
7691 ctxt->wellFormed = 0;
7692 ctxt->disableSAX = 1;
7693 }
7694 ctxt->instate = XML_PARSER_EOF;
7695 }
7696
7697 /*
7698 * SAX: end of the document processing.
7699 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007700 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007701 ctxt->sax->endDocument(ctxt->userData);
7702
Daniel Veillard5997aca2002-03-18 18:36:20 +00007703 /*
7704 * Remove locally kept entity definitions if the tree was not built
7705 */
7706 if ((ctxt->myDoc != NULL) &&
7707 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7708 xmlFreeDoc(ctxt->myDoc);
7709 ctxt->myDoc = NULL;
7710 }
7711
Daniel Veillardc7612992002-02-17 22:47:37 +00007712 if (! ctxt->wellFormed) {
7713 ctxt->valid = 0;
7714 return(-1);
7715 }
Owen Taylor3473f882001-02-23 17:55:21 +00007716 return(0);
7717}
7718
7719/**
7720 * xmlParseExtParsedEnt:
7721 * @ctxt: an XML parser context
7722 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007723 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007724 * An external general parsed entity is well-formed if it matches the
7725 * production labeled extParsedEnt.
7726 *
7727 * [78] extParsedEnt ::= TextDecl? content
7728 *
7729 * Returns 0, -1 in case of error. the parser context is augmented
7730 * as a result of the parsing.
7731 */
7732
7733int
7734xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7735 xmlChar start[4];
7736 xmlCharEncoding enc;
7737
7738 xmlDefaultSAXHandlerInit();
7739
7740 GROW;
7741
7742 /*
7743 * SAX: beginning of the document processing.
7744 */
7745 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7746 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7747
7748 /*
7749 * Get the 4 first bytes and decode the charset
7750 * if enc != XML_CHAR_ENCODING_NONE
7751 * plug some encoding conversion routines.
7752 */
7753 start[0] = RAW;
7754 start[1] = NXT(1);
7755 start[2] = NXT(2);
7756 start[3] = NXT(3);
7757 enc = xmlDetectCharEncoding(start, 4);
7758 if (enc != XML_CHAR_ENCODING_NONE) {
7759 xmlSwitchEncoding(ctxt, enc);
7760 }
7761
7762
7763 if (CUR == 0) {
7764 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7766 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7767 ctxt->wellFormed = 0;
7768 ctxt->disableSAX = 1;
7769 }
7770
7771 /*
7772 * Check for the XMLDecl in the Prolog.
7773 */
7774 GROW;
7775 if ((RAW == '<') && (NXT(1) == '?') &&
7776 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7777 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7778
7779 /*
7780 * Note that we will switch encoding on the fly.
7781 */
7782 xmlParseXMLDecl(ctxt);
7783 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7784 /*
7785 * The XML REC instructs us to stop parsing right here
7786 */
7787 return(-1);
7788 }
7789 SKIP_BLANKS;
7790 } else {
7791 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7792 }
7793 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7794 ctxt->sax->startDocument(ctxt->userData);
7795
7796 /*
7797 * Doing validity checking on chunk doesn't make sense
7798 */
7799 ctxt->instate = XML_PARSER_CONTENT;
7800 ctxt->validate = 0;
7801 ctxt->loadsubset = 0;
7802 ctxt->depth = 0;
7803
7804 xmlParseContent(ctxt);
7805
7806 if ((RAW == '<') && (NXT(1) == '/')) {
7807 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7809 ctxt->sax->error(ctxt->userData,
7810 "chunk is not well balanced\n");
7811 ctxt->wellFormed = 0;
7812 ctxt->disableSAX = 1;
7813 } else if (RAW != 0) {
7814 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7816 ctxt->sax->error(ctxt->userData,
7817 "extra content at the end of well balanced chunk\n");
7818 ctxt->wellFormed = 0;
7819 ctxt->disableSAX = 1;
7820 }
7821
7822 /*
7823 * SAX: end of the document processing.
7824 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007825 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007826 ctxt->sax->endDocument(ctxt->userData);
7827
7828 if (! ctxt->wellFormed) return(-1);
7829 return(0);
7830}
7831
7832/************************************************************************
7833 * *
7834 * Progressive parsing interfaces *
7835 * *
7836 ************************************************************************/
7837
7838/**
7839 * xmlParseLookupSequence:
7840 * @ctxt: an XML parser context
7841 * @first: the first char to lookup
7842 * @next: the next char to lookup or zero
7843 * @third: the next char to lookup or zero
7844 *
7845 * Try to find if a sequence (first, next, third) or just (first next) or
7846 * (first) is available in the input stream.
7847 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7848 * to avoid rescanning sequences of bytes, it DOES change the state of the
7849 * parser, do not use liberally.
7850 *
7851 * Returns the index to the current parsing point if the full sequence
7852 * is available, -1 otherwise.
7853 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007854static int
Owen Taylor3473f882001-02-23 17:55:21 +00007855xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7856 xmlChar next, xmlChar third) {
7857 int base, len;
7858 xmlParserInputPtr in;
7859 const xmlChar *buf;
7860
7861 in = ctxt->input;
7862 if (in == NULL) return(-1);
7863 base = in->cur - in->base;
7864 if (base < 0) return(-1);
7865 if (ctxt->checkIndex > base)
7866 base = ctxt->checkIndex;
7867 if (in->buf == NULL) {
7868 buf = in->base;
7869 len = in->length;
7870 } else {
7871 buf = in->buf->buffer->content;
7872 len = in->buf->buffer->use;
7873 }
7874 /* take into account the sequence length */
7875 if (third) len -= 2;
7876 else if (next) len --;
7877 for (;base < len;base++) {
7878 if (buf[base] == first) {
7879 if (third != 0) {
7880 if ((buf[base + 1] != next) ||
7881 (buf[base + 2] != third)) continue;
7882 } else if (next != 0) {
7883 if (buf[base + 1] != next) continue;
7884 }
7885 ctxt->checkIndex = 0;
7886#ifdef DEBUG_PUSH
7887 if (next == 0)
7888 xmlGenericError(xmlGenericErrorContext,
7889 "PP: lookup '%c' found at %d\n",
7890 first, base);
7891 else if (third == 0)
7892 xmlGenericError(xmlGenericErrorContext,
7893 "PP: lookup '%c%c' found at %d\n",
7894 first, next, base);
7895 else
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: lookup '%c%c%c' found at %d\n",
7898 first, next, third, base);
7899#endif
7900 return(base - (in->cur - in->base));
7901 }
7902 }
7903 ctxt->checkIndex = base;
7904#ifdef DEBUG_PUSH
7905 if (next == 0)
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: lookup '%c' failed\n", first);
7908 else if (third == 0)
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: lookup '%c%c' failed\n", first, next);
7911 else
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: lookup '%c%c%c' failed\n", first, next, third);
7914#endif
7915 return(-1);
7916}
7917
7918/**
7919 * xmlParseTryOrFinish:
7920 * @ctxt: an XML parser context
7921 * @terminate: last chunk indicator
7922 *
7923 * Try to progress on parsing
7924 *
7925 * Returns zero if no parsing was possible
7926 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007927static int
Owen Taylor3473f882001-02-23 17:55:21 +00007928xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7929 int ret = 0;
7930 int avail;
7931 xmlChar cur, next;
7932
7933#ifdef DEBUG_PUSH
7934 switch (ctxt->instate) {
7935 case XML_PARSER_EOF:
7936 xmlGenericError(xmlGenericErrorContext,
7937 "PP: try EOF\n"); break;
7938 case XML_PARSER_START:
7939 xmlGenericError(xmlGenericErrorContext,
7940 "PP: try START\n"); break;
7941 case XML_PARSER_MISC:
7942 xmlGenericError(xmlGenericErrorContext,
7943 "PP: try MISC\n");break;
7944 case XML_PARSER_COMMENT:
7945 xmlGenericError(xmlGenericErrorContext,
7946 "PP: try COMMENT\n");break;
7947 case XML_PARSER_PROLOG:
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: try PROLOG\n");break;
7950 case XML_PARSER_START_TAG:
7951 xmlGenericError(xmlGenericErrorContext,
7952 "PP: try START_TAG\n");break;
7953 case XML_PARSER_CONTENT:
7954 xmlGenericError(xmlGenericErrorContext,
7955 "PP: try CONTENT\n");break;
7956 case XML_PARSER_CDATA_SECTION:
7957 xmlGenericError(xmlGenericErrorContext,
7958 "PP: try CDATA_SECTION\n");break;
7959 case XML_PARSER_END_TAG:
7960 xmlGenericError(xmlGenericErrorContext,
7961 "PP: try END_TAG\n");break;
7962 case XML_PARSER_ENTITY_DECL:
7963 xmlGenericError(xmlGenericErrorContext,
7964 "PP: try ENTITY_DECL\n");break;
7965 case XML_PARSER_ENTITY_VALUE:
7966 xmlGenericError(xmlGenericErrorContext,
7967 "PP: try ENTITY_VALUE\n");break;
7968 case XML_PARSER_ATTRIBUTE_VALUE:
7969 xmlGenericError(xmlGenericErrorContext,
7970 "PP: try ATTRIBUTE_VALUE\n");break;
7971 case XML_PARSER_DTD:
7972 xmlGenericError(xmlGenericErrorContext,
7973 "PP: try DTD\n");break;
7974 case XML_PARSER_EPILOG:
7975 xmlGenericError(xmlGenericErrorContext,
7976 "PP: try EPILOG\n");break;
7977 case XML_PARSER_PI:
7978 xmlGenericError(xmlGenericErrorContext,
7979 "PP: try PI\n");break;
7980 case XML_PARSER_IGNORE:
7981 xmlGenericError(xmlGenericErrorContext,
7982 "PP: try IGNORE\n");break;
7983 }
7984#endif
7985
7986 while (1) {
7987 /*
7988 * Pop-up of finished entities.
7989 */
7990 while ((RAW == 0) && (ctxt->inputNr > 1))
7991 xmlPopInput(ctxt);
7992
7993 if (ctxt->input ==NULL) break;
7994 if (ctxt->input->buf == NULL)
7995 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007996 else {
7997 /*
7998 * If we are operating on converted input, try to flush
7999 * remainng chars to avoid them stalling in the non-converted
8000 * buffer.
8001 */
8002 if ((ctxt->input->buf->raw != NULL) &&
8003 (ctxt->input->buf->raw->use > 0)) {
8004 int base = ctxt->input->base -
8005 ctxt->input->buf->buffer->content;
8006 int current = ctxt->input->cur - ctxt->input->base;
8007
8008 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8009 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8010 ctxt->input->cur = ctxt->input->base + current;
8011 ctxt->input->end =
8012 &ctxt->input->buf->buffer->content[
8013 ctxt->input->buf->buffer->use];
8014 }
8015 avail = ctxt->input->buf->buffer->use -
8016 (ctxt->input->cur - ctxt->input->base);
8017 }
Owen Taylor3473f882001-02-23 17:55:21 +00008018 if (avail < 1)
8019 goto done;
8020 switch (ctxt->instate) {
8021 case XML_PARSER_EOF:
8022 /*
8023 * Document parsing is done !
8024 */
8025 goto done;
8026 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008027 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8028 xmlChar start[4];
8029 xmlCharEncoding enc;
8030
8031 /*
8032 * Very first chars read from the document flow.
8033 */
8034 if (avail < 4)
8035 goto done;
8036
8037 /*
8038 * Get the 4 first bytes and decode the charset
8039 * if enc != XML_CHAR_ENCODING_NONE
8040 * plug some encoding conversion routines.
8041 */
8042 start[0] = RAW;
8043 start[1] = NXT(1);
8044 start[2] = NXT(2);
8045 start[3] = NXT(3);
8046 enc = xmlDetectCharEncoding(start, 4);
8047 if (enc != XML_CHAR_ENCODING_NONE) {
8048 xmlSwitchEncoding(ctxt, enc);
8049 }
8050 break;
8051 }
Owen Taylor3473f882001-02-23 17:55:21 +00008052
8053 cur = ctxt->input->cur[0];
8054 next = ctxt->input->cur[1];
8055 if (cur == 0) {
8056 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8057 ctxt->sax->setDocumentLocator(ctxt->userData,
8058 &xmlDefaultSAXLocator);
8059 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8061 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8062 ctxt->wellFormed = 0;
8063 ctxt->disableSAX = 1;
8064 ctxt->instate = XML_PARSER_EOF;
8065#ifdef DEBUG_PUSH
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: entering EOF\n");
8068#endif
8069 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8070 ctxt->sax->endDocument(ctxt->userData);
8071 goto done;
8072 }
8073 if ((cur == '<') && (next == '?')) {
8074 /* PI or XML decl */
8075 if (avail < 5) return(ret);
8076 if ((!terminate) &&
8077 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8078 return(ret);
8079 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8080 ctxt->sax->setDocumentLocator(ctxt->userData,
8081 &xmlDefaultSAXLocator);
8082 if ((ctxt->input->cur[2] == 'x') &&
8083 (ctxt->input->cur[3] == 'm') &&
8084 (ctxt->input->cur[4] == 'l') &&
8085 (IS_BLANK(ctxt->input->cur[5]))) {
8086 ret += 5;
8087#ifdef DEBUG_PUSH
8088 xmlGenericError(xmlGenericErrorContext,
8089 "PP: Parsing XML Decl\n");
8090#endif
8091 xmlParseXMLDecl(ctxt);
8092 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8093 /*
8094 * The XML REC instructs us to stop parsing right
8095 * here
8096 */
8097 ctxt->instate = XML_PARSER_EOF;
8098 return(0);
8099 }
8100 ctxt->standalone = ctxt->input->standalone;
8101 if ((ctxt->encoding == NULL) &&
8102 (ctxt->input->encoding != NULL))
8103 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8104 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8105 (!ctxt->disableSAX))
8106 ctxt->sax->startDocument(ctxt->userData);
8107 ctxt->instate = XML_PARSER_MISC;
8108#ifdef DEBUG_PUSH
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: entering MISC\n");
8111#endif
8112 } else {
8113 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8114 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8115 (!ctxt->disableSAX))
8116 ctxt->sax->startDocument(ctxt->userData);
8117 ctxt->instate = XML_PARSER_MISC;
8118#ifdef DEBUG_PUSH
8119 xmlGenericError(xmlGenericErrorContext,
8120 "PP: entering MISC\n");
8121#endif
8122 }
8123 } else {
8124 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8125 ctxt->sax->setDocumentLocator(ctxt->userData,
8126 &xmlDefaultSAXLocator);
8127 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8128 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8129 (!ctxt->disableSAX))
8130 ctxt->sax->startDocument(ctxt->userData);
8131 ctxt->instate = XML_PARSER_MISC;
8132#ifdef DEBUG_PUSH
8133 xmlGenericError(xmlGenericErrorContext,
8134 "PP: entering MISC\n");
8135#endif
8136 }
8137 break;
8138 case XML_PARSER_MISC:
8139 SKIP_BLANKS;
8140 if (ctxt->input->buf == NULL)
8141 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8142 else
8143 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8144 if (avail < 2)
8145 goto done;
8146 cur = ctxt->input->cur[0];
8147 next = ctxt->input->cur[1];
8148 if ((cur == '<') && (next == '?')) {
8149 if ((!terminate) &&
8150 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8151 goto done;
8152#ifdef DEBUG_PUSH
8153 xmlGenericError(xmlGenericErrorContext,
8154 "PP: Parsing PI\n");
8155#endif
8156 xmlParsePI(ctxt);
8157 } else if ((cur == '<') && (next == '!') &&
8158 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8159 if ((!terminate) &&
8160 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8161 goto done;
8162#ifdef DEBUG_PUSH
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: Parsing Comment\n");
8165#endif
8166 xmlParseComment(ctxt);
8167 ctxt->instate = XML_PARSER_MISC;
8168 } else if ((cur == '<') && (next == '!') &&
8169 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8170 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8171 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8172 (ctxt->input->cur[8] == 'E')) {
8173 if ((!terminate) &&
8174 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8175 goto done;
8176#ifdef DEBUG_PUSH
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: Parsing internal subset\n");
8179#endif
8180 ctxt->inSubset = 1;
8181 xmlParseDocTypeDecl(ctxt);
8182 if (RAW == '[') {
8183 ctxt->instate = XML_PARSER_DTD;
8184#ifdef DEBUG_PUSH
8185 xmlGenericError(xmlGenericErrorContext,
8186 "PP: entering DTD\n");
8187#endif
8188 } else {
8189 /*
8190 * Create and update the external subset.
8191 */
8192 ctxt->inSubset = 2;
8193 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8194 (ctxt->sax->externalSubset != NULL))
8195 ctxt->sax->externalSubset(ctxt->userData,
8196 ctxt->intSubName, ctxt->extSubSystem,
8197 ctxt->extSubURI);
8198 ctxt->inSubset = 0;
8199 ctxt->instate = XML_PARSER_PROLOG;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering PROLOG\n");
8203#endif
8204 }
8205 } else if ((cur == '<') && (next == '!') &&
8206 (avail < 9)) {
8207 goto done;
8208 } else {
8209 ctxt->instate = XML_PARSER_START_TAG;
8210#ifdef DEBUG_PUSH
8211 xmlGenericError(xmlGenericErrorContext,
8212 "PP: entering START_TAG\n");
8213#endif
8214 }
8215 break;
8216 case XML_PARSER_IGNORE:
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: internal error, state == IGNORE");
8219 ctxt->instate = XML_PARSER_DTD;
8220#ifdef DEBUG_PUSH
8221 xmlGenericError(xmlGenericErrorContext,
8222 "PP: entering DTD\n");
8223#endif
8224 break;
8225 case XML_PARSER_PROLOG:
8226 SKIP_BLANKS;
8227 if (ctxt->input->buf == NULL)
8228 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8229 else
8230 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8231 if (avail < 2)
8232 goto done;
8233 cur = ctxt->input->cur[0];
8234 next = ctxt->input->cur[1];
8235 if ((cur == '<') && (next == '?')) {
8236 if ((!terminate) &&
8237 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8238 goto done;
8239#ifdef DEBUG_PUSH
8240 xmlGenericError(xmlGenericErrorContext,
8241 "PP: Parsing PI\n");
8242#endif
8243 xmlParsePI(ctxt);
8244 } else if ((cur == '<') && (next == '!') &&
8245 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8246 if ((!terminate) &&
8247 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8248 goto done;
8249#ifdef DEBUG_PUSH
8250 xmlGenericError(xmlGenericErrorContext,
8251 "PP: Parsing Comment\n");
8252#endif
8253 xmlParseComment(ctxt);
8254 ctxt->instate = XML_PARSER_PROLOG;
8255 } else if ((cur == '<') && (next == '!') &&
8256 (avail < 4)) {
8257 goto done;
8258 } else {
8259 ctxt->instate = XML_PARSER_START_TAG;
8260#ifdef DEBUG_PUSH
8261 xmlGenericError(xmlGenericErrorContext,
8262 "PP: entering START_TAG\n");
8263#endif
8264 }
8265 break;
8266 case XML_PARSER_EPILOG:
8267 SKIP_BLANKS;
8268 if (ctxt->input->buf == NULL)
8269 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8270 else
8271 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8272 if (avail < 2)
8273 goto done;
8274 cur = ctxt->input->cur[0];
8275 next = ctxt->input->cur[1];
8276 if ((cur == '<') && (next == '?')) {
8277 if ((!terminate) &&
8278 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8279 goto done;
8280#ifdef DEBUG_PUSH
8281 xmlGenericError(xmlGenericErrorContext,
8282 "PP: Parsing PI\n");
8283#endif
8284 xmlParsePI(ctxt);
8285 ctxt->instate = XML_PARSER_EPILOG;
8286 } else if ((cur == '<') && (next == '!') &&
8287 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8288 if ((!terminate) &&
8289 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8290 goto done;
8291#ifdef DEBUG_PUSH
8292 xmlGenericError(xmlGenericErrorContext,
8293 "PP: Parsing Comment\n");
8294#endif
8295 xmlParseComment(ctxt);
8296 ctxt->instate = XML_PARSER_EPILOG;
8297 } else if ((cur == '<') && (next == '!') &&
8298 (avail < 4)) {
8299 goto done;
8300 } else {
8301 ctxt->errNo = XML_ERR_DOCUMENT_END;
8302 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8303 ctxt->sax->error(ctxt->userData,
8304 "Extra content at the end of the document\n");
8305 ctxt->wellFormed = 0;
8306 ctxt->disableSAX = 1;
8307 ctxt->instate = XML_PARSER_EOF;
8308#ifdef DEBUG_PUSH
8309 xmlGenericError(xmlGenericErrorContext,
8310 "PP: entering EOF\n");
8311#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008312 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008313 ctxt->sax->endDocument(ctxt->userData);
8314 goto done;
8315 }
8316 break;
8317 case XML_PARSER_START_TAG: {
8318 xmlChar *name, *oldname;
8319
8320 if ((avail < 2) && (ctxt->inputNr == 1))
8321 goto done;
8322 cur = ctxt->input->cur[0];
8323 if (cur != '<') {
8324 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8326 ctxt->sax->error(ctxt->userData,
8327 "Start tag expect, '<' not found\n");
8328 ctxt->wellFormed = 0;
8329 ctxt->disableSAX = 1;
8330 ctxt->instate = XML_PARSER_EOF;
8331#ifdef DEBUG_PUSH
8332 xmlGenericError(xmlGenericErrorContext,
8333 "PP: entering EOF\n");
8334#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008335 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008336 ctxt->sax->endDocument(ctxt->userData);
8337 goto done;
8338 }
8339 if ((!terminate) &&
8340 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8341 goto done;
8342 if (ctxt->spaceNr == 0)
8343 spacePush(ctxt, -1);
8344 else
8345 spacePush(ctxt, *ctxt->space);
8346 name = xmlParseStartTag(ctxt);
8347 if (name == NULL) {
8348 spacePop(ctxt);
8349 ctxt->instate = XML_PARSER_EOF;
8350#ifdef DEBUG_PUSH
8351 xmlGenericError(xmlGenericErrorContext,
8352 "PP: entering EOF\n");
8353#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008354 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008355 ctxt->sax->endDocument(ctxt->userData);
8356 goto done;
8357 }
8358 namePush(ctxt, xmlStrdup(name));
8359
8360 /*
8361 * [ VC: Root Element Type ]
8362 * The Name in the document type declaration must match
8363 * the element type of the root element.
8364 */
8365 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8366 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8367 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8368
8369 /*
8370 * Check for an Empty Element.
8371 */
8372 if ((RAW == '/') && (NXT(1) == '>')) {
8373 SKIP(2);
8374 if ((ctxt->sax != NULL) &&
8375 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8376 ctxt->sax->endElement(ctxt->userData, name);
8377 xmlFree(name);
8378 oldname = namePop(ctxt);
8379 spacePop(ctxt);
8380 if (oldname != NULL) {
8381#ifdef DEBUG_STACK
8382 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8383#endif
8384 xmlFree(oldname);
8385 }
8386 if (ctxt->name == NULL) {
8387 ctxt->instate = XML_PARSER_EPILOG;
8388#ifdef DEBUG_PUSH
8389 xmlGenericError(xmlGenericErrorContext,
8390 "PP: entering EPILOG\n");
8391#endif
8392 } else {
8393 ctxt->instate = XML_PARSER_CONTENT;
8394#ifdef DEBUG_PUSH
8395 xmlGenericError(xmlGenericErrorContext,
8396 "PP: entering CONTENT\n");
8397#endif
8398 }
8399 break;
8400 }
8401 if (RAW == '>') {
8402 NEXT;
8403 } else {
8404 ctxt->errNo = XML_ERR_GT_REQUIRED;
8405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8406 ctxt->sax->error(ctxt->userData,
8407 "Couldn't find end of Start Tag %s\n",
8408 name);
8409 ctxt->wellFormed = 0;
8410 ctxt->disableSAX = 1;
8411
8412 /*
8413 * end of parsing of this node.
8414 */
8415 nodePop(ctxt);
8416 oldname = namePop(ctxt);
8417 spacePop(ctxt);
8418 if (oldname != NULL) {
8419#ifdef DEBUG_STACK
8420 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8421#endif
8422 xmlFree(oldname);
8423 }
8424 }
8425 xmlFree(name);
8426 ctxt->instate = XML_PARSER_CONTENT;
8427#ifdef DEBUG_PUSH
8428 xmlGenericError(xmlGenericErrorContext,
8429 "PP: entering CONTENT\n");
8430#endif
8431 break;
8432 }
8433 case XML_PARSER_CONTENT: {
8434 const xmlChar *test;
8435 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008436 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008437
8438 /*
8439 * Handle preparsed entities and charRef
8440 */
8441 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008442 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008443
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008444 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008445 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8446 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008447 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008448 ctxt->token = 0;
8449 }
8450 if ((avail < 2) && (ctxt->inputNr == 1))
8451 goto done;
8452 cur = ctxt->input->cur[0];
8453 next = ctxt->input->cur[1];
8454
8455 test = CUR_PTR;
8456 cons = ctxt->input->consumed;
8457 tok = ctxt->token;
8458 if ((cur == '<') && (next == '?')) {
8459 if ((!terminate) &&
8460 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8461 goto done;
8462#ifdef DEBUG_PUSH
8463 xmlGenericError(xmlGenericErrorContext,
8464 "PP: Parsing PI\n");
8465#endif
8466 xmlParsePI(ctxt);
8467 } else if ((cur == '<') && (next == '!') &&
8468 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8469 if ((!terminate) &&
8470 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8471 goto done;
8472#ifdef DEBUG_PUSH
8473 xmlGenericError(xmlGenericErrorContext,
8474 "PP: Parsing Comment\n");
8475#endif
8476 xmlParseComment(ctxt);
8477 ctxt->instate = XML_PARSER_CONTENT;
8478 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8479 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8480 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8481 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8482 (ctxt->input->cur[8] == '[')) {
8483 SKIP(9);
8484 ctxt->instate = XML_PARSER_CDATA_SECTION;
8485#ifdef DEBUG_PUSH
8486 xmlGenericError(xmlGenericErrorContext,
8487 "PP: entering CDATA_SECTION\n");
8488#endif
8489 break;
8490 } else if ((cur == '<') && (next == '!') &&
8491 (avail < 9)) {
8492 goto done;
8493 } else if ((cur == '<') && (next == '/')) {
8494 ctxt->instate = XML_PARSER_END_TAG;
8495#ifdef DEBUG_PUSH
8496 xmlGenericError(xmlGenericErrorContext,
8497 "PP: entering END_TAG\n");
8498#endif
8499 break;
8500 } else if (cur == '<') {
8501 ctxt->instate = XML_PARSER_START_TAG;
8502#ifdef DEBUG_PUSH
8503 xmlGenericError(xmlGenericErrorContext,
8504 "PP: entering START_TAG\n");
8505#endif
8506 break;
8507 } else if (cur == '&') {
8508 if ((!terminate) &&
8509 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8510 goto done;
8511#ifdef DEBUG_PUSH
8512 xmlGenericError(xmlGenericErrorContext,
8513 "PP: Parsing Reference\n");
8514#endif
8515 xmlParseReference(ctxt);
8516 } else {
8517 /* TODO Avoid the extra copy, handle directly !!! */
8518 /*
8519 * Goal of the following test is:
8520 * - minimize calls to the SAX 'character' callback
8521 * when they are mergeable
8522 * - handle an problem for isBlank when we only parse
8523 * a sequence of blank chars and the next one is
8524 * not available to check against '<' presence.
8525 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008526 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008527 * of the parser.
8528 */
8529 if ((ctxt->inputNr == 1) &&
8530 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8531 if ((!terminate) &&
8532 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8533 goto done;
8534 }
8535 ctxt->checkIndex = 0;
8536#ifdef DEBUG_PUSH
8537 xmlGenericError(xmlGenericErrorContext,
8538 "PP: Parsing char data\n");
8539#endif
8540 xmlParseCharData(ctxt, 0);
8541 }
8542 /*
8543 * Pop-up of finished entities.
8544 */
8545 while ((RAW == 0) && (ctxt->inputNr > 1))
8546 xmlPopInput(ctxt);
8547 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8548 (tok == ctxt->token)) {
8549 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8551 ctxt->sax->error(ctxt->userData,
8552 "detected an error in element content\n");
8553 ctxt->wellFormed = 0;
8554 ctxt->disableSAX = 1;
8555 ctxt->instate = XML_PARSER_EOF;
8556 break;
8557 }
8558 break;
8559 }
8560 case XML_PARSER_CDATA_SECTION: {
8561 /*
8562 * The Push mode need to have the SAX callback for
8563 * cdataBlock merge back contiguous callbacks.
8564 */
8565 int base;
8566
8567 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8568 if (base < 0) {
8569 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8570 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8571 if (ctxt->sax->cdataBlock != NULL)
8572 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8573 XML_PARSER_BIG_BUFFER_SIZE);
8574 }
8575 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8576 ctxt->checkIndex = 0;
8577 }
8578 goto done;
8579 } else {
8580 if ((ctxt->sax != NULL) && (base > 0) &&
8581 (!ctxt->disableSAX)) {
8582 if (ctxt->sax->cdataBlock != NULL)
8583 ctxt->sax->cdataBlock(ctxt->userData,
8584 ctxt->input->cur, base);
8585 }
8586 SKIP(base + 3);
8587 ctxt->checkIndex = 0;
8588 ctxt->instate = XML_PARSER_CONTENT;
8589#ifdef DEBUG_PUSH
8590 xmlGenericError(xmlGenericErrorContext,
8591 "PP: entering CONTENT\n");
8592#endif
8593 }
8594 break;
8595 }
8596 case XML_PARSER_END_TAG:
8597 if (avail < 2)
8598 goto done;
8599 if ((!terminate) &&
8600 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8601 goto done;
8602 xmlParseEndTag(ctxt);
8603 if (ctxt->name == NULL) {
8604 ctxt->instate = XML_PARSER_EPILOG;
8605#ifdef DEBUG_PUSH
8606 xmlGenericError(xmlGenericErrorContext,
8607 "PP: entering EPILOG\n");
8608#endif
8609 } else {
8610 ctxt->instate = XML_PARSER_CONTENT;
8611#ifdef DEBUG_PUSH
8612 xmlGenericError(xmlGenericErrorContext,
8613 "PP: entering CONTENT\n");
8614#endif
8615 }
8616 break;
8617 case XML_PARSER_DTD: {
8618 /*
8619 * Sorry but progressive parsing of the internal subset
8620 * is not expected to be supported. We first check that
8621 * the full content of the internal subset is available and
8622 * the parsing is launched only at that point.
8623 * Internal subset ends up with "']' S? '>'" in an unescaped
8624 * section and not in a ']]>' sequence which are conditional
8625 * sections (whoever argued to keep that crap in XML deserve
8626 * a place in hell !).
8627 */
8628 int base, i;
8629 xmlChar *buf;
8630 xmlChar quote = 0;
8631
8632 base = ctxt->input->cur - ctxt->input->base;
8633 if (base < 0) return(0);
8634 if (ctxt->checkIndex > base)
8635 base = ctxt->checkIndex;
8636 buf = ctxt->input->buf->buffer->content;
8637 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8638 base++) {
8639 if (quote != 0) {
8640 if (buf[base] == quote)
8641 quote = 0;
8642 continue;
8643 }
8644 if (buf[base] == '"') {
8645 quote = '"';
8646 continue;
8647 }
8648 if (buf[base] == '\'') {
8649 quote = '\'';
8650 continue;
8651 }
8652 if (buf[base] == ']') {
8653 if ((unsigned int) base +1 >=
8654 ctxt->input->buf->buffer->use)
8655 break;
8656 if (buf[base + 1] == ']') {
8657 /* conditional crap, skip both ']' ! */
8658 base++;
8659 continue;
8660 }
8661 for (i = 0;
8662 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8663 i++) {
8664 if (buf[base + i] == '>')
8665 goto found_end_int_subset;
8666 }
8667 break;
8668 }
8669 }
8670 /*
8671 * We didn't found the end of the Internal subset
8672 */
8673 if (quote == 0)
8674 ctxt->checkIndex = base;
8675#ifdef DEBUG_PUSH
8676 if (next == 0)
8677 xmlGenericError(xmlGenericErrorContext,
8678 "PP: lookup of int subset end filed\n");
8679#endif
8680 goto done;
8681
8682found_end_int_subset:
8683 xmlParseInternalSubset(ctxt);
8684 ctxt->inSubset = 2;
8685 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8686 (ctxt->sax->externalSubset != NULL))
8687 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8688 ctxt->extSubSystem, ctxt->extSubURI);
8689 ctxt->inSubset = 0;
8690 ctxt->instate = XML_PARSER_PROLOG;
8691 ctxt->checkIndex = 0;
8692#ifdef DEBUG_PUSH
8693 xmlGenericError(xmlGenericErrorContext,
8694 "PP: entering PROLOG\n");
8695#endif
8696 break;
8697 }
8698 case XML_PARSER_COMMENT:
8699 xmlGenericError(xmlGenericErrorContext,
8700 "PP: internal error, state == COMMENT\n");
8701 ctxt->instate = XML_PARSER_CONTENT;
8702#ifdef DEBUG_PUSH
8703 xmlGenericError(xmlGenericErrorContext,
8704 "PP: entering CONTENT\n");
8705#endif
8706 break;
8707 case XML_PARSER_PI:
8708 xmlGenericError(xmlGenericErrorContext,
8709 "PP: internal error, state == PI\n");
8710 ctxt->instate = XML_PARSER_CONTENT;
8711#ifdef DEBUG_PUSH
8712 xmlGenericError(xmlGenericErrorContext,
8713 "PP: entering CONTENT\n");
8714#endif
8715 break;
8716 case XML_PARSER_ENTITY_DECL:
8717 xmlGenericError(xmlGenericErrorContext,
8718 "PP: internal error, state == ENTITY_DECL\n");
8719 ctxt->instate = XML_PARSER_DTD;
8720#ifdef DEBUG_PUSH
8721 xmlGenericError(xmlGenericErrorContext,
8722 "PP: entering DTD\n");
8723#endif
8724 break;
8725 case XML_PARSER_ENTITY_VALUE:
8726 xmlGenericError(xmlGenericErrorContext,
8727 "PP: internal error, state == ENTITY_VALUE\n");
8728 ctxt->instate = XML_PARSER_CONTENT;
8729#ifdef DEBUG_PUSH
8730 xmlGenericError(xmlGenericErrorContext,
8731 "PP: entering DTD\n");
8732#endif
8733 break;
8734 case XML_PARSER_ATTRIBUTE_VALUE:
8735 xmlGenericError(xmlGenericErrorContext,
8736 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8737 ctxt->instate = XML_PARSER_START_TAG;
8738#ifdef DEBUG_PUSH
8739 xmlGenericError(xmlGenericErrorContext,
8740 "PP: entering START_TAG\n");
8741#endif
8742 break;
8743 case XML_PARSER_SYSTEM_LITERAL:
8744 xmlGenericError(xmlGenericErrorContext,
8745 "PP: internal error, state == SYSTEM_LITERAL\n");
8746 ctxt->instate = XML_PARSER_START_TAG;
8747#ifdef DEBUG_PUSH
8748 xmlGenericError(xmlGenericErrorContext,
8749 "PP: entering START_TAG\n");
8750#endif
8751 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008752 case XML_PARSER_PUBLIC_LITERAL:
8753 xmlGenericError(xmlGenericErrorContext,
8754 "PP: internal error, state == PUBLIC_LITERAL\n");
8755 ctxt->instate = XML_PARSER_START_TAG;
8756#ifdef DEBUG_PUSH
8757 xmlGenericError(xmlGenericErrorContext,
8758 "PP: entering START_TAG\n");
8759#endif
8760 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008761 }
8762 }
8763done:
8764#ifdef DEBUG_PUSH
8765 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8766#endif
8767 return(ret);
8768}
8769
8770/**
Owen Taylor3473f882001-02-23 17:55:21 +00008771 * xmlParseChunk:
8772 * @ctxt: an XML parser context
8773 * @chunk: an char array
8774 * @size: the size in byte of the chunk
8775 * @terminate: last chunk indicator
8776 *
8777 * Parse a Chunk of memory
8778 *
8779 * Returns zero if no error, the xmlParserErrors otherwise.
8780 */
8781int
8782xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8783 int terminate) {
8784 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8785 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8786 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8787 int cur = ctxt->input->cur - ctxt->input->base;
8788
8789 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8790 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8791 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008792 ctxt->input->end =
8793 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008794#ifdef DEBUG_PUSH
8795 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8796#endif
8797
8798 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8799 xmlParseTryOrFinish(ctxt, terminate);
8800 } else if (ctxt->instate != XML_PARSER_EOF) {
8801 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8802 xmlParserInputBufferPtr in = ctxt->input->buf;
8803 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8804 (in->raw != NULL)) {
8805 int nbchars;
8806
8807 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8808 if (nbchars < 0) {
8809 xmlGenericError(xmlGenericErrorContext,
8810 "xmlParseChunk: encoder error\n");
8811 return(XML_ERR_INVALID_ENCODING);
8812 }
8813 }
8814 }
8815 }
8816 xmlParseTryOrFinish(ctxt, terminate);
8817 if (terminate) {
8818 /*
8819 * Check for termination
8820 */
8821 if ((ctxt->instate != XML_PARSER_EOF) &&
8822 (ctxt->instate != XML_PARSER_EPILOG)) {
8823 ctxt->errNo = XML_ERR_DOCUMENT_END;
8824 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8825 ctxt->sax->error(ctxt->userData,
8826 "Extra content at the end of the document\n");
8827 ctxt->wellFormed = 0;
8828 ctxt->disableSAX = 1;
8829 }
8830 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008831 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008832 ctxt->sax->endDocument(ctxt->userData);
8833 }
8834 ctxt->instate = XML_PARSER_EOF;
8835 }
8836 return((xmlParserErrors) ctxt->errNo);
8837}
8838
8839/************************************************************************
8840 * *
8841 * I/O front end functions to the parser *
8842 * *
8843 ************************************************************************/
8844
8845/**
8846 * xmlStopParser:
8847 * @ctxt: an XML parser context
8848 *
8849 * Blocks further parser processing
8850 */
8851void
8852xmlStopParser(xmlParserCtxtPtr ctxt) {
8853 ctxt->instate = XML_PARSER_EOF;
8854 if (ctxt->input != NULL)
8855 ctxt->input->cur = BAD_CAST"";
8856}
8857
8858/**
8859 * xmlCreatePushParserCtxt:
8860 * @sax: a SAX handler
8861 * @user_data: The user data returned on SAX callbacks
8862 * @chunk: a pointer to an array of chars
8863 * @size: number of chars in the array
8864 * @filename: an optional file name or URI
8865 *
8866 * Create a parser context for using the XML parser in push mode
8867 * To allow content encoding detection, @size should be >= 4
8868 * The value of @filename is used for fetching external entities
8869 * and error/warning reports.
8870 *
8871 * Returns the new parser context or NULL
8872 */
8873xmlParserCtxtPtr
8874xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8875 const char *chunk, int size, const char *filename) {
8876 xmlParserCtxtPtr ctxt;
8877 xmlParserInputPtr inputStream;
8878 xmlParserInputBufferPtr buf;
8879 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8880
8881 /*
8882 * plug some encoding conversion routines
8883 */
8884 if ((chunk != NULL) && (size >= 4))
8885 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8886
8887 buf = xmlAllocParserInputBuffer(enc);
8888 if (buf == NULL) return(NULL);
8889
8890 ctxt = xmlNewParserCtxt();
8891 if (ctxt == NULL) {
8892 xmlFree(buf);
8893 return(NULL);
8894 }
8895 if (sax != NULL) {
8896 if (ctxt->sax != &xmlDefaultSAXHandler)
8897 xmlFree(ctxt->sax);
8898 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8899 if (ctxt->sax == NULL) {
8900 xmlFree(buf);
8901 xmlFree(ctxt);
8902 return(NULL);
8903 }
8904 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8905 if (user_data != NULL)
8906 ctxt->userData = user_data;
8907 }
8908 if (filename == NULL) {
8909 ctxt->directory = NULL;
8910 } else {
8911 ctxt->directory = xmlParserGetDirectory(filename);
8912 }
8913
8914 inputStream = xmlNewInputStream(ctxt);
8915 if (inputStream == NULL) {
8916 xmlFreeParserCtxt(ctxt);
8917 return(NULL);
8918 }
8919
8920 if (filename == NULL)
8921 inputStream->filename = NULL;
8922 else
8923 inputStream->filename = xmlMemStrdup(filename);
8924 inputStream->buf = buf;
8925 inputStream->base = inputStream->buf->buffer->content;
8926 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008927 inputStream->end =
8928 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008929
8930 inputPush(ctxt, inputStream);
8931
8932 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8933 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008934 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8935 int cur = ctxt->input->cur - ctxt->input->base;
8936
Owen Taylor3473f882001-02-23 17:55:21 +00008937 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008938
8939 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8940 ctxt->input->cur = ctxt->input->base + cur;
8941 ctxt->input->end =
8942 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008943#ifdef DEBUG_PUSH
8944 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8945#endif
8946 }
8947
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008948 if (enc != XML_CHAR_ENCODING_NONE) {
8949 xmlSwitchEncoding(ctxt, enc);
8950 }
8951
Owen Taylor3473f882001-02-23 17:55:21 +00008952 return(ctxt);
8953}
8954
8955/**
8956 * xmlCreateIOParserCtxt:
8957 * @sax: a SAX handler
8958 * @user_data: The user data returned on SAX callbacks
8959 * @ioread: an I/O read function
8960 * @ioclose: an I/O close function
8961 * @ioctx: an I/O handler
8962 * @enc: the charset encoding if known
8963 *
8964 * Create a parser context for using the XML parser with an existing
8965 * I/O stream
8966 *
8967 * Returns the new parser context or NULL
8968 */
8969xmlParserCtxtPtr
8970xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8971 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8972 void *ioctx, xmlCharEncoding enc) {
8973 xmlParserCtxtPtr ctxt;
8974 xmlParserInputPtr inputStream;
8975 xmlParserInputBufferPtr buf;
8976
8977 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8978 if (buf == NULL) return(NULL);
8979
8980 ctxt = xmlNewParserCtxt();
8981 if (ctxt == NULL) {
8982 xmlFree(buf);
8983 return(NULL);
8984 }
8985 if (sax != NULL) {
8986 if (ctxt->sax != &xmlDefaultSAXHandler)
8987 xmlFree(ctxt->sax);
8988 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8989 if (ctxt->sax == NULL) {
8990 xmlFree(buf);
8991 xmlFree(ctxt);
8992 return(NULL);
8993 }
8994 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8995 if (user_data != NULL)
8996 ctxt->userData = user_data;
8997 }
8998
8999 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
9000 if (inputStream == NULL) {
9001 xmlFreeParserCtxt(ctxt);
9002 return(NULL);
9003 }
9004 inputPush(ctxt, inputStream);
9005
9006 return(ctxt);
9007}
9008
9009/************************************************************************
9010 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009011 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009012 * *
9013 ************************************************************************/
9014
9015/**
9016 * xmlIOParseDTD:
9017 * @sax: the SAX handler block or NULL
9018 * @input: an Input Buffer
9019 * @enc: the charset encoding if known
9020 *
9021 * Load and parse a DTD
9022 *
9023 * Returns the resulting xmlDtdPtr or NULL in case of error.
9024 * @input will be freed at parsing end.
9025 */
9026
9027xmlDtdPtr
9028xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9029 xmlCharEncoding enc) {
9030 xmlDtdPtr ret = NULL;
9031 xmlParserCtxtPtr ctxt;
9032 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009033 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009034
9035 if (input == NULL)
9036 return(NULL);
9037
9038 ctxt = xmlNewParserCtxt();
9039 if (ctxt == NULL) {
9040 return(NULL);
9041 }
9042
9043 /*
9044 * Set-up the SAX context
9045 */
9046 if (sax != NULL) {
9047 if (ctxt->sax != NULL)
9048 xmlFree(ctxt->sax);
9049 ctxt->sax = sax;
9050 ctxt->userData = NULL;
9051 }
9052
9053 /*
9054 * generate a parser input from the I/O handler
9055 */
9056
9057 pinput = xmlNewIOInputStream(ctxt, input, enc);
9058 if (pinput == NULL) {
9059 if (sax != NULL) ctxt->sax = NULL;
9060 xmlFreeParserCtxt(ctxt);
9061 return(NULL);
9062 }
9063
9064 /*
9065 * plug some encoding conversion routines here.
9066 */
9067 xmlPushInput(ctxt, pinput);
9068
9069 pinput->filename = NULL;
9070 pinput->line = 1;
9071 pinput->col = 1;
9072 pinput->base = ctxt->input->cur;
9073 pinput->cur = ctxt->input->cur;
9074 pinput->free = NULL;
9075
9076 /*
9077 * let's parse that entity knowing it's an external subset.
9078 */
9079 ctxt->inSubset = 2;
9080 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9081 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9082 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009083
9084 if (enc == XML_CHAR_ENCODING_NONE) {
9085 /*
9086 * Get the 4 first bytes and decode the charset
9087 * if enc != XML_CHAR_ENCODING_NONE
9088 * plug some encoding conversion routines.
9089 */
9090 start[0] = RAW;
9091 start[1] = NXT(1);
9092 start[2] = NXT(2);
9093 start[3] = NXT(3);
9094 enc = xmlDetectCharEncoding(start, 4);
9095 if (enc != XML_CHAR_ENCODING_NONE) {
9096 xmlSwitchEncoding(ctxt, enc);
9097 }
9098 }
9099
Owen Taylor3473f882001-02-23 17:55:21 +00009100 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9101
9102 if (ctxt->myDoc != NULL) {
9103 if (ctxt->wellFormed) {
9104 ret = ctxt->myDoc->extSubset;
9105 ctxt->myDoc->extSubset = NULL;
9106 } else {
9107 ret = NULL;
9108 }
9109 xmlFreeDoc(ctxt->myDoc);
9110 ctxt->myDoc = NULL;
9111 }
9112 if (sax != NULL) ctxt->sax = NULL;
9113 xmlFreeParserCtxt(ctxt);
9114
9115 return(ret);
9116}
9117
9118/**
9119 * xmlSAXParseDTD:
9120 * @sax: the SAX handler block
9121 * @ExternalID: a NAME* containing the External ID of the DTD
9122 * @SystemID: a NAME* containing the URL to the DTD
9123 *
9124 * Load and parse an external subset.
9125 *
9126 * Returns the resulting xmlDtdPtr or NULL in case of error.
9127 */
9128
9129xmlDtdPtr
9130xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9131 const xmlChar *SystemID) {
9132 xmlDtdPtr ret = NULL;
9133 xmlParserCtxtPtr ctxt;
9134 xmlParserInputPtr input = NULL;
9135 xmlCharEncoding enc;
9136
9137 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9138
9139 ctxt = xmlNewParserCtxt();
9140 if (ctxt == NULL) {
9141 return(NULL);
9142 }
9143
9144 /*
9145 * Set-up the SAX context
9146 */
9147 if (sax != NULL) {
9148 if (ctxt->sax != NULL)
9149 xmlFree(ctxt->sax);
9150 ctxt->sax = sax;
9151 ctxt->userData = NULL;
9152 }
9153
9154 /*
9155 * Ask the Entity resolver to load the damn thing
9156 */
9157
9158 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9159 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9160 if (input == NULL) {
9161 if (sax != NULL) ctxt->sax = NULL;
9162 xmlFreeParserCtxt(ctxt);
9163 return(NULL);
9164 }
9165
9166 /*
9167 * plug some encoding conversion routines here.
9168 */
9169 xmlPushInput(ctxt, input);
9170 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9171 xmlSwitchEncoding(ctxt, enc);
9172
9173 if (input->filename == NULL)
9174 input->filename = (char *) xmlStrdup(SystemID);
9175 input->line = 1;
9176 input->col = 1;
9177 input->base = ctxt->input->cur;
9178 input->cur = ctxt->input->cur;
9179 input->free = NULL;
9180
9181 /*
9182 * let's parse that entity knowing it's an external subset.
9183 */
9184 ctxt->inSubset = 2;
9185 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9186 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9187 ExternalID, SystemID);
9188 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9189
9190 if (ctxt->myDoc != NULL) {
9191 if (ctxt->wellFormed) {
9192 ret = ctxt->myDoc->extSubset;
9193 ctxt->myDoc->extSubset = NULL;
9194 } else {
9195 ret = NULL;
9196 }
9197 xmlFreeDoc(ctxt->myDoc);
9198 ctxt->myDoc = NULL;
9199 }
9200 if (sax != NULL) ctxt->sax = NULL;
9201 xmlFreeParserCtxt(ctxt);
9202
9203 return(ret);
9204}
9205
9206/**
9207 * xmlParseDTD:
9208 * @ExternalID: a NAME* containing the External ID of the DTD
9209 * @SystemID: a NAME* containing the URL to the DTD
9210 *
9211 * Load and parse an external subset.
9212 *
9213 * Returns the resulting xmlDtdPtr or NULL in case of error.
9214 */
9215
9216xmlDtdPtr
9217xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9218 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9219}
9220
9221/************************************************************************
9222 * *
9223 * Front ends when parsing an Entity *
9224 * *
9225 ************************************************************************/
9226
9227/**
Owen Taylor3473f882001-02-23 17:55:21 +00009228 * xmlParseCtxtExternalEntity:
9229 * @ctx: the existing parsing context
9230 * @URL: the URL for the entity to load
9231 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009232 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009233 *
9234 * Parse an external general entity within an existing parsing context
9235 * An external general parsed entity is well-formed if it matches the
9236 * production labeled extParsedEnt.
9237 *
9238 * [78] extParsedEnt ::= TextDecl? content
9239 *
9240 * Returns 0 if the entity is well formed, -1 in case of args problem and
9241 * the parser error code otherwise
9242 */
9243
9244int
9245xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009246 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009247 xmlParserCtxtPtr ctxt;
9248 xmlDocPtr newDoc;
9249 xmlSAXHandlerPtr oldsax = NULL;
9250 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009251 xmlChar start[4];
9252 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009253
9254 if (ctx->depth > 40) {
9255 return(XML_ERR_ENTITY_LOOP);
9256 }
9257
Daniel Veillardcda96922001-08-21 10:56:31 +00009258 if (lst != NULL)
9259 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009260 if ((URL == NULL) && (ID == NULL))
9261 return(-1);
9262 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9263 return(-1);
9264
9265
9266 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9267 if (ctxt == NULL) return(-1);
9268 ctxt->userData = ctxt;
9269 oldsax = ctxt->sax;
9270 ctxt->sax = ctx->sax;
9271 newDoc = xmlNewDoc(BAD_CAST "1.0");
9272 if (newDoc == NULL) {
9273 xmlFreeParserCtxt(ctxt);
9274 return(-1);
9275 }
9276 if (ctx->myDoc != NULL) {
9277 newDoc->intSubset = ctx->myDoc->intSubset;
9278 newDoc->extSubset = ctx->myDoc->extSubset;
9279 }
9280 if (ctx->myDoc->URL != NULL) {
9281 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9282 }
9283 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9284 if (newDoc->children == NULL) {
9285 ctxt->sax = oldsax;
9286 xmlFreeParserCtxt(ctxt);
9287 newDoc->intSubset = NULL;
9288 newDoc->extSubset = NULL;
9289 xmlFreeDoc(newDoc);
9290 return(-1);
9291 }
9292 nodePush(ctxt, newDoc->children);
9293 if (ctx->myDoc == NULL) {
9294 ctxt->myDoc = newDoc;
9295 } else {
9296 ctxt->myDoc = ctx->myDoc;
9297 newDoc->children->doc = ctx->myDoc;
9298 }
9299
Daniel Veillard87a764e2001-06-20 17:41:10 +00009300 /*
9301 * Get the 4 first bytes and decode the charset
9302 * if enc != XML_CHAR_ENCODING_NONE
9303 * plug some encoding conversion routines.
9304 */
9305 GROW
9306 start[0] = RAW;
9307 start[1] = NXT(1);
9308 start[2] = NXT(2);
9309 start[3] = NXT(3);
9310 enc = xmlDetectCharEncoding(start, 4);
9311 if (enc != XML_CHAR_ENCODING_NONE) {
9312 xmlSwitchEncoding(ctxt, enc);
9313 }
9314
Owen Taylor3473f882001-02-23 17:55:21 +00009315 /*
9316 * Parse a possible text declaration first
9317 */
Owen Taylor3473f882001-02-23 17:55:21 +00009318 if ((RAW == '<') && (NXT(1) == '?') &&
9319 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9320 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9321 xmlParseTextDecl(ctxt);
9322 }
9323
9324 /*
9325 * Doing validity checking on chunk doesn't make sense
9326 */
9327 ctxt->instate = XML_PARSER_CONTENT;
9328 ctxt->validate = ctx->validate;
9329 ctxt->loadsubset = ctx->loadsubset;
9330 ctxt->depth = ctx->depth + 1;
9331 ctxt->replaceEntities = ctx->replaceEntities;
9332 if (ctxt->validate) {
9333 ctxt->vctxt.error = ctx->vctxt.error;
9334 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009335 } else {
9336 ctxt->vctxt.error = NULL;
9337 ctxt->vctxt.warning = NULL;
9338 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009339 ctxt->vctxt.nodeTab = NULL;
9340 ctxt->vctxt.nodeNr = 0;
9341 ctxt->vctxt.nodeMax = 0;
9342 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009343
9344 xmlParseContent(ctxt);
9345
9346 if ((RAW == '<') && (NXT(1) == '/')) {
9347 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9349 ctxt->sax->error(ctxt->userData,
9350 "chunk is not well balanced\n");
9351 ctxt->wellFormed = 0;
9352 ctxt->disableSAX = 1;
9353 } else if (RAW != 0) {
9354 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9356 ctxt->sax->error(ctxt->userData,
9357 "extra content at the end of well balanced chunk\n");
9358 ctxt->wellFormed = 0;
9359 ctxt->disableSAX = 1;
9360 }
9361 if (ctxt->node != newDoc->children) {
9362 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9364 ctxt->sax->error(ctxt->userData,
9365 "chunk is not well balanced\n");
9366 ctxt->wellFormed = 0;
9367 ctxt->disableSAX = 1;
9368 }
9369
9370 if (!ctxt->wellFormed) {
9371 if (ctxt->errNo == 0)
9372 ret = 1;
9373 else
9374 ret = ctxt->errNo;
9375 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009376 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009377 xmlNodePtr cur;
9378
9379 /*
9380 * Return the newly created nodeset after unlinking it from
9381 * they pseudo parent.
9382 */
9383 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009384 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009385 while (cur != NULL) {
9386 cur->parent = NULL;
9387 cur = cur->next;
9388 }
9389 newDoc->children->children = NULL;
9390 }
9391 ret = 0;
9392 }
9393 ctxt->sax = oldsax;
9394 xmlFreeParserCtxt(ctxt);
9395 newDoc->intSubset = NULL;
9396 newDoc->extSubset = NULL;
9397 xmlFreeDoc(newDoc);
9398
9399 return(ret);
9400}
9401
9402/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009403 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009404 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009405 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009406 * @sax: the SAX handler bloc (possibly NULL)
9407 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9408 * @depth: Used for loop detection, use 0
9409 * @URL: the URL for the entity to load
9410 * @ID: the System ID for the entity to load
9411 * @list: the return value for the set of parsed nodes
9412 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009413 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009414 *
9415 * Returns 0 if the entity is well formed, -1 in case of args problem and
9416 * the parser error code otherwise
9417 */
9418
Daniel Veillard257d9102001-05-08 10:41:44 +00009419static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009420xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9421 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009422 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009423 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009424 xmlParserCtxtPtr ctxt;
9425 xmlDocPtr newDoc;
9426 xmlSAXHandlerPtr oldsax = NULL;
9427 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009428 xmlChar start[4];
9429 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009430
9431 if (depth > 40) {
9432 return(XML_ERR_ENTITY_LOOP);
9433 }
9434
9435
9436
9437 if (list != NULL)
9438 *list = NULL;
9439 if ((URL == NULL) && (ID == NULL))
9440 return(-1);
9441 if (doc == NULL) /* @@ relax but check for dereferences */
9442 return(-1);
9443
9444
9445 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9446 if (ctxt == NULL) return(-1);
9447 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009448 if (oldctxt != NULL) {
9449 ctxt->_private = oldctxt->_private;
9450 ctxt->loadsubset = oldctxt->loadsubset;
9451 ctxt->validate = oldctxt->validate;
9452 ctxt->external = oldctxt->external;
9453 } else {
9454 /*
9455 * Doing validity checking on chunk without context
9456 * doesn't make sense
9457 */
9458 ctxt->_private = NULL;
9459 ctxt->validate = 0;
9460 ctxt->external = 2;
9461 ctxt->loadsubset = 0;
9462 }
Owen Taylor3473f882001-02-23 17:55:21 +00009463 if (sax != NULL) {
9464 oldsax = ctxt->sax;
9465 ctxt->sax = sax;
9466 if (user_data != NULL)
9467 ctxt->userData = user_data;
9468 }
9469 newDoc = xmlNewDoc(BAD_CAST "1.0");
9470 if (newDoc == NULL) {
9471 xmlFreeParserCtxt(ctxt);
9472 return(-1);
9473 }
9474 if (doc != NULL) {
9475 newDoc->intSubset = doc->intSubset;
9476 newDoc->extSubset = doc->extSubset;
9477 }
9478 if (doc->URL != NULL) {
9479 newDoc->URL = xmlStrdup(doc->URL);
9480 }
9481 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9482 if (newDoc->children == NULL) {
9483 if (sax != NULL)
9484 ctxt->sax = oldsax;
9485 xmlFreeParserCtxt(ctxt);
9486 newDoc->intSubset = NULL;
9487 newDoc->extSubset = NULL;
9488 xmlFreeDoc(newDoc);
9489 return(-1);
9490 }
9491 nodePush(ctxt, newDoc->children);
9492 if (doc == NULL) {
9493 ctxt->myDoc = newDoc;
9494 } else {
9495 ctxt->myDoc = doc;
9496 newDoc->children->doc = doc;
9497 }
9498
Daniel Veillard87a764e2001-06-20 17:41:10 +00009499 /*
9500 * Get the 4 first bytes and decode the charset
9501 * if enc != XML_CHAR_ENCODING_NONE
9502 * plug some encoding conversion routines.
9503 */
9504 GROW;
9505 start[0] = RAW;
9506 start[1] = NXT(1);
9507 start[2] = NXT(2);
9508 start[3] = NXT(3);
9509 enc = xmlDetectCharEncoding(start, 4);
9510 if (enc != XML_CHAR_ENCODING_NONE) {
9511 xmlSwitchEncoding(ctxt, enc);
9512 }
9513
Owen Taylor3473f882001-02-23 17:55:21 +00009514 /*
9515 * Parse a possible text declaration first
9516 */
Owen Taylor3473f882001-02-23 17:55:21 +00009517 if ((RAW == '<') && (NXT(1) == '?') &&
9518 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9519 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9520 xmlParseTextDecl(ctxt);
9521 }
9522
Owen Taylor3473f882001-02-23 17:55:21 +00009523 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009524 ctxt->depth = depth;
9525
9526 xmlParseContent(ctxt);
9527
Daniel Veillard561b7f82002-03-20 21:55:57 +00009528 if ((RAW == '<') && (NXT(1) == '/')) {
Owen Taylor3473f882001-02-23 17:55:21 +00009529 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9530 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9531 ctxt->sax->error(ctxt->userData,
9532 "chunk is not well balanced\n");
9533 ctxt->wellFormed = 0;
9534 ctxt->disableSAX = 1;
Daniel Veillard561b7f82002-03-20 21:55:57 +00009535 } else if (RAW != 0) {
Owen Taylor3473f882001-02-23 17:55:21 +00009536 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9538 ctxt->sax->error(ctxt->userData,
9539 "extra content at the end of well balanced chunk\n");
9540 ctxt->wellFormed = 0;
9541 ctxt->disableSAX = 1;
9542 }
9543 if (ctxt->node != newDoc->children) {
9544 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9546 ctxt->sax->error(ctxt->userData,
9547 "chunk is not well balanced\n");
9548 ctxt->wellFormed = 0;
9549 ctxt->disableSAX = 1;
9550 }
9551
9552 if (!ctxt->wellFormed) {
9553 if (ctxt->errNo == 0)
9554 ret = 1;
9555 else
9556 ret = ctxt->errNo;
9557 } else {
9558 if (list != NULL) {
9559 xmlNodePtr cur;
9560
9561 /*
9562 * Return the newly created nodeset after unlinking it from
9563 * they pseudo parent.
9564 */
9565 cur = newDoc->children->children;
9566 *list = cur;
9567 while (cur != NULL) {
9568 cur->parent = NULL;
9569 cur = cur->next;
9570 }
9571 newDoc->children->children = NULL;
9572 }
9573 ret = 0;
9574 }
9575 if (sax != NULL)
9576 ctxt->sax = oldsax;
9577 xmlFreeParserCtxt(ctxt);
9578 newDoc->intSubset = NULL;
9579 newDoc->extSubset = NULL;
9580 xmlFreeDoc(newDoc);
9581
9582 return(ret);
9583}
9584
9585/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009586 * xmlParseExternalEntity:
9587 * @doc: the document the chunk pertains to
9588 * @sax: the SAX handler bloc (possibly NULL)
9589 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9590 * @depth: Used for loop detection, use 0
9591 * @URL: the URL for the entity to load
9592 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009593 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009594 *
9595 * Parse an external general entity
9596 * An external general parsed entity is well-formed if it matches the
9597 * production labeled extParsedEnt.
9598 *
9599 * [78] extParsedEnt ::= TextDecl? content
9600 *
9601 * Returns 0 if the entity is well formed, -1 in case of args problem and
9602 * the parser error code otherwise
9603 */
9604
9605int
9606xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009607 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009608 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009609 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009610}
9611
9612/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009613 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009614 * @doc: the document the chunk pertains to
9615 * @sax: the SAX handler bloc (possibly NULL)
9616 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9617 * @depth: Used for loop detection, use 0
9618 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009619 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009620 *
9621 * Parse a well-balanced chunk of an XML document
9622 * called by the parser
9623 * The allowed sequence for the Well Balanced Chunk is the one defined by
9624 * the content production in the XML grammar:
9625 *
9626 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9627 *
9628 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9629 * the parser error code otherwise
9630 */
9631
9632int
9633xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009634 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009635 xmlParserCtxtPtr ctxt;
9636 xmlDocPtr newDoc;
9637 xmlSAXHandlerPtr oldsax = NULL;
9638 int size;
9639 int ret = 0;
9640
9641 if (depth > 40) {
9642 return(XML_ERR_ENTITY_LOOP);
9643 }
9644
9645
Daniel Veillardcda96922001-08-21 10:56:31 +00009646 if (lst != NULL)
9647 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009648 if (string == NULL)
9649 return(-1);
9650
9651 size = xmlStrlen(string);
9652
9653 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9654 if (ctxt == NULL) return(-1);
9655 ctxt->userData = ctxt;
9656 if (sax != NULL) {
9657 oldsax = ctxt->sax;
9658 ctxt->sax = sax;
9659 if (user_data != NULL)
9660 ctxt->userData = user_data;
9661 }
9662 newDoc = xmlNewDoc(BAD_CAST "1.0");
9663 if (newDoc == NULL) {
9664 xmlFreeParserCtxt(ctxt);
9665 return(-1);
9666 }
9667 if (doc != NULL) {
9668 newDoc->intSubset = doc->intSubset;
9669 newDoc->extSubset = doc->extSubset;
9670 }
9671 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9672 if (newDoc->children == NULL) {
9673 if (sax != NULL)
9674 ctxt->sax = oldsax;
9675 xmlFreeParserCtxt(ctxt);
9676 newDoc->intSubset = NULL;
9677 newDoc->extSubset = NULL;
9678 xmlFreeDoc(newDoc);
9679 return(-1);
9680 }
9681 nodePush(ctxt, newDoc->children);
9682 if (doc == NULL) {
9683 ctxt->myDoc = newDoc;
9684 } else {
9685 ctxt->myDoc = doc;
9686 newDoc->children->doc = doc;
9687 }
9688 ctxt->instate = XML_PARSER_CONTENT;
9689 ctxt->depth = depth;
9690
9691 /*
9692 * Doing validity checking on chunk doesn't make sense
9693 */
9694 ctxt->validate = 0;
9695 ctxt->loadsubset = 0;
9696
9697 xmlParseContent(ctxt);
9698
9699 if ((RAW == '<') && (NXT(1) == '/')) {
9700 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9702 ctxt->sax->error(ctxt->userData,
9703 "chunk is not well balanced\n");
9704 ctxt->wellFormed = 0;
9705 ctxt->disableSAX = 1;
9706 } else if (RAW != 0) {
9707 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9709 ctxt->sax->error(ctxt->userData,
9710 "extra content at the end of well balanced chunk\n");
9711 ctxt->wellFormed = 0;
9712 ctxt->disableSAX = 1;
9713 }
9714 if (ctxt->node != newDoc->children) {
9715 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9717 ctxt->sax->error(ctxt->userData,
9718 "chunk is not well balanced\n");
9719 ctxt->wellFormed = 0;
9720 ctxt->disableSAX = 1;
9721 }
9722
9723 if (!ctxt->wellFormed) {
9724 if (ctxt->errNo == 0)
9725 ret = 1;
9726 else
9727 ret = ctxt->errNo;
9728 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009729 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009730 xmlNodePtr cur;
9731
9732 /*
9733 * Return the newly created nodeset after unlinking it from
9734 * they pseudo parent.
9735 */
9736 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009737 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009738 while (cur != NULL) {
9739 cur->parent = NULL;
9740 cur = cur->next;
9741 }
9742 newDoc->children->children = NULL;
9743 }
9744 ret = 0;
9745 }
9746 if (sax != NULL)
9747 ctxt->sax = oldsax;
9748 xmlFreeParserCtxt(ctxt);
9749 newDoc->intSubset = NULL;
9750 newDoc->extSubset = NULL;
9751 xmlFreeDoc(newDoc);
9752
9753 return(ret);
9754}
9755
9756/**
9757 * xmlSAXParseEntity:
9758 * @sax: the SAX handler block
9759 * @filename: the filename
9760 *
9761 * parse an XML external entity out of context and build a tree.
9762 * It use the given SAX function block to handle the parsing callback.
9763 * If sax is NULL, fallback to the default DOM tree building routines.
9764 *
9765 * [78] extParsedEnt ::= TextDecl? content
9766 *
9767 * This correspond to a "Well Balanced" chunk
9768 *
9769 * Returns the resulting document tree
9770 */
9771
9772xmlDocPtr
9773xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9774 xmlDocPtr ret;
9775 xmlParserCtxtPtr ctxt;
9776 char *directory = NULL;
9777
9778 ctxt = xmlCreateFileParserCtxt(filename);
9779 if (ctxt == NULL) {
9780 return(NULL);
9781 }
9782 if (sax != NULL) {
9783 if (ctxt->sax != NULL)
9784 xmlFree(ctxt->sax);
9785 ctxt->sax = sax;
9786 ctxt->userData = NULL;
9787 }
9788
9789 if ((ctxt->directory == NULL) && (directory == NULL))
9790 directory = xmlParserGetDirectory(filename);
9791
9792 xmlParseExtParsedEnt(ctxt);
9793
9794 if (ctxt->wellFormed)
9795 ret = ctxt->myDoc;
9796 else {
9797 ret = NULL;
9798 xmlFreeDoc(ctxt->myDoc);
9799 ctxt->myDoc = NULL;
9800 }
9801 if (sax != NULL)
9802 ctxt->sax = NULL;
9803 xmlFreeParserCtxt(ctxt);
9804
9805 return(ret);
9806}
9807
9808/**
9809 * xmlParseEntity:
9810 * @filename: the filename
9811 *
9812 * parse an XML external entity out of context and build a tree.
9813 *
9814 * [78] extParsedEnt ::= TextDecl? content
9815 *
9816 * This correspond to a "Well Balanced" chunk
9817 *
9818 * Returns the resulting document tree
9819 */
9820
9821xmlDocPtr
9822xmlParseEntity(const char *filename) {
9823 return(xmlSAXParseEntity(NULL, filename));
9824}
9825
9826/**
9827 * xmlCreateEntityParserCtxt:
9828 * @URL: the entity URL
9829 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009830 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009831 *
9832 * Create a parser context for an external entity
9833 * Automatic support for ZLIB/Compress compressed document is provided
9834 * by default if found at compile-time.
9835 *
9836 * Returns the new parser context or NULL
9837 */
9838xmlParserCtxtPtr
9839xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9840 const xmlChar *base) {
9841 xmlParserCtxtPtr ctxt;
9842 xmlParserInputPtr inputStream;
9843 char *directory = NULL;
9844 xmlChar *uri;
9845
9846 ctxt = xmlNewParserCtxt();
9847 if (ctxt == NULL) {
9848 return(NULL);
9849 }
9850
9851 uri = xmlBuildURI(URL, base);
9852
9853 if (uri == NULL) {
9854 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9855 if (inputStream == NULL) {
9856 xmlFreeParserCtxt(ctxt);
9857 return(NULL);
9858 }
9859
9860 inputPush(ctxt, inputStream);
9861
9862 if ((ctxt->directory == NULL) && (directory == NULL))
9863 directory = xmlParserGetDirectory((char *)URL);
9864 if ((ctxt->directory == NULL) && (directory != NULL))
9865 ctxt->directory = directory;
9866 } else {
9867 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9868 if (inputStream == NULL) {
9869 xmlFree(uri);
9870 xmlFreeParserCtxt(ctxt);
9871 return(NULL);
9872 }
9873
9874 inputPush(ctxt, inputStream);
9875
9876 if ((ctxt->directory == NULL) && (directory == NULL))
9877 directory = xmlParserGetDirectory((char *)uri);
9878 if ((ctxt->directory == NULL) && (directory != NULL))
9879 ctxt->directory = directory;
9880 xmlFree(uri);
9881 }
9882
9883 return(ctxt);
9884}
9885
9886/************************************************************************
9887 * *
9888 * Front ends when parsing from a file *
9889 * *
9890 ************************************************************************/
9891
9892/**
9893 * xmlCreateFileParserCtxt:
9894 * @filename: the filename
9895 *
9896 * Create a parser context for a file content.
9897 * Automatic support for ZLIB/Compress compressed document is provided
9898 * by default if found at compile-time.
9899 *
9900 * Returns the new parser context or NULL
9901 */
9902xmlParserCtxtPtr
9903xmlCreateFileParserCtxt(const char *filename)
9904{
9905 xmlParserCtxtPtr ctxt;
9906 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009907 char *directory = NULL;
9908
Owen Taylor3473f882001-02-23 17:55:21 +00009909 ctxt = xmlNewParserCtxt();
9910 if (ctxt == NULL) {
9911 if (xmlDefaultSAXHandler.error != NULL) {
9912 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9913 }
9914 return(NULL);
9915 }
9916
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009917 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009918 if (inputStream == NULL) {
9919 xmlFreeParserCtxt(ctxt);
9920 return(NULL);
9921 }
9922
Owen Taylor3473f882001-02-23 17:55:21 +00009923 inputPush(ctxt, inputStream);
9924 if ((ctxt->directory == NULL) && (directory == NULL))
9925 directory = xmlParserGetDirectory(filename);
9926 if ((ctxt->directory == NULL) && (directory != NULL))
9927 ctxt->directory = directory;
9928
9929 return(ctxt);
9930}
9931
9932/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009933 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009934 * @sax: the SAX handler block
9935 * @filename: the filename
9936 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9937 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009938 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009939 *
9940 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9941 * compressed document is provided by default if found at compile-time.
9942 * It use the given SAX function block to handle the parsing callback.
9943 * If sax is NULL, fallback to the default DOM tree building routines.
9944 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009945 * User data (void *) is stored within the parser context, so it is
9946 * available nearly everywhere in libxml.
9947 *
Owen Taylor3473f882001-02-23 17:55:21 +00009948 * Returns the resulting document tree
9949 */
9950
9951xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009952xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9953 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009954 xmlDocPtr ret;
9955 xmlParserCtxtPtr ctxt;
9956 char *directory = NULL;
9957
Daniel Veillard635ef722001-10-29 11:48:19 +00009958 xmlInitParser();
9959
Owen Taylor3473f882001-02-23 17:55:21 +00009960 ctxt = xmlCreateFileParserCtxt(filename);
9961 if (ctxt == NULL) {
9962 return(NULL);
9963 }
9964 if (sax != NULL) {
9965 if (ctxt->sax != NULL)
9966 xmlFree(ctxt->sax);
9967 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009968 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009969 if (data!=NULL) {
9970 ctxt->_private=data;
9971 }
Owen Taylor3473f882001-02-23 17:55:21 +00009972
9973 if ((ctxt->directory == NULL) && (directory == NULL))
9974 directory = xmlParserGetDirectory(filename);
9975 if ((ctxt->directory == NULL) && (directory != NULL))
9976 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9977
9978 xmlParseDocument(ctxt);
9979
9980 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9981 else {
9982 ret = NULL;
9983 xmlFreeDoc(ctxt->myDoc);
9984 ctxt->myDoc = NULL;
9985 }
9986 if (sax != NULL)
9987 ctxt->sax = NULL;
9988 xmlFreeParserCtxt(ctxt);
9989
9990 return(ret);
9991}
9992
9993/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009994 * xmlSAXParseFile:
9995 * @sax: the SAX handler block
9996 * @filename: the filename
9997 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9998 * documents
9999 *
10000 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10001 * compressed document is provided by default if found at compile-time.
10002 * It use the given SAX function block to handle the parsing callback.
10003 * If sax is NULL, fallback to the default DOM tree building routines.
10004 *
10005 * Returns the resulting document tree
10006 */
10007
10008xmlDocPtr
10009xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10010 int recovery) {
10011 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10012}
10013
10014/**
Owen Taylor3473f882001-02-23 17:55:21 +000010015 * xmlRecoverDoc:
10016 * @cur: a pointer to an array of xmlChar
10017 *
10018 * parse an XML in-memory document and build a tree.
10019 * In the case the document is not Well Formed, a tree is built anyway
10020 *
10021 * Returns the resulting document tree
10022 */
10023
10024xmlDocPtr
10025xmlRecoverDoc(xmlChar *cur) {
10026 return(xmlSAXParseDoc(NULL, cur, 1));
10027}
10028
10029/**
10030 * xmlParseFile:
10031 * @filename: the filename
10032 *
10033 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10034 * compressed document is provided by default if found at compile-time.
10035 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010036 * Returns the resulting document tree if the file was wellformed,
10037 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010038 */
10039
10040xmlDocPtr
10041xmlParseFile(const char *filename) {
10042 return(xmlSAXParseFile(NULL, filename, 0));
10043}
10044
10045/**
10046 * xmlRecoverFile:
10047 * @filename: the filename
10048 *
10049 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10050 * compressed document is provided by default if found at compile-time.
10051 * In the case the document is not Well Formed, a tree is built anyway
10052 *
10053 * Returns the resulting document tree
10054 */
10055
10056xmlDocPtr
10057xmlRecoverFile(const char *filename) {
10058 return(xmlSAXParseFile(NULL, filename, 1));
10059}
10060
10061
10062/**
10063 * xmlSetupParserForBuffer:
10064 * @ctxt: an XML parser context
10065 * @buffer: a xmlChar * buffer
10066 * @filename: a file name
10067 *
10068 * Setup the parser context to parse a new buffer; Clears any prior
10069 * contents from the parser context. The buffer parameter must not be
10070 * NULL, but the filename parameter can be
10071 */
10072void
10073xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10074 const char* filename)
10075{
10076 xmlParserInputPtr input;
10077
10078 input = xmlNewInputStream(ctxt);
10079 if (input == NULL) {
10080 perror("malloc");
10081 xmlFree(ctxt);
10082 return;
10083 }
10084
10085 xmlClearParserCtxt(ctxt);
10086 if (filename != NULL)
10087 input->filename = xmlMemStrdup(filename);
10088 input->base = buffer;
10089 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010090 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010091 inputPush(ctxt, input);
10092}
10093
10094/**
10095 * xmlSAXUserParseFile:
10096 * @sax: a SAX handler
10097 * @user_data: The user data returned on SAX callbacks
10098 * @filename: a file name
10099 *
10100 * parse an XML file and call the given SAX handler routines.
10101 * Automatic support for ZLIB/Compress compressed document is provided
10102 *
10103 * Returns 0 in case of success or a error number otherwise
10104 */
10105int
10106xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10107 const char *filename) {
10108 int ret = 0;
10109 xmlParserCtxtPtr ctxt;
10110
10111 ctxt = xmlCreateFileParserCtxt(filename);
10112 if (ctxt == NULL) return -1;
10113 if (ctxt->sax != &xmlDefaultSAXHandler)
10114 xmlFree(ctxt->sax);
10115 ctxt->sax = sax;
10116 if (user_data != NULL)
10117 ctxt->userData = user_data;
10118
10119 xmlParseDocument(ctxt);
10120
10121 if (ctxt->wellFormed)
10122 ret = 0;
10123 else {
10124 if (ctxt->errNo != 0)
10125 ret = ctxt->errNo;
10126 else
10127 ret = -1;
10128 }
10129 if (sax != NULL)
10130 ctxt->sax = NULL;
10131 xmlFreeParserCtxt(ctxt);
10132
10133 return ret;
10134}
10135
10136/************************************************************************
10137 * *
10138 * Front ends when parsing from memory *
10139 * *
10140 ************************************************************************/
10141
10142/**
10143 * xmlCreateMemoryParserCtxt:
10144 * @buffer: a pointer to a char array
10145 * @size: the size of the array
10146 *
10147 * Create a parser context for an XML in-memory document.
10148 *
10149 * Returns the new parser context or NULL
10150 */
10151xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010152xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010153 xmlParserCtxtPtr ctxt;
10154 xmlParserInputPtr input;
10155 xmlParserInputBufferPtr buf;
10156
10157 if (buffer == NULL)
10158 return(NULL);
10159 if (size <= 0)
10160 return(NULL);
10161
10162 ctxt = xmlNewParserCtxt();
10163 if (ctxt == NULL)
10164 return(NULL);
10165
10166 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10167 if (buf == NULL) return(NULL);
10168
10169 input = xmlNewInputStream(ctxt);
10170 if (input == NULL) {
10171 xmlFreeParserCtxt(ctxt);
10172 return(NULL);
10173 }
10174
10175 input->filename = NULL;
10176 input->buf = buf;
10177 input->base = input->buf->buffer->content;
10178 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010179 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010180
10181 inputPush(ctxt, input);
10182 return(ctxt);
10183}
10184
10185/**
10186 * xmlSAXParseMemory:
10187 * @sax: the SAX handler block
10188 * @buffer: an pointer to a char array
10189 * @size: the size of the array
10190 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10191 * documents
10192 *
10193 * parse an XML in-memory block and use the given SAX function block
10194 * to handle the parsing callback. If sax is NULL, fallback to the default
10195 * DOM tree building routines.
10196 *
10197 * Returns the resulting document tree
10198 */
10199xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010200xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10201 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010202 xmlDocPtr ret;
10203 xmlParserCtxtPtr ctxt;
10204
10205 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10206 if (ctxt == NULL) return(NULL);
10207 if (sax != NULL) {
10208 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010209 }
10210
10211 xmlParseDocument(ctxt);
10212
10213 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10214 else {
10215 ret = NULL;
10216 xmlFreeDoc(ctxt->myDoc);
10217 ctxt->myDoc = NULL;
10218 }
10219 if (sax != NULL)
10220 ctxt->sax = NULL;
10221 xmlFreeParserCtxt(ctxt);
10222
10223 return(ret);
10224}
10225
10226/**
10227 * xmlParseMemory:
10228 * @buffer: an pointer to a char array
10229 * @size: the size of the array
10230 *
10231 * parse an XML in-memory block and build a tree.
10232 *
10233 * Returns the resulting document tree
10234 */
10235
Daniel Veillard50822cb2001-07-26 20:05:51 +000010236xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010237 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10238}
10239
10240/**
10241 * xmlRecoverMemory:
10242 * @buffer: an pointer to a char array
10243 * @size: the size of the array
10244 *
10245 * parse an XML in-memory block and build a tree.
10246 * In the case the document is not Well Formed, a tree is built anyway
10247 *
10248 * Returns the resulting document tree
10249 */
10250
Daniel Veillard50822cb2001-07-26 20:05:51 +000010251xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010252 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10253}
10254
10255/**
10256 * xmlSAXUserParseMemory:
10257 * @sax: a SAX handler
10258 * @user_data: The user data returned on SAX callbacks
10259 * @buffer: an in-memory XML document input
10260 * @size: the length of the XML document in bytes
10261 *
10262 * A better SAX parsing routine.
10263 * parse an XML in-memory buffer and call the given SAX handler routines.
10264 *
10265 * Returns 0 in case of success or a error number otherwise
10266 */
10267int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010268 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010269 int ret = 0;
10270 xmlParserCtxtPtr ctxt;
10271 xmlSAXHandlerPtr oldsax = NULL;
10272
10273 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10274 if (ctxt == NULL) return -1;
10275 if (sax != NULL) {
10276 oldsax = ctxt->sax;
10277 ctxt->sax = sax;
10278 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010279 if (user_data != NULL)
10280 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010281
10282 xmlParseDocument(ctxt);
10283
10284 if (ctxt->wellFormed)
10285 ret = 0;
10286 else {
10287 if (ctxt->errNo != 0)
10288 ret = ctxt->errNo;
10289 else
10290 ret = -1;
10291 }
10292 if (sax != NULL) {
10293 ctxt->sax = oldsax;
10294 }
10295 xmlFreeParserCtxt(ctxt);
10296
10297 return ret;
10298}
10299
10300/**
10301 * xmlCreateDocParserCtxt:
10302 * @cur: a pointer to an array of xmlChar
10303 *
10304 * Creates a parser context for an XML in-memory document.
10305 *
10306 * Returns the new parser context or NULL
10307 */
10308xmlParserCtxtPtr
10309xmlCreateDocParserCtxt(xmlChar *cur) {
10310 int len;
10311
10312 if (cur == NULL)
10313 return(NULL);
10314 len = xmlStrlen(cur);
10315 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10316}
10317
10318/**
10319 * xmlSAXParseDoc:
10320 * @sax: the SAX handler block
10321 * @cur: a pointer to an array of xmlChar
10322 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10323 * documents
10324 *
10325 * parse an XML in-memory document and build a tree.
10326 * It use the given SAX function block to handle the parsing callback.
10327 * If sax is NULL, fallback to the default DOM tree building routines.
10328 *
10329 * Returns the resulting document tree
10330 */
10331
10332xmlDocPtr
10333xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10334 xmlDocPtr ret;
10335 xmlParserCtxtPtr ctxt;
10336
10337 if (cur == NULL) return(NULL);
10338
10339
10340 ctxt = xmlCreateDocParserCtxt(cur);
10341 if (ctxt == NULL) return(NULL);
10342 if (sax != NULL) {
10343 ctxt->sax = sax;
10344 ctxt->userData = NULL;
10345 }
10346
10347 xmlParseDocument(ctxt);
10348 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10349 else {
10350 ret = NULL;
10351 xmlFreeDoc(ctxt->myDoc);
10352 ctxt->myDoc = NULL;
10353 }
10354 if (sax != NULL)
10355 ctxt->sax = NULL;
10356 xmlFreeParserCtxt(ctxt);
10357
10358 return(ret);
10359}
10360
10361/**
10362 * xmlParseDoc:
10363 * @cur: a pointer to an array of xmlChar
10364 *
10365 * parse an XML in-memory document and build a tree.
10366 *
10367 * Returns the resulting document tree
10368 */
10369
10370xmlDocPtr
10371xmlParseDoc(xmlChar *cur) {
10372 return(xmlSAXParseDoc(NULL, cur, 0));
10373}
10374
Daniel Veillard8107a222002-01-13 14:10:10 +000010375/************************************************************************
10376 * *
10377 * Specific function to keep track of entities references *
10378 * and used by the XSLT debugger *
10379 * *
10380 ************************************************************************/
10381
10382static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10383
10384/**
10385 * xmlAddEntityReference:
10386 * @ent : A valid entity
10387 * @firstNode : A valid first node for children of entity
10388 * @lastNode : A valid last node of children entity
10389 *
10390 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10391 */
10392static void
10393xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10394 xmlNodePtr lastNode)
10395{
10396 if (xmlEntityRefFunc != NULL) {
10397 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10398 }
10399}
10400
10401
10402/**
10403 * xmlSetEntityReferenceFunc:
10404 * @func : A valid function
10405 *
10406 * Set the function to call call back when a xml reference has been made
10407 */
10408void
10409xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10410{
10411 xmlEntityRefFunc = func;
10412}
Owen Taylor3473f882001-02-23 17:55:21 +000010413
10414/************************************************************************
10415 * *
10416 * Miscellaneous *
10417 * *
10418 ************************************************************************/
10419
10420#ifdef LIBXML_XPATH_ENABLED
10421#include <libxml/xpath.h>
10422#endif
10423
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010424extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010425static int xmlParserInitialized = 0;
10426
10427/**
10428 * xmlInitParser:
10429 *
10430 * Initialization function for the XML parser.
10431 * This is not reentrant. Call once before processing in case of
10432 * use in multithreaded programs.
10433 */
10434
10435void
10436xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010437 if (xmlParserInitialized != 0)
10438 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010439
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010440 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10441 (xmlGenericError == NULL))
10442 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010443 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010444 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010445 xmlInitCharEncodingHandlers();
10446 xmlInitializePredefinedEntities();
10447 xmlDefaultSAXHandlerInit();
10448 xmlRegisterDefaultInputCallbacks();
10449 xmlRegisterDefaultOutputCallbacks();
10450#ifdef LIBXML_HTML_ENABLED
10451 htmlInitAutoClose();
10452 htmlDefaultSAXHandlerInit();
10453#endif
10454#ifdef LIBXML_XPATH_ENABLED
10455 xmlXPathInit();
10456#endif
10457 xmlParserInitialized = 1;
10458}
10459
10460/**
10461 * xmlCleanupParser:
10462 *
10463 * Cleanup function for the XML parser. It tries to reclaim all
10464 * parsing related global memory allocated for the parser processing.
10465 * It doesn't deallocate any document related memory. Calling this
10466 * function should not prevent reusing the parser.
10467 */
10468
10469void
10470xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010471 xmlCleanupCharEncodingHandlers();
10472 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010473#ifdef LIBXML_CATALOG_ENABLED
10474 xmlCatalogCleanup();
10475#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010476 xmlCleanupThreads();
10477 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010478}