blob: d2fd69059e098a2e21075920c7d74e52aaa27583 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
Daniel Veillardcbaf3992001-12-31 16:16:02 +000015 * high level APIs to call the parser and a few miscellaneous functions.
Owen Taylor3473f882001-02-23 17:55:21 +000016 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillardcbaf3992001-12-31 16:16:02 +000025 * from the SAX callbacks or as standalone functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Daniel Veillard3c5ed912002-01-08 10:36:16 +000035#if defined(WIN32) && !defined (__CYGWIN__)
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
Daniel Veillard5997aca2002-03-18 18:36:20 +000082#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
83
Owen Taylor3473f882001-02-23 17:55:21 +000084/*
Owen Taylor3473f882001-02-23 17:55:21 +000085 * List of XML prefixed PI allowed by W3C specs
86 */
87
Daniel Veillardb44025c2001-10-11 22:55:55 +000088static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000089 "xml-stylesheet",
90 NULL
91};
92
93/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
Owen Taylor3473f882001-02-23 17:55:21 +000094xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
95 const xmlChar **str);
96
Daniel Veillard257d9102001-05-08 10:41:44 +000097static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000098xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
99 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000100 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000101 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000102
Daniel Veillard8107a222002-01-13 14:10:10 +0000103static void
104xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
105 xmlNodePtr lastNode);
106
Owen Taylor3473f882001-02-23 17:55:21 +0000107/************************************************************************
108 * *
109 * Parser stacks related functions and macros *
110 * *
111 ************************************************************************/
112
113xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
114 const xmlChar ** str);
115
116/*
117 * Generic function for accessing stacks in the Parser Context
118 */
119
120#define PUSH_AND_POP(scope, type, name) \
121scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
122 if (ctxt->name##Nr >= ctxt->name##Max) { \
123 ctxt->name##Max *= 2; \
124 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
125 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
126 if (ctxt->name##Tab == NULL) { \
127 xmlGenericError(xmlGenericErrorContext, \
128 "realloc failed !\n"); \
129 return(0); \
130 } \
131 } \
132 ctxt->name##Tab[ctxt->name##Nr] = value; \
133 ctxt->name = value; \
134 return(ctxt->name##Nr++); \
135} \
136scope type name##Pop(xmlParserCtxtPtr ctxt) { \
137 type ret; \
138 if (ctxt->name##Nr <= 0) return(0); \
139 ctxt->name##Nr--; \
140 if (ctxt->name##Nr > 0) \
141 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
142 else \
143 ctxt->name = NULL; \
144 ret = ctxt->name##Tab[ctxt->name##Nr]; \
145 ctxt->name##Tab[ctxt->name##Nr] = 0; \
146 return(ret); \
147} \
148
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000149/**
150 * inputPop:
151 * @ctxt: an XML parser context
152 *
153 * Pops the top parser input from the input stack
154 *
155 * Returns the input just removed
156 */
157/**
158 * inputPush:
159 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000160 * @value: the parser input
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000161 *
162 * Pushes a new parser input on top of the input stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000163 *
164 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000165 */
166/**
167 * namePop:
168 * @ctxt: an XML parser context
169 *
170 * Pops the top element name from the name stack
171 *
172 * Returns the name just removed
173 */
174/**
175 * namePush:
176 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000177 * @value: the element name
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000178 *
179 * Pushes a new element name on top of the name stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000180 *
181 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000182 */
183/**
184 * nodePop:
185 * @ctxt: an XML parser context
186 *
187 * Pops the top element node from the node stack
188 *
189 * Returns the node just removed
190 */
191/**
192 * nodePush:
193 * @ctxt: an XML parser context
Daniel Veillard9d06d302002-01-22 18:15:52 +0000194 * @value: the element node
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000195 *
196 * Pushes a new element node on top of the node stack
Daniel Veillard9d06d302002-01-22 18:15:52 +0000197 *
198 * Returns 0 in case of error, the index in the stack otherwise
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000199 */
Owen Taylor3473f882001-02-23 17:55:21 +0000200/*
201 * Those macros actually generate the functions
202 */
203PUSH_AND_POP(extern, xmlParserInputPtr, input)
204PUSH_AND_POP(extern, xmlNodePtr, node)
205PUSH_AND_POP(extern, xmlChar*, name)
206
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000207static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000208 if (ctxt->spaceNr >= ctxt->spaceMax) {
209 ctxt->spaceMax *= 2;
210 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
211 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
212 if (ctxt->spaceTab == NULL) {
213 xmlGenericError(xmlGenericErrorContext,
214 "realloc failed !\n");
215 return(0);
216 }
217 }
218 ctxt->spaceTab[ctxt->spaceNr] = val;
219 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
220 return(ctxt->spaceNr++);
221}
222
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000223static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000224 int ret;
225 if (ctxt->spaceNr <= 0) return(0);
226 ctxt->spaceNr--;
227 if (ctxt->spaceNr > 0)
228 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
229 else
230 ctxt->space = NULL;
231 ret = ctxt->spaceTab[ctxt->spaceNr];
232 ctxt->spaceTab[ctxt->spaceNr] = -1;
233 return(ret);
234}
235
236/*
237 * Macros for accessing the content. Those should be used only by the parser,
238 * and not exported.
239 *
240 * Dirty macros, i.e. one often need to make assumption on the context to
241 * use them
242 *
243 * CUR_PTR return the current pointer to the xmlChar to be parsed.
244 * To be used with extreme caution since operations consuming
245 * characters may move the input buffer to a different location !
246 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
247 * This should be used internally by the parser
248 * only to compare to ASCII values otherwise it would break when
249 * running with UTF-8 encoding.
250 * RAW same as CUR but in the input buffer, bypass any token
251 * extraction that may have been done
252 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
253 * to compare on ASCII based substring.
254 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
255 * strings within the parser.
256 *
257 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
258 *
259 * NEXT Skip to the next character, this does the proper decoding
260 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000261 * NEXTL(l) Skip l xmlChar in the input buffer
Owen Taylor3473f882001-02-23 17:55:21 +0000262 * CUR_CHAR(l) returns the current unicode character (int), set l
263 * to the number of xmlChars used for the encoding [0-5].
264 * CUR_SCHAR same but operate on a string instead of the context
265 * COPY_BUF copy the current unicode char to the target buffer, increment
266 * the index
267 * GROW, SHRINK handling of input buffers
268 */
269
270#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
271#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
272#define NXT(val) ctxt->input->cur[(val)]
273#define CUR_PTR ctxt->input->cur
274
275#define SKIP(val) do { \
276 ctxt->nbChars += (val),ctxt->input->cur += (val); \
277 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000278 if ((*ctxt->input->cur == 0) && \
279 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
280 xmlPopInput(ctxt); \
281 } while (0)
282
Daniel Veillard48b2f892001-02-25 16:11:03 +0000283#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000284 xmlParserInputShrink(ctxt->input); \
285 if ((*ctxt->input->cur == 0) && \
286 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
287 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000288 }
Owen Taylor3473f882001-02-23 17:55:21 +0000289
Daniel Veillard48b2f892001-02-25 16:11:03 +0000290#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000291 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
292 if ((*ctxt->input->cur == 0) && \
293 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
294 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000295 }
Owen Taylor3473f882001-02-23 17:55:21 +0000296
297#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
298
299#define NEXT xmlNextChar(ctxt)
300
Daniel Veillard21a0f912001-02-25 19:54:14 +0000301#define NEXT1 { \
302 ctxt->input->cur++; \
303 ctxt->nbChars++; \
304 if (*ctxt->input->cur == 0) \
305 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
306 }
307
Owen Taylor3473f882001-02-23 17:55:21 +0000308#define NEXTL(l) do { \
309 if (*(ctxt->input->cur) == '\n') { \
310 ctxt->input->line++; ctxt->input->col = 1; \
311 } else ctxt->input->col++; \
312 ctxt->token = 0; ctxt->input->cur += l; \
313 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000314 } while (0)
315
316#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
317#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
318
319#define COPY_BUF(l,b,i,v) \
320 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000321 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000322
323/**
324 * xmlSkipBlankChars:
325 * @ctxt: the XML parser context
326 *
327 * skip all blanks character found at that point in the input streams.
328 * It pops up finished entities in the process if allowable at that point.
329 *
330 * Returns the number of space chars skipped
331 */
332
333int
334xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000335 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000336
Daniel Veillard02141ea2001-04-30 11:46:40 +0000337 if (ctxt->token != 0) {
338 if (!IS_BLANK(ctxt->token))
339 return(0);
340 ctxt->token = 0;
341 res++;
342 }
Owen Taylor3473f882001-02-23 17:55:21 +0000343 /*
344 * It's Okay to use CUR/NEXT here since all the blanks are on
345 * the ASCII range.
346 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000347 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
348 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000349 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000350 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000351 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000352 cur = ctxt->input->cur;
353 while (IS_BLANK(*cur)) {
354 if (*cur == '\n') {
355 ctxt->input->line++; ctxt->input->col = 1;
356 }
357 cur++;
358 res++;
359 if (*cur == 0) {
360 ctxt->input->cur = cur;
361 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
362 cur = ctxt->input->cur;
363 }
364 }
365 ctxt->input->cur = cur;
366 } else {
367 int cur;
368 do {
369 cur = CUR;
370 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
371 NEXT;
372 cur = CUR;
373 res++;
374 }
375 while ((cur == 0) && (ctxt->inputNr > 1) &&
376 (ctxt->instate != XML_PARSER_COMMENT)) {
377 xmlPopInput(ctxt);
378 cur = CUR;
379 }
380 /*
381 * Need to handle support of entities branching here
382 */
383 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
384 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
385 }
Owen Taylor3473f882001-02-23 17:55:21 +0000386 return(res);
387}
388
389/************************************************************************
390 * *
391 * Commodity functions to handle entities *
392 * *
393 ************************************************************************/
394
395/**
396 * xmlPopInput:
397 * @ctxt: an XML parser context
398 *
399 * xmlPopInput: the current input pointed by ctxt->input came to an end
400 * pop it and return the next char.
401 *
402 * Returns the current xmlChar in the parser context
403 */
404xmlChar
405xmlPopInput(xmlParserCtxtPtr ctxt) {
406 if (ctxt->inputNr == 1) return(0); /* End of main Input */
407 if (xmlParserDebugEntities)
408 xmlGenericError(xmlGenericErrorContext,
409 "Popping input %d\n", ctxt->inputNr);
410 xmlFreeInputStream(inputPop(ctxt));
411 if ((*ctxt->input->cur == 0) &&
412 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
413 return(xmlPopInput(ctxt));
414 return(CUR);
415}
416
417/**
418 * xmlPushInput:
419 * @ctxt: an XML parser context
420 * @input: an XML parser input fragment (entity, XML fragment ...).
421 *
422 * xmlPushInput: switch to a new input stream which is stacked on top
423 * of the previous one(s).
424 */
425void
426xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
427 if (input == NULL) return;
428
429 if (xmlParserDebugEntities) {
430 if ((ctxt->input != NULL) && (ctxt->input->filename))
431 xmlGenericError(xmlGenericErrorContext,
432 "%s(%d): ", ctxt->input->filename,
433 ctxt->input->line);
434 xmlGenericError(xmlGenericErrorContext,
435 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
436 }
437 inputPush(ctxt, input);
438 GROW;
439}
440
441/**
442 * xmlParseCharRef:
443 * @ctxt: an XML parser context
444 *
445 * parse Reference declarations
446 *
447 * [66] CharRef ::= '&#' [0-9]+ ';' |
448 * '&#x' [0-9a-fA-F]+ ';'
449 *
450 * [ WFC: Legal Character ]
451 * Characters referred to using character references must match the
452 * production for Char.
453 *
454 * Returns the value parsed (as an int), 0 in case of error
455 */
456int
457xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000458 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000459 int count = 0;
460
461 if (ctxt->token != 0) {
462 val = ctxt->token;
463 ctxt->token = 0;
464 return(val);
465 }
466 /*
467 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
468 */
469 if ((RAW == '&') && (NXT(1) == '#') &&
470 (NXT(2) == 'x')) {
471 SKIP(3);
472 GROW;
473 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000474 if (count++ > 20) {
475 count = 0;
476 GROW;
477 }
478 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000479 val = val * 16 + (CUR - '0');
480 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
481 val = val * 16 + (CUR - 'a') + 10;
482 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
483 val = val * 16 + (CUR - 'A') + 10;
484 else {
485 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
487 ctxt->sax->error(ctxt->userData,
488 "xmlParseCharRef: invalid hexadecimal value\n");
489 ctxt->wellFormed = 0;
490 ctxt->disableSAX = 1;
491 val = 0;
492 break;
493 }
494 NEXT;
495 count++;
496 }
497 if (RAW == ';') {
498 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
499 ctxt->nbChars ++;
500 ctxt->input->cur++;
501 }
502 } else if ((RAW == '&') && (NXT(1) == '#')) {
503 SKIP(2);
504 GROW;
505 while (RAW != ';') { /* loop blocked by count */
Daniel Veillardbb7ddb32002-02-17 21:26:33 +0000506 if (count++ > 20) {
507 count = 0;
508 GROW;
509 }
510 if ((RAW >= '0') && (RAW <= '9'))
Owen Taylor3473f882001-02-23 17:55:21 +0000511 val = val * 10 + (CUR - '0');
512 else {
513 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
515 ctxt->sax->error(ctxt->userData,
516 "xmlParseCharRef: invalid decimal value\n");
517 ctxt->wellFormed = 0;
518 ctxt->disableSAX = 1;
519 val = 0;
520 break;
521 }
522 NEXT;
523 count++;
524 }
525 if (RAW == ';') {
526 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
527 ctxt->nbChars ++;
528 ctxt->input->cur++;
529 }
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHARREF;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData,
534 "xmlParseCharRef: invalid value\n");
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538
539 /*
540 * [ WFC: Legal Character ]
541 * Characters referred to using character references must match the
542 * production for Char.
543 */
544 if (IS_CHAR(val)) {
545 return(val);
546 } else {
547 ctxt->errNo = XML_ERR_INVALID_CHAR;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid xmlChar value %d\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000551 val);
552 ctxt->wellFormed = 0;
553 ctxt->disableSAX = 1;
554 }
555 return(0);
556}
557
558/**
559 * xmlParseStringCharRef:
560 * @ctxt: an XML parser context
561 * @str: a pointer to an index in the string
562 *
563 * parse Reference declarations, variant parsing from a string rather
564 * than an an input flow.
565 *
566 * [66] CharRef ::= '&#' [0-9]+ ';' |
567 * '&#x' [0-9a-fA-F]+ ';'
568 *
569 * [ WFC: Legal Character ]
570 * Characters referred to using character references must match the
571 * production for Char.
572 *
573 * Returns the value parsed (as an int), 0 in case of error, str will be
574 * updated to the current value of the index
575 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000576static int
Owen Taylor3473f882001-02-23 17:55:21 +0000577xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
578 const xmlChar *ptr;
579 xmlChar cur;
580 int val = 0;
581
582 if ((str == NULL) || (*str == NULL)) return(0);
583 ptr = *str;
584 cur = *ptr;
585 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
586 ptr += 3;
587 cur = *ptr;
588 while (cur != ';') { /* Non input consuming loop */
589 if ((cur >= '0') && (cur <= '9'))
590 val = val * 16 + (cur - '0');
591 else if ((cur >= 'a') && (cur <= 'f'))
592 val = val * 16 + (cur - 'a') + 10;
593 else if ((cur >= 'A') && (cur <= 'F'))
594 val = val * 16 + (cur - 'A') + 10;
595 else {
596 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
597 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
598 ctxt->sax->error(ctxt->userData,
599 "xmlParseStringCharRef: invalid hexadecimal value\n");
600 ctxt->wellFormed = 0;
601 ctxt->disableSAX = 1;
602 val = 0;
603 break;
604 }
605 ptr++;
606 cur = *ptr;
607 }
608 if (cur == ';')
609 ptr++;
610 } else if ((cur == '&') && (ptr[1] == '#')){
611 ptr += 2;
612 cur = *ptr;
613 while (cur != ';') { /* Non input consuming loops */
614 if ((cur >= '0') && (cur <= '9'))
615 val = val * 10 + (cur - '0');
616 else {
617 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
619 ctxt->sax->error(ctxt->userData,
620 "xmlParseStringCharRef: invalid decimal value\n");
621 ctxt->wellFormed = 0;
622 ctxt->disableSAX = 1;
623 val = 0;
624 break;
625 }
626 ptr++;
627 cur = *ptr;
628 }
629 if (cur == ';')
630 ptr++;
631 } else {
632 ctxt->errNo = XML_ERR_INVALID_CHARREF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000635 "xmlParseStringCharRef: invalid value\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000636 ctxt->wellFormed = 0;
637 ctxt->disableSAX = 1;
638 return(0);
639 }
640 *str = ptr;
641
642 /*
643 * [ WFC: Legal Character ]
644 * Characters referred to using character references must match the
645 * production for Char.
646 */
647 if (IS_CHAR(val)) {
648 return(val);
649 } else {
650 ctxt->errNo = XML_ERR_INVALID_CHAR;
651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
652 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000653 "xmlParseStringCharRef: invalid xmlChar value %d\n", val);
Owen Taylor3473f882001-02-23 17:55:21 +0000654 ctxt->wellFormed = 0;
655 ctxt->disableSAX = 1;
656 }
657 return(0);
658}
659
660/**
661 * xmlParserHandlePEReference:
662 * @ctxt: the parser context
663 *
664 * [69] PEReference ::= '%' Name ';'
665 *
666 * [ WFC: No Recursion ]
667 * A parsed entity must not contain a recursive
668 * reference to itself, either directly or indirectly.
669 *
670 * [ WFC: Entity Declared ]
671 * In a document without any DTD, a document with only an internal DTD
672 * subset which contains no parameter entity references, or a document
673 * with "standalone='yes'", ... ... The declaration of a parameter
674 * entity must precede any reference to it...
675 *
676 * [ VC: Entity Declared ]
677 * In a document with an external subset or external parameter entities
678 * with "standalone='no'", ... ... The declaration of a parameter entity
679 * must precede any reference to it...
680 *
681 * [ WFC: In DTD ]
682 * Parameter-entity references may only appear in the DTD.
683 * NOTE: misleading but this is handled.
684 *
685 * A PEReference may have been detected in the current input stream
686 * the handling is done accordingly to
687 * http://www.w3.org/TR/REC-xml#entproc
688 * i.e.
689 * - Included in literal in entity values
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000690 * - Included as Parameter Entity reference within DTDs
Owen Taylor3473f882001-02-23 17:55:21 +0000691 */
692void
693xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
694 xmlChar *name;
695 xmlEntityPtr entity = NULL;
696 xmlParserInputPtr input;
697
698 if (ctxt->token != 0) {
699 return;
700 }
701 if (RAW != '%') return;
702 switch(ctxt->instate) {
703 case XML_PARSER_CDATA_SECTION:
704 return;
705 case XML_PARSER_COMMENT:
706 return;
707 case XML_PARSER_START_TAG:
708 return;
709 case XML_PARSER_END_TAG:
710 return;
711 case XML_PARSER_EOF:
712 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
714 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
715 ctxt->wellFormed = 0;
716 ctxt->disableSAX = 1;
717 return;
718 case XML_PARSER_PROLOG:
719 case XML_PARSER_START:
720 case XML_PARSER_MISC:
721 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
723 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
724 ctxt->wellFormed = 0;
725 ctxt->disableSAX = 1;
726 return;
727 case XML_PARSER_ENTITY_DECL:
728 case XML_PARSER_CONTENT:
729 case XML_PARSER_ATTRIBUTE_VALUE:
730 case XML_PARSER_PI:
731 case XML_PARSER_SYSTEM_LITERAL:
Daniel Veillard4a7ae502002-02-18 19:18:17 +0000732 case XML_PARSER_PUBLIC_LITERAL:
Owen Taylor3473f882001-02-23 17:55:21 +0000733 /* we just ignore it there */
734 return;
735 case XML_PARSER_EPILOG:
736 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
738 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
739 ctxt->wellFormed = 0;
740 ctxt->disableSAX = 1;
741 return;
742 case XML_PARSER_ENTITY_VALUE:
743 /*
744 * NOTE: in the case of entity values, we don't do the
745 * substitution here since we need the literal
746 * entity value to be able to save the internal
747 * subset of the document.
748 * This will be handled by xmlStringDecodeEntities
749 */
750 return;
751 case XML_PARSER_DTD:
752 /*
753 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
754 * In the internal DTD subset, parameter-entity references
755 * can occur only where markup declarations can occur, not
756 * within markup declarations.
757 * In that case this is handled in xmlParseMarkupDecl
758 */
759 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
760 return;
761 break;
762 case XML_PARSER_IGNORE:
763 return;
764 }
765
766 NEXT;
767 name = xmlParseName(ctxt);
768 if (xmlParserDebugEntities)
769 xmlGenericError(xmlGenericErrorContext,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000770 "PEReference: %s\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +0000771 if (name == NULL) {
772 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000774 ctxt->sax->error(ctxt->userData, "xmlParserHandlePEReference: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000775 ctxt->wellFormed = 0;
776 ctxt->disableSAX = 1;
777 } else {
778 if (RAW == ';') {
779 NEXT;
780 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
781 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
782 if (entity == NULL) {
783
784 /*
785 * [ WFC: Entity Declared ]
786 * In a document without any DTD, a document with only an
787 * internal DTD subset which contains no parameter entity
788 * references, or a document with "standalone='yes'", ...
789 * ... The declaration of a parameter entity must precede
790 * any reference to it...
791 */
792 if ((ctxt->standalone == 1) ||
793 ((ctxt->hasExternalSubset == 0) &&
794 (ctxt->hasPErefs == 0))) {
795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
796 ctxt->sax->error(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->wellFormed = 0;
799 ctxt->disableSAX = 1;
800 } else {
801 /*
802 * [ VC: Entity Declared ]
803 * In a document with an external subset or external
804 * parameter entities with "standalone='no'", ...
805 * ... The declaration of a parameter entity must precede
806 * any reference to it...
807 */
808 if ((!ctxt->disableSAX) &&
809 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
810 ctxt->vctxt.error(ctxt->vctxt.userData,
811 "PEReference: %%%s; not found\n", name);
812 } else if ((!ctxt->disableSAX) &&
813 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
814 ctxt->sax->warning(ctxt->userData,
815 "PEReference: %%%s; not found\n", name);
816 ctxt->valid = 0;
817 }
818 } else {
819 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
820 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000821 xmlChar start[4];
822 xmlCharEncoding enc;
823
Owen Taylor3473f882001-02-23 17:55:21 +0000824 /*
825 * handle the extra spaces added before and after
826 * c.f. http://www.w3.org/TR/REC-xml#as-PE
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000827 * this is done independently.
Owen Taylor3473f882001-02-23 17:55:21 +0000828 */
829 input = xmlNewEntityInputStream(ctxt, entity);
830 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000831
832 /*
833 * Get the 4 first bytes and decode the charset
834 * if enc != XML_CHAR_ENCODING_NONE
835 * plug some encoding conversion routines.
836 */
837 GROW
838 start[0] = RAW;
839 start[1] = NXT(1);
840 start[2] = NXT(2);
841 start[3] = NXT(3);
842 enc = xmlDetectCharEncoding(start, 4);
843 if (enc != XML_CHAR_ENCODING_NONE) {
844 xmlSwitchEncoding(ctxt, enc);
845 }
846
Owen Taylor3473f882001-02-23 17:55:21 +0000847 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
848 (RAW == '<') && (NXT(1) == '?') &&
849 (NXT(2) == 'x') && (NXT(3) == 'm') &&
850 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
851 xmlParseTextDecl(ctxt);
852 }
853 if (ctxt->token == 0)
854 ctxt->token = ' ';
855 } else {
856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
857 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000858 "xmlParserHandlePEReference: %s is not a parameter entity\n",
Owen Taylor3473f882001-02-23 17:55:21 +0000859 name);
860 ctxt->wellFormed = 0;
861 ctxt->disableSAX = 1;
862 }
863 }
864 } else {
865 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000868 "xmlParserHandlePEReference: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +0000869 ctxt->wellFormed = 0;
870 ctxt->disableSAX = 1;
871 }
872 xmlFree(name);
873 }
874}
875
876/*
877 * Macro used to grow the current buffer.
878 */
879#define growBuffer(buffer) { \
880 buffer##_size *= 2; \
881 buffer = (xmlChar *) \
882 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
883 if (buffer == NULL) { \
884 perror("realloc failed"); \
885 return(NULL); \
886 } \
887}
888
889/**
890 * xmlStringDecodeEntities:
891 * @ctxt: the parser context
892 * @str: the input string
893 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
894 * @end: an end marker xmlChar, 0 if none
895 * @end2: an end marker xmlChar, 0 if none
896 * @end3: an end marker xmlChar, 0 if none
897 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000898 * Takes a entity string content and process to do the adequate substitutions.
Owen Taylor3473f882001-02-23 17:55:21 +0000899 *
900 * [67] Reference ::= EntityRef | CharRef
901 *
902 * [69] PEReference ::= '%' Name ';'
903 *
904 * Returns A newly allocated string with the substitution done. The caller
905 * must deallocate it !
906 */
907xmlChar *
908xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
909 xmlChar end, xmlChar end2, xmlChar end3) {
910 xmlChar *buffer = NULL;
911 int buffer_size = 0;
912
913 xmlChar *current = NULL;
914 xmlEntityPtr ent;
915 int c,l;
916 int nbchars = 0;
917
918 if (str == NULL)
919 return(NULL);
920
921 if (ctxt->depth > 40) {
922 ctxt->errNo = XML_ERR_ENTITY_LOOP;
923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
924 ctxt->sax->error(ctxt->userData,
925 "Detected entity reference loop\n");
926 ctxt->wellFormed = 0;
927 ctxt->disableSAX = 1;
928 return(NULL);
929 }
930
931 /*
932 * allocate a translation buffer.
933 */
934 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
935 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
936 if (buffer == NULL) {
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000937 perror("xmlStringDecodeEntities: malloc failed");
Owen Taylor3473f882001-02-23 17:55:21 +0000938 return(NULL);
939 }
940
941 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +0000942 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +0000943 * we are operating on already parsed values.
944 */
945 c = CUR_SCHAR(str, l);
946 while ((c != 0) && (c != end) && /* non input consuming loop */
947 (c != end2) && (c != end3)) {
948
949 if (c == 0) break;
950 if ((c == '&') && (str[1] == '#')) {
951 int val = xmlParseStringCharRef(ctxt, &str);
952 if (val != 0) {
953 COPY_BUF(0,buffer,nbchars,val);
954 }
955 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
956 if (xmlParserDebugEntities)
957 xmlGenericError(xmlGenericErrorContext,
958 "String decoding Entity Reference: %.30s\n",
959 str);
960 ent = xmlParseStringEntityRef(ctxt, &str);
961 if ((ent != NULL) &&
962 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
963 if (ent->content != NULL) {
964 COPY_BUF(0,buffer,nbchars,ent->content[0]);
965 } else {
966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
967 ctxt->sax->error(ctxt->userData,
968 "internal error entity has no content\n");
969 }
970 } else if ((ent != NULL) && (ent->content != NULL)) {
971 xmlChar *rep;
972
973 ctxt->depth++;
974 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
975 0, 0, 0);
976 ctxt->depth--;
977 if (rep != NULL) {
978 current = rep;
979 while (*current != 0) { /* non input consuming loop */
980 buffer[nbchars++] = *current++;
981 if (nbchars >
982 buffer_size - XML_PARSER_BUFFER_SIZE) {
983 growBuffer(buffer);
984 }
985 }
986 xmlFree(rep);
987 }
988 } else if (ent != NULL) {
989 int i = xmlStrlen(ent->name);
990 const xmlChar *cur = ent->name;
991
992 buffer[nbchars++] = '&';
993 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
994 growBuffer(buffer);
995 }
996 for (;i > 0;i--)
997 buffer[nbchars++] = *cur++;
998 buffer[nbchars++] = ';';
999 }
1000 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
1001 if (xmlParserDebugEntities)
1002 xmlGenericError(xmlGenericErrorContext,
1003 "String decoding PE Reference: %.30s\n", str);
1004 ent = xmlParseStringPEReference(ctxt, &str);
1005 if (ent != NULL) {
1006 xmlChar *rep;
1007
1008 ctxt->depth++;
1009 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
1010 0, 0, 0);
1011 ctxt->depth--;
1012 if (rep != NULL) {
1013 current = rep;
1014 while (*current != 0) { /* non input consuming loop */
1015 buffer[nbchars++] = *current++;
1016 if (nbchars >
1017 buffer_size - XML_PARSER_BUFFER_SIZE) {
1018 growBuffer(buffer);
1019 }
1020 }
1021 xmlFree(rep);
1022 }
1023 }
1024 } else {
1025 COPY_BUF(l,buffer,nbchars,c);
1026 str += l;
1027 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1028 growBuffer(buffer);
1029 }
1030 }
1031 c = CUR_SCHAR(str, l);
1032 }
1033 buffer[nbchars++] = 0;
1034 return(buffer);
1035}
1036
1037
1038/************************************************************************
1039 * *
1040 * Commodity functions to handle xmlChars *
1041 * *
1042 ************************************************************************/
1043
1044/**
1045 * xmlStrndup:
1046 * @cur: the input xmlChar *
1047 * @len: the len of @cur
1048 *
1049 * a strndup for array of xmlChar's
1050 *
1051 * Returns a new xmlChar * or NULL
1052 */
1053xmlChar *
1054xmlStrndup(const xmlChar *cur, int len) {
1055 xmlChar *ret;
1056
1057 if ((cur == NULL) || (len < 0)) return(NULL);
1058 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1059 if (ret == NULL) {
1060 xmlGenericError(xmlGenericErrorContext,
1061 "malloc of %ld byte failed\n",
1062 (len + 1) * (long)sizeof(xmlChar));
1063 return(NULL);
1064 }
1065 memcpy(ret, cur, len * sizeof(xmlChar));
1066 ret[len] = 0;
1067 return(ret);
1068}
1069
1070/**
1071 * xmlStrdup:
1072 * @cur: the input xmlChar *
1073 *
1074 * a strdup for array of xmlChar's. Since they are supposed to be
1075 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1076 * a termination mark of '0'.
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080xmlChar *
1081xmlStrdup(const xmlChar *cur) {
1082 const xmlChar *p = cur;
1083
1084 if (cur == NULL) return(NULL);
1085 while (*p != 0) p++; /* non input consuming */
1086 return(xmlStrndup(cur, p - cur));
1087}
1088
1089/**
1090 * xmlCharStrndup:
1091 * @cur: the input char *
1092 * @len: the len of @cur
1093 *
1094 * a strndup for char's to xmlChar's
1095 *
1096 * Returns a new xmlChar * or NULL
1097 */
1098
1099xmlChar *
1100xmlCharStrndup(const char *cur, int len) {
1101 int i;
1102 xmlChar *ret;
1103
1104 if ((cur == NULL) || (len < 0)) return(NULL);
1105 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1106 if (ret == NULL) {
1107 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1108 (len + 1) * (long)sizeof(xmlChar));
1109 return(NULL);
1110 }
1111 for (i = 0;i < len;i++)
1112 ret[i] = (xmlChar) cur[i];
1113 ret[len] = 0;
1114 return(ret);
1115}
1116
1117/**
1118 * xmlCharStrdup:
1119 * @cur: the input char *
1120 * @len: the len of @cur
1121 *
1122 * a strdup for char's to xmlChar's
1123 *
1124 * Returns a new xmlChar * or NULL
1125 */
1126
1127xmlChar *
1128xmlCharStrdup(const char *cur) {
1129 const char *p = cur;
1130
1131 if (cur == NULL) return(NULL);
1132 while (*p != '\0') p++; /* non input consuming */
1133 return(xmlCharStrndup(cur, p - cur));
1134}
1135
1136/**
1137 * xmlStrcmp:
1138 * @str1: the first xmlChar *
1139 * @str2: the second xmlChar *
1140 *
1141 * a strcmp for xmlChar's
1142 *
1143 * Returns the integer result of the comparison
1144 */
1145
1146int
1147xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1148 register int tmp;
1149
1150 if (str1 == str2) return(0);
1151 if (str1 == NULL) return(-1);
1152 if (str2 == NULL) return(1);
1153 do {
1154 tmp = *str1++ - *str2;
1155 if (tmp != 0) return(tmp);
1156 } while (*str2++ != 0);
1157 return 0;
1158}
1159
1160/**
1161 * xmlStrEqual:
1162 * @str1: the first xmlChar *
1163 * @str2: the second xmlChar *
1164 *
1165 * Check if both string are equal of have same content
1166 * Should be a bit more readable and faster than xmlStrEqual()
1167 *
1168 * Returns 1 if they are equal, 0 if they are different
1169 */
1170
1171int
1172xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1173 if (str1 == str2) return(1);
1174 if (str1 == NULL) return(0);
1175 if (str2 == NULL) return(0);
1176 do {
1177 if (*str1++ != *str2) return(0);
1178 } while (*str2++);
1179 return(1);
1180}
1181
1182/**
1183 * xmlStrncmp:
1184 * @str1: the first xmlChar *
1185 * @str2: the second xmlChar *
1186 * @len: the max comparison length
1187 *
1188 * a strncmp for xmlChar's
1189 *
1190 * Returns the integer result of the comparison
1191 */
1192
1193int
1194xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1195 register int tmp;
1196
1197 if (len <= 0) return(0);
1198 if (str1 == str2) return(0);
1199 if (str1 == NULL) return(-1);
1200 if (str2 == NULL) return(1);
1201 do {
1202 tmp = *str1++ - *str2;
1203 if (tmp != 0 || --len == 0) return(tmp);
1204 } while (*str2++ != 0);
1205 return 0;
1206}
1207
Daniel Veillardb44025c2001-10-11 22:55:55 +00001208static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001209 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1210 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1211 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1212 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1213 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1214 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1215 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1216 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1217 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1218 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1219 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1220 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1221 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1222 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1223 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1224 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1225 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1226 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1227 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1228 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1229 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1230 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1231 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1232 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1233 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1234 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1235 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1236 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1237 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1238 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1239 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1240 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1241};
1242
1243/**
1244 * xmlStrcasecmp:
1245 * @str1: the first xmlChar *
1246 * @str2: the second xmlChar *
1247 *
1248 * a strcasecmp for xmlChar's
1249 *
1250 * Returns the integer result of the comparison
1251 */
1252
1253int
1254xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1255 register int tmp;
1256
1257 if (str1 == str2) return(0);
1258 if (str1 == NULL) return(-1);
1259 if (str2 == NULL) return(1);
1260 do {
1261 tmp = casemap[*str1++] - casemap[*str2];
1262 if (tmp != 0) return(tmp);
1263 } while (*str2++ != 0);
1264 return 0;
1265}
1266
1267/**
1268 * xmlStrncasecmp:
1269 * @str1: the first xmlChar *
1270 * @str2: the second xmlChar *
1271 * @len: the max comparison length
1272 *
1273 * a strncasecmp for xmlChar's
1274 *
1275 * Returns the integer result of the comparison
1276 */
1277
1278int
1279xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1280 register int tmp;
1281
1282 if (len <= 0) return(0);
1283 if (str1 == str2) return(0);
1284 if (str1 == NULL) return(-1);
1285 if (str2 == NULL) return(1);
1286 do {
1287 tmp = casemap[*str1++] - casemap[*str2];
1288 if (tmp != 0 || --len == 0) return(tmp);
1289 } while (*str2++ != 0);
1290 return 0;
1291}
1292
1293/**
1294 * xmlStrchr:
1295 * @str: the xmlChar * array
1296 * @val: the xmlChar to search
1297 *
1298 * a strchr for xmlChar's
1299 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001300 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001301 */
1302
1303const xmlChar *
1304xmlStrchr(const xmlChar *str, xmlChar val) {
1305 if (str == NULL) return(NULL);
1306 while (*str != 0) { /* non input consuming */
1307 if (*str == val) return((xmlChar *) str);
1308 str++;
1309 }
1310 return(NULL);
1311}
1312
1313/**
1314 * xmlStrstr:
1315 * @str: the xmlChar * array (haystack)
1316 * @val: the xmlChar to search (needle)
1317 *
1318 * a strstr for xmlChar's
1319 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001320 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001321 */
1322
1323const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001324xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001325 int n;
1326
1327 if (str == NULL) return(NULL);
1328 if (val == NULL) return(NULL);
1329 n = xmlStrlen(val);
1330
1331 if (n == 0) return(str);
1332 while (*str != 0) { /* non input consuming */
1333 if (*str == *val) {
1334 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1335 }
1336 str++;
1337 }
1338 return(NULL);
1339}
1340
1341/**
1342 * xmlStrcasestr:
1343 * @str: the xmlChar * array (haystack)
1344 * @val: the xmlChar to search (needle)
1345 *
1346 * a case-ignoring strstr for xmlChar's
1347 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001348 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001349 */
1350
1351const xmlChar *
1352xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1353 int n;
1354
1355 if (str == NULL) return(NULL);
1356 if (val == NULL) return(NULL);
1357 n = xmlStrlen(val);
1358
1359 if (n == 0) return(str);
1360 while (*str != 0) { /* non input consuming */
1361 if (casemap[*str] == casemap[*val])
1362 if (!xmlStrncasecmp(str, val, n)) return(str);
1363 str++;
1364 }
1365 return(NULL);
1366}
1367
1368/**
1369 * xmlStrsub:
1370 * @str: the xmlChar * array (haystack)
1371 * @start: the index of the first char (zero based)
1372 * @len: the length of the substring
1373 *
1374 * Extract a substring of a given string
1375 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001376 * Returns the xmlChar * for the first occurrence or NULL.
Owen Taylor3473f882001-02-23 17:55:21 +00001377 */
1378
1379xmlChar *
1380xmlStrsub(const xmlChar *str, int start, int len) {
1381 int i;
1382
1383 if (str == NULL) return(NULL);
1384 if (start < 0) return(NULL);
1385 if (len < 0) return(NULL);
1386
1387 for (i = 0;i < start;i++) {
1388 if (*str == 0) return(NULL);
1389 str++;
1390 }
1391 if (*str == 0) return(NULL);
1392 return(xmlStrndup(str, len));
1393}
1394
1395/**
1396 * xmlStrlen:
1397 * @str: the xmlChar * array
1398 *
1399 * length of a xmlChar's string
1400 *
1401 * Returns the number of xmlChar contained in the ARRAY.
1402 */
1403
1404int
1405xmlStrlen(const xmlChar *str) {
1406 int len = 0;
1407
1408 if (str == NULL) return(0);
1409 while (*str != 0) { /* non input consuming */
1410 str++;
1411 len++;
1412 }
1413 return(len);
1414}
1415
1416/**
1417 * xmlStrncat:
1418 * @cur: the original xmlChar * array
1419 * @add: the xmlChar * array added
1420 * @len: the length of @add
1421 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001422 * a strncat for array of xmlChar's, it will extend @cur with the len
Owen Taylor3473f882001-02-23 17:55:21 +00001423 * first bytes of @add.
1424 *
1425 * Returns a new xmlChar *, the original @cur is reallocated if needed
1426 * and should not be freed
1427 */
1428
1429xmlChar *
1430xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1431 int size;
1432 xmlChar *ret;
1433
1434 if ((add == NULL) || (len == 0))
1435 return(cur);
1436 if (cur == NULL)
1437 return(xmlStrndup(add, len));
1438
1439 size = xmlStrlen(cur);
1440 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1441 if (ret == NULL) {
1442 xmlGenericError(xmlGenericErrorContext,
1443 "xmlStrncat: realloc of %ld byte failed\n",
1444 (size + len + 1) * (long)sizeof(xmlChar));
1445 return(cur);
1446 }
1447 memcpy(&ret[size], add, len * sizeof(xmlChar));
1448 ret[size + len] = 0;
1449 return(ret);
1450}
1451
1452/**
1453 * xmlStrcat:
1454 * @cur: the original xmlChar * array
1455 * @add: the xmlChar * array added
1456 *
1457 * a strcat for array of xmlChar's. Since they are supposed to be
1458 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1459 * a termination mark of '0'.
1460 *
1461 * Returns a new xmlChar * containing the concatenated string.
1462 */
1463xmlChar *
1464xmlStrcat(xmlChar *cur, const xmlChar *add) {
1465 const xmlChar *p = add;
1466
1467 if (add == NULL) return(cur);
1468 if (cur == NULL)
1469 return(xmlStrdup(add));
1470
1471 while (*p != 0) p++; /* non input consuming */
1472 return(xmlStrncat(cur, add, p - add));
1473}
1474
1475/************************************************************************
1476 * *
1477 * Commodity functions, cleanup needed ? *
1478 * *
1479 ************************************************************************/
1480
1481/**
1482 * areBlanks:
1483 * @ctxt: an XML parser context
1484 * @str: a xmlChar *
1485 * @len: the size of @str
1486 *
1487 * Is this a sequence of blank chars that one can ignore ?
1488 *
1489 * Returns 1 if ignorable 0 otherwise.
1490 */
1491
1492static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1493 int i, ret;
1494 xmlNodePtr lastChild;
1495
Daniel Veillard05c13a22001-09-09 08:38:09 +00001496 /*
1497 * Don't spend time trying to differentiate them, the same callback is
1498 * used !
1499 */
1500 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001501 return(0);
1502
Owen Taylor3473f882001-02-23 17:55:21 +00001503 /*
1504 * Check for xml:space value.
1505 */
1506 if (*(ctxt->space) == 1)
1507 return(0);
1508
1509 /*
1510 * Check that the string is made of blanks
1511 */
1512 for (i = 0;i < len;i++)
1513 if (!(IS_BLANK(str[i]))) return(0);
1514
1515 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001516 * Look if the element is mixed content in the DTD if available
Owen Taylor3473f882001-02-23 17:55:21 +00001517 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001518 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001519 if (ctxt->myDoc != NULL) {
1520 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1521 if (ret == 0) return(1);
1522 if (ret == 1) return(0);
1523 }
1524
1525 /*
1526 * Otherwise, heuristic :-\
1527 */
Owen Taylor3473f882001-02-23 17:55:21 +00001528 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001529 if ((ctxt->node->children == NULL) &&
1530 (RAW == '<') && (NXT(1) == '/')) return(0);
1531
1532 lastChild = xmlGetLastChild(ctxt->node);
1533 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001534 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1535 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001536 } else if (xmlNodeIsText(lastChild))
1537 return(0);
1538 else if ((ctxt->node->children != NULL) &&
1539 (xmlNodeIsText(ctxt->node->children)))
1540 return(0);
1541 return(1);
1542}
1543
Owen Taylor3473f882001-02-23 17:55:21 +00001544/************************************************************************
1545 * *
1546 * Extra stuff for namespace support *
1547 * Relates to http://www.w3.org/TR/WD-xml-names *
1548 * *
1549 ************************************************************************/
1550
1551/**
1552 * xmlSplitQName:
1553 * @ctxt: an XML parser context
1554 * @name: an XML parser context
1555 * @prefix: a xmlChar **
1556 *
1557 * parse an UTF8 encoded XML qualified name string
1558 *
1559 * [NS 5] QName ::= (Prefix ':')? LocalPart
1560 *
1561 * [NS 6] Prefix ::= NCName
1562 *
1563 * [NS 7] LocalPart ::= NCName
1564 *
1565 * Returns the local part, and prefix is updated
1566 * to get the Prefix if any.
1567 */
1568
1569xmlChar *
1570xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1571 xmlChar buf[XML_MAX_NAMELEN + 5];
1572 xmlChar *buffer = NULL;
1573 int len = 0;
1574 int max = XML_MAX_NAMELEN;
1575 xmlChar *ret = NULL;
1576 const xmlChar *cur = name;
1577 int c;
1578
1579 *prefix = NULL;
1580
1581#ifndef XML_XML_NAMESPACE
1582 /* xml: prefix is not really a namespace */
1583 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1584 (cur[2] == 'l') && (cur[3] == ':'))
1585 return(xmlStrdup(name));
1586#endif
1587
1588 /* nasty but valid */
1589 if (cur[0] == ':')
1590 return(xmlStrdup(name));
1591
1592 c = *cur++;
1593 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1594 buf[len++] = c;
1595 c = *cur++;
1596 }
1597 if (len >= max) {
1598 /*
1599 * Okay someone managed to make a huge name, so he's ready to pay
1600 * for the processing speed.
1601 */
1602 max = len * 2;
1603
1604 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1605 if (buffer == NULL) {
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData,
1608 "xmlSplitQName: out of memory\n");
1609 return(NULL);
1610 }
1611 memcpy(buffer, buf, len);
1612 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1613 if (len + 10 > max) {
1614 max *= 2;
1615 buffer = (xmlChar *) xmlRealloc(buffer,
1616 max * sizeof(xmlChar));
1617 if (buffer == NULL) {
1618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1619 ctxt->sax->error(ctxt->userData,
1620 "xmlSplitQName: out of memory\n");
1621 return(NULL);
1622 }
1623 }
1624 buffer[len++] = c;
1625 c = *cur++;
1626 }
1627 buffer[len] = 0;
1628 }
1629
1630 if (buffer == NULL)
1631 ret = xmlStrndup(buf, len);
1632 else {
1633 ret = buffer;
1634 buffer = NULL;
1635 max = XML_MAX_NAMELEN;
1636 }
1637
1638
1639 if (c == ':') {
1640 c = *cur++;
1641 if (c == 0) return(ret);
1642 *prefix = ret;
1643 len = 0;
1644
1645 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1646 buf[len++] = c;
1647 c = *cur++;
1648 }
1649 if (len >= max) {
1650 /*
1651 * Okay someone managed to make a huge name, so he's ready to pay
1652 * for the processing speed.
1653 */
1654 max = len * 2;
1655
1656 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1657 if (buffer == NULL) {
1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1659 ctxt->sax->error(ctxt->userData,
1660 "xmlSplitQName: out of memory\n");
1661 return(NULL);
1662 }
1663 memcpy(buffer, buf, len);
1664 while (c != 0) { /* tested bigname2.xml */
1665 if (len + 10 > max) {
1666 max *= 2;
1667 buffer = (xmlChar *) xmlRealloc(buffer,
1668 max * sizeof(xmlChar));
1669 if (buffer == NULL) {
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "xmlSplitQName: out of memory\n");
1673 return(NULL);
1674 }
1675 }
1676 buffer[len++] = c;
1677 c = *cur++;
1678 }
1679 buffer[len] = 0;
1680 }
1681
1682 if (buffer == NULL)
1683 ret = xmlStrndup(buf, len);
1684 else {
1685 ret = buffer;
1686 }
1687 }
1688
1689 return(ret);
1690}
1691
1692/************************************************************************
1693 * *
1694 * The parser itself *
1695 * Relates to http://www.w3.org/TR/REC-xml *
1696 * *
1697 ************************************************************************/
1698
Daniel Veillard76d66f42001-05-16 21:05:17 +00001699static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001700/**
1701 * xmlParseName:
1702 * @ctxt: an XML parser context
1703 *
1704 * parse an XML name.
1705 *
1706 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1707 * CombiningChar | Extender
1708 *
1709 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1710 *
1711 * [6] Names ::= Name (S Name)*
1712 *
1713 * Returns the Name parsed or NULL
1714 */
1715
1716xmlChar *
1717xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001718 const xmlChar *in;
1719 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001720 int count = 0;
1721
1722 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723
1724 /*
1725 * Accelerator for simple ASCII names
1726 */
1727 in = ctxt->input->cur;
1728 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1729 ((*in >= 0x41) && (*in <= 0x5A)) ||
1730 (*in == '_') || (*in == ':')) {
1731 in++;
1732 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1733 ((*in >= 0x41) && (*in <= 0x5A)) ||
1734 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001735 (*in == '_') || (*in == '-') ||
1736 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001737 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001738 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001739 count = in - ctxt->input->cur;
1740 ret = xmlStrndup(ctxt->input->cur, count);
1741 ctxt->input->cur = in;
1742 return(ret);
1743 }
1744 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001745 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001746}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001747
Daniel Veillard76d66f42001-05-16 21:05:17 +00001748static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001749xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1750 xmlChar buf[XML_MAX_NAMELEN + 5];
1751 int len = 0, l;
1752 int c;
1753 int count = 0;
1754
1755 /*
1756 * Handler for more complex cases
1757 */
1758 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001759 c = CUR_CHAR(l);
1760 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1761 (!IS_LETTER(c) && (c != '_') &&
1762 (c != ':'))) {
1763 return(NULL);
1764 }
1765
1766 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1767 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1768 (c == '.') || (c == '-') ||
1769 (c == '_') || (c == ':') ||
1770 (IS_COMBINING(c)) ||
1771 (IS_EXTENDER(c)))) {
1772 if (count++ > 100) {
1773 count = 0;
1774 GROW;
1775 }
1776 COPY_BUF(l,buf,len,c);
1777 NEXTL(l);
1778 c = CUR_CHAR(l);
1779 if (len >= XML_MAX_NAMELEN) {
1780 /*
1781 * Okay someone managed to make a huge name, so he's ready to pay
1782 * for the processing speed.
1783 */
1784 xmlChar *buffer;
1785 int max = len * 2;
1786
1787 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1788 if (buffer == NULL) {
1789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1790 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001791 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001792 return(NULL);
1793 }
1794 memcpy(buffer, buf, len);
1795 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1796 (c == '.') || (c == '-') ||
1797 (c == '_') || (c == ':') ||
1798 (IS_COMBINING(c)) ||
1799 (IS_EXTENDER(c))) {
1800 if (count++ > 100) {
1801 count = 0;
1802 GROW;
1803 }
1804 if (len + 10 > max) {
1805 max *= 2;
1806 buffer = (xmlChar *) xmlRealloc(buffer,
1807 max * sizeof(xmlChar));
1808 if (buffer == NULL) {
1809 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1810 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001811 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001812 return(NULL);
1813 }
1814 }
1815 COPY_BUF(l,buffer,len,c);
1816 NEXTL(l);
1817 c = CUR_CHAR(l);
1818 }
1819 buffer[len] = 0;
1820 return(buffer);
1821 }
1822 }
1823 return(xmlStrndup(buf, len));
1824}
1825
1826/**
1827 * xmlParseStringName:
1828 * @ctxt: an XML parser context
1829 * @str: a pointer to the string pointer (IN/OUT)
1830 *
1831 * parse an XML name.
1832 *
1833 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1834 * CombiningChar | Extender
1835 *
1836 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1837 *
1838 * [6] Names ::= Name (S Name)*
1839 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001840 * Returns the Name parsed or NULL. The @str pointer
Owen Taylor3473f882001-02-23 17:55:21 +00001841 * is updated to the current location in the string.
1842 */
1843
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001844static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001845xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1846 xmlChar buf[XML_MAX_NAMELEN + 5];
1847 const xmlChar *cur = *str;
1848 int len = 0, l;
1849 int c;
1850
1851 c = CUR_SCHAR(cur, l);
1852 if (!IS_LETTER(c) && (c != '_') &&
1853 (c != ':')) {
1854 return(NULL);
1855 }
1856
1857 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1858 (c == '.') || (c == '-') ||
1859 (c == '_') || (c == ':') ||
1860 (IS_COMBINING(c)) ||
1861 (IS_EXTENDER(c))) {
1862 COPY_BUF(l,buf,len,c);
1863 cur += l;
1864 c = CUR_SCHAR(cur, l);
1865 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1866 /*
1867 * Okay someone managed to make a huge name, so he's ready to pay
1868 * for the processing speed.
1869 */
1870 xmlChar *buffer;
1871 int max = len * 2;
1872
1873 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
1877 "xmlParseStringName: out of memory\n");
1878 return(NULL);
1879 }
1880 memcpy(buffer, buf, len);
1881 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1882 (c == '.') || (c == '-') ||
1883 (c == '_') || (c == ':') ||
1884 (IS_COMBINING(c)) ||
1885 (IS_EXTENDER(c))) {
1886 if (len + 10 > max) {
1887 max *= 2;
1888 buffer = (xmlChar *) xmlRealloc(buffer,
1889 max * sizeof(xmlChar));
1890 if (buffer == NULL) {
1891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1892 ctxt->sax->error(ctxt->userData,
1893 "xmlParseStringName: out of memory\n");
1894 return(NULL);
1895 }
1896 }
1897 COPY_BUF(l,buffer,len,c);
1898 cur += l;
1899 c = CUR_SCHAR(cur, l);
1900 }
1901 buffer[len] = 0;
1902 *str = cur;
1903 return(buffer);
1904 }
1905 }
1906 *str = cur;
1907 return(xmlStrndup(buf, len));
1908}
1909
1910/**
1911 * xmlParseNmtoken:
1912 * @ctxt: an XML parser context
1913 *
1914 * parse an XML Nmtoken.
1915 *
1916 * [7] Nmtoken ::= (NameChar)+
1917 *
1918 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1919 *
1920 * Returns the Nmtoken parsed or NULL
1921 */
1922
1923xmlChar *
1924xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1925 xmlChar buf[XML_MAX_NAMELEN + 5];
1926 int len = 0, l;
1927 int c;
1928 int count = 0;
1929
1930 GROW;
1931 c = CUR_CHAR(l);
1932
1933 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1934 (c == '.') || (c == '-') ||
1935 (c == '_') || (c == ':') ||
1936 (IS_COMBINING(c)) ||
1937 (IS_EXTENDER(c))) {
1938 if (count++ > 100) {
1939 count = 0;
1940 GROW;
1941 }
1942 COPY_BUF(l,buf,len,c);
1943 NEXTL(l);
1944 c = CUR_CHAR(l);
1945 if (len >= XML_MAX_NAMELEN) {
1946 /*
1947 * Okay someone managed to make a huge token, so he's ready to pay
1948 * for the processing speed.
1949 */
1950 xmlChar *buffer;
1951 int max = len * 2;
1952
1953 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1954 if (buffer == NULL) {
1955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1956 ctxt->sax->error(ctxt->userData,
1957 "xmlParseNmtoken: out of memory\n");
1958 return(NULL);
1959 }
1960 memcpy(buffer, buf, len);
1961 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1962 (c == '.') || (c == '-') ||
1963 (c == '_') || (c == ':') ||
1964 (IS_COMBINING(c)) ||
1965 (IS_EXTENDER(c))) {
1966 if (count++ > 100) {
1967 count = 0;
1968 GROW;
1969 }
1970 if (len + 10 > max) {
1971 max *= 2;
1972 buffer = (xmlChar *) xmlRealloc(buffer,
1973 max * sizeof(xmlChar));
1974 if (buffer == NULL) {
1975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1976 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00001977 "xmlParseNmtoken: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001978 return(NULL);
1979 }
1980 }
1981 COPY_BUF(l,buffer,len,c);
1982 NEXTL(l);
1983 c = CUR_CHAR(l);
1984 }
1985 buffer[len] = 0;
1986 return(buffer);
1987 }
1988 }
1989 if (len == 0)
1990 return(NULL);
1991 return(xmlStrndup(buf, len));
1992}
1993
1994/**
1995 * xmlParseEntityValue:
1996 * @ctxt: an XML parser context
1997 * @orig: if non-NULL store a copy of the original entity value
1998 *
1999 * parse a value for ENTITY declarations
2000 *
2001 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
2002 * "'" ([^%&'] | PEReference | Reference)* "'"
2003 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002004 * Returns the EntityValue parsed with reference substituted or NULL
Owen Taylor3473f882001-02-23 17:55:21 +00002005 */
2006
2007xmlChar *
2008xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
2009 xmlChar *buf = NULL;
2010 int len = 0;
2011 int size = XML_PARSER_BUFFER_SIZE;
2012 int c, l;
2013 xmlChar stop;
2014 xmlChar *ret = NULL;
2015 const xmlChar *cur = NULL;
2016 xmlParserInputPtr input;
2017
2018 if (RAW == '"') stop = '"';
2019 else if (RAW == '\'') stop = '\'';
2020 else {
2021 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2024 ctxt->wellFormed = 0;
2025 ctxt->disableSAX = 1;
2026 return(NULL);
2027 }
2028 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2029 if (buf == NULL) {
2030 xmlGenericError(xmlGenericErrorContext,
2031 "malloc of %d byte failed\n", size);
2032 return(NULL);
2033 }
2034
2035 /*
2036 * The content of the entity definition is copied in a buffer.
2037 */
2038
2039 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2040 input = ctxt->input;
2041 GROW;
2042 NEXT;
2043 c = CUR_CHAR(l);
2044 /*
2045 * NOTE: 4.4.5 Included in Literal
2046 * When a parameter entity reference appears in a literal entity
2047 * value, ... a single or double quote character in the replacement
2048 * text is always treated as a normal data character and will not
2049 * terminate the literal.
2050 * In practice it means we stop the loop only when back at parsing
2051 * the initial entity and the quote is found
2052 */
2053 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2054 (ctxt->input != input))) {
2055 if (len + 5 >= size) {
2056 size *= 2;
2057 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2058 if (buf == NULL) {
2059 xmlGenericError(xmlGenericErrorContext,
2060 "realloc of %d byte failed\n", size);
2061 return(NULL);
2062 }
2063 }
2064 COPY_BUF(l,buf,len,c);
2065 NEXTL(l);
2066 /*
2067 * Pop-up of finished entities.
2068 */
2069 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2070 xmlPopInput(ctxt);
2071
2072 GROW;
2073 c = CUR_CHAR(l);
2074 if (c == 0) {
2075 GROW;
2076 c = CUR_CHAR(l);
2077 }
2078 }
2079 buf[len] = 0;
2080
2081 /*
2082 * Raise problem w.r.t. '&' and '%' being used in non-entities
2083 * reference constructs. Note Charref will be handled in
2084 * xmlStringDecodeEntities()
2085 */
2086 cur = buf;
2087 while (*cur != 0) { /* non input consuming */
2088 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2089 xmlChar *name;
2090 xmlChar tmp = *cur;
2091
2092 cur++;
2093 name = xmlParseStringName(ctxt, &cur);
2094 if ((name == NULL) || (*cur != ';')) {
2095 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2097 ctxt->sax->error(ctxt->userData,
2098 "EntityValue: '%c' forbidden except for entities references\n",
2099 tmp);
2100 ctxt->wellFormed = 0;
2101 ctxt->disableSAX = 1;
2102 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002103 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2104 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002105 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2106 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2107 ctxt->sax->error(ctxt->userData,
2108 "EntityValue: PEReferences forbidden in internal subset\n",
2109 tmp);
2110 ctxt->wellFormed = 0;
2111 ctxt->disableSAX = 1;
2112 }
2113 if (name != NULL)
2114 xmlFree(name);
2115 }
2116 cur++;
2117 }
2118
2119 /*
2120 * Then PEReference entities are substituted.
2121 */
2122 if (c != stop) {
2123 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2125 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2126 ctxt->wellFormed = 0;
2127 ctxt->disableSAX = 1;
2128 xmlFree(buf);
2129 } else {
2130 NEXT;
2131 /*
2132 * NOTE: 4.4.7 Bypassed
2133 * When a general entity reference appears in the EntityValue in
2134 * an entity declaration, it is bypassed and left as is.
2135 * so XML_SUBSTITUTE_REF is not set here.
2136 */
2137 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2138 0, 0, 0);
2139 if (orig != NULL)
2140 *orig = buf;
2141 else
2142 xmlFree(buf);
2143 }
2144
2145 return(ret);
2146}
2147
2148/**
2149 * xmlParseAttValue:
2150 * @ctxt: an XML parser context
2151 *
2152 * parse a value for an attribute
2153 * Note: the parser won't do substitution of entities here, this
2154 * will be handled later in xmlStringGetNodeList
2155 *
2156 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2157 * "'" ([^<&'] | Reference)* "'"
2158 *
2159 * 3.3.3 Attribute-Value Normalization:
2160 * Before the value of an attribute is passed to the application or
2161 * checked for validity, the XML processor must normalize it as follows:
2162 * - a character reference is processed by appending the referenced
2163 * character to the attribute value
2164 * - an entity reference is processed by recursively processing the
2165 * replacement text of the entity
2166 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2167 * appending #x20 to the normalized value, except that only a single
2168 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2169 * parsed entity or the literal entity value of an internal parsed entity
2170 * - other characters are processed by appending them to the normalized value
2171 * If the declared value is not CDATA, then the XML processor must further
2172 * process the normalized attribute value by discarding any leading and
2173 * trailing space (#x20) characters, and by replacing sequences of space
2174 * (#x20) characters by a single space (#x20) character.
2175 * All attributes for which no declaration has been read should be treated
2176 * by a non-validating parser as if declared CDATA.
2177 *
2178 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2179 */
2180
2181xmlChar *
2182xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2183 xmlChar limit = 0;
2184 xmlChar *buf = NULL;
2185 int len = 0;
2186 int buf_size = 0;
2187 int c, l;
2188 xmlChar *current = NULL;
2189 xmlEntityPtr ent;
2190
2191
2192 SHRINK;
2193 if (NXT(0) == '"') {
2194 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2195 limit = '"';
2196 NEXT;
2197 } else if (NXT(0) == '\'') {
2198 limit = '\'';
2199 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2200 NEXT;
2201 } else {
2202 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2204 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2205 ctxt->wellFormed = 0;
2206 ctxt->disableSAX = 1;
2207 return(NULL);
2208 }
2209
2210 /*
2211 * allocate a translation buffer.
2212 */
2213 buf_size = XML_PARSER_BUFFER_SIZE;
2214 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2215 if (buf == NULL) {
2216 perror("xmlParseAttValue: malloc failed");
2217 return(NULL);
2218 }
2219
2220 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002221 * OK loop until we reach one of the ending char or a size limit.
Owen Taylor3473f882001-02-23 17:55:21 +00002222 */
2223 c = CUR_CHAR(l);
2224 while (((NXT(0) != limit) && /* checked */
2225 (c != '<')) || (ctxt->token != 0)) {
2226 if (c == 0) break;
2227 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002228 if (ctxt->replaceEntities) {
2229 if (len > buf_size - 10) {
2230 growBuffer(buf);
2231 }
2232 buf[len++] = '&';
2233 } else {
2234 /*
2235 * The reparsing will be done in xmlStringGetNodeList()
2236 * called by the attribute() function in SAX.c
2237 */
2238 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002239
Daniel Veillard319a7422001-09-11 09:27:09 +00002240 if (len > buf_size - 10) {
2241 growBuffer(buf);
2242 }
2243 current = &buffer[0];
2244 while (*current != 0) { /* non input consuming */
2245 buf[len++] = *current++;
2246 }
2247 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002248 }
Owen Taylor3473f882001-02-23 17:55:21 +00002249 } else if (c == '&') {
2250 if (NXT(1) == '#') {
2251 int val = xmlParseCharRef(ctxt);
2252 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002253 if (ctxt->replaceEntities) {
2254 if (len > buf_size - 10) {
2255 growBuffer(buf);
2256 }
2257 buf[len++] = '&';
2258 } else {
2259 /*
2260 * The reparsing will be done in xmlStringGetNodeList()
2261 * called by the attribute() function in SAX.c
2262 */
2263 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002264
Daniel Veillard319a7422001-09-11 09:27:09 +00002265 if (len > buf_size - 10) {
2266 growBuffer(buf);
2267 }
2268 current = &buffer[0];
2269 while (*current != 0) { /* non input consuming */
2270 buf[len++] = *current++;
2271 }
Owen Taylor3473f882001-02-23 17:55:21 +00002272 }
2273 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002274 if (len > buf_size - 10) {
2275 growBuffer(buf);
2276 }
Owen Taylor3473f882001-02-23 17:55:21 +00002277 len += xmlCopyChar(0, &buf[len], val);
2278 }
2279 } else {
2280 ent = xmlParseEntityRef(ctxt);
2281 if ((ent != NULL) &&
2282 (ctxt->replaceEntities != 0)) {
2283 xmlChar *rep;
2284
2285 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2286 rep = xmlStringDecodeEntities(ctxt, ent->content,
2287 XML_SUBSTITUTE_REF, 0, 0, 0);
2288 if (rep != NULL) {
2289 current = rep;
2290 while (*current != 0) { /* non input consuming */
2291 buf[len++] = *current++;
2292 if (len > buf_size - 10) {
2293 growBuffer(buf);
2294 }
2295 }
2296 xmlFree(rep);
2297 }
2298 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002299 if (len > buf_size - 10) {
2300 growBuffer(buf);
2301 }
Owen Taylor3473f882001-02-23 17:55:21 +00002302 if (ent->content != NULL)
2303 buf[len++] = ent->content[0];
2304 }
2305 } else if (ent != NULL) {
2306 int i = xmlStrlen(ent->name);
2307 const xmlChar *cur = ent->name;
2308
2309 /*
2310 * This may look absurd but is needed to detect
2311 * entities problems
2312 */
2313 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2314 (ent->content != NULL)) {
2315 xmlChar *rep;
2316 rep = xmlStringDecodeEntities(ctxt, ent->content,
2317 XML_SUBSTITUTE_REF, 0, 0, 0);
2318 if (rep != NULL)
2319 xmlFree(rep);
2320 }
2321
2322 /*
2323 * Just output the reference
2324 */
2325 buf[len++] = '&';
2326 if (len > buf_size - i - 10) {
2327 growBuffer(buf);
2328 }
2329 for (;i > 0;i--)
2330 buf[len++] = *cur++;
2331 buf[len++] = ';';
2332 }
2333 }
2334 } else {
2335 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2336 COPY_BUF(l,buf,len,0x20);
2337 if (len > buf_size - 10) {
2338 growBuffer(buf);
2339 }
2340 } else {
2341 COPY_BUF(l,buf,len,c);
2342 if (len > buf_size - 10) {
2343 growBuffer(buf);
2344 }
2345 }
2346 NEXTL(l);
2347 }
2348 GROW;
2349 c = CUR_CHAR(l);
2350 }
2351 buf[len++] = 0;
2352 if (RAW == '<') {
2353 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2355 ctxt->sax->error(ctxt->userData,
2356 "Unescaped '<' not allowed in attributes values\n");
2357 ctxt->wellFormed = 0;
2358 ctxt->disableSAX = 1;
2359 } else if (RAW != limit) {
2360 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2362 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2363 ctxt->wellFormed = 0;
2364 ctxt->disableSAX = 1;
2365 } else
2366 NEXT;
2367 return(buf);
2368}
2369
2370/**
2371 * xmlParseSystemLiteral:
2372 * @ctxt: an XML parser context
2373 *
2374 * parse an XML Literal
2375 *
2376 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2377 *
2378 * Returns the SystemLiteral parsed or NULL
2379 */
2380
2381xmlChar *
2382xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2383 xmlChar *buf = NULL;
2384 int len = 0;
2385 int size = XML_PARSER_BUFFER_SIZE;
2386 int cur, l;
2387 xmlChar stop;
2388 int state = ctxt->instate;
2389 int count = 0;
2390
2391 SHRINK;
2392 if (RAW == '"') {
2393 NEXT;
2394 stop = '"';
2395 } else if (RAW == '\'') {
2396 NEXT;
2397 stop = '\'';
2398 } else {
2399 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2401 ctxt->sax->error(ctxt->userData,
2402 "SystemLiteral \" or ' expected\n");
2403 ctxt->wellFormed = 0;
2404 ctxt->disableSAX = 1;
2405 return(NULL);
2406 }
2407
2408 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2409 if (buf == NULL) {
2410 xmlGenericError(xmlGenericErrorContext,
2411 "malloc of %d byte failed\n", size);
2412 return(NULL);
2413 }
2414 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2415 cur = CUR_CHAR(l);
2416 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2417 if (len + 5 >= size) {
2418 size *= 2;
2419 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2420 if (buf == NULL) {
2421 xmlGenericError(xmlGenericErrorContext,
2422 "realloc of %d byte failed\n", size);
2423 ctxt->instate = (xmlParserInputState) state;
2424 return(NULL);
2425 }
2426 }
2427 count++;
2428 if (count > 50) {
2429 GROW;
2430 count = 0;
2431 }
2432 COPY_BUF(l,buf,len,cur);
2433 NEXTL(l);
2434 cur = CUR_CHAR(l);
2435 if (cur == 0) {
2436 GROW;
2437 SHRINK;
2438 cur = CUR_CHAR(l);
2439 }
2440 }
2441 buf[len] = 0;
2442 ctxt->instate = (xmlParserInputState) state;
2443 if (!IS_CHAR(cur)) {
2444 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2446 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2447 ctxt->wellFormed = 0;
2448 ctxt->disableSAX = 1;
2449 } else {
2450 NEXT;
2451 }
2452 return(buf);
2453}
2454
2455/**
2456 * xmlParsePubidLiteral:
2457 * @ctxt: an XML parser context
2458 *
2459 * parse an XML public literal
2460 *
2461 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2462 *
2463 * Returns the PubidLiteral parsed or NULL.
2464 */
2465
2466xmlChar *
2467xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2468 xmlChar *buf = NULL;
2469 int len = 0;
2470 int size = XML_PARSER_BUFFER_SIZE;
2471 xmlChar cur;
2472 xmlChar stop;
2473 int count = 0;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002474 xmlParserInputState oldstate = ctxt->instate;
Owen Taylor3473f882001-02-23 17:55:21 +00002475
2476 SHRINK;
2477 if (RAW == '"') {
2478 NEXT;
2479 stop = '"';
2480 } else if (RAW == '\'') {
2481 NEXT;
2482 stop = '\'';
2483 } else {
2484 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2486 ctxt->sax->error(ctxt->userData,
2487 "SystemLiteral \" or ' expected\n");
2488 ctxt->wellFormed = 0;
2489 ctxt->disableSAX = 1;
2490 return(NULL);
2491 }
2492 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2493 if (buf == NULL) {
2494 xmlGenericError(xmlGenericErrorContext,
2495 "malloc of %d byte failed\n", size);
2496 return(NULL);
2497 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002498 ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
Owen Taylor3473f882001-02-23 17:55:21 +00002499 cur = CUR;
2500 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2501 if (len + 1 >= size) {
2502 size *= 2;
2503 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2504 if (buf == NULL) {
2505 xmlGenericError(xmlGenericErrorContext,
2506 "realloc of %d byte failed\n", size);
2507 return(NULL);
2508 }
2509 }
2510 buf[len++] = cur;
2511 count++;
2512 if (count > 50) {
2513 GROW;
2514 count = 0;
2515 }
2516 NEXT;
2517 cur = CUR;
2518 if (cur == 0) {
2519 GROW;
2520 SHRINK;
2521 cur = CUR;
2522 }
2523 }
2524 buf[len] = 0;
2525 if (cur != stop) {
2526 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2528 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2529 ctxt->wellFormed = 0;
2530 ctxt->disableSAX = 1;
2531 } else {
2532 NEXT;
2533 }
Daniel Veillard4a7ae502002-02-18 19:18:17 +00002534 ctxt->instate = oldstate;
Owen Taylor3473f882001-02-23 17:55:21 +00002535 return(buf);
2536}
2537
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002539/**
2540 * xmlParseCharData:
2541 * @ctxt: an XML parser context
2542 * @cdata: int indicating whether we are within a CDATA section
2543 *
2544 * parse a CharData section.
2545 * if we are within a CDATA section ']]>' marks an end of section.
2546 *
2547 * The right angle bracket (>) may be represented using the string "&gt;",
2548 * and must, for compatibility, be escaped using "&gt;" or a character
2549 * reference when it appears in the string "]]>" in content, when that
2550 * string is not marking the end of a CDATA section.
2551 *
2552 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2553 */
2554
2555void
2556xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002557 const xmlChar *in;
2558 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002559 int line = ctxt->input->line;
2560 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002561
2562 SHRINK;
2563 GROW;
2564 /*
2565 * Accelerated common case where input don't need to be
2566 * modified before passing it to the handler.
2567 */
2568 if ((ctxt->token == 0) && (!cdata)) {
2569 in = ctxt->input->cur;
2570 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002571get_more:
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002572 while (((*in >= 0x20) && (*in != '<') && (*in != ']') &&
Daniel Veillard48b2f892001-02-25 16:11:03 +00002573 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2574 in++;
2575 if (*in == 0xA) {
2576 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002577 in++;
2578 while (*in == 0xA) {
2579 ctxt->input->line++;
2580 in++;
2581 }
2582 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002583 }
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002584 if (*in == ']') {
2585 if ((in[1] == ']') && (in[2] == '>')) {
2586 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2588 ctxt->sax->error(ctxt->userData,
2589 "Sequence ']]>' not allowed in content\n");
2590 ctxt->input->cur = in;
2591 ctxt->wellFormed = 0;
2592 ctxt->disableSAX = 1;
2593 return;
2594 }
2595 in++;
2596 goto get_more;
2597 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002598 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002599 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002600 if (IS_BLANK(*ctxt->input->cur)) {
2601 const xmlChar *tmp = ctxt->input->cur;
2602 ctxt->input->cur = in;
2603 if (areBlanks(ctxt, tmp, nbchar)) {
2604 if (ctxt->sax->ignorableWhitespace != NULL)
2605 ctxt->sax->ignorableWhitespace(ctxt->userData,
2606 tmp, nbchar);
2607 } else {
2608 if (ctxt->sax->characters != NULL)
2609 ctxt->sax->characters(ctxt->userData,
2610 tmp, nbchar);
2611 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002612 line = ctxt->input->line;
2613 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002614 } else {
2615 if (ctxt->sax->characters != NULL)
2616 ctxt->sax->characters(ctxt->userData,
2617 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002618 line = ctxt->input->line;
2619 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002620 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002621 }
2622 ctxt->input->cur = in;
2623 if (*in == 0xD) {
2624 in++;
2625 if (*in == 0xA) {
2626 ctxt->input->cur = in;
2627 in++;
2628 ctxt->input->line++;
2629 continue; /* while */
2630 }
2631 in--;
2632 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002633 if (*in == '<') {
2634 return;
2635 }
2636 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002637 return;
2638 }
2639 SHRINK;
2640 GROW;
2641 in = ctxt->input->cur;
2642 } while ((*in >= 0x20) && (*in <= 0x7F));
2643 nbchar = 0;
2644 }
Daniel Veillard50582112001-03-26 22:52:16 +00002645 ctxt->input->line = line;
2646 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002647 xmlParseCharDataComplex(ctxt, cdata);
2648}
2649
2650void
2651xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002652 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2653 int nbchar = 0;
2654 int cur, l;
2655 int count = 0;
2656
2657 SHRINK;
2658 GROW;
2659 cur = CUR_CHAR(l);
2660 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2661 ((cur != '&') || (ctxt->token == '&')) &&
2662 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2663 if ((cur == ']') && (NXT(1) == ']') &&
2664 (NXT(2) == '>')) {
2665 if (cdata) break;
2666 else {
2667 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2669 ctxt->sax->error(ctxt->userData,
2670 "Sequence ']]>' not allowed in content\n");
2671 /* Should this be relaxed ??? I see a "must here */
2672 ctxt->wellFormed = 0;
2673 ctxt->disableSAX = 1;
2674 }
2675 }
2676 COPY_BUF(l,buf,nbchar,cur);
2677 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2678 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002679 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002680 */
2681 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2682 if (areBlanks(ctxt, buf, nbchar)) {
2683 if (ctxt->sax->ignorableWhitespace != NULL)
2684 ctxt->sax->ignorableWhitespace(ctxt->userData,
2685 buf, nbchar);
2686 } else {
2687 if (ctxt->sax->characters != NULL)
2688 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2689 }
2690 }
2691 nbchar = 0;
2692 }
2693 count++;
2694 if (count > 50) {
2695 GROW;
2696 count = 0;
2697 }
2698 NEXTL(l);
2699 cur = CUR_CHAR(l);
2700 }
2701 if (nbchar != 0) {
2702 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002703 * OK the segment is to be consumed as chars.
Owen Taylor3473f882001-02-23 17:55:21 +00002704 */
2705 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2706 if (areBlanks(ctxt, buf, nbchar)) {
2707 if (ctxt->sax->ignorableWhitespace != NULL)
2708 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2709 } else {
2710 if (ctxt->sax->characters != NULL)
2711 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2712 }
2713 }
2714 }
2715}
2716
2717/**
2718 * xmlParseExternalID:
2719 * @ctxt: an XML parser context
2720 * @publicID: a xmlChar** receiving PubidLiteral
2721 * @strict: indicate whether we should restrict parsing to only
2722 * production [75], see NOTE below
2723 *
2724 * Parse an External ID or a Public ID
2725 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002726 * NOTE: Productions [75] and [83] interact badly since [75] can generate
Owen Taylor3473f882001-02-23 17:55:21 +00002727 * 'PUBLIC' S PubidLiteral S SystemLiteral
2728 *
2729 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2730 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2731 *
2732 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2733 *
2734 * Returns the function returns SystemLiteral and in the second
2735 * case publicID receives PubidLiteral, is strict is off
2736 * it is possible to return NULL and have publicID set.
2737 */
2738
2739xmlChar *
2740xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2741 xmlChar *URI = NULL;
2742
2743 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002744
2745 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002746 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2747 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2748 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2749 SKIP(6);
2750 if (!IS_BLANK(CUR)) {
2751 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753 ctxt->sax->error(ctxt->userData,
2754 "Space required after 'SYSTEM'\n");
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
2757 }
2758 SKIP_BLANKS;
2759 URI = xmlParseSystemLiteral(ctxt);
2760 if (URI == NULL) {
2761 ctxt->errNo = XML_ERR_URI_REQUIRED;
2762 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2763 ctxt->sax->error(ctxt->userData,
2764 "xmlParseExternalID: SYSTEM, no URI\n");
2765 ctxt->wellFormed = 0;
2766 ctxt->disableSAX = 1;
2767 }
2768 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2769 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2770 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2771 SKIP(6);
2772 if (!IS_BLANK(CUR)) {
2773 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2775 ctxt->sax->error(ctxt->userData,
2776 "Space required after 'PUBLIC'\n");
2777 ctxt->wellFormed = 0;
2778 ctxt->disableSAX = 1;
2779 }
2780 SKIP_BLANKS;
2781 *publicID = xmlParsePubidLiteral(ctxt);
2782 if (*publicID == NULL) {
2783 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2785 ctxt->sax->error(ctxt->userData,
2786 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2787 ctxt->wellFormed = 0;
2788 ctxt->disableSAX = 1;
2789 }
2790 if (strict) {
2791 /*
2792 * We don't handle [83] so "S SystemLiteral" is required.
2793 */
2794 if (!IS_BLANK(CUR)) {
2795 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2797 ctxt->sax->error(ctxt->userData,
2798 "Space required after the Public Identifier\n");
2799 ctxt->wellFormed = 0;
2800 ctxt->disableSAX = 1;
2801 }
2802 } else {
2803 /*
2804 * We handle [83] so we return immediately, if
2805 * "S SystemLiteral" is not detected. From a purely parsing
2806 * point of view that's a nice mess.
2807 */
2808 const xmlChar *ptr;
2809 GROW;
2810
2811 ptr = CUR_PTR;
2812 if (!IS_BLANK(*ptr)) return(NULL);
2813
2814 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2815 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2816 }
2817 SKIP_BLANKS;
2818 URI = xmlParseSystemLiteral(ctxt);
2819 if (URI == NULL) {
2820 ctxt->errNo = XML_ERR_URI_REQUIRED;
2821 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2822 ctxt->sax->error(ctxt->userData,
2823 "xmlParseExternalID: PUBLIC, no URI\n");
2824 ctxt->wellFormed = 0;
2825 ctxt->disableSAX = 1;
2826 }
2827 }
2828 return(URI);
2829}
2830
2831/**
2832 * xmlParseComment:
2833 * @ctxt: an XML parser context
2834 *
2835 * Skip an XML (SGML) comment <!-- .... -->
2836 * The spec says that "For compatibility, the string "--" (double-hyphen)
2837 * must not occur within comments. "
2838 *
2839 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2840 */
2841void
2842xmlParseComment(xmlParserCtxtPtr ctxt) {
2843 xmlChar *buf = NULL;
2844 int len;
2845 int size = XML_PARSER_BUFFER_SIZE;
2846 int q, ql;
2847 int r, rl;
2848 int cur, l;
2849 xmlParserInputState state;
2850 xmlParserInputPtr input = ctxt->input;
2851 int count = 0;
2852
2853 /*
2854 * Check that there is a comment right here.
2855 */
2856 if ((RAW != '<') || (NXT(1) != '!') ||
2857 (NXT(2) != '-') || (NXT(3) != '-')) return;
2858
2859 state = ctxt->instate;
2860 ctxt->instate = XML_PARSER_COMMENT;
2861 SHRINK;
2862 SKIP(4);
2863 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2864 if (buf == NULL) {
2865 xmlGenericError(xmlGenericErrorContext,
2866 "malloc of %d byte failed\n", size);
2867 ctxt->instate = state;
2868 return;
2869 }
2870 q = CUR_CHAR(ql);
2871 NEXTL(ql);
2872 r = CUR_CHAR(rl);
2873 NEXTL(rl);
2874 cur = CUR_CHAR(l);
2875 len = 0;
2876 while (IS_CHAR(cur) && /* checked */
2877 ((cur != '>') ||
2878 (r != '-') || (q != '-'))) {
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00002879 if ((r == '-') && (q == '-')) {
Owen Taylor3473f882001-02-23 17:55:21 +00002880 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2882 ctxt->sax->error(ctxt->userData,
2883 "Comment must not contain '--' (double-hyphen)`\n");
2884 ctxt->wellFormed = 0;
2885 ctxt->disableSAX = 1;
2886 }
2887 if (len + 5 >= size) {
2888 size *= 2;
2889 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2890 if (buf == NULL) {
2891 xmlGenericError(xmlGenericErrorContext,
2892 "realloc of %d byte failed\n", size);
2893 ctxt->instate = state;
2894 return;
2895 }
2896 }
2897 COPY_BUF(ql,buf,len,q);
2898 q = r;
2899 ql = rl;
2900 r = cur;
2901 rl = l;
2902
2903 count++;
2904 if (count > 50) {
2905 GROW;
2906 count = 0;
2907 }
2908 NEXTL(l);
2909 cur = CUR_CHAR(l);
2910 if (cur == 0) {
2911 SHRINK;
2912 GROW;
2913 cur = CUR_CHAR(l);
2914 }
2915 }
2916 buf[len] = 0;
2917 if (!IS_CHAR(cur)) {
2918 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2920 ctxt->sax->error(ctxt->userData,
2921 "Comment not terminated \n<!--%.50s\n", buf);
2922 ctxt->wellFormed = 0;
2923 ctxt->disableSAX = 1;
2924 xmlFree(buf);
2925 } else {
2926 if (input != ctxt->input) {
2927 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2928 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2929 ctxt->sax->error(ctxt->userData,
2930"Comment doesn't start and stop in the same entity\n");
2931 ctxt->wellFormed = 0;
2932 ctxt->disableSAX = 1;
2933 }
2934 NEXT;
2935 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2936 (!ctxt->disableSAX))
2937 ctxt->sax->comment(ctxt->userData, buf);
2938 xmlFree(buf);
2939 }
2940 ctxt->instate = state;
2941}
2942
2943/**
2944 * xmlParsePITarget:
2945 * @ctxt: an XML parser context
2946 *
2947 * parse the name of a PI
2948 *
2949 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2950 *
2951 * Returns the PITarget name or NULL
2952 */
2953
2954xmlChar *
2955xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2956 xmlChar *name;
2957
2958 name = xmlParseName(ctxt);
2959 if ((name != NULL) &&
2960 ((name[0] == 'x') || (name[0] == 'X')) &&
2961 ((name[1] == 'm') || (name[1] == 'M')) &&
2962 ((name[2] == 'l') || (name[2] == 'L'))) {
2963 int i;
2964 if ((name[0] == 'x') && (name[1] == 'm') &&
2965 (name[2] == 'l') && (name[3] == 0)) {
2966 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2967 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2968 ctxt->sax->error(ctxt->userData,
2969 "XML declaration allowed only at the start of the document\n");
2970 ctxt->wellFormed = 0;
2971 ctxt->disableSAX = 1;
2972 return(name);
2973 } else if (name[3] == 0) {
2974 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2976 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2977 ctxt->wellFormed = 0;
2978 ctxt->disableSAX = 1;
2979 return(name);
2980 }
2981 for (i = 0;;i++) {
2982 if (xmlW3CPIs[i] == NULL) break;
2983 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2984 return(name);
2985 }
2986 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2987 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2988 ctxt->sax->warning(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00002989 "xmlParsePITarget: invalid name prefix 'xml'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00002990 }
2991 }
2992 return(name);
2993}
2994
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002995#ifdef LIBXML_CATALOG_ENABLED
2996/**
2997 * xmlParseCatalogPI:
2998 * @ctxt: an XML parser context
2999 * @catalog: the PI value string
3000 *
3001 * parse an XML Catalog Processing Instruction.
3002 *
3003 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
3004 *
3005 * Occurs only if allowed by the user and if happening in the Misc
3006 * part of the document before any doctype informations
3007 * This will add the given catalog to the parsing context in order
3008 * to be used if there is a resolution need further down in the document
3009 */
3010
3011static void
3012xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
3013 xmlChar *URL = NULL;
3014 const xmlChar *tmp, *base;
3015 xmlChar marker;
3016
3017 tmp = catalog;
3018 while (IS_BLANK(*tmp)) tmp++;
3019 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
3020 goto error;
3021 tmp += 7;
3022 while (IS_BLANK(*tmp)) tmp++;
3023 if (*tmp != '=') {
3024 return;
3025 }
3026 tmp++;
3027 while (IS_BLANK(*tmp)) tmp++;
3028 marker = *tmp;
3029 if ((marker != '\'') && (marker != '"'))
3030 goto error;
3031 tmp++;
3032 base = tmp;
3033 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3034 if (*tmp == 0)
3035 goto error;
3036 URL = xmlStrndup(base, tmp - base);
3037 tmp++;
3038 while (IS_BLANK(*tmp)) tmp++;
3039 if (*tmp != 0)
3040 goto error;
3041
3042 if (URL != NULL) {
3043 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3044 xmlFree(URL);
3045 }
3046 return;
3047
3048error:
3049 ctxt->errNo = XML_WAR_CATALOG_PI;
3050 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3051 ctxt->sax->warning(ctxt->userData,
3052 "Catalog PI syntax error: %s\n", catalog);
3053 if (URL != NULL)
3054 xmlFree(URL);
3055}
3056#endif
3057
Owen Taylor3473f882001-02-23 17:55:21 +00003058/**
3059 * xmlParsePI:
3060 * @ctxt: an XML parser context
3061 *
3062 * parse an XML Processing Instruction.
3063 *
3064 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3065 *
3066 * The processing is transfered to SAX once parsed.
3067 */
3068
3069void
3070xmlParsePI(xmlParserCtxtPtr ctxt) {
3071 xmlChar *buf = NULL;
3072 int len = 0;
3073 int size = XML_PARSER_BUFFER_SIZE;
3074 int cur, l;
3075 xmlChar *target;
3076 xmlParserInputState state;
3077 int count = 0;
3078
3079 if ((RAW == '<') && (NXT(1) == '?')) {
3080 xmlParserInputPtr input = ctxt->input;
3081 state = ctxt->instate;
3082 ctxt->instate = XML_PARSER_PI;
3083 /*
3084 * this is a Processing Instruction.
3085 */
3086 SKIP(2);
3087 SHRINK;
3088
3089 /*
3090 * Parse the target name and check for special support like
3091 * namespace.
3092 */
3093 target = xmlParsePITarget(ctxt);
3094 if (target != NULL) {
3095 if ((RAW == '?') && (NXT(1) == '>')) {
3096 if (input != ctxt->input) {
3097 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3099 ctxt->sax->error(ctxt->userData,
3100 "PI declaration doesn't start and stop in the same entity\n");
3101 ctxt->wellFormed = 0;
3102 ctxt->disableSAX = 1;
3103 }
3104 SKIP(2);
3105
3106 /*
3107 * SAX: PI detected.
3108 */
3109 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3110 (ctxt->sax->processingInstruction != NULL))
3111 ctxt->sax->processingInstruction(ctxt->userData,
3112 target, NULL);
3113 ctxt->instate = state;
3114 xmlFree(target);
3115 return;
3116 }
3117 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3118 if (buf == NULL) {
3119 xmlGenericError(xmlGenericErrorContext,
3120 "malloc of %d byte failed\n", size);
3121 ctxt->instate = state;
3122 return;
3123 }
3124 cur = CUR;
3125 if (!IS_BLANK(cur)) {
3126 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3128 ctxt->sax->error(ctxt->userData,
3129 "xmlParsePI: PI %s space expected\n", target);
3130 ctxt->wellFormed = 0;
3131 ctxt->disableSAX = 1;
3132 }
3133 SKIP_BLANKS;
3134 cur = CUR_CHAR(l);
3135 while (IS_CHAR(cur) && /* checked */
3136 ((cur != '?') || (NXT(1) != '>'))) {
3137 if (len + 5 >= size) {
3138 size *= 2;
3139 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3140 if (buf == NULL) {
3141 xmlGenericError(xmlGenericErrorContext,
3142 "realloc of %d byte failed\n", size);
3143 ctxt->instate = state;
3144 return;
3145 }
3146 }
3147 count++;
3148 if (count > 50) {
3149 GROW;
3150 count = 0;
3151 }
3152 COPY_BUF(l,buf,len,cur);
3153 NEXTL(l);
3154 cur = CUR_CHAR(l);
3155 if (cur == 0) {
3156 SHRINK;
3157 GROW;
3158 cur = CUR_CHAR(l);
3159 }
3160 }
3161 buf[len] = 0;
3162 if (cur != '?') {
3163 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3165 ctxt->sax->error(ctxt->userData,
3166 "xmlParsePI: PI %s never end ...\n", target);
3167 ctxt->wellFormed = 0;
3168 ctxt->disableSAX = 1;
3169 } else {
3170 if (input != ctxt->input) {
3171 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData,
3174 "PI declaration doesn't start and stop in the same entity\n");
3175 ctxt->wellFormed = 0;
3176 ctxt->disableSAX = 1;
3177 }
3178 SKIP(2);
3179
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003180#ifdef LIBXML_CATALOG_ENABLED
3181 if (((state == XML_PARSER_MISC) ||
3182 (state == XML_PARSER_START)) &&
3183 (xmlStrEqual(target, XML_CATALOG_PI))) {
3184 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3185 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3186 (allow == XML_CATA_ALLOW_ALL))
3187 xmlParseCatalogPI(ctxt, buf);
3188 }
3189#endif
3190
3191
Owen Taylor3473f882001-02-23 17:55:21 +00003192 /*
3193 * SAX: PI detected.
3194 */
3195 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3196 (ctxt->sax->processingInstruction != NULL))
3197 ctxt->sax->processingInstruction(ctxt->userData,
3198 target, buf);
3199 }
3200 xmlFree(buf);
3201 xmlFree(target);
3202 } else {
3203 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3205 ctxt->sax->error(ctxt->userData,
3206 "xmlParsePI : no target name\n");
3207 ctxt->wellFormed = 0;
3208 ctxt->disableSAX = 1;
3209 }
3210 ctxt->instate = state;
3211 }
3212}
3213
3214/**
3215 * xmlParseNotationDecl:
3216 * @ctxt: an XML parser context
3217 *
3218 * parse a notation declaration
3219 *
3220 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3221 *
3222 * Hence there is actually 3 choices:
3223 * 'PUBLIC' S PubidLiteral
3224 * 'PUBLIC' S PubidLiteral S SystemLiteral
3225 * and 'SYSTEM' S SystemLiteral
3226 *
3227 * See the NOTE on xmlParseExternalID().
3228 */
3229
3230void
3231xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3232 xmlChar *name;
3233 xmlChar *Pubid;
3234 xmlChar *Systemid;
3235
3236 if ((RAW == '<') && (NXT(1) == '!') &&
3237 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3238 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3239 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3240 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3241 xmlParserInputPtr input = ctxt->input;
3242 SHRINK;
3243 SKIP(10);
3244 if (!IS_BLANK(CUR)) {
3245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData,
3248 "Space required after '<!NOTATION'\n");
3249 ctxt->wellFormed = 0;
3250 ctxt->disableSAX = 1;
3251 return;
3252 }
3253 SKIP_BLANKS;
3254
Daniel Veillard76d66f42001-05-16 21:05:17 +00003255 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003256 if (name == NULL) {
3257 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3258 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3259 ctxt->sax->error(ctxt->userData,
3260 "NOTATION: Name expected here\n");
3261 ctxt->wellFormed = 0;
3262 ctxt->disableSAX = 1;
3263 return;
3264 }
3265 if (!IS_BLANK(CUR)) {
3266 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3268 ctxt->sax->error(ctxt->userData,
3269 "Space required after the NOTATION name'\n");
3270 ctxt->wellFormed = 0;
3271 ctxt->disableSAX = 1;
3272 return;
3273 }
3274 SKIP_BLANKS;
3275
3276 /*
3277 * Parse the IDs.
3278 */
3279 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3280 SKIP_BLANKS;
3281
3282 if (RAW == '>') {
3283 if (input != ctxt->input) {
3284 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3286 ctxt->sax->error(ctxt->userData,
3287"Notation declaration doesn't start and stop in the same entity\n");
3288 ctxt->wellFormed = 0;
3289 ctxt->disableSAX = 1;
3290 }
3291 NEXT;
3292 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3293 (ctxt->sax->notationDecl != NULL))
3294 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3295 } else {
3296 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "'>' required to close NOTATION declaration\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 }
3303 xmlFree(name);
3304 if (Systemid != NULL) xmlFree(Systemid);
3305 if (Pubid != NULL) xmlFree(Pubid);
3306 }
3307}
3308
3309/**
3310 * xmlParseEntityDecl:
3311 * @ctxt: an XML parser context
3312 *
3313 * parse <!ENTITY declarations
3314 *
3315 * [70] EntityDecl ::= GEDecl | PEDecl
3316 *
3317 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3318 *
3319 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3320 *
3321 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3322 *
3323 * [74] PEDef ::= EntityValue | ExternalID
3324 *
3325 * [76] NDataDecl ::= S 'NDATA' S Name
3326 *
3327 * [ VC: Notation Declared ]
3328 * The Name must match the declared name of a notation.
3329 */
3330
3331void
3332xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3333 xmlChar *name = NULL;
3334 xmlChar *value = NULL;
3335 xmlChar *URI = NULL, *literal = NULL;
3336 xmlChar *ndata = NULL;
3337 int isParameter = 0;
3338 xmlChar *orig = NULL;
3339
3340 GROW;
3341 if ((RAW == '<') && (NXT(1) == '!') &&
3342 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3343 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3344 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3345 xmlParserInputPtr input = ctxt->input;
3346 ctxt->instate = XML_PARSER_ENTITY_DECL;
3347 SHRINK;
3348 SKIP(8);
3349 if (!IS_BLANK(CUR)) {
3350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Space required after '<!ENTITY'\n");
3354 ctxt->wellFormed = 0;
3355 ctxt->disableSAX = 1;
3356 }
3357 SKIP_BLANKS;
3358
3359 if (RAW == '%') {
3360 NEXT;
3361 if (!IS_BLANK(CUR)) {
3362 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3364 ctxt->sax->error(ctxt->userData,
3365 "Space required after '%'\n");
3366 ctxt->wellFormed = 0;
3367 ctxt->disableSAX = 1;
3368 }
3369 SKIP_BLANKS;
3370 isParameter = 1;
3371 }
3372
Daniel Veillard76d66f42001-05-16 21:05:17 +00003373 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003374 if (name == NULL) {
3375 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3378 ctxt->wellFormed = 0;
3379 ctxt->disableSAX = 1;
3380 return;
3381 }
3382 if (!IS_BLANK(CUR)) {
3383 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "Space required after the entity name\n");
3387 ctxt->wellFormed = 0;
3388 ctxt->disableSAX = 1;
3389 }
3390 SKIP_BLANKS;
3391
3392 /*
3393 * handle the various case of definitions...
3394 */
3395 if (isParameter) {
3396 if ((RAW == '"') || (RAW == '\'')) {
3397 value = xmlParseEntityValue(ctxt, &orig);
3398 if (value) {
3399 if ((ctxt->sax != NULL) &&
3400 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3401 ctxt->sax->entityDecl(ctxt->userData, name,
3402 XML_INTERNAL_PARAMETER_ENTITY,
3403 NULL, NULL, value);
3404 }
3405 } else {
3406 URI = xmlParseExternalID(ctxt, &literal, 1);
3407 if ((URI == NULL) && (literal == NULL)) {
3408 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3409 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3410 ctxt->sax->error(ctxt->userData,
3411 "Entity value required\n");
3412 ctxt->wellFormed = 0;
3413 ctxt->disableSAX = 1;
3414 }
3415 if (URI) {
3416 xmlURIPtr uri;
3417
3418 uri = xmlParseURI((const char *) URI);
3419 if (uri == NULL) {
3420 ctxt->errNo = XML_ERR_INVALID_URI;
3421 if ((ctxt->sax != NULL) &&
3422 (!ctxt->disableSAX) &&
3423 (ctxt->sax->error != NULL))
3424 ctxt->sax->error(ctxt->userData,
3425 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003426 /*
3427 * This really ought to be a well formedness error
3428 * but the XML Core WG decided otherwise c.f. issue
3429 * E26 of the XML erratas.
3430 */
Owen Taylor3473f882001-02-23 17:55:21 +00003431 } else {
3432 if (uri->fragment != NULL) {
3433 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3434 if ((ctxt->sax != NULL) &&
3435 (!ctxt->disableSAX) &&
3436 (ctxt->sax->error != NULL))
3437 ctxt->sax->error(ctxt->userData,
3438 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003439 /*
3440 * Okay this is foolish to block those but not
3441 * invalid URIs.
3442 */
Owen Taylor3473f882001-02-23 17:55:21 +00003443 ctxt->wellFormed = 0;
3444 } else {
3445 if ((ctxt->sax != NULL) &&
3446 (!ctxt->disableSAX) &&
3447 (ctxt->sax->entityDecl != NULL))
3448 ctxt->sax->entityDecl(ctxt->userData, name,
3449 XML_EXTERNAL_PARAMETER_ENTITY,
3450 literal, URI, NULL);
3451 }
3452 xmlFreeURI(uri);
3453 }
3454 }
3455 }
3456 } else {
3457 if ((RAW == '"') || (RAW == '\'')) {
3458 value = xmlParseEntityValue(ctxt, &orig);
3459 if ((ctxt->sax != NULL) &&
3460 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3461 ctxt->sax->entityDecl(ctxt->userData, name,
3462 XML_INTERNAL_GENERAL_ENTITY,
3463 NULL, NULL, value);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003464 /*
3465 * For expat compatibility in SAX mode.
3466 */
3467 if ((ctxt->myDoc == NULL) ||
3468 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
3469 if (ctxt->myDoc == NULL) {
3470 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3471 }
3472 if (ctxt->myDoc->intSubset == NULL)
3473 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3474 BAD_CAST "fake", NULL, NULL);
3475
3476 entityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
3477 NULL, NULL, value);
3478 }
Owen Taylor3473f882001-02-23 17:55:21 +00003479 } else {
3480 URI = xmlParseExternalID(ctxt, &literal, 1);
3481 if ((URI == NULL) && (literal == NULL)) {
3482 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3484 ctxt->sax->error(ctxt->userData,
3485 "Entity value required\n");
3486 ctxt->wellFormed = 0;
3487 ctxt->disableSAX = 1;
3488 }
3489 if (URI) {
3490 xmlURIPtr uri;
3491
3492 uri = xmlParseURI((const char *)URI);
3493 if (uri == NULL) {
3494 ctxt->errNo = XML_ERR_INVALID_URI;
3495 if ((ctxt->sax != NULL) &&
3496 (!ctxt->disableSAX) &&
3497 (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499 "Invalid URI: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003500 /*
3501 * This really ought to be a well formedness error
3502 * but the XML Core WG decided otherwise c.f. issue
3503 * E26 of the XML erratas.
3504 */
Owen Taylor3473f882001-02-23 17:55:21 +00003505 } else {
3506 if (uri->fragment != NULL) {
3507 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3508 if ((ctxt->sax != NULL) &&
3509 (!ctxt->disableSAX) &&
3510 (ctxt->sax->error != NULL))
3511 ctxt->sax->error(ctxt->userData,
3512 "Fragment not allowed: %s\n", URI);
Daniel Veillard4a7ae502002-02-18 19:18:17 +00003513 /*
3514 * Okay this is foolish to block those but not
3515 * invalid URIs.
3516 */
Owen Taylor3473f882001-02-23 17:55:21 +00003517 ctxt->wellFormed = 0;
3518 }
3519 xmlFreeURI(uri);
3520 }
3521 }
3522 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3523 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3525 ctxt->sax->error(ctxt->userData,
3526 "Space required before 'NDATA'\n");
3527 ctxt->wellFormed = 0;
3528 ctxt->disableSAX = 1;
3529 }
3530 SKIP_BLANKS;
3531 if ((RAW == 'N') && (NXT(1) == 'D') &&
3532 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3533 (NXT(4) == 'A')) {
3534 SKIP(5);
3535 if (!IS_BLANK(CUR)) {
3536 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3538 ctxt->sax->error(ctxt->userData,
3539 "Space required after 'NDATA'\n");
3540 ctxt->wellFormed = 0;
3541 ctxt->disableSAX = 1;
3542 }
3543 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003544 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003545 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3546 (ctxt->sax->unparsedEntityDecl != NULL))
3547 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3548 literal, URI, ndata);
3549 } else {
3550 if ((ctxt->sax != NULL) &&
3551 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3552 ctxt->sax->entityDecl(ctxt->userData, name,
3553 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3554 literal, URI, NULL);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003555 /*
3556 * For expat compatibility in SAX mode.
3557 * assuming the entity repalcement was asked for
3558 */
3559 if ((ctxt->replaceEntities != 0) &&
3560 ((ctxt->myDoc == NULL) ||
3561 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
3562 if (ctxt->myDoc == NULL) {
3563 ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
3564 }
3565
3566 if (ctxt->myDoc->intSubset == NULL)
3567 ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
3568 BAD_CAST "fake", NULL, NULL);
3569 entityDecl(ctxt, name,
3570 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3571 literal, URI, NULL);
3572 }
Owen Taylor3473f882001-02-23 17:55:21 +00003573 }
3574 }
3575 }
3576 SKIP_BLANKS;
3577 if (RAW != '>') {
3578 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3579 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3580 ctxt->sax->error(ctxt->userData,
3581 "xmlParseEntityDecl: entity %s not terminated\n", name);
3582 ctxt->wellFormed = 0;
3583 ctxt->disableSAX = 1;
3584 } else {
3585 if (input != ctxt->input) {
3586 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3588 ctxt->sax->error(ctxt->userData,
3589"Entity declaration doesn't start and stop in the same entity\n");
3590 ctxt->wellFormed = 0;
3591 ctxt->disableSAX = 1;
3592 }
3593 NEXT;
3594 }
3595 if (orig != NULL) {
3596 /*
3597 * Ugly mechanism to save the raw entity value.
3598 */
3599 xmlEntityPtr cur = NULL;
3600
3601 if (isParameter) {
3602 if ((ctxt->sax != NULL) &&
3603 (ctxt->sax->getParameterEntity != NULL))
3604 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3605 } else {
3606 if ((ctxt->sax != NULL) &&
3607 (ctxt->sax->getEntity != NULL))
3608 cur = ctxt->sax->getEntity(ctxt->userData, name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00003609 if ((cur == NULL) && (ctxt->userData==ctxt)) {
3610 cur = getEntity(ctxt, name);
3611 }
Owen Taylor3473f882001-02-23 17:55:21 +00003612 }
3613 if (cur != NULL) {
3614 if (cur->orig != NULL)
3615 xmlFree(orig);
3616 else
3617 cur->orig = orig;
3618 } else
3619 xmlFree(orig);
3620 }
3621 if (name != NULL) xmlFree(name);
3622 if (value != NULL) xmlFree(value);
3623 if (URI != NULL) xmlFree(URI);
3624 if (literal != NULL) xmlFree(literal);
3625 if (ndata != NULL) xmlFree(ndata);
3626 }
3627}
3628
3629/**
3630 * xmlParseDefaultDecl:
3631 * @ctxt: an XML parser context
3632 * @value: Receive a possible fixed default value for the attribute
3633 *
3634 * Parse an attribute default declaration
3635 *
3636 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3637 *
3638 * [ VC: Required Attribute ]
3639 * if the default declaration is the keyword #REQUIRED, then the
3640 * attribute must be specified for all elements of the type in the
3641 * attribute-list declaration.
3642 *
3643 * [ VC: Attribute Default Legal ]
3644 * The declared default value must meet the lexical constraints of
3645 * the declared attribute type c.f. xmlValidateAttributeDecl()
3646 *
3647 * [ VC: Fixed Attribute Default ]
3648 * if an attribute has a default value declared with the #FIXED
3649 * keyword, instances of that attribute must match the default value.
3650 *
3651 * [ WFC: No < in Attribute Values ]
3652 * handled in xmlParseAttValue()
3653 *
3654 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3655 * or XML_ATTRIBUTE_FIXED.
3656 */
3657
3658int
3659xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3660 int val;
3661 xmlChar *ret;
3662
3663 *value = NULL;
3664 if ((RAW == '#') && (NXT(1) == 'R') &&
3665 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3666 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3667 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3668 (NXT(8) == 'D')) {
3669 SKIP(9);
3670 return(XML_ATTRIBUTE_REQUIRED);
3671 }
3672 if ((RAW == '#') && (NXT(1) == 'I') &&
3673 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3674 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3675 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3676 SKIP(8);
3677 return(XML_ATTRIBUTE_IMPLIED);
3678 }
3679 val = XML_ATTRIBUTE_NONE;
3680 if ((RAW == '#') && (NXT(1) == 'F') &&
3681 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3682 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3683 SKIP(6);
3684 val = XML_ATTRIBUTE_FIXED;
3685 if (!IS_BLANK(CUR)) {
3686 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3688 ctxt->sax->error(ctxt->userData,
3689 "Space required after '#FIXED'\n");
3690 ctxt->wellFormed = 0;
3691 ctxt->disableSAX = 1;
3692 }
3693 SKIP_BLANKS;
3694 }
3695 ret = xmlParseAttValue(ctxt);
3696 ctxt->instate = XML_PARSER_DTD;
3697 if (ret == NULL) {
3698 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3699 ctxt->sax->error(ctxt->userData,
3700 "Attribute default value declaration error\n");
3701 ctxt->wellFormed = 0;
3702 ctxt->disableSAX = 1;
3703 } else
3704 *value = ret;
3705 return(val);
3706}
3707
3708/**
3709 * xmlParseNotationType:
3710 * @ctxt: an XML parser context
3711 *
3712 * parse an Notation attribute type.
3713 *
3714 * Note: the leading 'NOTATION' S part has already being parsed...
3715 *
3716 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3717 *
3718 * [ VC: Notation Attributes ]
3719 * Values of this type must match one of the notation names included
3720 * in the declaration; all notation names in the declaration must be declared.
3721 *
3722 * Returns: the notation attribute tree built while parsing
3723 */
3724
3725xmlEnumerationPtr
3726xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3727 xmlChar *name;
3728 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3729
3730 if (RAW != '(') {
3731 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3733 ctxt->sax->error(ctxt->userData,
3734 "'(' required to start 'NOTATION'\n");
3735 ctxt->wellFormed = 0;
3736 ctxt->disableSAX = 1;
3737 return(NULL);
3738 }
3739 SHRINK;
3740 do {
3741 NEXT;
3742 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003743 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003744 if (name == NULL) {
3745 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3747 ctxt->sax->error(ctxt->userData,
3748 "Name expected in NOTATION declaration\n");
3749 ctxt->wellFormed = 0;
3750 ctxt->disableSAX = 1;
3751 return(ret);
3752 }
3753 cur = xmlCreateEnumeration(name);
3754 xmlFree(name);
3755 if (cur == NULL) return(ret);
3756 if (last == NULL) ret = last = cur;
3757 else {
3758 last->next = cur;
3759 last = cur;
3760 }
3761 SKIP_BLANKS;
3762 } while (RAW == '|');
3763 if (RAW != ')') {
3764 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3766 ctxt->sax->error(ctxt->userData,
3767 "')' required to finish NOTATION declaration\n");
3768 ctxt->wellFormed = 0;
3769 ctxt->disableSAX = 1;
3770 if ((last != NULL) && (last != ret))
3771 xmlFreeEnumeration(last);
3772 return(ret);
3773 }
3774 NEXT;
3775 return(ret);
3776}
3777
3778/**
3779 * xmlParseEnumerationType:
3780 * @ctxt: an XML parser context
3781 *
3782 * parse an Enumeration attribute type.
3783 *
3784 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3785 *
3786 * [ VC: Enumeration ]
3787 * Values of this type must match one of the Nmtoken tokens in
3788 * the declaration
3789 *
3790 * Returns: the enumeration attribute tree built while parsing
3791 */
3792
3793xmlEnumerationPtr
3794xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3795 xmlChar *name;
3796 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3797
3798 if (RAW != '(') {
3799 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3801 ctxt->sax->error(ctxt->userData,
3802 "'(' required to start ATTLIST enumeration\n");
3803 ctxt->wellFormed = 0;
3804 ctxt->disableSAX = 1;
3805 return(NULL);
3806 }
3807 SHRINK;
3808 do {
3809 NEXT;
3810 SKIP_BLANKS;
3811 name = xmlParseNmtoken(ctxt);
3812 if (name == NULL) {
3813 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3815 ctxt->sax->error(ctxt->userData,
3816 "NmToken expected in ATTLIST enumeration\n");
3817 ctxt->wellFormed = 0;
3818 ctxt->disableSAX = 1;
3819 return(ret);
3820 }
3821 cur = xmlCreateEnumeration(name);
3822 xmlFree(name);
3823 if (cur == NULL) return(ret);
3824 if (last == NULL) ret = last = cur;
3825 else {
3826 last->next = cur;
3827 last = cur;
3828 }
3829 SKIP_BLANKS;
3830 } while (RAW == '|');
3831 if (RAW != ')') {
3832 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3834 ctxt->sax->error(ctxt->userData,
3835 "')' required to finish ATTLIST enumeration\n");
3836 ctxt->wellFormed = 0;
3837 ctxt->disableSAX = 1;
3838 return(ret);
3839 }
3840 NEXT;
3841 return(ret);
3842}
3843
3844/**
3845 * xmlParseEnumeratedType:
3846 * @ctxt: an XML parser context
3847 * @tree: the enumeration tree built while parsing
3848 *
3849 * parse an Enumerated attribute type.
3850 *
3851 * [57] EnumeratedType ::= NotationType | Enumeration
3852 *
3853 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3854 *
3855 *
3856 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3857 */
3858
3859int
3860xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3861 if ((RAW == 'N') && (NXT(1) == 'O') &&
3862 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3863 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3864 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3865 SKIP(8);
3866 if (!IS_BLANK(CUR)) {
3867 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3869 ctxt->sax->error(ctxt->userData,
3870 "Space required after 'NOTATION'\n");
3871 ctxt->wellFormed = 0;
3872 ctxt->disableSAX = 1;
3873 return(0);
3874 }
3875 SKIP_BLANKS;
3876 *tree = xmlParseNotationType(ctxt);
3877 if (*tree == NULL) return(0);
3878 return(XML_ATTRIBUTE_NOTATION);
3879 }
3880 *tree = xmlParseEnumerationType(ctxt);
3881 if (*tree == NULL) return(0);
3882 return(XML_ATTRIBUTE_ENUMERATION);
3883}
3884
3885/**
3886 * xmlParseAttributeType:
3887 * @ctxt: an XML parser context
3888 * @tree: the enumeration tree built while parsing
3889 *
3890 * parse the Attribute list def for an element
3891 *
3892 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3893 *
3894 * [55] StringType ::= 'CDATA'
3895 *
3896 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3897 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3898 *
3899 * Validity constraints for attribute values syntax are checked in
3900 * xmlValidateAttributeValue()
3901 *
3902 * [ VC: ID ]
3903 * Values of type ID must match the Name production. A name must not
3904 * appear more than once in an XML document as a value of this type;
3905 * i.e., ID values must uniquely identify the elements which bear them.
3906 *
3907 * [ VC: One ID per Element Type ]
3908 * No element type may have more than one ID attribute specified.
3909 *
3910 * [ VC: ID Attribute Default ]
3911 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3912 *
3913 * [ VC: IDREF ]
3914 * Values of type IDREF must match the Name production, and values
3915 * of type IDREFS must match Names; each IDREF Name must match the value
3916 * of an ID attribute on some element in the XML document; i.e. IDREF
3917 * values must match the value of some ID attribute.
3918 *
3919 * [ VC: Entity Name ]
3920 * Values of type ENTITY must match the Name production, values
3921 * of type ENTITIES must match Names; each Entity Name must match the
3922 * name of an unparsed entity declared in the DTD.
3923 *
3924 * [ VC: Name Token ]
3925 * Values of type NMTOKEN must match the Nmtoken production; values
3926 * of type NMTOKENS must match Nmtokens.
3927 *
3928 * Returns the attribute type
3929 */
3930int
3931xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3932 SHRINK;
3933 if ((RAW == 'C') && (NXT(1) == 'D') &&
3934 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3935 (NXT(4) == 'A')) {
3936 SKIP(5);
3937 return(XML_ATTRIBUTE_CDATA);
3938 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3939 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3940 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3941 SKIP(6);
3942 return(XML_ATTRIBUTE_IDREFS);
3943 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3944 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3945 (NXT(4) == 'F')) {
3946 SKIP(5);
3947 return(XML_ATTRIBUTE_IDREF);
3948 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3949 SKIP(2);
3950 return(XML_ATTRIBUTE_ID);
3951 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3952 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3953 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3954 SKIP(6);
3955 return(XML_ATTRIBUTE_ENTITY);
3956 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3957 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3958 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3959 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3960 SKIP(8);
3961 return(XML_ATTRIBUTE_ENTITIES);
3962 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3963 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3964 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3965 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3966 SKIP(8);
3967 return(XML_ATTRIBUTE_NMTOKENS);
3968 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3969 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3970 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3971 (NXT(6) == 'N')) {
3972 SKIP(7);
3973 return(XML_ATTRIBUTE_NMTOKEN);
3974 }
3975 return(xmlParseEnumeratedType(ctxt, tree));
3976}
3977
3978/**
3979 * xmlParseAttributeListDecl:
3980 * @ctxt: an XML parser context
3981 *
3982 * : parse the Attribute list def for an element
3983 *
3984 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3985 *
3986 * [53] AttDef ::= S Name S AttType S DefaultDecl
3987 *
3988 */
3989void
3990xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3991 xmlChar *elemName;
3992 xmlChar *attrName;
3993 xmlEnumerationPtr tree;
3994
3995 if ((RAW == '<') && (NXT(1) == '!') &&
3996 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3997 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3998 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3999 (NXT(8) == 'T')) {
4000 xmlParserInputPtr input = ctxt->input;
4001
4002 SKIP(9);
4003 if (!IS_BLANK(CUR)) {
4004 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4005 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4006 ctxt->sax->error(ctxt->userData,
4007 "Space required after '<!ATTLIST'\n");
4008 ctxt->wellFormed = 0;
4009 ctxt->disableSAX = 1;
4010 }
4011 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004012 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004013 if (elemName == NULL) {
4014 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4016 ctxt->sax->error(ctxt->userData,
4017 "ATTLIST: no name for Element\n");
4018 ctxt->wellFormed = 0;
4019 ctxt->disableSAX = 1;
4020 return;
4021 }
4022 SKIP_BLANKS;
4023 GROW;
4024 while (RAW != '>') {
4025 const xmlChar *check = CUR_PTR;
4026 int type;
4027 int def;
4028 xmlChar *defaultValue = NULL;
4029
4030 GROW;
4031 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004032 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004033 if (attrName == NULL) {
4034 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4036 ctxt->sax->error(ctxt->userData,
4037 "ATTLIST: no name for Attribute\n");
4038 ctxt->wellFormed = 0;
4039 ctxt->disableSAX = 1;
4040 break;
4041 }
4042 GROW;
4043 if (!IS_BLANK(CUR)) {
4044 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4046 ctxt->sax->error(ctxt->userData,
4047 "Space required after the attribute name\n");
4048 ctxt->wellFormed = 0;
4049 ctxt->disableSAX = 1;
4050 if (attrName != NULL)
4051 xmlFree(attrName);
4052 if (defaultValue != NULL)
4053 xmlFree(defaultValue);
4054 break;
4055 }
4056 SKIP_BLANKS;
4057
4058 type = xmlParseAttributeType(ctxt, &tree);
4059 if (type <= 0) {
4060 if (attrName != NULL)
4061 xmlFree(attrName);
4062 if (defaultValue != NULL)
4063 xmlFree(defaultValue);
4064 break;
4065 }
4066
4067 GROW;
4068 if (!IS_BLANK(CUR)) {
4069 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4071 ctxt->sax->error(ctxt->userData,
4072 "Space required after the attribute type\n");
4073 ctxt->wellFormed = 0;
4074 ctxt->disableSAX = 1;
4075 if (attrName != NULL)
4076 xmlFree(attrName);
4077 if (defaultValue != NULL)
4078 xmlFree(defaultValue);
4079 if (tree != NULL)
4080 xmlFreeEnumeration(tree);
4081 break;
4082 }
4083 SKIP_BLANKS;
4084
4085 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4086 if (def <= 0) {
4087 if (attrName != NULL)
4088 xmlFree(attrName);
4089 if (defaultValue != NULL)
4090 xmlFree(defaultValue);
4091 if (tree != NULL)
4092 xmlFreeEnumeration(tree);
4093 break;
4094 }
4095
4096 GROW;
4097 if (RAW != '>') {
4098 if (!IS_BLANK(CUR)) {
4099 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4101 ctxt->sax->error(ctxt->userData,
4102 "Space required after the attribute default value\n");
4103 ctxt->wellFormed = 0;
4104 ctxt->disableSAX = 1;
4105 if (attrName != NULL)
4106 xmlFree(attrName);
4107 if (defaultValue != NULL)
4108 xmlFree(defaultValue);
4109 if (tree != NULL)
4110 xmlFreeEnumeration(tree);
4111 break;
4112 }
4113 SKIP_BLANKS;
4114 }
4115 if (check == CUR_PTR) {
4116 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4118 ctxt->sax->error(ctxt->userData,
4119 "xmlParseAttributeListDecl: detected internal error\n");
4120 if (attrName != NULL)
4121 xmlFree(attrName);
4122 if (defaultValue != NULL)
4123 xmlFree(defaultValue);
4124 if (tree != NULL)
4125 xmlFreeEnumeration(tree);
4126 break;
4127 }
4128 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4129 (ctxt->sax->attributeDecl != NULL))
4130 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4131 type, def, defaultValue, tree);
4132 if (attrName != NULL)
4133 xmlFree(attrName);
4134 if (defaultValue != NULL)
4135 xmlFree(defaultValue);
4136 GROW;
4137 }
4138 if (RAW == '>') {
4139 if (input != ctxt->input) {
4140 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4142 ctxt->sax->error(ctxt->userData,
4143"Attribute list declaration doesn't start and stop in the same entity\n");
4144 ctxt->wellFormed = 0;
4145 ctxt->disableSAX = 1;
4146 }
4147 NEXT;
4148 }
4149
4150 xmlFree(elemName);
4151 }
4152}
4153
4154/**
4155 * xmlParseElementMixedContentDecl:
4156 * @ctxt: an XML parser context
4157 *
4158 * parse the declaration for a Mixed Element content
4159 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4160 *
4161 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4162 * '(' S? '#PCDATA' S? ')'
4163 *
4164 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4165 *
4166 * [ VC: No Duplicate Types ]
4167 * The same name must not appear more than once in a single
4168 * mixed-content declaration.
4169 *
4170 * returns: the list of the xmlElementContentPtr describing the element choices
4171 */
4172xmlElementContentPtr
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004173xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004174 xmlElementContentPtr ret = NULL, cur = NULL, n;
4175 xmlChar *elem = NULL;
4176
4177 GROW;
4178 if ((RAW == '#') && (NXT(1) == 'P') &&
4179 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4180 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4181 (NXT(6) == 'A')) {
4182 SKIP(7);
4183 SKIP_BLANKS;
4184 SHRINK;
4185 if (RAW == ')') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004186 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4187 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4188 if (ctxt->vctxt.error != NULL)
4189 ctxt->vctxt.error(ctxt->vctxt.userData,
4190"Element content declaration doesn't start and stop in the same entity\n");
4191 ctxt->valid = 0;
4192 }
Owen Taylor3473f882001-02-23 17:55:21 +00004193 NEXT;
4194 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4195 if (RAW == '*') {
4196 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4197 NEXT;
4198 }
4199 return(ret);
4200 }
4201 if ((RAW == '(') || (RAW == '|')) {
4202 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4203 if (ret == NULL) return(NULL);
4204 }
4205 while (RAW == '|') {
4206 NEXT;
4207 if (elem == NULL) {
4208 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4209 if (ret == NULL) return(NULL);
4210 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004211 if (cur != NULL)
4212 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004213 cur = ret;
4214 } else {
4215 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4216 if (n == NULL) return(NULL);
4217 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004218 if (n->c1 != NULL)
4219 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004220 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004221 if (n != NULL)
4222 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004223 cur = n;
4224 xmlFree(elem);
4225 }
4226 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004227 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004228 if (elem == NULL) {
4229 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4231 ctxt->sax->error(ctxt->userData,
4232 "xmlParseElementMixedContentDecl : Name expected\n");
4233 ctxt->wellFormed = 0;
4234 ctxt->disableSAX = 1;
4235 xmlFreeElementContent(cur);
4236 return(NULL);
4237 }
4238 SKIP_BLANKS;
4239 GROW;
4240 }
4241 if ((RAW == ')') && (NXT(1) == '*')) {
4242 if (elem != NULL) {
4243 cur->c2 = xmlNewElementContent(elem,
4244 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004245 if (cur->c2 != NULL)
4246 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004247 xmlFree(elem);
4248 }
4249 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004250 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4251 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4252 if (ctxt->vctxt.error != NULL)
4253 ctxt->vctxt.error(ctxt->vctxt.userData,
4254"Element content declaration doesn't start and stop in the same entity\n");
4255 ctxt->valid = 0;
4256 }
Owen Taylor3473f882001-02-23 17:55:21 +00004257 SKIP(2);
4258 } else {
4259 if (elem != NULL) xmlFree(elem);
4260 xmlFreeElementContent(ret);
4261 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4263 ctxt->sax->error(ctxt->userData,
4264 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4265 ctxt->wellFormed = 0;
4266 ctxt->disableSAX = 1;
4267 return(NULL);
4268 }
4269
4270 } else {
4271 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4272 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4273 ctxt->sax->error(ctxt->userData,
4274 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4275 ctxt->wellFormed = 0;
4276 ctxt->disableSAX = 1;
4277 }
4278 return(ret);
4279}
4280
4281/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004282 * xmlParseElementChildrenContentD:
4283 * @ctxt: an XML parser context
4284 *
4285 * VMS version of xmlParseElementChildrenContentDecl()
4286 *
4287 * Returns the tree of xmlElementContentPtr describing the element
4288 * hierarchy.
4289 */
4290/**
Owen Taylor3473f882001-02-23 17:55:21 +00004291 * xmlParseElementChildrenContentDecl:
4292 * @ctxt: an XML parser context
4293 *
4294 * parse the declaration for a Mixed Element content
4295 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4296 *
4297 *
4298 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4299 *
4300 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4301 *
4302 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4303 *
4304 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4305 *
4306 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4307 * TODO Parameter-entity replacement text must be properly nested
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004308 * with parenthesized groups. That is to say, if either of the
Owen Taylor3473f882001-02-23 17:55:21 +00004309 * opening or closing parentheses in a choice, seq, or Mixed
4310 * construct is contained in the replacement text for a parameter
4311 * entity, both must be contained in the same replacement text. For
4312 * interoperability, if a parameter-entity reference appears in a
4313 * choice, seq, or Mixed construct, its replacement text should not
4314 * be empty, and neither the first nor last non-blank character of
4315 * the replacement text should be a connector (| or ,).
4316 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004317 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004318 * hierarchy.
4319 */
4320xmlElementContentPtr
4321#ifdef VMS
4322xmlParseElementChildrenContentD
4323#else
4324xmlParseElementChildrenContentDecl
4325#endif
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004326(xmlParserCtxtPtr ctxt, xmlParserInputPtr inputchk) {
Owen Taylor3473f882001-02-23 17:55:21 +00004327 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4328 xmlChar *elem;
4329 xmlChar type = 0;
4330
4331 SKIP_BLANKS;
4332 GROW;
4333 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004334 xmlParserInputPtr input = ctxt->input;
4335
Owen Taylor3473f882001-02-23 17:55:21 +00004336 /* Recurse on first child */
4337 NEXT;
4338 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004339 cur = ret = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004340 SKIP_BLANKS;
4341 GROW;
4342 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004343 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004344 if (elem == NULL) {
4345 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4347 ctxt->sax->error(ctxt->userData,
4348 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4349 ctxt->wellFormed = 0;
4350 ctxt->disableSAX = 1;
4351 return(NULL);
4352 }
4353 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4354 GROW;
4355 if (RAW == '?') {
4356 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4357 NEXT;
4358 } else if (RAW == '*') {
4359 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4360 NEXT;
4361 } else if (RAW == '+') {
4362 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4363 NEXT;
4364 } else {
4365 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4366 }
4367 xmlFree(elem);
4368 GROW;
4369 }
4370 SKIP_BLANKS;
4371 SHRINK;
4372 while (RAW != ')') {
4373 /*
4374 * Each loop we parse one separator and one element.
4375 */
4376 if (RAW == ',') {
4377 if (type == 0) type = CUR;
4378
4379 /*
4380 * Detect "Name | Name , Name" error
4381 */
4382 else if (type != CUR) {
4383 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4385 ctxt->sax->error(ctxt->userData,
4386 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4387 type);
4388 ctxt->wellFormed = 0;
4389 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004390 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004391 xmlFreeElementContent(last);
4392 if (ret != NULL)
4393 xmlFreeElementContent(ret);
4394 return(NULL);
4395 }
4396 NEXT;
4397
4398 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4399 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004400 if ((last != NULL) && (last != ret))
4401 xmlFreeElementContent(last);
Owen Taylor3473f882001-02-23 17:55:21 +00004402 xmlFreeElementContent(ret);
4403 return(NULL);
4404 }
4405 if (last == NULL) {
4406 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004407 if (ret != NULL)
4408 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004409 ret = cur = op;
4410 } else {
4411 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004412 if (op != NULL)
4413 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004414 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004415 if (last != NULL)
4416 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004417 cur =op;
4418 last = NULL;
4419 }
4420 } else if (RAW == '|') {
4421 if (type == 0) type = CUR;
4422
4423 /*
4424 * Detect "Name , Name | Name" error
4425 */
4426 else if (type != CUR) {
4427 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4429 ctxt->sax->error(ctxt->userData,
4430 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4431 type);
4432 ctxt->wellFormed = 0;
4433 ctxt->disableSAX = 1;
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004434 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004435 xmlFreeElementContent(last);
4436 if (ret != NULL)
4437 xmlFreeElementContent(ret);
4438 return(NULL);
4439 }
4440 NEXT;
4441
4442 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4443 if (op == NULL) {
Daniel Veillardd54fa3e2002-02-20 16:48:52 +00004444 if ((last != NULL) && (last != ret))
Owen Taylor3473f882001-02-23 17:55:21 +00004445 xmlFreeElementContent(last);
4446 if (ret != NULL)
4447 xmlFreeElementContent(ret);
4448 return(NULL);
4449 }
4450 if (last == NULL) {
4451 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004452 if (ret != NULL)
4453 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004454 ret = cur = op;
4455 } else {
4456 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004457 if (op != NULL)
4458 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004459 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004460 if (last != NULL)
4461 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004462 cur =op;
4463 last = NULL;
4464 }
4465 } else {
4466 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4467 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4468 ctxt->sax->error(ctxt->userData,
4469 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4470 ctxt->wellFormed = 0;
4471 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004472 if (ret != NULL)
4473 xmlFreeElementContent(ret);
4474 return(NULL);
4475 }
4476 GROW;
4477 SKIP_BLANKS;
4478 GROW;
4479 if (RAW == '(') {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004480 xmlParserInputPtr input = ctxt->input;
Owen Taylor3473f882001-02-23 17:55:21 +00004481 /* Recurse on second child */
4482 NEXT;
4483 SKIP_BLANKS;
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004484 last = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004485 SKIP_BLANKS;
4486 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004487 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004488 if (elem == NULL) {
4489 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4491 ctxt->sax->error(ctxt->userData,
4492 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4493 ctxt->wellFormed = 0;
4494 ctxt->disableSAX = 1;
Owen Taylor3473f882001-02-23 17:55:21 +00004495 if (ret != NULL)
4496 xmlFreeElementContent(ret);
4497 return(NULL);
4498 }
4499 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4500 xmlFree(elem);
4501 if (RAW == '?') {
4502 last->ocur = XML_ELEMENT_CONTENT_OPT;
4503 NEXT;
4504 } else if (RAW == '*') {
4505 last->ocur = XML_ELEMENT_CONTENT_MULT;
4506 NEXT;
4507 } else if (RAW == '+') {
4508 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4509 NEXT;
4510 } else {
4511 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4512 }
4513 }
4514 SKIP_BLANKS;
4515 GROW;
4516 }
4517 if ((cur != NULL) && (last != NULL)) {
4518 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004519 if (last != NULL)
4520 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004521 }
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004522 if ((ctxt->validate) && (ctxt->input != inputchk)) {
4523 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4524 if (ctxt->vctxt.error != NULL)
4525 ctxt->vctxt.error(ctxt->vctxt.userData,
4526"Element content declaration doesn't start and stop in the same entity\n");
4527 ctxt->valid = 0;
4528 }
Owen Taylor3473f882001-02-23 17:55:21 +00004529 NEXT;
4530 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004531 if (ret != NULL)
4532 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004533 NEXT;
4534 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004535 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004536 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004537 cur = ret;
4538 /*
4539 * Some normalization:
4540 * (a | b* | c?)* == (a | b | c)*
4541 */
4542 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4543 if ((cur->c1 != NULL) &&
4544 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4545 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4546 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4547 if ((cur->c2 != NULL) &&
4548 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4549 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4550 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4551 cur = cur->c2;
4552 }
4553 }
Owen Taylor3473f882001-02-23 17:55:21 +00004554 NEXT;
4555 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004556 if (ret != NULL) {
4557 int found = 0;
4558
Daniel Veillarde470df72001-04-18 21:41:07 +00004559 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004560 /*
4561 * Some normalization:
4562 * (a | b*)+ == (a | b)*
4563 * (a | b?)+ == (a | b)*
4564 */
4565 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4566 if ((cur->c1 != NULL) &&
4567 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4568 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4569 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4570 found = 1;
4571 }
4572 if ((cur->c2 != NULL) &&
4573 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4574 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4575 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4576 found = 1;
4577 }
4578 cur = cur->c2;
4579 }
4580 if (found)
4581 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4582 }
Owen Taylor3473f882001-02-23 17:55:21 +00004583 NEXT;
4584 }
4585 return(ret);
4586}
4587
4588/**
4589 * xmlParseElementContentDecl:
4590 * @ctxt: an XML parser context
4591 * @name: the name of the element being defined.
4592 * @result: the Element Content pointer will be stored here if any
4593 *
4594 * parse the declaration for an Element content either Mixed or Children,
4595 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4596 *
4597 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4598 *
4599 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4600 */
4601
4602int
4603xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4604 xmlElementContentPtr *result) {
4605
4606 xmlElementContentPtr tree = NULL;
4607 xmlParserInputPtr input = ctxt->input;
4608 int res;
4609
4610 *result = NULL;
4611
4612 if (RAW != '(') {
4613 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4615 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004616 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004617 ctxt->wellFormed = 0;
4618 ctxt->disableSAX = 1;
4619 return(-1);
4620 }
4621 NEXT;
4622 GROW;
4623 SKIP_BLANKS;
4624 if ((RAW == '#') && (NXT(1) == 'P') &&
4625 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4626 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4627 (NXT(6) == 'A')) {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004628 tree = xmlParseElementMixedContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004629 res = XML_ELEMENT_TYPE_MIXED;
4630 } else {
Daniel Veillard8dc16a62002-02-19 21:08:48 +00004631 tree = xmlParseElementChildrenContentDecl(ctxt, input);
Owen Taylor3473f882001-02-23 17:55:21 +00004632 res = XML_ELEMENT_TYPE_ELEMENT;
4633 }
Owen Taylor3473f882001-02-23 17:55:21 +00004634 SKIP_BLANKS;
4635 *result = tree;
4636 return(res);
4637}
4638
4639/**
4640 * xmlParseElementDecl:
4641 * @ctxt: an XML parser context
4642 *
4643 * parse an Element declaration.
4644 *
4645 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4646 *
4647 * [ VC: Unique Element Type Declaration ]
4648 * No element type may be declared more than once
4649 *
4650 * Returns the type of the element, or -1 in case of error
4651 */
4652int
4653xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4654 xmlChar *name;
4655 int ret = -1;
4656 xmlElementContentPtr content = NULL;
4657
4658 GROW;
4659 if ((RAW == '<') && (NXT(1) == '!') &&
4660 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4661 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4662 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4663 (NXT(8) == 'T')) {
4664 xmlParserInputPtr input = ctxt->input;
4665
4666 SKIP(9);
4667 if (!IS_BLANK(CUR)) {
4668 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4670 ctxt->sax->error(ctxt->userData,
4671 "Space required after 'ELEMENT'\n");
4672 ctxt->wellFormed = 0;
4673 ctxt->disableSAX = 1;
4674 }
4675 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004676 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004677 if (name == NULL) {
4678 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4680 ctxt->sax->error(ctxt->userData,
4681 "xmlParseElementDecl: no name for Element\n");
4682 ctxt->wellFormed = 0;
4683 ctxt->disableSAX = 1;
4684 return(-1);
4685 }
4686 while ((RAW == 0) && (ctxt->inputNr > 1))
4687 xmlPopInput(ctxt);
4688 if (!IS_BLANK(CUR)) {
4689 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4691 ctxt->sax->error(ctxt->userData,
4692 "Space required after the element name\n");
4693 ctxt->wellFormed = 0;
4694 ctxt->disableSAX = 1;
4695 }
4696 SKIP_BLANKS;
4697 if ((RAW == 'E') && (NXT(1) == 'M') &&
4698 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4699 (NXT(4) == 'Y')) {
4700 SKIP(5);
4701 /*
4702 * Element must always be empty.
4703 */
4704 ret = XML_ELEMENT_TYPE_EMPTY;
4705 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4706 (NXT(2) == 'Y')) {
4707 SKIP(3);
4708 /*
4709 * Element is a generic container.
4710 */
4711 ret = XML_ELEMENT_TYPE_ANY;
4712 } else if (RAW == '(') {
4713 ret = xmlParseElementContentDecl(ctxt, name, &content);
4714 } else {
4715 /*
4716 * [ WFC: PEs in Internal Subset ] error handling.
4717 */
4718 if ((RAW == '%') && (ctxt->external == 0) &&
4719 (ctxt->inputNr == 1)) {
4720 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4722 ctxt->sax->error(ctxt->userData,
4723 "PEReference: forbidden within markup decl in internal subset\n");
4724 } else {
4725 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4727 ctxt->sax->error(ctxt->userData,
4728 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4729 }
4730 ctxt->wellFormed = 0;
4731 ctxt->disableSAX = 1;
4732 if (name != NULL) xmlFree(name);
4733 return(-1);
4734 }
4735
4736 SKIP_BLANKS;
4737 /*
4738 * Pop-up of finished entities.
4739 */
4740 while ((RAW == 0) && (ctxt->inputNr > 1))
4741 xmlPopInput(ctxt);
4742 SKIP_BLANKS;
4743
4744 if (RAW != '>') {
4745 ctxt->errNo = XML_ERR_GT_REQUIRED;
4746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4747 ctxt->sax->error(ctxt->userData,
4748 "xmlParseElementDecl: expected '>' at the end\n");
4749 ctxt->wellFormed = 0;
4750 ctxt->disableSAX = 1;
4751 } else {
4752 if (input != ctxt->input) {
4753 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4755 ctxt->sax->error(ctxt->userData,
4756"Element declaration doesn't start and stop in the same entity\n");
4757 ctxt->wellFormed = 0;
4758 ctxt->disableSAX = 1;
4759 }
4760
4761 NEXT;
4762 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4763 (ctxt->sax->elementDecl != NULL))
4764 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4765 content);
4766 }
4767 if (content != NULL) {
4768 xmlFreeElementContent(content);
4769 }
4770 if (name != NULL) {
4771 xmlFree(name);
4772 }
4773 }
4774 return(ret);
4775}
4776
4777/**
Owen Taylor3473f882001-02-23 17:55:21 +00004778 * xmlParseConditionalSections
4779 * @ctxt: an XML parser context
4780 *
4781 * [61] conditionalSect ::= includeSect | ignoreSect
4782 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4783 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4784 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4785 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4786 */
4787
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004788static void
Owen Taylor3473f882001-02-23 17:55:21 +00004789xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4790 SKIP(3);
4791 SKIP_BLANKS;
4792 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4793 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4794 (NXT(6) == 'E')) {
4795 SKIP(7);
4796 SKIP_BLANKS;
4797 if (RAW != '[') {
4798 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4799 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4800 ctxt->sax->error(ctxt->userData,
4801 "XML conditional section '[' expected\n");
4802 ctxt->wellFormed = 0;
4803 ctxt->disableSAX = 1;
4804 } else {
4805 NEXT;
4806 }
4807 if (xmlParserDebugEntities) {
4808 if ((ctxt->input != NULL) && (ctxt->input->filename))
4809 xmlGenericError(xmlGenericErrorContext,
4810 "%s(%d): ", ctxt->input->filename,
4811 ctxt->input->line);
4812 xmlGenericError(xmlGenericErrorContext,
4813 "Entering INCLUDE Conditional Section\n");
4814 }
4815
4816 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4817 (NXT(2) != '>'))) {
4818 const xmlChar *check = CUR_PTR;
4819 int cons = ctxt->input->consumed;
4820 int tok = ctxt->token;
4821
4822 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4823 xmlParseConditionalSections(ctxt);
4824 } else if (IS_BLANK(CUR)) {
4825 NEXT;
4826 } else if (RAW == '%') {
4827 xmlParsePEReference(ctxt);
4828 } else
4829 xmlParseMarkupDecl(ctxt);
4830
4831 /*
4832 * Pop-up of finished entities.
4833 */
4834 while ((RAW == 0) && (ctxt->inputNr > 1))
4835 xmlPopInput(ctxt);
4836
4837 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4838 (tok == ctxt->token)) {
4839 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4841 ctxt->sax->error(ctxt->userData,
4842 "Content error in the external subset\n");
4843 ctxt->wellFormed = 0;
4844 ctxt->disableSAX = 1;
4845 break;
4846 }
4847 }
4848 if (xmlParserDebugEntities) {
4849 if ((ctxt->input != NULL) && (ctxt->input->filename))
4850 xmlGenericError(xmlGenericErrorContext,
4851 "%s(%d): ", ctxt->input->filename,
4852 ctxt->input->line);
4853 xmlGenericError(xmlGenericErrorContext,
4854 "Leaving INCLUDE Conditional Section\n");
4855 }
4856
4857 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4858 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4859 int state;
4860 int instate;
4861 int depth = 0;
4862
4863 SKIP(6);
4864 SKIP_BLANKS;
4865 if (RAW != '[') {
4866 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4867 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4868 ctxt->sax->error(ctxt->userData,
4869 "XML conditional section '[' expected\n");
4870 ctxt->wellFormed = 0;
4871 ctxt->disableSAX = 1;
4872 } else {
4873 NEXT;
4874 }
4875 if (xmlParserDebugEntities) {
4876 if ((ctxt->input != NULL) && (ctxt->input->filename))
4877 xmlGenericError(xmlGenericErrorContext,
4878 "%s(%d): ", ctxt->input->filename,
4879 ctxt->input->line);
4880 xmlGenericError(xmlGenericErrorContext,
4881 "Entering IGNORE Conditional Section\n");
4882 }
4883
4884 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00004885 * Parse up to the end of the conditional section
Owen Taylor3473f882001-02-23 17:55:21 +00004886 * But disable SAX event generating DTD building in the meantime
4887 */
4888 state = ctxt->disableSAX;
4889 instate = ctxt->instate;
4890 ctxt->disableSAX = 1;
4891 ctxt->instate = XML_PARSER_IGNORE;
4892
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00004893 while ((depth >= 0) && (RAW != 0)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004894 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4895 depth++;
4896 SKIP(3);
4897 continue;
4898 }
4899 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4900 if (--depth >= 0) SKIP(3);
4901 continue;
4902 }
4903 NEXT;
4904 continue;
4905 }
4906
4907 ctxt->disableSAX = state;
4908 ctxt->instate = instate;
4909
4910 if (xmlParserDebugEntities) {
4911 if ((ctxt->input != NULL) && (ctxt->input->filename))
4912 xmlGenericError(xmlGenericErrorContext,
4913 "%s(%d): ", ctxt->input->filename,
4914 ctxt->input->line);
4915 xmlGenericError(xmlGenericErrorContext,
4916 "Leaving IGNORE Conditional Section\n");
4917 }
4918
4919 } else {
4920 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4922 ctxt->sax->error(ctxt->userData,
4923 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4924 ctxt->wellFormed = 0;
4925 ctxt->disableSAX = 1;
4926 }
4927
4928 if (RAW == 0)
4929 SHRINK;
4930
4931 if (RAW == 0) {
4932 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4934 ctxt->sax->error(ctxt->userData,
4935 "XML conditional section not closed\n");
4936 ctxt->wellFormed = 0;
4937 ctxt->disableSAX = 1;
4938 } else {
4939 SKIP(3);
4940 }
4941}
4942
4943/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004944 * xmlParseMarkupDecl:
4945 * @ctxt: an XML parser context
4946 *
4947 * parse Markup declarations
4948 *
4949 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4950 * NotationDecl | PI | Comment
4951 *
4952 * [ VC: Proper Declaration/PE Nesting ]
4953 * Parameter-entity replacement text must be properly nested with
4954 * markup declarations. That is to say, if either the first character
4955 * or the last character of a markup declaration (markupdecl above) is
4956 * contained in the replacement text for a parameter-entity reference,
4957 * both must be contained in the same replacement text.
4958 *
4959 * [ WFC: PEs in Internal Subset ]
4960 * In the internal DTD subset, parameter-entity references can occur
4961 * only where markup declarations can occur, not within markup declarations.
4962 * (This does not apply to references that occur in external parameter
4963 * entities or to the external subset.)
4964 */
4965void
4966xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4967 GROW;
4968 xmlParseElementDecl(ctxt);
4969 xmlParseAttributeListDecl(ctxt);
4970 xmlParseEntityDecl(ctxt);
4971 xmlParseNotationDecl(ctxt);
4972 xmlParsePI(ctxt);
4973 xmlParseComment(ctxt);
4974 /*
4975 * This is only for internal subset. On external entities,
4976 * the replacement is done before parsing stage
4977 */
4978 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4979 xmlParsePEReference(ctxt);
4980
4981 /*
4982 * Conditional sections are allowed from entities included
4983 * by PE References in the internal subset.
4984 */
4985 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4986 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4987 xmlParseConditionalSections(ctxt);
4988 }
4989 }
4990
4991 ctxt->instate = XML_PARSER_DTD;
4992}
4993
4994/**
4995 * xmlParseTextDecl:
4996 * @ctxt: an XML parser context
4997 *
4998 * parse an XML declaration header for external entities
4999 *
5000 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
5001 *
5002 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
5003 */
5004
5005void
5006xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
5007 xmlChar *version;
5008
5009 /*
5010 * We know that '<?xml' is here.
5011 */
5012 if ((RAW == '<') && (NXT(1) == '?') &&
5013 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5014 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5015 SKIP(5);
5016 } else {
5017 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
5018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5019 ctxt->sax->error(ctxt->userData,
5020 "Text declaration '<?xml' required\n");
5021 ctxt->wellFormed = 0;
5022 ctxt->disableSAX = 1;
5023
5024 return;
5025 }
5026
5027 if (!IS_BLANK(CUR)) {
5028 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5030 ctxt->sax->error(ctxt->userData,
5031 "Space needed after '<?xml'\n");
5032 ctxt->wellFormed = 0;
5033 ctxt->disableSAX = 1;
5034 }
5035 SKIP_BLANKS;
5036
5037 /*
5038 * We may have the VersionInfo here.
5039 */
5040 version = xmlParseVersionInfo(ctxt);
5041 if (version == NULL)
5042 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillard401c2112002-01-07 16:54:10 +00005043 else {
5044 if (!IS_BLANK(CUR)) {
5045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5047 ctxt->sax->error(ctxt->userData, "Space needed here\n");
5048 ctxt->wellFormed = 0;
5049 ctxt->disableSAX = 1;
5050 }
5051 }
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005052 ctxt->input->version = version;
5053
5054 /*
5055 * We must have the encoding declaration
5056 */
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00005057 xmlParseEncodingDecl(ctxt);
5058 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5059 /*
5060 * The XML REC instructs us to stop parsing right here
5061 */
5062 return;
5063 }
5064
5065 SKIP_BLANKS;
5066 if ((RAW == '?') && (NXT(1) == '>')) {
5067 SKIP(2);
5068 } else if (RAW == '>') {
5069 /* Deprecated old WD ... */
5070 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5072 ctxt->sax->error(ctxt->userData,
5073 "XML declaration must end-up with '?>'\n");
5074 ctxt->wellFormed = 0;
5075 ctxt->disableSAX = 1;
5076 NEXT;
5077 } else {
5078 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
5079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5080 ctxt->sax->error(ctxt->userData,
5081 "parsing XML declaration: '?>' expected\n");
5082 ctxt->wellFormed = 0;
5083 ctxt->disableSAX = 1;
5084 MOVETO_ENDTAG(CUR_PTR);
5085 NEXT;
5086 }
5087}
5088
5089/**
Owen Taylor3473f882001-02-23 17:55:21 +00005090 * xmlParseExternalSubset:
5091 * @ctxt: an XML parser context
5092 * @ExternalID: the external identifier
5093 * @SystemID: the system identifier (or URL)
5094 *
5095 * parse Markup declarations from an external subset
5096 *
5097 * [30] extSubset ::= textDecl? extSubsetDecl
5098 *
5099 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5100 */
5101void
5102xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5103 const xmlChar *SystemID) {
5104 GROW;
5105 if ((RAW == '<') && (NXT(1) == '?') &&
5106 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5107 (NXT(4) == 'l')) {
5108 xmlParseTextDecl(ctxt);
5109 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5110 /*
5111 * The XML REC instructs us to stop parsing right here
5112 */
5113 ctxt->instate = XML_PARSER_EOF;
5114 return;
5115 }
5116 }
5117 if (ctxt->myDoc == NULL) {
5118 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5119 }
5120 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5121 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5122
5123 ctxt->instate = XML_PARSER_DTD;
5124 ctxt->external = 1;
5125 while (((RAW == '<') && (NXT(1) == '?')) ||
5126 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005127 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005128 const xmlChar *check = CUR_PTR;
5129 int cons = ctxt->input->consumed;
5130 int tok = ctxt->token;
5131
5132 GROW;
5133 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5134 xmlParseConditionalSections(ctxt);
5135 } else if (IS_BLANK(CUR)) {
5136 NEXT;
5137 } else if (RAW == '%') {
5138 xmlParsePEReference(ctxt);
5139 } else
5140 xmlParseMarkupDecl(ctxt);
5141
5142 /*
5143 * Pop-up of finished entities.
5144 */
5145 while ((RAW == 0) && (ctxt->inputNr > 1))
5146 xmlPopInput(ctxt);
5147
5148 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5149 (tok == ctxt->token)) {
5150 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5152 ctxt->sax->error(ctxt->userData,
5153 "Content error in the external subset\n");
5154 ctxt->wellFormed = 0;
5155 ctxt->disableSAX = 1;
5156 break;
5157 }
5158 }
5159
5160 if (RAW != 0) {
5161 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5163 ctxt->sax->error(ctxt->userData,
5164 "Extra content at the end of the document\n");
5165 ctxt->wellFormed = 0;
5166 ctxt->disableSAX = 1;
5167 }
5168
5169}
5170
5171/**
5172 * xmlParseReference:
5173 * @ctxt: an XML parser context
5174 *
5175 * parse and handle entity references in content, depending on the SAX
5176 * interface, this may end-up in a call to character() if this is a
5177 * CharRef, a predefined entity, if there is no reference() callback.
5178 * or if the parser was asked to switch to that mode.
5179 *
5180 * [67] Reference ::= EntityRef | CharRef
5181 */
5182void
5183xmlParseReference(xmlParserCtxtPtr ctxt) {
5184 xmlEntityPtr ent;
5185 xmlChar *val;
5186 if (RAW != '&') return;
5187
5188 if (NXT(1) == '#') {
5189 int i = 0;
5190 xmlChar out[10];
5191 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005192 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005193
5194 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5195 /*
5196 * So we are using non-UTF-8 buffers
5197 * Check that the char fit on 8bits, if not
5198 * generate a CharRef.
5199 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005200 if (value <= 0xFF) {
5201 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005202 out[1] = 0;
5203 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5204 (!ctxt->disableSAX))
5205 ctxt->sax->characters(ctxt->userData, out, 1);
5206 } else {
5207 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005208 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005209 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005210 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005211 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5212 (!ctxt->disableSAX))
5213 ctxt->sax->reference(ctxt->userData, out);
5214 }
5215 } else {
5216 /*
5217 * Just encode the value in UTF-8
5218 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005219 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005220 out[i] = 0;
5221 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5222 (!ctxt->disableSAX))
5223 ctxt->sax->characters(ctxt->userData, out, i);
5224 }
5225 } else {
5226 ent = xmlParseEntityRef(ctxt);
5227 if (ent == NULL) return;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005228 if (!ctxt->wellFormed)
5229 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005230 if ((ent->name != NULL) &&
5231 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5232 xmlNodePtr list = NULL;
5233 int ret;
5234
5235
5236 /*
5237 * The first reference to the entity trigger a parsing phase
5238 * where the ent->children is filled with the result from
5239 * the parsing.
5240 */
5241 if (ent->children == NULL) {
5242 xmlChar *value;
5243 value = ent->content;
5244
5245 /*
5246 * Check that this entity is well formed
5247 */
5248 if ((value != NULL) &&
5249 (value[1] == 0) && (value[0] == '<') &&
5250 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5251 /*
5252 * DONE: get definite answer on this !!!
5253 * Lots of entity decls are used to declare a single
5254 * char
5255 * <!ENTITY lt "<">
5256 * Which seems to be valid since
5257 * 2.4: The ampersand character (&) and the left angle
5258 * bracket (<) may appear in their literal form only
5259 * when used ... They are also legal within the literal
5260 * entity value of an internal entity declaration;i
5261 * see "4.3.2 Well-Formed Parsed Entities".
5262 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5263 * Looking at the OASIS test suite and James Clark
5264 * tests, this is broken. However the XML REC uses
5265 * it. Is the XML REC not well-formed ????
5266 * This is a hack to avoid this problem
5267 *
5268 * ANSWER: since lt gt amp .. are already defined,
5269 * this is a redefinition and hence the fact that the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005270 * content is not well balanced is not a Wf error, this
Owen Taylor3473f882001-02-23 17:55:21 +00005271 * is lousy but acceptable.
5272 */
5273 list = xmlNewDocText(ctxt->myDoc, value);
5274 if (list != NULL) {
5275 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5276 (ent->children == NULL)) {
5277 ent->children = list;
5278 ent->last = list;
5279 list->parent = (xmlNodePtr) ent;
5280 } else {
5281 xmlFreeNodeList(list);
5282 }
5283 } else if (list != NULL) {
5284 xmlFreeNodeList(list);
5285 }
5286 } else {
5287 /*
5288 * 4.3.2: An internal general parsed entity is well-formed
5289 * if its replacement text matches the production labeled
5290 * content.
5291 */
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005292
5293 void *user_data;
5294 /*
5295 * This is a bit hackish but this seems the best
5296 * way to make sure both SAX and DOM entity support
5297 * behaves okay.
5298 */
5299 if (ctxt->userData == ctxt)
5300 user_data = NULL;
5301 else
5302 user_data = ctxt->userData;
5303
Owen Taylor3473f882001-02-23 17:55:21 +00005304 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5305 ctxt->depth++;
5306 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005307 ctxt->sax, user_data, ctxt->depth,
Owen Taylor3473f882001-02-23 17:55:21 +00005308 value, &list);
5309 ctxt->depth--;
5310 } else if (ent->etype ==
5311 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5312 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005313 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Daniel Veillardb5a60ec2002-03-18 11:45:56 +00005314 ctxt->sax, user_data, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005315 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005316 ctxt->depth--;
5317 } else {
5318 ret = -1;
5319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5320 ctxt->sax->error(ctxt->userData,
5321 "Internal: invalid entity type\n");
5322 }
5323 if (ret == XML_ERR_ENTITY_LOOP) {
5324 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5326 ctxt->sax->error(ctxt->userData,
5327 "Detected entity reference loop\n");
5328 ctxt->wellFormed = 0;
5329 ctxt->disableSAX = 1;
Daniel Veillardbb7ddb32002-02-17 21:26:33 +00005330 return;
Owen Taylor3473f882001-02-23 17:55:21 +00005331 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005332 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5333 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005334 (ent->children == NULL)) {
5335 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005336 if (ctxt->replaceEntities) {
5337 /*
5338 * Prune it directly in the generated document
5339 * except for single text nodes.
5340 */
5341 if ((list->type == XML_TEXT_NODE) &&
5342 (list->next == NULL)) {
5343 list->parent = (xmlNodePtr) ent;
5344 list = NULL;
5345 } else {
5346 while (list != NULL) {
5347 list->parent = (xmlNodePtr) ctxt->node;
5348 if (list->next == NULL)
5349 ent->last = list;
5350 list = list->next;
Daniel Veillard8107a222002-01-13 14:10:10 +00005351 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005352 list = ent->children;
Daniel Veillard8107a222002-01-13 14:10:10 +00005353 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5354 xmlAddEntityReference(ent, list, NULL);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005355 }
5356 } else {
5357 while (list != NULL) {
5358 list->parent = (xmlNodePtr) ent;
5359 if (list->next == NULL)
5360 ent->last = list;
5361 list = list->next;
5362 }
Owen Taylor3473f882001-02-23 17:55:21 +00005363 }
5364 } else {
5365 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005366 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005367 }
5368 } else if (ret > 0) {
5369 ctxt->errNo = ret;
5370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5371 ctxt->sax->error(ctxt->userData,
5372 "Entity value required\n");
5373 ctxt->wellFormed = 0;
5374 ctxt->disableSAX = 1;
5375 } else if (list != NULL) {
5376 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005377 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005378 }
5379 }
5380 }
5381 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5382 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5383 /*
5384 * Create a node.
5385 */
5386 ctxt->sax->reference(ctxt->userData, ent->name);
5387 return;
5388 } else if (ctxt->replaceEntities) {
5389 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5390 /*
5391 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005392 * a simple tree copy for all references except the first
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005393 * In the first occurrence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005394 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005395 if (list == NULL) {
Daniel Veillard8107a222002-01-13 14:10:10 +00005396 xmlNodePtr new = NULL, cur, firstChild = NULL;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005397 cur = ent->children;
5398 while (cur != NULL) {
5399 new = xmlCopyNode(cur, 1);
Daniel Veillard8107a222002-01-13 14:10:10 +00005400 if (firstChild == NULL){
5401 firstChild = new;
5402 }
Daniel Veillard62f313b2001-07-04 19:49:14 +00005403 xmlAddChild(ctxt->node, new);
5404 if (cur == ent->last)
5405 break;
5406 cur = cur->next;
5407 }
Daniel Veillard8107a222002-01-13 14:10:10 +00005408 if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
5409 xmlAddEntityReference(ent, firstChild, new);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005410 } else {
5411 /*
5412 * the name change is to avoid coalescing of the
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005413 * node with a possible previous text one which
5414 * would make ent->children a dangling pointer
Daniel Veillard62f313b2001-07-04 19:49:14 +00005415 */
5416 if (ent->children->type == XML_TEXT_NODE)
5417 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5418 if ((ent->last != ent->children) &&
5419 (ent->last->type == XML_TEXT_NODE))
5420 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5421 xmlAddChildList(ctxt->node, ent->children);
5422 }
5423
Owen Taylor3473f882001-02-23 17:55:21 +00005424 /*
5425 * This is to avoid a nasty side effect, see
5426 * characters() in SAX.c
5427 */
5428 ctxt->nodemem = 0;
5429 ctxt->nodelen = 0;
5430 return;
5431 } else {
5432 /*
5433 * Probably running in SAX mode
5434 */
5435 xmlParserInputPtr input;
5436
5437 input = xmlNewEntityInputStream(ctxt, ent);
5438 xmlPushInput(ctxt, input);
5439 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5440 (RAW == '<') && (NXT(1) == '?') &&
5441 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5442 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5443 xmlParseTextDecl(ctxt);
5444 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5445 /*
5446 * The XML REC instructs us to stop parsing right here
5447 */
5448 ctxt->instate = XML_PARSER_EOF;
5449 return;
5450 }
5451 if (input->standalone == 1) {
5452 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5454 ctxt->sax->error(ctxt->userData,
5455 "external parsed entities cannot be standalone\n");
5456 ctxt->wellFormed = 0;
5457 ctxt->disableSAX = 1;
5458 }
5459 }
5460 return;
5461 }
5462 }
5463 } else {
5464 val = ent->content;
5465 if (val == NULL) return;
5466 /*
5467 * inline the entity.
5468 */
5469 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5470 (!ctxt->disableSAX))
5471 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5472 }
5473 }
5474}
5475
5476/**
5477 * xmlParseEntityRef:
5478 * @ctxt: an XML parser context
5479 *
5480 * parse ENTITY references declarations
5481 *
5482 * [68] EntityRef ::= '&' Name ';'
5483 *
5484 * [ WFC: Entity Declared ]
5485 * In a document without any DTD, a document with only an internal DTD
5486 * subset which contains no parameter entity references, or a document
5487 * with "standalone='yes'", the Name given in the entity reference
5488 * must match that in an entity declaration, except that well-formed
5489 * documents need not declare any of the following entities: amp, lt,
5490 * gt, apos, quot. The declaration of a parameter entity must precede
5491 * any reference to it. Similarly, the declaration of a general entity
5492 * must precede any reference to it which appears in a default value in an
5493 * attribute-list declaration. Note that if entities are declared in the
5494 * external subset or in external parameter entities, a non-validating
5495 * processor is not obligated to read and process their declarations;
5496 * for such documents, the rule that an entity must be declared is a
5497 * well-formedness constraint only if standalone='yes'.
5498 *
5499 * [ WFC: Parsed Entity ]
5500 * An entity reference must not contain the name of an unparsed entity
5501 *
5502 * Returns the xmlEntityPtr if found, or NULL otherwise.
5503 */
5504xmlEntityPtr
5505xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5506 xmlChar *name;
5507 xmlEntityPtr ent = NULL;
5508
5509 GROW;
5510
5511 if (RAW == '&') {
5512 NEXT;
5513 name = xmlParseName(ctxt);
5514 if (name == NULL) {
5515 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5517 ctxt->sax->error(ctxt->userData,
5518 "xmlParseEntityRef: no name\n");
5519 ctxt->wellFormed = 0;
5520 ctxt->disableSAX = 1;
5521 } else {
5522 if (RAW == ';') {
5523 NEXT;
5524 /*
5525 * Ask first SAX for entity resolution, otherwise try the
5526 * predefined set.
5527 */
5528 if (ctxt->sax != NULL) {
5529 if (ctxt->sax->getEntity != NULL)
5530 ent = ctxt->sax->getEntity(ctxt->userData, name);
5531 if (ent == NULL)
5532 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005533 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5534 ent = getEntity(ctxt, name);
5535 }
Owen Taylor3473f882001-02-23 17:55:21 +00005536 }
5537 /*
5538 * [ WFC: Entity Declared ]
5539 * In a document without any DTD, a document with only an
5540 * internal DTD subset which contains no parameter entity
5541 * references, or a document with "standalone='yes'", the
5542 * Name given in the entity reference must match that in an
5543 * entity declaration, except that well-formed documents
5544 * need not declare any of the following entities: amp, lt,
5545 * gt, apos, quot.
5546 * The declaration of a parameter entity must precede any
5547 * reference to it.
5548 * Similarly, the declaration of a general entity must
5549 * precede any reference to it which appears in a default
5550 * value in an attribute-list declaration. Note that if
5551 * entities are declared in the external subset or in
5552 * external parameter entities, a non-validating processor
5553 * is not obligated to read and process their declarations;
5554 * for such documents, the rule that an entity must be
5555 * declared is a well-formedness constraint only if
5556 * standalone='yes'.
5557 */
5558 if (ent == NULL) {
5559 if ((ctxt->standalone == 1) ||
5560 ((ctxt->hasExternalSubset == 0) &&
5561 (ctxt->hasPErefs == 0))) {
5562 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5564 ctxt->sax->error(ctxt->userData,
5565 "Entity '%s' not defined\n", name);
5566 ctxt->wellFormed = 0;
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005567 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005568 ctxt->disableSAX = 1;
5569 } else {
5570 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005572 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005573 "Entity '%s' not defined\n", name);
Daniel Veillardd01fd3e2002-02-18 22:27:47 +00005574 ctxt->valid = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00005575 }
5576 }
5577
5578 /*
5579 * [ WFC: Parsed Entity ]
5580 * An entity reference must not contain the name of an
5581 * unparsed entity
5582 */
5583 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5584 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5586 ctxt->sax->error(ctxt->userData,
5587 "Entity reference to unparsed entity %s\n", name);
5588 ctxt->wellFormed = 0;
5589 ctxt->disableSAX = 1;
5590 }
5591
5592 /*
5593 * [ WFC: No External Entity References ]
5594 * Attribute values cannot contain direct or indirect
5595 * entity references to external entities.
5596 */
5597 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5598 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5599 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5600 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5601 ctxt->sax->error(ctxt->userData,
5602 "Attribute references external entity '%s'\n", name);
5603 ctxt->wellFormed = 0;
5604 ctxt->disableSAX = 1;
5605 }
5606 /*
5607 * [ WFC: No < in Attribute Values ]
5608 * The replacement text of any entity referred to directly or
5609 * indirectly in an attribute value (other than "&lt;") must
5610 * not contain a <.
5611 */
5612 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5613 (ent != NULL) &&
5614 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5615 (ent->content != NULL) &&
5616 (xmlStrchr(ent->content, '<'))) {
5617 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5619 ctxt->sax->error(ctxt->userData,
5620 "'<' in entity '%s' is not allowed in attributes values\n", name);
5621 ctxt->wellFormed = 0;
5622 ctxt->disableSAX = 1;
5623 }
5624
5625 /*
5626 * Internal check, no parameter entities here ...
5627 */
5628 else {
5629 switch (ent->etype) {
5630 case XML_INTERNAL_PARAMETER_ENTITY:
5631 case XML_EXTERNAL_PARAMETER_ENTITY:
5632 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5634 ctxt->sax->error(ctxt->userData,
5635 "Attempt to reference the parameter entity '%s'\n", name);
5636 ctxt->wellFormed = 0;
5637 ctxt->disableSAX = 1;
5638 break;
5639 default:
5640 break;
5641 }
5642 }
5643
5644 /*
5645 * [ WFC: No Recursion ]
5646 * A parsed entity must not contain a recursive reference
5647 * to itself, either directly or indirectly.
5648 * Done somewhere else
5649 */
5650
5651 } else {
5652 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5653 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5654 ctxt->sax->error(ctxt->userData,
5655 "xmlParseEntityRef: expecting ';'\n");
5656 ctxt->wellFormed = 0;
5657 ctxt->disableSAX = 1;
5658 }
5659 xmlFree(name);
5660 }
5661 }
5662 return(ent);
5663}
5664
5665/**
5666 * xmlParseStringEntityRef:
5667 * @ctxt: an XML parser context
5668 * @str: a pointer to an index in the string
5669 *
5670 * parse ENTITY references declarations, but this version parses it from
5671 * a string value.
5672 *
5673 * [68] EntityRef ::= '&' Name ';'
5674 *
5675 * [ WFC: Entity Declared ]
5676 * In a document without any DTD, a document with only an internal DTD
5677 * subset which contains no parameter entity references, or a document
5678 * with "standalone='yes'", the Name given in the entity reference
5679 * must match that in an entity declaration, except that well-formed
5680 * documents need not declare any of the following entities: amp, lt,
5681 * gt, apos, quot. The declaration of a parameter entity must precede
5682 * any reference to it. Similarly, the declaration of a general entity
5683 * must precede any reference to it which appears in a default value in an
5684 * attribute-list declaration. Note that if entities are declared in the
5685 * external subset or in external parameter entities, a non-validating
5686 * processor is not obligated to read and process their declarations;
5687 * for such documents, the rule that an entity must be declared is a
5688 * well-formedness constraint only if standalone='yes'.
5689 *
5690 * [ WFC: Parsed Entity ]
5691 * An entity reference must not contain the name of an unparsed entity
5692 *
5693 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5694 * is updated to the current location in the string.
5695 */
5696xmlEntityPtr
5697xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5698 xmlChar *name;
5699 const xmlChar *ptr;
5700 xmlChar cur;
5701 xmlEntityPtr ent = NULL;
5702
5703 if ((str == NULL) || (*str == NULL))
5704 return(NULL);
5705 ptr = *str;
5706 cur = *ptr;
5707 if (cur == '&') {
5708 ptr++;
5709 cur = *ptr;
5710 name = xmlParseStringName(ctxt, &ptr);
5711 if (name == NULL) {
5712 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5713 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5714 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005715 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005716 ctxt->wellFormed = 0;
5717 ctxt->disableSAX = 1;
5718 } else {
5719 if (*ptr == ';') {
5720 ptr++;
5721 /*
5722 * Ask first SAX for entity resolution, otherwise try the
5723 * predefined set.
5724 */
5725 if (ctxt->sax != NULL) {
5726 if (ctxt->sax->getEntity != NULL)
5727 ent = ctxt->sax->getEntity(ctxt->userData, name);
5728 if (ent == NULL)
5729 ent = xmlGetPredefinedEntity(name);
Daniel Veillard5997aca2002-03-18 18:36:20 +00005730 if ((ent == NULL) && (ctxt->userData==ctxt)) {
5731 ent = getEntity(ctxt, name);
5732 }
Owen Taylor3473f882001-02-23 17:55:21 +00005733 }
5734 /*
5735 * [ WFC: Entity Declared ]
5736 * In a document without any DTD, a document with only an
5737 * internal DTD subset which contains no parameter entity
5738 * references, or a document with "standalone='yes'", the
5739 * Name given in the entity reference must match that in an
5740 * entity declaration, except that well-formed documents
5741 * need not declare any of the following entities: amp, lt,
5742 * gt, apos, quot.
5743 * The declaration of a parameter entity must precede any
5744 * reference to it.
5745 * Similarly, the declaration of a general entity must
5746 * precede any reference to it which appears in a default
5747 * value in an attribute-list declaration. Note that if
5748 * entities are declared in the external subset or in
5749 * external parameter entities, a non-validating processor
5750 * is not obligated to read and process their declarations;
5751 * for such documents, the rule that an entity must be
5752 * declared is a well-formedness constraint only if
5753 * standalone='yes'.
5754 */
5755 if (ent == NULL) {
5756 if ((ctxt->standalone == 1) ||
5757 ((ctxt->hasExternalSubset == 0) &&
5758 (ctxt->hasPErefs == 0))) {
5759 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5761 ctxt->sax->error(ctxt->userData,
5762 "Entity '%s' not defined\n", name);
5763 ctxt->wellFormed = 0;
5764 ctxt->disableSAX = 1;
5765 } else {
5766 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5767 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5768 ctxt->sax->warning(ctxt->userData,
5769 "Entity '%s' not defined\n", name);
5770 }
5771 }
5772
5773 /*
5774 * [ WFC: Parsed Entity ]
5775 * An entity reference must not contain the name of an
5776 * unparsed entity
5777 */
5778 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5779 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5781 ctxt->sax->error(ctxt->userData,
5782 "Entity reference to unparsed entity %s\n", name);
5783 ctxt->wellFormed = 0;
5784 ctxt->disableSAX = 1;
5785 }
5786
5787 /*
5788 * [ WFC: No External Entity References ]
5789 * Attribute values cannot contain direct or indirect
5790 * entity references to external entities.
5791 */
5792 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5793 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5794 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5796 ctxt->sax->error(ctxt->userData,
5797 "Attribute references external entity '%s'\n", name);
5798 ctxt->wellFormed = 0;
5799 ctxt->disableSAX = 1;
5800 }
5801 /*
5802 * [ WFC: No < in Attribute Values ]
5803 * The replacement text of any entity referred to directly or
5804 * indirectly in an attribute value (other than "&lt;") must
5805 * not contain a <.
5806 */
5807 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5808 (ent != NULL) &&
5809 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5810 (ent->content != NULL) &&
5811 (xmlStrchr(ent->content, '<'))) {
5812 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5814 ctxt->sax->error(ctxt->userData,
5815 "'<' in entity '%s' is not allowed in attributes values\n", name);
5816 ctxt->wellFormed = 0;
5817 ctxt->disableSAX = 1;
5818 }
5819
5820 /*
5821 * Internal check, no parameter entities here ...
5822 */
5823 else {
5824 switch (ent->etype) {
5825 case XML_INTERNAL_PARAMETER_ENTITY:
5826 case XML_EXTERNAL_PARAMETER_ENTITY:
5827 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5829 ctxt->sax->error(ctxt->userData,
5830 "Attempt to reference the parameter entity '%s'\n", name);
5831 ctxt->wellFormed = 0;
5832 ctxt->disableSAX = 1;
5833 break;
5834 default:
5835 break;
5836 }
5837 }
5838
5839 /*
5840 * [ WFC: No Recursion ]
5841 * A parsed entity must not contain a recursive reference
5842 * to itself, either directly or indirectly.
Daniel Veillardcbaf3992001-12-31 16:16:02 +00005843 * Done somewhere else
Owen Taylor3473f882001-02-23 17:55:21 +00005844 */
5845
5846 } else {
5847 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5849 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005850 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005851 ctxt->wellFormed = 0;
5852 ctxt->disableSAX = 1;
5853 }
5854 xmlFree(name);
5855 }
5856 }
5857 *str = ptr;
5858 return(ent);
5859}
5860
5861/**
5862 * xmlParsePEReference:
5863 * @ctxt: an XML parser context
5864 *
5865 * parse PEReference declarations
5866 * The entity content is handled directly by pushing it's content as
5867 * a new input stream.
5868 *
5869 * [69] PEReference ::= '%' Name ';'
5870 *
5871 * [ WFC: No Recursion ]
5872 * A parsed entity must not contain a recursive
5873 * reference to itself, either directly or indirectly.
5874 *
5875 * [ WFC: Entity Declared ]
5876 * In a document without any DTD, a document with only an internal DTD
5877 * subset which contains no parameter entity references, or a document
5878 * with "standalone='yes'", ... ... The declaration of a parameter
5879 * entity must precede any reference to it...
5880 *
5881 * [ VC: Entity Declared ]
5882 * In a document with an external subset or external parameter entities
5883 * with "standalone='no'", ... ... The declaration of a parameter entity
5884 * must precede any reference to it...
5885 *
5886 * [ WFC: In DTD ]
5887 * Parameter-entity references may only appear in the DTD.
5888 * NOTE: misleading but this is handled.
5889 */
5890void
5891xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5892 xmlChar *name;
5893 xmlEntityPtr entity = NULL;
5894 xmlParserInputPtr input;
5895
5896 if (RAW == '%') {
5897 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005898 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005899 if (name == NULL) {
5900 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5902 ctxt->sax->error(ctxt->userData,
5903 "xmlParsePEReference: no name\n");
5904 ctxt->wellFormed = 0;
5905 ctxt->disableSAX = 1;
5906 } else {
5907 if (RAW == ';') {
5908 NEXT;
5909 if ((ctxt->sax != NULL) &&
5910 (ctxt->sax->getParameterEntity != NULL))
5911 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5912 name);
5913 if (entity == NULL) {
5914 /*
5915 * [ WFC: Entity Declared ]
5916 * In a document without any DTD, a document with only an
5917 * internal DTD subset which contains no parameter entity
5918 * references, or a document with "standalone='yes'", ...
5919 * ... The declaration of a parameter entity must precede
5920 * any reference to it...
5921 */
5922 if ((ctxt->standalone == 1) ||
5923 ((ctxt->hasExternalSubset == 0) &&
5924 (ctxt->hasPErefs == 0))) {
5925 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5926 if ((!ctxt->disableSAX) &&
5927 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5928 ctxt->sax->error(ctxt->userData,
5929 "PEReference: %%%s; not found\n", name);
5930 ctxt->wellFormed = 0;
5931 ctxt->disableSAX = 1;
5932 } else {
5933 /*
5934 * [ VC: Entity Declared ]
5935 * In a document with an external subset or external
5936 * parameter entities with "standalone='no'", ...
5937 * ... The declaration of a parameter entity must precede
5938 * any reference to it...
5939 */
5940 if ((!ctxt->disableSAX) &&
5941 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5942 ctxt->sax->warning(ctxt->userData,
5943 "PEReference: %%%s; not found\n", name);
5944 ctxt->valid = 0;
5945 }
5946 } else {
5947 /*
5948 * Internal checking in case the entity quest barfed
5949 */
5950 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5951 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5952 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5953 ctxt->sax->warning(ctxt->userData,
5954 "Internal: %%%s; is not a parameter entity\n", name);
5955 } else {
5956 /*
5957 * TODO !!!
5958 * handle the extra spaces added before and after
5959 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5960 */
5961 input = xmlNewEntityInputStream(ctxt, entity);
5962 xmlPushInput(ctxt, input);
5963 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5964 (RAW == '<') && (NXT(1) == '?') &&
5965 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5966 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5967 xmlParseTextDecl(ctxt);
5968 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5969 /*
5970 * The XML REC instructs us to stop parsing
5971 * right here
5972 */
5973 ctxt->instate = XML_PARSER_EOF;
5974 xmlFree(name);
5975 return;
5976 }
5977 }
5978 if (ctxt->token == 0)
5979 ctxt->token = ' ';
5980 }
5981 }
5982 ctxt->hasPErefs = 1;
5983 } else {
5984 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5986 ctxt->sax->error(ctxt->userData,
5987 "xmlParsePEReference: expecting ';'\n");
5988 ctxt->wellFormed = 0;
5989 ctxt->disableSAX = 1;
5990 }
5991 xmlFree(name);
5992 }
5993 }
5994}
5995
5996/**
5997 * xmlParseStringPEReference:
5998 * @ctxt: an XML parser context
5999 * @str: a pointer to an index in the string
6000 *
6001 * parse PEReference declarations
6002 *
6003 * [69] PEReference ::= '%' Name ';'
6004 *
6005 * [ WFC: No Recursion ]
6006 * A parsed entity must not contain a recursive
6007 * reference to itself, either directly or indirectly.
6008 *
6009 * [ WFC: Entity Declared ]
6010 * In a document without any DTD, a document with only an internal DTD
6011 * subset which contains no parameter entity references, or a document
6012 * with "standalone='yes'", ... ... The declaration of a parameter
6013 * entity must precede any reference to it...
6014 *
6015 * [ VC: Entity Declared ]
6016 * In a document with an external subset or external parameter entities
6017 * with "standalone='no'", ... ... The declaration of a parameter entity
6018 * must precede any reference to it...
6019 *
6020 * [ WFC: In DTD ]
6021 * Parameter-entity references may only appear in the DTD.
6022 * NOTE: misleading but this is handled.
6023 *
6024 * Returns the string of the entity content.
6025 * str is updated to the current value of the index
6026 */
6027xmlEntityPtr
6028xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
6029 const xmlChar *ptr;
6030 xmlChar cur;
6031 xmlChar *name;
6032 xmlEntityPtr entity = NULL;
6033
6034 if ((str == NULL) || (*str == NULL)) return(NULL);
6035 ptr = *str;
6036 cur = *ptr;
6037 if (cur == '%') {
6038 ptr++;
6039 cur = *ptr;
6040 name = xmlParseStringName(ctxt, &ptr);
6041 if (name == NULL) {
6042 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6044 ctxt->sax->error(ctxt->userData,
6045 "xmlParseStringPEReference: no name\n");
6046 ctxt->wellFormed = 0;
6047 ctxt->disableSAX = 1;
6048 } else {
6049 cur = *ptr;
6050 if (cur == ';') {
6051 ptr++;
6052 cur = *ptr;
6053 if ((ctxt->sax != NULL) &&
6054 (ctxt->sax->getParameterEntity != NULL))
6055 entity = ctxt->sax->getParameterEntity(ctxt->userData,
6056 name);
6057 if (entity == NULL) {
6058 /*
6059 * [ WFC: Entity Declared ]
6060 * In a document without any DTD, a document with only an
6061 * internal DTD subset which contains no parameter entity
6062 * references, or a document with "standalone='yes'", ...
6063 * ... The declaration of a parameter entity must precede
6064 * any reference to it...
6065 */
6066 if ((ctxt->standalone == 1) ||
6067 ((ctxt->hasExternalSubset == 0) &&
6068 (ctxt->hasPErefs == 0))) {
6069 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
6070 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6071 ctxt->sax->error(ctxt->userData,
6072 "PEReference: %%%s; not found\n", name);
6073 ctxt->wellFormed = 0;
6074 ctxt->disableSAX = 1;
6075 } else {
6076 /*
6077 * [ VC: Entity Declared ]
6078 * In a document with an external subset or external
6079 * parameter entities with "standalone='no'", ...
6080 * ... The declaration of a parameter entity must
6081 * precede any reference to it...
6082 */
6083 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6084 ctxt->sax->warning(ctxt->userData,
6085 "PEReference: %%%s; not found\n", name);
6086 ctxt->valid = 0;
6087 }
6088 } else {
6089 /*
6090 * Internal checking in case the entity quest barfed
6091 */
6092 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
6093 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
6094 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6095 ctxt->sax->warning(ctxt->userData,
6096 "Internal: %%%s; is not a parameter entity\n", name);
6097 }
6098 }
6099 ctxt->hasPErefs = 1;
6100 } else {
6101 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
6102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6103 ctxt->sax->error(ctxt->userData,
6104 "xmlParseStringPEReference: expecting ';'\n");
6105 ctxt->wellFormed = 0;
6106 ctxt->disableSAX = 1;
6107 }
6108 xmlFree(name);
6109 }
6110 }
6111 *str = ptr;
6112 return(entity);
6113}
6114
6115/**
6116 * xmlParseDocTypeDecl:
6117 * @ctxt: an XML parser context
6118 *
6119 * parse a DOCTYPE declaration
6120 *
6121 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6122 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6123 *
6124 * [ VC: Root Element Type ]
6125 * The Name in the document type declaration must match the element
6126 * type of the root element.
6127 */
6128
6129void
6130xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6131 xmlChar *name = NULL;
6132 xmlChar *ExternalID = NULL;
6133 xmlChar *URI = NULL;
6134
6135 /*
6136 * We know that '<!DOCTYPE' has been detected.
6137 */
6138 SKIP(9);
6139
6140 SKIP_BLANKS;
6141
6142 /*
6143 * Parse the DOCTYPE name.
6144 */
6145 name = xmlParseName(ctxt);
6146 if (name == NULL) {
6147 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6149 ctxt->sax->error(ctxt->userData,
6150 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6151 ctxt->wellFormed = 0;
6152 ctxt->disableSAX = 1;
6153 }
6154 ctxt->intSubName = name;
6155
6156 SKIP_BLANKS;
6157
6158 /*
6159 * Check for SystemID and ExternalID
6160 */
6161 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6162
6163 if ((URI != NULL) || (ExternalID != NULL)) {
6164 ctxt->hasExternalSubset = 1;
6165 }
6166 ctxt->extSubURI = URI;
6167 ctxt->extSubSystem = ExternalID;
6168
6169 SKIP_BLANKS;
6170
6171 /*
6172 * Create and update the internal subset.
6173 */
6174 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6175 (!ctxt->disableSAX))
6176 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6177
6178 /*
6179 * Is there any internal subset declarations ?
6180 * they are handled separately in xmlParseInternalSubset()
6181 */
6182 if (RAW == '[')
6183 return;
6184
6185 /*
6186 * We should be at the end of the DOCTYPE declaration.
6187 */
6188 if (RAW != '>') {
6189 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006191 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006192 ctxt->wellFormed = 0;
6193 ctxt->disableSAX = 1;
6194 }
6195 NEXT;
6196}
6197
6198/**
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006199 * xmlParseInternalSubset:
Owen Taylor3473f882001-02-23 17:55:21 +00006200 * @ctxt: an XML parser context
6201 *
6202 * parse the internal subset declaration
6203 *
6204 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6205 */
6206
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006207static void
Owen Taylor3473f882001-02-23 17:55:21 +00006208xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6209 /*
6210 * Is there any DTD definition ?
6211 */
6212 if (RAW == '[') {
6213 ctxt->instate = XML_PARSER_DTD;
6214 NEXT;
6215 /*
6216 * Parse the succession of Markup declarations and
6217 * PEReferences.
6218 * Subsequence (markupdecl | PEReference | S)*
6219 */
6220 while (RAW != ']') {
6221 const xmlChar *check = CUR_PTR;
6222 int cons = ctxt->input->consumed;
6223
6224 SKIP_BLANKS;
6225 xmlParseMarkupDecl(ctxt);
6226 xmlParsePEReference(ctxt);
6227
6228 /*
6229 * Pop-up of finished entities.
6230 */
6231 while ((RAW == 0) && (ctxt->inputNr > 1))
6232 xmlPopInput(ctxt);
6233
6234 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6235 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6237 ctxt->sax->error(ctxt->userData,
6238 "xmlParseInternalSubset: error detected in Markup declaration\n");
6239 ctxt->wellFormed = 0;
6240 ctxt->disableSAX = 1;
6241 break;
6242 }
6243 }
6244 if (RAW == ']') {
6245 NEXT;
6246 SKIP_BLANKS;
6247 }
6248 }
6249
6250 /*
6251 * We should be at the end of the DOCTYPE declaration.
6252 */
6253 if (RAW != '>') {
6254 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006256 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006257 ctxt->wellFormed = 0;
6258 ctxt->disableSAX = 1;
6259 }
6260 NEXT;
6261}
6262
6263/**
6264 * xmlParseAttribute:
6265 * @ctxt: an XML parser context
6266 * @value: a xmlChar ** used to store the value of the attribute
6267 *
6268 * parse an attribute
6269 *
6270 * [41] Attribute ::= Name Eq AttValue
6271 *
6272 * [ WFC: No External Entity References ]
6273 * Attribute values cannot contain direct or indirect entity references
6274 * to external entities.
6275 *
6276 * [ WFC: No < in Attribute Values ]
6277 * The replacement text of any entity referred to directly or indirectly in
6278 * an attribute value (other than "&lt;") must not contain a <.
6279 *
6280 * [ VC: Attribute Value Type ]
6281 * The attribute must have been declared; the value must be of the type
6282 * declared for it.
6283 *
6284 * [25] Eq ::= S? '=' S?
6285 *
6286 * With namespace:
6287 *
6288 * [NS 11] Attribute ::= QName Eq AttValue
6289 *
6290 * Also the case QName == xmlns:??? is handled independently as a namespace
6291 * definition.
6292 *
6293 * Returns the attribute name, and the value in *value.
6294 */
6295
6296xmlChar *
6297xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6298 xmlChar *name, *val;
6299
6300 *value = NULL;
Daniel Veillard878eab02002-02-19 13:46:09 +00006301 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00006302 name = xmlParseName(ctxt);
6303 if (name == NULL) {
6304 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6306 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6307 ctxt->wellFormed = 0;
6308 ctxt->disableSAX = 1;
6309 return(NULL);
6310 }
6311
6312 /*
6313 * read the value
6314 */
6315 SKIP_BLANKS;
6316 if (RAW == '=') {
6317 NEXT;
6318 SKIP_BLANKS;
6319 val = xmlParseAttValue(ctxt);
6320 ctxt->instate = XML_PARSER_CONTENT;
6321 } else {
6322 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6324 ctxt->sax->error(ctxt->userData,
6325 "Specification mandate value for attribute %s\n", name);
6326 ctxt->wellFormed = 0;
6327 ctxt->disableSAX = 1;
6328 xmlFree(name);
6329 return(NULL);
6330 }
6331
6332 /*
6333 * Check that xml:lang conforms to the specification
6334 * No more registered as an error, just generate a warning now
6335 * since this was deprecated in XML second edition
6336 */
6337 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6338 if (!xmlCheckLanguageID(val)) {
6339 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6340 ctxt->sax->warning(ctxt->userData,
6341 "Malformed value for xml:lang : %s\n", val);
6342 }
6343 }
6344
6345 /*
6346 * Check that xml:space conforms to the specification
6347 */
6348 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6349 if (xmlStrEqual(val, BAD_CAST "default"))
6350 *(ctxt->space) = 0;
6351 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6352 *(ctxt->space) = 1;
6353 else {
6354 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6356 ctxt->sax->error(ctxt->userData,
6357"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6358 val);
6359 ctxt->wellFormed = 0;
6360 ctxt->disableSAX = 1;
6361 }
6362 }
6363
6364 *value = val;
6365 return(name);
6366}
6367
6368/**
6369 * xmlParseStartTag:
6370 * @ctxt: an XML parser context
6371 *
6372 * parse a start of tag either for rule element or
6373 * EmptyElement. In both case we don't parse the tag closing chars.
6374 *
6375 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6376 *
6377 * [ WFC: Unique Att Spec ]
6378 * No attribute name may appear more than once in the same start-tag or
6379 * empty-element tag.
6380 *
6381 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6382 *
6383 * [ WFC: Unique Att Spec ]
6384 * No attribute name may appear more than once in the same start-tag or
6385 * empty-element tag.
6386 *
6387 * With namespace:
6388 *
6389 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6390 *
6391 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6392 *
6393 * Returns the element name parsed
6394 */
6395
6396xmlChar *
6397xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6398 xmlChar *name;
6399 xmlChar *attname;
6400 xmlChar *attvalue;
6401 const xmlChar **atts = NULL;
6402 int nbatts = 0;
6403 int maxatts = 0;
6404 int i;
6405
6406 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006407 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006408
6409 name = xmlParseName(ctxt);
6410 if (name == NULL) {
6411 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6413 ctxt->sax->error(ctxt->userData,
6414 "xmlParseStartTag: invalid element name\n");
6415 ctxt->wellFormed = 0;
6416 ctxt->disableSAX = 1;
6417 return(NULL);
6418 }
6419
6420 /*
6421 * Now parse the attributes, it ends up with the ending
6422 *
6423 * (S Attribute)* S?
6424 */
6425 SKIP_BLANKS;
6426 GROW;
6427
Daniel Veillard21a0f912001-02-25 19:54:14 +00006428 while ((RAW != '>') &&
6429 ((RAW != '/') || (NXT(1) != '>')) &&
6430 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006431 const xmlChar *q = CUR_PTR;
6432 int cons = ctxt->input->consumed;
6433
6434 attname = xmlParseAttribute(ctxt, &attvalue);
6435 if ((attname != NULL) && (attvalue != NULL)) {
6436 /*
6437 * [ WFC: Unique Att Spec ]
6438 * No attribute name may appear more than once in the same
6439 * start-tag or empty-element tag.
6440 */
6441 for (i = 0; i < nbatts;i += 2) {
6442 if (xmlStrEqual(atts[i], attname)) {
6443 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6444 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6445 ctxt->sax->error(ctxt->userData,
6446 "Attribute %s redefined\n",
6447 attname);
6448 ctxt->wellFormed = 0;
6449 ctxt->disableSAX = 1;
6450 xmlFree(attname);
6451 xmlFree(attvalue);
6452 goto failed;
6453 }
6454 }
6455
6456 /*
6457 * Add the pair to atts
6458 */
6459 if (atts == NULL) {
6460 maxatts = 10;
6461 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6462 if (atts == NULL) {
6463 xmlGenericError(xmlGenericErrorContext,
6464 "malloc of %ld byte failed\n",
6465 maxatts * (long)sizeof(xmlChar *));
6466 return(NULL);
6467 }
6468 } else if (nbatts + 4 > maxatts) {
6469 maxatts *= 2;
6470 atts = (const xmlChar **) xmlRealloc((void *) atts,
6471 maxatts * sizeof(xmlChar *));
6472 if (atts == NULL) {
6473 xmlGenericError(xmlGenericErrorContext,
6474 "realloc of %ld byte failed\n",
6475 maxatts * (long)sizeof(xmlChar *));
6476 return(NULL);
6477 }
6478 }
6479 atts[nbatts++] = attname;
6480 atts[nbatts++] = attvalue;
6481 atts[nbatts] = NULL;
6482 atts[nbatts + 1] = NULL;
6483 } else {
6484 if (attname != NULL)
6485 xmlFree(attname);
6486 if (attvalue != NULL)
6487 xmlFree(attvalue);
6488 }
6489
6490failed:
6491
6492 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6493 break;
6494 if (!IS_BLANK(RAW)) {
6495 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6496 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6497 ctxt->sax->error(ctxt->userData,
6498 "attributes construct error\n");
6499 ctxt->wellFormed = 0;
6500 ctxt->disableSAX = 1;
6501 }
6502 SKIP_BLANKS;
6503 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6504 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6506 ctxt->sax->error(ctxt->userData,
6507 "xmlParseStartTag: problem parsing attributes\n");
6508 ctxt->wellFormed = 0;
6509 ctxt->disableSAX = 1;
6510 break;
6511 }
6512 GROW;
6513 }
6514
6515 /*
6516 * SAX: Start of Element !
6517 */
6518 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6519 (!ctxt->disableSAX))
6520 ctxt->sax->startElement(ctxt->userData, name, atts);
6521
6522 if (atts != NULL) {
6523 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6524 xmlFree((void *) atts);
6525 }
6526 return(name);
6527}
6528
6529/**
6530 * xmlParseEndTag:
6531 * @ctxt: an XML parser context
6532 *
6533 * parse an end of tag
6534 *
6535 * [42] ETag ::= '</' Name S? '>'
6536 *
6537 * With namespace
6538 *
6539 * [NS 9] ETag ::= '</' QName S? '>'
6540 */
6541
6542void
6543xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6544 xmlChar *name;
6545 xmlChar *oldname;
6546
6547 GROW;
6548 if ((RAW != '<') || (NXT(1) != '/')) {
6549 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6551 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6552 ctxt->wellFormed = 0;
6553 ctxt->disableSAX = 1;
6554 return;
6555 }
6556 SKIP(2);
6557
6558 name = xmlParseName(ctxt);
6559
6560 /*
6561 * We should definitely be at the ending "S? '>'" part
6562 */
6563 GROW;
6564 SKIP_BLANKS;
6565 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6566 ctxt->errNo = XML_ERR_GT_REQUIRED;
6567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6568 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6569 ctxt->wellFormed = 0;
6570 ctxt->disableSAX = 1;
6571 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006572 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006573
6574 /*
6575 * [ WFC: Element Type Match ]
6576 * The Name in an element's end-tag must match the element type in the
6577 * start-tag.
6578 *
6579 */
6580 if ((name == NULL) || (ctxt->name == NULL) ||
6581 (!xmlStrEqual(name, ctxt->name))) {
6582 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6584 if ((name != NULL) && (ctxt->name != NULL)) {
6585 ctxt->sax->error(ctxt->userData,
6586 "Opening and ending tag mismatch: %s and %s\n",
6587 ctxt->name, name);
6588 } else if (ctxt->name != NULL) {
6589 ctxt->sax->error(ctxt->userData,
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006590 "Ending tag error for: %s\n", ctxt->name);
Owen Taylor3473f882001-02-23 17:55:21 +00006591 } else {
6592 ctxt->sax->error(ctxt->userData,
6593 "Ending tag error: internal error ???\n");
6594 }
6595
6596 }
6597 ctxt->wellFormed = 0;
6598 ctxt->disableSAX = 1;
6599 }
6600
6601 /*
6602 * SAX: End of Tag
6603 */
6604 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6605 (!ctxt->disableSAX))
6606 ctxt->sax->endElement(ctxt->userData, name);
6607
6608 if (name != NULL)
6609 xmlFree(name);
6610 oldname = namePop(ctxt);
6611 spacePop(ctxt);
6612 if (oldname != NULL) {
6613#ifdef DEBUG_STACK
6614 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6615#endif
6616 xmlFree(oldname);
6617 }
6618 return;
6619}
6620
6621/**
6622 * xmlParseCDSect:
6623 * @ctxt: an XML parser context
6624 *
6625 * Parse escaped pure raw content.
6626 *
6627 * [18] CDSect ::= CDStart CData CDEnd
6628 *
6629 * [19] CDStart ::= '<![CDATA['
6630 *
6631 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6632 *
6633 * [21] CDEnd ::= ']]>'
6634 */
6635void
6636xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6637 xmlChar *buf = NULL;
6638 int len = 0;
6639 int size = XML_PARSER_BUFFER_SIZE;
6640 int r, rl;
6641 int s, sl;
6642 int cur, l;
6643 int count = 0;
6644
6645 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6646 (NXT(2) == '[') && (NXT(3) == 'C') &&
6647 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6648 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6649 (NXT(8) == '[')) {
6650 SKIP(9);
6651 } else
6652 return;
6653
6654 ctxt->instate = XML_PARSER_CDATA_SECTION;
6655 r = CUR_CHAR(rl);
6656 if (!IS_CHAR(r)) {
6657 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6659 ctxt->sax->error(ctxt->userData,
6660 "CData section not finished\n");
6661 ctxt->wellFormed = 0;
6662 ctxt->disableSAX = 1;
6663 ctxt->instate = XML_PARSER_CONTENT;
6664 return;
6665 }
6666 NEXTL(rl);
6667 s = CUR_CHAR(sl);
6668 if (!IS_CHAR(s)) {
6669 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6671 ctxt->sax->error(ctxt->userData,
6672 "CData section not finished\n");
6673 ctxt->wellFormed = 0;
6674 ctxt->disableSAX = 1;
6675 ctxt->instate = XML_PARSER_CONTENT;
6676 return;
6677 }
6678 NEXTL(sl);
6679 cur = CUR_CHAR(l);
6680 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6681 if (buf == NULL) {
6682 xmlGenericError(xmlGenericErrorContext,
6683 "malloc of %d byte failed\n", size);
6684 return;
6685 }
6686 while (IS_CHAR(cur) &&
6687 ((r != ']') || (s != ']') || (cur != '>'))) {
6688 if (len + 5 >= size) {
6689 size *= 2;
6690 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6691 if (buf == NULL) {
6692 xmlGenericError(xmlGenericErrorContext,
6693 "realloc of %d byte failed\n", size);
6694 return;
6695 }
6696 }
6697 COPY_BUF(rl,buf,len,r);
6698 r = s;
6699 rl = sl;
6700 s = cur;
6701 sl = l;
6702 count++;
6703 if (count > 50) {
6704 GROW;
6705 count = 0;
6706 }
6707 NEXTL(l);
6708 cur = CUR_CHAR(l);
6709 }
6710 buf[len] = 0;
6711 ctxt->instate = XML_PARSER_CONTENT;
6712 if (cur != '>') {
6713 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6715 ctxt->sax->error(ctxt->userData,
6716 "CData section not finished\n%.50s\n", buf);
6717 ctxt->wellFormed = 0;
6718 ctxt->disableSAX = 1;
6719 xmlFree(buf);
6720 return;
6721 }
6722 NEXTL(l);
6723
6724 /*
Daniel Veillardcbaf3992001-12-31 16:16:02 +00006725 * OK the buffer is to be consumed as cdata.
Owen Taylor3473f882001-02-23 17:55:21 +00006726 */
6727 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6728 if (ctxt->sax->cdataBlock != NULL)
6729 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006730 else if (ctxt->sax->characters != NULL)
6731 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006732 }
6733 xmlFree(buf);
6734}
6735
6736/**
6737 * xmlParseContent:
6738 * @ctxt: an XML parser context
6739 *
6740 * Parse a content:
6741 *
6742 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6743 */
6744
6745void
6746xmlParseContent(xmlParserCtxtPtr ctxt) {
6747 GROW;
6748 while (((RAW != 0) || (ctxt->token != 0)) &&
6749 ((RAW != '<') || (NXT(1) != '/'))) {
6750 const xmlChar *test = CUR_PTR;
6751 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006752 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006753 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006754
6755 /*
6756 * Handle possible processed charrefs.
6757 */
6758 if (ctxt->token != 0) {
6759 xmlParseCharData(ctxt, 0);
6760 }
6761 /*
6762 * First case : a Processing Instruction.
6763 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006764 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006765 xmlParsePI(ctxt);
6766 }
6767
6768 /*
6769 * Second case : a CDSection
6770 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006771 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006772 (NXT(2) == '[') && (NXT(3) == 'C') &&
6773 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6774 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6775 (NXT(8) == '[')) {
6776 xmlParseCDSect(ctxt);
6777 }
6778
6779 /*
6780 * Third case : a comment
6781 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006782 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006783 (NXT(2) == '-') && (NXT(3) == '-')) {
6784 xmlParseComment(ctxt);
6785 ctxt->instate = XML_PARSER_CONTENT;
6786 }
6787
6788 /*
6789 * Fourth case : a sub-element.
6790 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006791 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006792 xmlParseElement(ctxt);
6793 }
6794
6795 /*
6796 * Fifth case : a reference. If if has not been resolved,
6797 * parsing returns it's Name, create the node
6798 */
6799
Daniel Veillard21a0f912001-02-25 19:54:14 +00006800 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006801 xmlParseReference(ctxt);
6802 }
6803
6804 /*
6805 * Last case, text. Note that References are handled directly.
6806 */
6807 else {
6808 xmlParseCharData(ctxt, 0);
6809 }
6810
6811 GROW;
6812 /*
6813 * Pop-up of finished entities.
6814 */
6815 while ((RAW == 0) && (ctxt->inputNr > 1))
6816 xmlPopInput(ctxt);
6817 SHRINK;
6818
6819 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6820 (tok == ctxt->token)) {
6821 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6822 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6823 ctxt->sax->error(ctxt->userData,
6824 "detected an error in element content\n");
6825 ctxt->wellFormed = 0;
6826 ctxt->disableSAX = 1;
6827 ctxt->instate = XML_PARSER_EOF;
6828 break;
6829 }
6830 }
6831}
6832
6833/**
6834 * xmlParseElement:
6835 * @ctxt: an XML parser context
6836 *
6837 * parse an XML element, this is highly recursive
6838 *
6839 * [39] element ::= EmptyElemTag | STag content ETag
6840 *
6841 * [ WFC: Element Type Match ]
6842 * The Name in an element's end-tag must match the element type in the
6843 * start-tag.
6844 *
6845 * [ VC: Element Valid ]
6846 * An element is valid if there is a declaration matching elementdecl
6847 * where the Name matches the element type and one of the following holds:
6848 * - The declaration matches EMPTY and the element has no content.
6849 * - The declaration matches children and the sequence of child elements
6850 * belongs to the language generated by the regular expression in the
6851 * content model, with optional white space (characters matching the
6852 * nonterminal S) between each pair of child elements.
6853 * - The declaration matches Mixed and the content consists of character
6854 * data and child elements whose types match names in the content model.
6855 * - The declaration matches ANY, and the types of any child elements have
6856 * been declared.
6857 */
6858
6859void
6860xmlParseElement(xmlParserCtxtPtr ctxt) {
6861 const xmlChar *openTag = CUR_PTR;
6862 xmlChar *name;
6863 xmlChar *oldname;
6864 xmlParserNodeInfo node_info;
6865 xmlNodePtr ret;
6866
6867 /* Capture start position */
6868 if (ctxt->record_info) {
6869 node_info.begin_pos = ctxt->input->consumed +
6870 (CUR_PTR - ctxt->input->base);
6871 node_info.begin_line = ctxt->input->line;
6872 }
6873
6874 if (ctxt->spaceNr == 0)
6875 spacePush(ctxt, -1);
6876 else
6877 spacePush(ctxt, *ctxt->space);
6878
6879 name = xmlParseStartTag(ctxt);
6880 if (name == NULL) {
6881 spacePop(ctxt);
6882 return;
6883 }
6884 namePush(ctxt, name);
6885 ret = ctxt->node;
6886
6887 /*
6888 * [ VC: Root Element Type ]
6889 * The Name in the document type declaration must match the element
6890 * type of the root element.
6891 */
6892 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6893 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6894 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6895
6896 /*
6897 * Check for an Empty Element.
6898 */
6899 if ((RAW == '/') && (NXT(1) == '>')) {
6900 SKIP(2);
6901 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6902 (!ctxt->disableSAX))
6903 ctxt->sax->endElement(ctxt->userData, name);
6904 oldname = namePop(ctxt);
6905 spacePop(ctxt);
6906 if (oldname != NULL) {
6907#ifdef DEBUG_STACK
6908 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6909#endif
6910 xmlFree(oldname);
6911 }
6912 if ( ret != NULL && ctxt->record_info ) {
6913 node_info.end_pos = ctxt->input->consumed +
6914 (CUR_PTR - ctxt->input->base);
6915 node_info.end_line = ctxt->input->line;
6916 node_info.node = ret;
6917 xmlParserAddNodeInfo(ctxt, &node_info);
6918 }
6919 return;
6920 }
6921 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006922 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006923 } else {
6924 ctxt->errNo = XML_ERR_GT_REQUIRED;
6925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6926 ctxt->sax->error(ctxt->userData,
6927 "Couldn't find end of Start Tag\n%.30s\n",
6928 openTag);
6929 ctxt->wellFormed = 0;
6930 ctxt->disableSAX = 1;
6931
6932 /*
6933 * end of parsing of this node.
6934 */
6935 nodePop(ctxt);
6936 oldname = namePop(ctxt);
6937 spacePop(ctxt);
6938 if (oldname != NULL) {
6939#ifdef DEBUG_STACK
6940 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6941#endif
6942 xmlFree(oldname);
6943 }
6944
6945 /*
6946 * Capture end position and add node
6947 */
6948 if ( ret != NULL && ctxt->record_info ) {
6949 node_info.end_pos = ctxt->input->consumed +
6950 (CUR_PTR - ctxt->input->base);
6951 node_info.end_line = ctxt->input->line;
6952 node_info.node = ret;
6953 xmlParserAddNodeInfo(ctxt, &node_info);
6954 }
6955 return;
6956 }
6957
6958 /*
6959 * Parse the content of the element:
6960 */
6961 xmlParseContent(ctxt);
6962 if (!IS_CHAR(RAW)) {
Daniel Veillard5344c602001-12-31 16:37:34 +00006963 ctxt->errNo = XML_ERR_TAG_NOT_FINISHED;
Owen Taylor3473f882001-02-23 17:55:21 +00006964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6965 ctxt->sax->error(ctxt->userData,
6966 "Premature end of data in tag %.30s\n", openTag);
6967 ctxt->wellFormed = 0;
6968 ctxt->disableSAX = 1;
6969
6970 /*
6971 * end of parsing of this node.
6972 */
6973 nodePop(ctxt);
6974 oldname = namePop(ctxt);
6975 spacePop(ctxt);
6976 if (oldname != NULL) {
6977#ifdef DEBUG_STACK
6978 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6979#endif
6980 xmlFree(oldname);
6981 }
6982 return;
6983 }
6984
6985 /*
6986 * parse the end of tag: '</' should be here.
6987 */
6988 xmlParseEndTag(ctxt);
6989
6990 /*
6991 * Capture end position and add node
6992 */
6993 if ( ret != NULL && ctxt->record_info ) {
6994 node_info.end_pos = ctxt->input->consumed +
6995 (CUR_PTR - ctxt->input->base);
6996 node_info.end_line = ctxt->input->line;
6997 node_info.node = ret;
6998 xmlParserAddNodeInfo(ctxt, &node_info);
6999 }
7000}
7001
7002/**
7003 * xmlParseVersionNum:
7004 * @ctxt: an XML parser context
7005 *
7006 * parse the XML version value.
7007 *
7008 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
7009 *
7010 * Returns the string giving the XML version number, or NULL
7011 */
7012xmlChar *
7013xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
7014 xmlChar *buf = NULL;
7015 int len = 0;
7016 int size = 10;
7017 xmlChar cur;
7018
7019 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7020 if (buf == NULL) {
7021 xmlGenericError(xmlGenericErrorContext,
7022 "malloc of %d byte failed\n", size);
7023 return(NULL);
7024 }
7025 cur = CUR;
7026 while (((cur >= 'a') && (cur <= 'z')) ||
7027 ((cur >= 'A') && (cur <= 'Z')) ||
7028 ((cur >= '0') && (cur <= '9')) ||
7029 (cur == '_') || (cur == '.') ||
7030 (cur == ':') || (cur == '-')) {
7031 if (len + 1 >= size) {
7032 size *= 2;
7033 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7034 if (buf == NULL) {
7035 xmlGenericError(xmlGenericErrorContext,
7036 "realloc of %d byte failed\n", size);
7037 return(NULL);
7038 }
7039 }
7040 buf[len++] = cur;
7041 NEXT;
7042 cur=CUR;
7043 }
7044 buf[len] = 0;
7045 return(buf);
7046}
7047
7048/**
7049 * xmlParseVersionInfo:
7050 * @ctxt: an XML parser context
7051 *
7052 * parse the XML version.
7053 *
7054 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
7055 *
7056 * [25] Eq ::= S? '=' S?
7057 *
7058 * Returns the version string, e.g. "1.0"
7059 */
7060
7061xmlChar *
7062xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
7063 xmlChar *version = NULL;
7064 const xmlChar *q;
7065
7066 if ((RAW == 'v') && (NXT(1) == 'e') &&
7067 (NXT(2) == 'r') && (NXT(3) == 's') &&
7068 (NXT(4) == 'i') && (NXT(5) == 'o') &&
7069 (NXT(6) == 'n')) {
7070 SKIP(7);
7071 SKIP_BLANKS;
7072 if (RAW != '=') {
7073 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7075 ctxt->sax->error(ctxt->userData,
7076 "xmlParseVersionInfo : expected '='\n");
7077 ctxt->wellFormed = 0;
7078 ctxt->disableSAX = 1;
7079 return(NULL);
7080 }
7081 NEXT;
7082 SKIP_BLANKS;
7083 if (RAW == '"') {
7084 NEXT;
7085 q = CUR_PTR;
7086 version = xmlParseVersionNum(ctxt);
7087 if (RAW != '"') {
7088 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7090 ctxt->sax->error(ctxt->userData,
7091 "String not closed\n%.50s\n", q);
7092 ctxt->wellFormed = 0;
7093 ctxt->disableSAX = 1;
7094 } else
7095 NEXT;
7096 } else if (RAW == '\''){
7097 NEXT;
7098 q = CUR_PTR;
7099 version = xmlParseVersionNum(ctxt);
7100 if (RAW != '\'') {
7101 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7102 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7103 ctxt->sax->error(ctxt->userData,
7104 "String not closed\n%.50s\n", q);
7105 ctxt->wellFormed = 0;
7106 ctxt->disableSAX = 1;
7107 } else
7108 NEXT;
7109 } else {
7110 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7112 ctxt->sax->error(ctxt->userData,
7113 "xmlParseVersionInfo : expected ' or \"\n");
7114 ctxt->wellFormed = 0;
7115 ctxt->disableSAX = 1;
7116 }
7117 }
7118 return(version);
7119}
7120
7121/**
7122 * xmlParseEncName:
7123 * @ctxt: an XML parser context
7124 *
7125 * parse the XML encoding name
7126 *
7127 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7128 *
7129 * Returns the encoding name value or NULL
7130 */
7131xmlChar *
7132xmlParseEncName(xmlParserCtxtPtr ctxt) {
7133 xmlChar *buf = NULL;
7134 int len = 0;
7135 int size = 10;
7136 xmlChar cur;
7137
7138 cur = CUR;
7139 if (((cur >= 'a') && (cur <= 'z')) ||
7140 ((cur >= 'A') && (cur <= 'Z'))) {
7141 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7142 if (buf == NULL) {
7143 xmlGenericError(xmlGenericErrorContext,
7144 "malloc of %d byte failed\n", size);
7145 return(NULL);
7146 }
7147
7148 buf[len++] = cur;
7149 NEXT;
7150 cur = CUR;
7151 while (((cur >= 'a') && (cur <= 'z')) ||
7152 ((cur >= 'A') && (cur <= 'Z')) ||
7153 ((cur >= '0') && (cur <= '9')) ||
7154 (cur == '.') || (cur == '_') ||
7155 (cur == '-')) {
7156 if (len + 1 >= size) {
7157 size *= 2;
7158 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7159 if (buf == NULL) {
7160 xmlGenericError(xmlGenericErrorContext,
7161 "realloc of %d byte failed\n", size);
7162 return(NULL);
7163 }
7164 }
7165 buf[len++] = cur;
7166 NEXT;
7167 cur = CUR;
7168 if (cur == 0) {
7169 SHRINK;
7170 GROW;
7171 cur = CUR;
7172 }
7173 }
7174 buf[len] = 0;
7175 } else {
7176 ctxt->errNo = XML_ERR_ENCODING_NAME;
7177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7178 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7179 ctxt->wellFormed = 0;
7180 ctxt->disableSAX = 1;
7181 }
7182 return(buf);
7183}
7184
7185/**
7186 * xmlParseEncodingDecl:
7187 * @ctxt: an XML parser context
7188 *
7189 * parse the XML encoding declaration
7190 *
7191 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7192 *
7193 * this setups the conversion filters.
7194 *
7195 * Returns the encoding value or NULL
7196 */
7197
7198xmlChar *
7199xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7200 xmlChar *encoding = NULL;
7201 const xmlChar *q;
7202
7203 SKIP_BLANKS;
7204 if ((RAW == 'e') && (NXT(1) == 'n') &&
7205 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7206 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7207 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7208 SKIP(8);
7209 SKIP_BLANKS;
7210 if (RAW != '=') {
7211 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7213 ctxt->sax->error(ctxt->userData,
7214 "xmlParseEncodingDecl : expected '='\n");
7215 ctxt->wellFormed = 0;
7216 ctxt->disableSAX = 1;
7217 return(NULL);
7218 }
7219 NEXT;
7220 SKIP_BLANKS;
7221 if (RAW == '"') {
7222 NEXT;
7223 q = CUR_PTR;
7224 encoding = xmlParseEncName(ctxt);
7225 if (RAW != '"') {
7226 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7228 ctxt->sax->error(ctxt->userData,
7229 "String not closed\n%.50s\n", q);
7230 ctxt->wellFormed = 0;
7231 ctxt->disableSAX = 1;
7232 } else
7233 NEXT;
7234 } else if (RAW == '\''){
7235 NEXT;
7236 q = CUR_PTR;
7237 encoding = xmlParseEncName(ctxt);
7238 if (RAW != '\'') {
7239 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
7242 "String not closed\n%.50s\n", q);
7243 ctxt->wellFormed = 0;
7244 ctxt->disableSAX = 1;
7245 } else
7246 NEXT;
Daniel Veillard82ac6b02002-02-17 23:18:55 +00007247 } else {
Owen Taylor3473f882001-02-23 17:55:21 +00007248 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7250 ctxt->sax->error(ctxt->userData,
7251 "xmlParseEncodingDecl : expected ' or \"\n");
7252 ctxt->wellFormed = 0;
7253 ctxt->disableSAX = 1;
7254 }
7255 if (encoding != NULL) {
7256 xmlCharEncoding enc;
7257 xmlCharEncodingHandlerPtr handler;
7258
7259 if (ctxt->input->encoding != NULL)
7260 xmlFree((xmlChar *) ctxt->input->encoding);
7261 ctxt->input->encoding = encoding;
7262
7263 enc = xmlParseCharEncoding((const char *) encoding);
7264 /*
7265 * registered set of known encodings
7266 */
7267 if (enc != XML_CHAR_ENCODING_ERROR) {
7268 xmlSwitchEncoding(ctxt, enc);
7269 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7270 xmlFree(encoding);
7271 return(NULL);
7272 }
7273 } else {
7274 /*
7275 * fallback for unknown encodings
7276 */
7277 handler = xmlFindCharEncodingHandler((const char *) encoding);
7278 if (handler != NULL) {
7279 xmlSwitchToEncoding(ctxt, handler);
7280 } else {
7281 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7282 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7283 ctxt->sax->error(ctxt->userData,
7284 "Unsupported encoding %s\n", encoding);
7285 return(NULL);
7286 }
7287 }
7288 }
7289 }
7290 return(encoding);
7291}
7292
7293/**
7294 * xmlParseSDDecl:
7295 * @ctxt: an XML parser context
7296 *
7297 * parse the XML standalone declaration
7298 *
7299 * [32] SDDecl ::= S 'standalone' Eq
7300 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7301 *
7302 * [ VC: Standalone Document Declaration ]
7303 * TODO The standalone document declaration must have the value "no"
7304 * if any external markup declarations contain declarations of:
7305 * - attributes with default values, if elements to which these
7306 * attributes apply appear in the document without specifications
7307 * of values for these attributes, or
7308 * - entities (other than amp, lt, gt, apos, quot), if references
7309 * to those entities appear in the document, or
7310 * - attributes with values subject to normalization, where the
7311 * attribute appears in the document with a value which will change
7312 * as a result of normalization, or
7313 * - element types with element content, if white space occurs directly
7314 * within any instance of those types.
7315 *
7316 * Returns 1 if standalone, 0 otherwise
7317 */
7318
7319int
7320xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7321 int standalone = -1;
7322
7323 SKIP_BLANKS;
7324 if ((RAW == 's') && (NXT(1) == 't') &&
7325 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7326 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7327 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7328 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7329 SKIP(10);
7330 SKIP_BLANKS;
7331 if (RAW != '=') {
7332 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7334 ctxt->sax->error(ctxt->userData,
7335 "XML standalone declaration : expected '='\n");
7336 ctxt->wellFormed = 0;
7337 ctxt->disableSAX = 1;
7338 return(standalone);
7339 }
7340 NEXT;
7341 SKIP_BLANKS;
7342 if (RAW == '\''){
7343 NEXT;
7344 if ((RAW == 'n') && (NXT(1) == 'o')) {
7345 standalone = 0;
7346 SKIP(2);
7347 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7348 (NXT(2) == 's')) {
7349 standalone = 1;
7350 SKIP(3);
7351 } else {
7352 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7353 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7354 ctxt->sax->error(ctxt->userData,
7355 "standalone accepts only 'yes' or 'no'\n");
7356 ctxt->wellFormed = 0;
7357 ctxt->disableSAX = 1;
7358 }
7359 if (RAW != '\'') {
7360 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7362 ctxt->sax->error(ctxt->userData, "String not closed\n");
7363 ctxt->wellFormed = 0;
7364 ctxt->disableSAX = 1;
7365 } else
7366 NEXT;
7367 } else if (RAW == '"'){
7368 NEXT;
7369 if ((RAW == 'n') && (NXT(1) == 'o')) {
7370 standalone = 0;
7371 SKIP(2);
7372 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7373 (NXT(2) == 's')) {
7374 standalone = 1;
7375 SKIP(3);
7376 } else {
7377 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7379 ctxt->sax->error(ctxt->userData,
7380 "standalone accepts only 'yes' or 'no'\n");
7381 ctxt->wellFormed = 0;
7382 ctxt->disableSAX = 1;
7383 }
7384 if (RAW != '"') {
7385 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7387 ctxt->sax->error(ctxt->userData, "String not closed\n");
7388 ctxt->wellFormed = 0;
7389 ctxt->disableSAX = 1;
7390 } else
7391 NEXT;
7392 } else {
7393 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7395 ctxt->sax->error(ctxt->userData,
7396 "Standalone value not found\n");
7397 ctxt->wellFormed = 0;
7398 ctxt->disableSAX = 1;
7399 }
7400 }
7401 return(standalone);
7402}
7403
7404/**
7405 * xmlParseXMLDecl:
7406 * @ctxt: an XML parser context
7407 *
7408 * parse an XML declaration header
7409 *
7410 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7411 */
7412
7413void
7414xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7415 xmlChar *version;
7416
7417 /*
7418 * We know that '<?xml' is here.
7419 */
7420 SKIP(5);
7421
7422 if (!IS_BLANK(RAW)) {
7423 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7425 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7426 ctxt->wellFormed = 0;
7427 ctxt->disableSAX = 1;
7428 }
7429 SKIP_BLANKS;
7430
7431 /*
Daniel Veillard19840942001-11-29 16:11:38 +00007432 * We must have the VersionInfo here.
Owen Taylor3473f882001-02-23 17:55:21 +00007433 */
7434 version = xmlParseVersionInfo(ctxt);
Daniel Veillard19840942001-11-29 16:11:38 +00007435 if (version == NULL) {
7436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7437 ctxt->sax->error(ctxt->userData,
7438 "Malformed declaration expecting version\n");
7439 ctxt->wellFormed = 0;
7440 ctxt->disableSAX = 1;
7441 } else {
7442 if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7443 /*
7444 * TODO: Blueberry should be detected here
7445 */
7446 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7447 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7448 version);
7449 }
7450 if (ctxt->version != NULL)
Daniel Veillardd3b08822001-12-05 12:03:33 +00007451 xmlFree((void *) ctxt->version);
Daniel Veillard19840942001-11-29 16:11:38 +00007452 ctxt->version = version;
Daniel Veillarda050d232001-09-05 15:51:05 +00007453 }
Owen Taylor3473f882001-02-23 17:55:21 +00007454
7455 /*
7456 * We may have the encoding declaration
7457 */
7458 if (!IS_BLANK(RAW)) {
7459 if ((RAW == '?') && (NXT(1) == '>')) {
7460 SKIP(2);
7461 return;
7462 }
7463 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7465 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7466 ctxt->wellFormed = 0;
7467 ctxt->disableSAX = 1;
7468 }
7469 xmlParseEncodingDecl(ctxt);
7470 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7471 /*
7472 * The XML REC instructs us to stop parsing right here
7473 */
7474 return;
7475 }
7476
7477 /*
7478 * We may have the standalone status.
7479 */
7480 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7481 if ((RAW == '?') && (NXT(1) == '>')) {
7482 SKIP(2);
7483 return;
7484 }
7485 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7487 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7488 ctxt->wellFormed = 0;
7489 ctxt->disableSAX = 1;
7490 }
7491 SKIP_BLANKS;
7492 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7493
7494 SKIP_BLANKS;
7495 if ((RAW == '?') && (NXT(1) == '>')) {
7496 SKIP(2);
7497 } else if (RAW == '>') {
7498 /* Deprecated old WD ... */
7499 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7501 ctxt->sax->error(ctxt->userData,
7502 "XML declaration must end-up with '?>'\n");
7503 ctxt->wellFormed = 0;
7504 ctxt->disableSAX = 1;
7505 NEXT;
7506 } else {
7507 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7509 ctxt->sax->error(ctxt->userData,
7510 "parsing XML declaration: '?>' expected\n");
7511 ctxt->wellFormed = 0;
7512 ctxt->disableSAX = 1;
7513 MOVETO_ENDTAG(CUR_PTR);
7514 NEXT;
7515 }
7516}
7517
7518/**
7519 * xmlParseMisc:
7520 * @ctxt: an XML parser context
7521 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007522 * parse an XML Misc* optional field.
Owen Taylor3473f882001-02-23 17:55:21 +00007523 *
7524 * [27] Misc ::= Comment | PI | S
7525 */
7526
7527void
7528xmlParseMisc(xmlParserCtxtPtr ctxt) {
7529 while (((RAW == '<') && (NXT(1) == '?')) ||
7530 ((RAW == '<') && (NXT(1) == '!') &&
7531 (NXT(2) == '-') && (NXT(3) == '-')) ||
7532 IS_BLANK(CUR)) {
7533 if ((RAW == '<') && (NXT(1) == '?')) {
7534 xmlParsePI(ctxt);
7535 } else if (IS_BLANK(CUR)) {
7536 NEXT;
7537 } else
7538 xmlParseComment(ctxt);
7539 }
7540}
7541
7542/**
7543 * xmlParseDocument:
7544 * @ctxt: an XML parser context
7545 *
7546 * parse an XML document (and build a tree if using the standard SAX
7547 * interface).
7548 *
7549 * [1] document ::= prolog element Misc*
7550 *
7551 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7552 *
7553 * Returns 0, -1 in case of error. the parser context is augmented
7554 * as a result of the parsing.
7555 */
7556
7557int
7558xmlParseDocument(xmlParserCtxtPtr ctxt) {
7559 xmlChar start[4];
7560 xmlCharEncoding enc;
7561
7562 xmlInitParser();
7563
7564 GROW;
7565
7566 /*
7567 * SAX: beginning of the document processing.
7568 */
7569 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7570 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7571
Daniel Veillard50f34372001-08-03 12:06:36 +00007572 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007573 /*
7574 * Get the 4 first bytes and decode the charset
7575 * if enc != XML_CHAR_ENCODING_NONE
7576 * plug some encoding conversion routines.
7577 */
7578 start[0] = RAW;
7579 start[1] = NXT(1);
7580 start[2] = NXT(2);
7581 start[3] = NXT(3);
7582 enc = xmlDetectCharEncoding(start, 4);
7583 if (enc != XML_CHAR_ENCODING_NONE) {
7584 xmlSwitchEncoding(ctxt, enc);
7585 }
Owen Taylor3473f882001-02-23 17:55:21 +00007586 }
7587
7588
7589 if (CUR == 0) {
7590 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7592 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7593 ctxt->wellFormed = 0;
7594 ctxt->disableSAX = 1;
7595 }
7596
7597 /*
7598 * Check for the XMLDecl in the Prolog.
7599 */
7600 GROW;
7601 if ((RAW == '<') && (NXT(1) == '?') &&
7602 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7603 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7604
7605 /*
7606 * Note that we will switch encoding on the fly.
7607 */
7608 xmlParseXMLDecl(ctxt);
7609 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7610 /*
7611 * The XML REC instructs us to stop parsing right here
7612 */
7613 return(-1);
7614 }
7615 ctxt->standalone = ctxt->input->standalone;
7616 SKIP_BLANKS;
7617 } else {
7618 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7619 }
7620 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7621 ctxt->sax->startDocument(ctxt->userData);
7622
7623 /*
7624 * The Misc part of the Prolog
7625 */
7626 GROW;
7627 xmlParseMisc(ctxt);
7628
7629 /*
7630 * Then possibly doc type declaration(s) and more Misc
7631 * (doctypedecl Misc*)?
7632 */
7633 GROW;
7634 if ((RAW == '<') && (NXT(1) == '!') &&
7635 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7636 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7637 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7638 (NXT(8) == 'E')) {
7639
7640 ctxt->inSubset = 1;
7641 xmlParseDocTypeDecl(ctxt);
7642 if (RAW == '[') {
7643 ctxt->instate = XML_PARSER_DTD;
7644 xmlParseInternalSubset(ctxt);
7645 }
7646
7647 /*
7648 * Create and update the external subset.
7649 */
7650 ctxt->inSubset = 2;
7651 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7652 (!ctxt->disableSAX))
7653 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7654 ctxt->extSubSystem, ctxt->extSubURI);
7655 ctxt->inSubset = 0;
7656
7657
7658 ctxt->instate = XML_PARSER_PROLOG;
7659 xmlParseMisc(ctxt);
7660 }
7661
7662 /*
7663 * Time to start parsing the tree itself
7664 */
7665 GROW;
7666 if (RAW != '<') {
7667 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7669 ctxt->sax->error(ctxt->userData,
7670 "Start tag expected, '<' not found\n");
7671 ctxt->wellFormed = 0;
7672 ctxt->disableSAX = 1;
7673 ctxt->instate = XML_PARSER_EOF;
7674 } else {
7675 ctxt->instate = XML_PARSER_CONTENT;
7676 xmlParseElement(ctxt);
7677 ctxt->instate = XML_PARSER_EPILOG;
7678
7679
7680 /*
7681 * The Misc part at the end
7682 */
7683 xmlParseMisc(ctxt);
7684
7685 if (RAW != 0) {
7686 ctxt->errNo = XML_ERR_DOCUMENT_END;
7687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7688 ctxt->sax->error(ctxt->userData,
7689 "Extra content at the end of the document\n");
7690 ctxt->wellFormed = 0;
7691 ctxt->disableSAX = 1;
7692 }
7693 ctxt->instate = XML_PARSER_EOF;
7694 }
7695
7696 /*
7697 * SAX: end of the document processing.
7698 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007699 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007700 ctxt->sax->endDocument(ctxt->userData);
7701
Daniel Veillard5997aca2002-03-18 18:36:20 +00007702 /*
7703 * Remove locally kept entity definitions if the tree was not built
7704 */
7705 if ((ctxt->myDoc != NULL) &&
7706 (xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
7707 xmlFreeDoc(ctxt->myDoc);
7708 ctxt->myDoc = NULL;
7709 }
7710
Daniel Veillardc7612992002-02-17 22:47:37 +00007711 if (! ctxt->wellFormed) {
7712 ctxt->valid = 0;
7713 return(-1);
7714 }
Owen Taylor3473f882001-02-23 17:55:21 +00007715 return(0);
7716}
7717
7718/**
7719 * xmlParseExtParsedEnt:
7720 * @ctxt: an XML parser context
7721 *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00007722 * parse a general parsed entity
Owen Taylor3473f882001-02-23 17:55:21 +00007723 * An external general parsed entity is well-formed if it matches the
7724 * production labeled extParsedEnt.
7725 *
7726 * [78] extParsedEnt ::= TextDecl? content
7727 *
7728 * Returns 0, -1 in case of error. the parser context is augmented
7729 * as a result of the parsing.
7730 */
7731
7732int
7733xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7734 xmlChar start[4];
7735 xmlCharEncoding enc;
7736
7737 xmlDefaultSAXHandlerInit();
7738
7739 GROW;
7740
7741 /*
7742 * SAX: beginning of the document processing.
7743 */
7744 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7745 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7746
7747 /*
7748 * Get the 4 first bytes and decode the charset
7749 * if enc != XML_CHAR_ENCODING_NONE
7750 * plug some encoding conversion routines.
7751 */
7752 start[0] = RAW;
7753 start[1] = NXT(1);
7754 start[2] = NXT(2);
7755 start[3] = NXT(3);
7756 enc = xmlDetectCharEncoding(start, 4);
7757 if (enc != XML_CHAR_ENCODING_NONE) {
7758 xmlSwitchEncoding(ctxt, enc);
7759 }
7760
7761
7762 if (CUR == 0) {
7763 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7765 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7766 ctxt->wellFormed = 0;
7767 ctxt->disableSAX = 1;
7768 }
7769
7770 /*
7771 * Check for the XMLDecl in the Prolog.
7772 */
7773 GROW;
7774 if ((RAW == '<') && (NXT(1) == '?') &&
7775 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7776 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7777
7778 /*
7779 * Note that we will switch encoding on the fly.
7780 */
7781 xmlParseXMLDecl(ctxt);
7782 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7783 /*
7784 * The XML REC instructs us to stop parsing right here
7785 */
7786 return(-1);
7787 }
7788 SKIP_BLANKS;
7789 } else {
7790 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7791 }
7792 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7793 ctxt->sax->startDocument(ctxt->userData);
7794
7795 /*
7796 * Doing validity checking on chunk doesn't make sense
7797 */
7798 ctxt->instate = XML_PARSER_CONTENT;
7799 ctxt->validate = 0;
7800 ctxt->loadsubset = 0;
7801 ctxt->depth = 0;
7802
7803 xmlParseContent(ctxt);
7804
7805 if ((RAW == '<') && (NXT(1) == '/')) {
7806 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7808 ctxt->sax->error(ctxt->userData,
7809 "chunk is not well balanced\n");
7810 ctxt->wellFormed = 0;
7811 ctxt->disableSAX = 1;
7812 } else if (RAW != 0) {
7813 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7815 ctxt->sax->error(ctxt->userData,
7816 "extra content at the end of well balanced chunk\n");
7817 ctxt->wellFormed = 0;
7818 ctxt->disableSAX = 1;
7819 }
7820
7821 /*
7822 * SAX: end of the document processing.
7823 */
Daniel Veillard8d24cc12002-03-05 15:41:29 +00007824 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00007825 ctxt->sax->endDocument(ctxt->userData);
7826
7827 if (! ctxt->wellFormed) return(-1);
7828 return(0);
7829}
7830
7831/************************************************************************
7832 * *
7833 * Progressive parsing interfaces *
7834 * *
7835 ************************************************************************/
7836
7837/**
7838 * xmlParseLookupSequence:
7839 * @ctxt: an XML parser context
7840 * @first: the first char to lookup
7841 * @next: the next char to lookup or zero
7842 * @third: the next char to lookup or zero
7843 *
7844 * Try to find if a sequence (first, next, third) or just (first next) or
7845 * (first) is available in the input stream.
7846 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7847 * to avoid rescanning sequences of bytes, it DOES change the state of the
7848 * parser, do not use liberally.
7849 *
7850 * Returns the index to the current parsing point if the full sequence
7851 * is available, -1 otherwise.
7852 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007853static int
Owen Taylor3473f882001-02-23 17:55:21 +00007854xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7855 xmlChar next, xmlChar third) {
7856 int base, len;
7857 xmlParserInputPtr in;
7858 const xmlChar *buf;
7859
7860 in = ctxt->input;
7861 if (in == NULL) return(-1);
7862 base = in->cur - in->base;
7863 if (base < 0) return(-1);
7864 if (ctxt->checkIndex > base)
7865 base = ctxt->checkIndex;
7866 if (in->buf == NULL) {
7867 buf = in->base;
7868 len = in->length;
7869 } else {
7870 buf = in->buf->buffer->content;
7871 len = in->buf->buffer->use;
7872 }
7873 /* take into account the sequence length */
7874 if (third) len -= 2;
7875 else if (next) len --;
7876 for (;base < len;base++) {
7877 if (buf[base] == first) {
7878 if (third != 0) {
7879 if ((buf[base + 1] != next) ||
7880 (buf[base + 2] != third)) continue;
7881 } else if (next != 0) {
7882 if (buf[base + 1] != next) continue;
7883 }
7884 ctxt->checkIndex = 0;
7885#ifdef DEBUG_PUSH
7886 if (next == 0)
7887 xmlGenericError(xmlGenericErrorContext,
7888 "PP: lookup '%c' found at %d\n",
7889 first, base);
7890 else if (third == 0)
7891 xmlGenericError(xmlGenericErrorContext,
7892 "PP: lookup '%c%c' found at %d\n",
7893 first, next, base);
7894 else
7895 xmlGenericError(xmlGenericErrorContext,
7896 "PP: lookup '%c%c%c' found at %d\n",
7897 first, next, third, base);
7898#endif
7899 return(base - (in->cur - in->base));
7900 }
7901 }
7902 ctxt->checkIndex = base;
7903#ifdef DEBUG_PUSH
7904 if (next == 0)
7905 xmlGenericError(xmlGenericErrorContext,
7906 "PP: lookup '%c' failed\n", first);
7907 else if (third == 0)
7908 xmlGenericError(xmlGenericErrorContext,
7909 "PP: lookup '%c%c' failed\n", first, next);
7910 else
7911 xmlGenericError(xmlGenericErrorContext,
7912 "PP: lookup '%c%c%c' failed\n", first, next, third);
7913#endif
7914 return(-1);
7915}
7916
7917/**
7918 * xmlParseTryOrFinish:
7919 * @ctxt: an XML parser context
7920 * @terminate: last chunk indicator
7921 *
7922 * Try to progress on parsing
7923 *
7924 * Returns zero if no parsing was possible
7925 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007926static int
Owen Taylor3473f882001-02-23 17:55:21 +00007927xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7928 int ret = 0;
7929 int avail;
7930 xmlChar cur, next;
7931
7932#ifdef DEBUG_PUSH
7933 switch (ctxt->instate) {
7934 case XML_PARSER_EOF:
7935 xmlGenericError(xmlGenericErrorContext,
7936 "PP: try EOF\n"); break;
7937 case XML_PARSER_START:
7938 xmlGenericError(xmlGenericErrorContext,
7939 "PP: try START\n"); break;
7940 case XML_PARSER_MISC:
7941 xmlGenericError(xmlGenericErrorContext,
7942 "PP: try MISC\n");break;
7943 case XML_PARSER_COMMENT:
7944 xmlGenericError(xmlGenericErrorContext,
7945 "PP: try COMMENT\n");break;
7946 case XML_PARSER_PROLOG:
7947 xmlGenericError(xmlGenericErrorContext,
7948 "PP: try PROLOG\n");break;
7949 case XML_PARSER_START_TAG:
7950 xmlGenericError(xmlGenericErrorContext,
7951 "PP: try START_TAG\n");break;
7952 case XML_PARSER_CONTENT:
7953 xmlGenericError(xmlGenericErrorContext,
7954 "PP: try CONTENT\n");break;
7955 case XML_PARSER_CDATA_SECTION:
7956 xmlGenericError(xmlGenericErrorContext,
7957 "PP: try CDATA_SECTION\n");break;
7958 case XML_PARSER_END_TAG:
7959 xmlGenericError(xmlGenericErrorContext,
7960 "PP: try END_TAG\n");break;
7961 case XML_PARSER_ENTITY_DECL:
7962 xmlGenericError(xmlGenericErrorContext,
7963 "PP: try ENTITY_DECL\n");break;
7964 case XML_PARSER_ENTITY_VALUE:
7965 xmlGenericError(xmlGenericErrorContext,
7966 "PP: try ENTITY_VALUE\n");break;
7967 case XML_PARSER_ATTRIBUTE_VALUE:
7968 xmlGenericError(xmlGenericErrorContext,
7969 "PP: try ATTRIBUTE_VALUE\n");break;
7970 case XML_PARSER_DTD:
7971 xmlGenericError(xmlGenericErrorContext,
7972 "PP: try DTD\n");break;
7973 case XML_PARSER_EPILOG:
7974 xmlGenericError(xmlGenericErrorContext,
7975 "PP: try EPILOG\n");break;
7976 case XML_PARSER_PI:
7977 xmlGenericError(xmlGenericErrorContext,
7978 "PP: try PI\n");break;
7979 case XML_PARSER_IGNORE:
7980 xmlGenericError(xmlGenericErrorContext,
7981 "PP: try IGNORE\n");break;
7982 }
7983#endif
7984
7985 while (1) {
7986 /*
7987 * Pop-up of finished entities.
7988 */
7989 while ((RAW == 0) && (ctxt->inputNr > 1))
7990 xmlPopInput(ctxt);
7991
7992 if (ctxt->input ==NULL) break;
7993 if (ctxt->input->buf == NULL)
7994 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
Daniel Veillard158a4d22002-02-20 22:17:58 +00007995 else {
7996 /*
7997 * If we are operating on converted input, try to flush
7998 * remainng chars to avoid them stalling in the non-converted
7999 * buffer.
8000 */
8001 if ((ctxt->input->buf->raw != NULL) &&
8002 (ctxt->input->buf->raw->use > 0)) {
8003 int base = ctxt->input->base -
8004 ctxt->input->buf->buffer->content;
8005 int current = ctxt->input->cur - ctxt->input->base;
8006
8007 xmlParserInputBufferPush(ctxt->input->buf, 0, "");
8008 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8009 ctxt->input->cur = ctxt->input->base + current;
8010 ctxt->input->end =
8011 &ctxt->input->buf->buffer->content[
8012 ctxt->input->buf->buffer->use];
8013 }
8014 avail = ctxt->input->buf->buffer->use -
8015 (ctxt->input->cur - ctxt->input->base);
8016 }
Owen Taylor3473f882001-02-23 17:55:21 +00008017 if (avail < 1)
8018 goto done;
8019 switch (ctxt->instate) {
8020 case XML_PARSER_EOF:
8021 /*
8022 * Document parsing is done !
8023 */
8024 goto done;
8025 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008026 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
8027 xmlChar start[4];
8028 xmlCharEncoding enc;
8029
8030 /*
8031 * Very first chars read from the document flow.
8032 */
8033 if (avail < 4)
8034 goto done;
8035
8036 /*
8037 * Get the 4 first bytes and decode the charset
8038 * if enc != XML_CHAR_ENCODING_NONE
8039 * plug some encoding conversion routines.
8040 */
8041 start[0] = RAW;
8042 start[1] = NXT(1);
8043 start[2] = NXT(2);
8044 start[3] = NXT(3);
8045 enc = xmlDetectCharEncoding(start, 4);
8046 if (enc != XML_CHAR_ENCODING_NONE) {
8047 xmlSwitchEncoding(ctxt, enc);
8048 }
8049 break;
8050 }
Owen Taylor3473f882001-02-23 17:55:21 +00008051
8052 cur = ctxt->input->cur[0];
8053 next = ctxt->input->cur[1];
8054 if (cur == 0) {
8055 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8056 ctxt->sax->setDocumentLocator(ctxt->userData,
8057 &xmlDefaultSAXLocator);
8058 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8060 ctxt->sax->error(ctxt->userData, "Document is empty\n");
8061 ctxt->wellFormed = 0;
8062 ctxt->disableSAX = 1;
8063 ctxt->instate = XML_PARSER_EOF;
8064#ifdef DEBUG_PUSH
8065 xmlGenericError(xmlGenericErrorContext,
8066 "PP: entering EOF\n");
8067#endif
8068 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
8069 ctxt->sax->endDocument(ctxt->userData);
8070 goto done;
8071 }
8072 if ((cur == '<') && (next == '?')) {
8073 /* PI or XML decl */
8074 if (avail < 5) return(ret);
8075 if ((!terminate) &&
8076 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8077 return(ret);
8078 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8079 ctxt->sax->setDocumentLocator(ctxt->userData,
8080 &xmlDefaultSAXLocator);
8081 if ((ctxt->input->cur[2] == 'x') &&
8082 (ctxt->input->cur[3] == 'm') &&
8083 (ctxt->input->cur[4] == 'l') &&
8084 (IS_BLANK(ctxt->input->cur[5]))) {
8085 ret += 5;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: Parsing XML Decl\n");
8089#endif
8090 xmlParseXMLDecl(ctxt);
8091 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
8092 /*
8093 * The XML REC instructs us to stop parsing right
8094 * here
8095 */
8096 ctxt->instate = XML_PARSER_EOF;
8097 return(0);
8098 }
8099 ctxt->standalone = ctxt->input->standalone;
8100 if ((ctxt->encoding == NULL) &&
8101 (ctxt->input->encoding != NULL))
8102 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
8103 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8104 (!ctxt->disableSAX))
8105 ctxt->sax->startDocument(ctxt->userData);
8106 ctxt->instate = XML_PARSER_MISC;
8107#ifdef DEBUG_PUSH
8108 xmlGenericError(xmlGenericErrorContext,
8109 "PP: entering MISC\n");
8110#endif
8111 } else {
8112 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8113 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8114 (!ctxt->disableSAX))
8115 ctxt->sax->startDocument(ctxt->userData);
8116 ctxt->instate = XML_PARSER_MISC;
8117#ifdef DEBUG_PUSH
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: entering MISC\n");
8120#endif
8121 }
8122 } else {
8123 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
8124 ctxt->sax->setDocumentLocator(ctxt->userData,
8125 &xmlDefaultSAXLocator);
8126 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
8127 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
8128 (!ctxt->disableSAX))
8129 ctxt->sax->startDocument(ctxt->userData);
8130 ctxt->instate = XML_PARSER_MISC;
8131#ifdef DEBUG_PUSH
8132 xmlGenericError(xmlGenericErrorContext,
8133 "PP: entering MISC\n");
8134#endif
8135 }
8136 break;
8137 case XML_PARSER_MISC:
8138 SKIP_BLANKS;
8139 if (ctxt->input->buf == NULL)
8140 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8141 else
8142 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8143 if (avail < 2)
8144 goto done;
8145 cur = ctxt->input->cur[0];
8146 next = ctxt->input->cur[1];
8147 if ((cur == '<') && (next == '?')) {
8148 if ((!terminate) &&
8149 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8150 goto done;
8151#ifdef DEBUG_PUSH
8152 xmlGenericError(xmlGenericErrorContext,
8153 "PP: Parsing PI\n");
8154#endif
8155 xmlParsePI(ctxt);
8156 } else if ((cur == '<') && (next == '!') &&
8157 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8158 if ((!terminate) &&
8159 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8160 goto done;
8161#ifdef DEBUG_PUSH
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: Parsing Comment\n");
8164#endif
8165 xmlParseComment(ctxt);
8166 ctxt->instate = XML_PARSER_MISC;
8167 } else if ((cur == '<') && (next == '!') &&
8168 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8169 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8170 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8171 (ctxt->input->cur[8] == 'E')) {
8172 if ((!terminate) &&
8173 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8174 goto done;
8175#ifdef DEBUG_PUSH
8176 xmlGenericError(xmlGenericErrorContext,
8177 "PP: Parsing internal subset\n");
8178#endif
8179 ctxt->inSubset = 1;
8180 xmlParseDocTypeDecl(ctxt);
8181 if (RAW == '[') {
8182 ctxt->instate = XML_PARSER_DTD;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: entering DTD\n");
8186#endif
8187 } else {
8188 /*
8189 * Create and update the external subset.
8190 */
8191 ctxt->inSubset = 2;
8192 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8193 (ctxt->sax->externalSubset != NULL))
8194 ctxt->sax->externalSubset(ctxt->userData,
8195 ctxt->intSubName, ctxt->extSubSystem,
8196 ctxt->extSubURI);
8197 ctxt->inSubset = 0;
8198 ctxt->instate = XML_PARSER_PROLOG;
8199#ifdef DEBUG_PUSH
8200 xmlGenericError(xmlGenericErrorContext,
8201 "PP: entering PROLOG\n");
8202#endif
8203 }
8204 } else if ((cur == '<') && (next == '!') &&
8205 (avail < 9)) {
8206 goto done;
8207 } else {
8208 ctxt->instate = XML_PARSER_START_TAG;
8209#ifdef DEBUG_PUSH
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: entering START_TAG\n");
8212#endif
8213 }
8214 break;
8215 case XML_PARSER_IGNORE:
8216 xmlGenericError(xmlGenericErrorContext,
8217 "PP: internal error, state == IGNORE");
8218 ctxt->instate = XML_PARSER_DTD;
8219#ifdef DEBUG_PUSH
8220 xmlGenericError(xmlGenericErrorContext,
8221 "PP: entering DTD\n");
8222#endif
8223 break;
8224 case XML_PARSER_PROLOG:
8225 SKIP_BLANKS;
8226 if (ctxt->input->buf == NULL)
8227 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8228 else
8229 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8230 if (avail < 2)
8231 goto done;
8232 cur = ctxt->input->cur[0];
8233 next = ctxt->input->cur[1];
8234 if ((cur == '<') && (next == '?')) {
8235 if ((!terminate) &&
8236 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8237 goto done;
8238#ifdef DEBUG_PUSH
8239 xmlGenericError(xmlGenericErrorContext,
8240 "PP: Parsing PI\n");
8241#endif
8242 xmlParsePI(ctxt);
8243 } else if ((cur == '<') && (next == '!') &&
8244 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8245 if ((!terminate) &&
8246 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8247 goto done;
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: Parsing Comment\n");
8251#endif
8252 xmlParseComment(ctxt);
8253 ctxt->instate = XML_PARSER_PROLOG;
8254 } else if ((cur == '<') && (next == '!') &&
8255 (avail < 4)) {
8256 goto done;
8257 } else {
8258 ctxt->instate = XML_PARSER_START_TAG;
8259#ifdef DEBUG_PUSH
8260 xmlGenericError(xmlGenericErrorContext,
8261 "PP: entering START_TAG\n");
8262#endif
8263 }
8264 break;
8265 case XML_PARSER_EPILOG:
8266 SKIP_BLANKS;
8267 if (ctxt->input->buf == NULL)
8268 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8269 else
8270 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8271 if (avail < 2)
8272 goto done;
8273 cur = ctxt->input->cur[0];
8274 next = ctxt->input->cur[1];
8275 if ((cur == '<') && (next == '?')) {
8276 if ((!terminate) &&
8277 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8278 goto done;
8279#ifdef DEBUG_PUSH
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: Parsing PI\n");
8282#endif
8283 xmlParsePI(ctxt);
8284 ctxt->instate = XML_PARSER_EPILOG;
8285 } else if ((cur == '<') && (next == '!') &&
8286 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8287 if ((!terminate) &&
8288 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8289 goto done;
8290#ifdef DEBUG_PUSH
8291 xmlGenericError(xmlGenericErrorContext,
8292 "PP: Parsing Comment\n");
8293#endif
8294 xmlParseComment(ctxt);
8295 ctxt->instate = XML_PARSER_EPILOG;
8296 } else if ((cur == '<') && (next == '!') &&
8297 (avail < 4)) {
8298 goto done;
8299 } else {
8300 ctxt->errNo = XML_ERR_DOCUMENT_END;
8301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8302 ctxt->sax->error(ctxt->userData,
8303 "Extra content at the end of the document\n");
8304 ctxt->wellFormed = 0;
8305 ctxt->disableSAX = 1;
8306 ctxt->instate = XML_PARSER_EOF;
8307#ifdef DEBUG_PUSH
8308 xmlGenericError(xmlGenericErrorContext,
8309 "PP: entering EOF\n");
8310#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008311 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008312 ctxt->sax->endDocument(ctxt->userData);
8313 goto done;
8314 }
8315 break;
8316 case XML_PARSER_START_TAG: {
8317 xmlChar *name, *oldname;
8318
8319 if ((avail < 2) && (ctxt->inputNr == 1))
8320 goto done;
8321 cur = ctxt->input->cur[0];
8322 if (cur != '<') {
8323 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8325 ctxt->sax->error(ctxt->userData,
8326 "Start tag expect, '<' not found\n");
8327 ctxt->wellFormed = 0;
8328 ctxt->disableSAX = 1;
8329 ctxt->instate = XML_PARSER_EOF;
8330#ifdef DEBUG_PUSH
8331 xmlGenericError(xmlGenericErrorContext,
8332 "PP: entering EOF\n");
8333#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008334 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008335 ctxt->sax->endDocument(ctxt->userData);
8336 goto done;
8337 }
8338 if ((!terminate) &&
8339 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8340 goto done;
8341 if (ctxt->spaceNr == 0)
8342 spacePush(ctxt, -1);
8343 else
8344 spacePush(ctxt, *ctxt->space);
8345 name = xmlParseStartTag(ctxt);
8346 if (name == NULL) {
8347 spacePop(ctxt);
8348 ctxt->instate = XML_PARSER_EOF;
8349#ifdef DEBUG_PUSH
8350 xmlGenericError(xmlGenericErrorContext,
8351 "PP: entering EOF\n");
8352#endif
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008353 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008354 ctxt->sax->endDocument(ctxt->userData);
8355 goto done;
8356 }
8357 namePush(ctxt, xmlStrdup(name));
8358
8359 /*
8360 * [ VC: Root Element Type ]
8361 * The Name in the document type declaration must match
8362 * the element type of the root element.
8363 */
8364 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8365 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8366 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8367
8368 /*
8369 * Check for an Empty Element.
8370 */
8371 if ((RAW == '/') && (NXT(1) == '>')) {
8372 SKIP(2);
8373 if ((ctxt->sax != NULL) &&
8374 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8375 ctxt->sax->endElement(ctxt->userData, name);
8376 xmlFree(name);
8377 oldname = namePop(ctxt);
8378 spacePop(ctxt);
8379 if (oldname != NULL) {
8380#ifdef DEBUG_STACK
8381 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8382#endif
8383 xmlFree(oldname);
8384 }
8385 if (ctxt->name == NULL) {
8386 ctxt->instate = XML_PARSER_EPILOG;
8387#ifdef DEBUG_PUSH
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: entering EPILOG\n");
8390#endif
8391 } else {
8392 ctxt->instate = XML_PARSER_CONTENT;
8393#ifdef DEBUG_PUSH
8394 xmlGenericError(xmlGenericErrorContext,
8395 "PP: entering CONTENT\n");
8396#endif
8397 }
8398 break;
8399 }
8400 if (RAW == '>') {
8401 NEXT;
8402 } else {
8403 ctxt->errNo = XML_ERR_GT_REQUIRED;
8404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8405 ctxt->sax->error(ctxt->userData,
8406 "Couldn't find end of Start Tag %s\n",
8407 name);
8408 ctxt->wellFormed = 0;
8409 ctxt->disableSAX = 1;
8410
8411 /*
8412 * end of parsing of this node.
8413 */
8414 nodePop(ctxt);
8415 oldname = namePop(ctxt);
8416 spacePop(ctxt);
8417 if (oldname != NULL) {
8418#ifdef DEBUG_STACK
8419 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8420#endif
8421 xmlFree(oldname);
8422 }
8423 }
8424 xmlFree(name);
8425 ctxt->instate = XML_PARSER_CONTENT;
8426#ifdef DEBUG_PUSH
8427 xmlGenericError(xmlGenericErrorContext,
8428 "PP: entering CONTENT\n");
8429#endif
8430 break;
8431 }
8432 case XML_PARSER_CONTENT: {
8433 const xmlChar *test;
8434 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008435 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008436
8437 /*
8438 * Handle preparsed entities and charRef
8439 */
8440 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008441 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008442
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008443 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008444 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8445 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008446 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008447 ctxt->token = 0;
8448 }
8449 if ((avail < 2) && (ctxt->inputNr == 1))
8450 goto done;
8451 cur = ctxt->input->cur[0];
8452 next = ctxt->input->cur[1];
8453
8454 test = CUR_PTR;
8455 cons = ctxt->input->consumed;
8456 tok = ctxt->token;
8457 if ((cur == '<') && (next == '?')) {
8458 if ((!terminate) &&
8459 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8460 goto done;
8461#ifdef DEBUG_PUSH
8462 xmlGenericError(xmlGenericErrorContext,
8463 "PP: Parsing PI\n");
8464#endif
8465 xmlParsePI(ctxt);
8466 } else if ((cur == '<') && (next == '!') &&
8467 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8468 if ((!terminate) &&
8469 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8470 goto done;
8471#ifdef DEBUG_PUSH
8472 xmlGenericError(xmlGenericErrorContext,
8473 "PP: Parsing Comment\n");
8474#endif
8475 xmlParseComment(ctxt);
8476 ctxt->instate = XML_PARSER_CONTENT;
8477 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8478 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8479 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8480 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8481 (ctxt->input->cur[8] == '[')) {
8482 SKIP(9);
8483 ctxt->instate = XML_PARSER_CDATA_SECTION;
8484#ifdef DEBUG_PUSH
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: entering CDATA_SECTION\n");
8487#endif
8488 break;
8489 } else if ((cur == '<') && (next == '!') &&
8490 (avail < 9)) {
8491 goto done;
8492 } else if ((cur == '<') && (next == '/')) {
8493 ctxt->instate = XML_PARSER_END_TAG;
8494#ifdef DEBUG_PUSH
8495 xmlGenericError(xmlGenericErrorContext,
8496 "PP: entering END_TAG\n");
8497#endif
8498 break;
8499 } else if (cur == '<') {
8500 ctxt->instate = XML_PARSER_START_TAG;
8501#ifdef DEBUG_PUSH
8502 xmlGenericError(xmlGenericErrorContext,
8503 "PP: entering START_TAG\n");
8504#endif
8505 break;
8506 } else if (cur == '&') {
8507 if ((!terminate) &&
8508 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8509 goto done;
8510#ifdef DEBUG_PUSH
8511 xmlGenericError(xmlGenericErrorContext,
8512 "PP: Parsing Reference\n");
8513#endif
8514 xmlParseReference(ctxt);
8515 } else {
8516 /* TODO Avoid the extra copy, handle directly !!! */
8517 /*
8518 * Goal of the following test is:
8519 * - minimize calls to the SAX 'character' callback
8520 * when they are mergeable
8521 * - handle an problem for isBlank when we only parse
8522 * a sequence of blank chars and the next one is
8523 * not available to check against '<' presence.
8524 * - tries to homogenize the differences in SAX
Daniel Veillardcbaf3992001-12-31 16:16:02 +00008525 * callbacks between the push and pull versions
Owen Taylor3473f882001-02-23 17:55:21 +00008526 * of the parser.
8527 */
8528 if ((ctxt->inputNr == 1) &&
8529 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8530 if ((!terminate) &&
8531 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8532 goto done;
8533 }
8534 ctxt->checkIndex = 0;
8535#ifdef DEBUG_PUSH
8536 xmlGenericError(xmlGenericErrorContext,
8537 "PP: Parsing char data\n");
8538#endif
8539 xmlParseCharData(ctxt, 0);
8540 }
8541 /*
8542 * Pop-up of finished entities.
8543 */
8544 while ((RAW == 0) && (ctxt->inputNr > 1))
8545 xmlPopInput(ctxt);
8546 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8547 (tok == ctxt->token)) {
8548 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8550 ctxt->sax->error(ctxt->userData,
8551 "detected an error in element content\n");
8552 ctxt->wellFormed = 0;
8553 ctxt->disableSAX = 1;
8554 ctxt->instate = XML_PARSER_EOF;
8555 break;
8556 }
8557 break;
8558 }
8559 case XML_PARSER_CDATA_SECTION: {
8560 /*
8561 * The Push mode need to have the SAX callback for
8562 * cdataBlock merge back contiguous callbacks.
8563 */
8564 int base;
8565
8566 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8567 if (base < 0) {
8568 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8569 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8570 if (ctxt->sax->cdataBlock != NULL)
8571 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8572 XML_PARSER_BIG_BUFFER_SIZE);
8573 }
8574 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8575 ctxt->checkIndex = 0;
8576 }
8577 goto done;
8578 } else {
8579 if ((ctxt->sax != NULL) && (base > 0) &&
8580 (!ctxt->disableSAX)) {
8581 if (ctxt->sax->cdataBlock != NULL)
8582 ctxt->sax->cdataBlock(ctxt->userData,
8583 ctxt->input->cur, base);
8584 }
8585 SKIP(base + 3);
8586 ctxt->checkIndex = 0;
8587 ctxt->instate = XML_PARSER_CONTENT;
8588#ifdef DEBUG_PUSH
8589 xmlGenericError(xmlGenericErrorContext,
8590 "PP: entering CONTENT\n");
8591#endif
8592 }
8593 break;
8594 }
8595 case XML_PARSER_END_TAG:
8596 if (avail < 2)
8597 goto done;
8598 if ((!terminate) &&
8599 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8600 goto done;
8601 xmlParseEndTag(ctxt);
8602 if (ctxt->name == NULL) {
8603 ctxt->instate = XML_PARSER_EPILOG;
8604#ifdef DEBUG_PUSH
8605 xmlGenericError(xmlGenericErrorContext,
8606 "PP: entering EPILOG\n");
8607#endif
8608 } else {
8609 ctxt->instate = XML_PARSER_CONTENT;
8610#ifdef DEBUG_PUSH
8611 xmlGenericError(xmlGenericErrorContext,
8612 "PP: entering CONTENT\n");
8613#endif
8614 }
8615 break;
8616 case XML_PARSER_DTD: {
8617 /*
8618 * Sorry but progressive parsing of the internal subset
8619 * is not expected to be supported. We first check that
8620 * the full content of the internal subset is available and
8621 * the parsing is launched only at that point.
8622 * Internal subset ends up with "']' S? '>'" in an unescaped
8623 * section and not in a ']]>' sequence which are conditional
8624 * sections (whoever argued to keep that crap in XML deserve
8625 * a place in hell !).
8626 */
8627 int base, i;
8628 xmlChar *buf;
8629 xmlChar quote = 0;
8630
8631 base = ctxt->input->cur - ctxt->input->base;
8632 if (base < 0) return(0);
8633 if (ctxt->checkIndex > base)
8634 base = ctxt->checkIndex;
8635 buf = ctxt->input->buf->buffer->content;
8636 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8637 base++) {
8638 if (quote != 0) {
8639 if (buf[base] == quote)
8640 quote = 0;
8641 continue;
8642 }
8643 if (buf[base] == '"') {
8644 quote = '"';
8645 continue;
8646 }
8647 if (buf[base] == '\'') {
8648 quote = '\'';
8649 continue;
8650 }
8651 if (buf[base] == ']') {
8652 if ((unsigned int) base +1 >=
8653 ctxt->input->buf->buffer->use)
8654 break;
8655 if (buf[base + 1] == ']') {
8656 /* conditional crap, skip both ']' ! */
8657 base++;
8658 continue;
8659 }
8660 for (i = 0;
8661 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8662 i++) {
8663 if (buf[base + i] == '>')
8664 goto found_end_int_subset;
8665 }
8666 break;
8667 }
8668 }
8669 /*
8670 * We didn't found the end of the Internal subset
8671 */
8672 if (quote == 0)
8673 ctxt->checkIndex = base;
8674#ifdef DEBUG_PUSH
8675 if (next == 0)
8676 xmlGenericError(xmlGenericErrorContext,
8677 "PP: lookup of int subset end filed\n");
8678#endif
8679 goto done;
8680
8681found_end_int_subset:
8682 xmlParseInternalSubset(ctxt);
8683 ctxt->inSubset = 2;
8684 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8685 (ctxt->sax->externalSubset != NULL))
8686 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8687 ctxt->extSubSystem, ctxt->extSubURI);
8688 ctxt->inSubset = 0;
8689 ctxt->instate = XML_PARSER_PROLOG;
8690 ctxt->checkIndex = 0;
8691#ifdef DEBUG_PUSH
8692 xmlGenericError(xmlGenericErrorContext,
8693 "PP: entering PROLOG\n");
8694#endif
8695 break;
8696 }
8697 case XML_PARSER_COMMENT:
8698 xmlGenericError(xmlGenericErrorContext,
8699 "PP: internal error, state == COMMENT\n");
8700 ctxt->instate = XML_PARSER_CONTENT;
8701#ifdef DEBUG_PUSH
8702 xmlGenericError(xmlGenericErrorContext,
8703 "PP: entering CONTENT\n");
8704#endif
8705 break;
8706 case XML_PARSER_PI:
8707 xmlGenericError(xmlGenericErrorContext,
8708 "PP: internal error, state == PI\n");
8709 ctxt->instate = XML_PARSER_CONTENT;
8710#ifdef DEBUG_PUSH
8711 xmlGenericError(xmlGenericErrorContext,
8712 "PP: entering CONTENT\n");
8713#endif
8714 break;
8715 case XML_PARSER_ENTITY_DECL:
8716 xmlGenericError(xmlGenericErrorContext,
8717 "PP: internal error, state == ENTITY_DECL\n");
8718 ctxt->instate = XML_PARSER_DTD;
8719#ifdef DEBUG_PUSH
8720 xmlGenericError(xmlGenericErrorContext,
8721 "PP: entering DTD\n");
8722#endif
8723 break;
8724 case XML_PARSER_ENTITY_VALUE:
8725 xmlGenericError(xmlGenericErrorContext,
8726 "PP: internal error, state == ENTITY_VALUE\n");
8727 ctxt->instate = XML_PARSER_CONTENT;
8728#ifdef DEBUG_PUSH
8729 xmlGenericError(xmlGenericErrorContext,
8730 "PP: entering DTD\n");
8731#endif
8732 break;
8733 case XML_PARSER_ATTRIBUTE_VALUE:
8734 xmlGenericError(xmlGenericErrorContext,
8735 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8736 ctxt->instate = XML_PARSER_START_TAG;
8737#ifdef DEBUG_PUSH
8738 xmlGenericError(xmlGenericErrorContext,
8739 "PP: entering START_TAG\n");
8740#endif
8741 break;
8742 case XML_PARSER_SYSTEM_LITERAL:
8743 xmlGenericError(xmlGenericErrorContext,
8744 "PP: internal error, state == SYSTEM_LITERAL\n");
8745 ctxt->instate = XML_PARSER_START_TAG;
8746#ifdef DEBUG_PUSH
8747 xmlGenericError(xmlGenericErrorContext,
8748 "PP: entering START_TAG\n");
8749#endif
8750 break;
Daniel Veillard4a7ae502002-02-18 19:18:17 +00008751 case XML_PARSER_PUBLIC_LITERAL:
8752 xmlGenericError(xmlGenericErrorContext,
8753 "PP: internal error, state == PUBLIC_LITERAL\n");
8754 ctxt->instate = XML_PARSER_START_TAG;
8755#ifdef DEBUG_PUSH
8756 xmlGenericError(xmlGenericErrorContext,
8757 "PP: entering START_TAG\n");
8758#endif
8759 break;
Owen Taylor3473f882001-02-23 17:55:21 +00008760 }
8761 }
8762done:
8763#ifdef DEBUG_PUSH
8764 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8765#endif
8766 return(ret);
8767}
8768
8769/**
Owen Taylor3473f882001-02-23 17:55:21 +00008770 * xmlParseChunk:
8771 * @ctxt: an XML parser context
8772 * @chunk: an char array
8773 * @size: the size in byte of the chunk
8774 * @terminate: last chunk indicator
8775 *
8776 * Parse a Chunk of memory
8777 *
8778 * Returns zero if no error, the xmlParserErrors otherwise.
8779 */
8780int
8781xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8782 int terminate) {
8783 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8784 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8785 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8786 int cur = ctxt->input->cur - ctxt->input->base;
8787
8788 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8789 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8790 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008791 ctxt->input->end =
8792 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008793#ifdef DEBUG_PUSH
8794 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8795#endif
8796
8797 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8798 xmlParseTryOrFinish(ctxt, terminate);
8799 } else if (ctxt->instate != XML_PARSER_EOF) {
8800 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8801 xmlParserInputBufferPtr in = ctxt->input->buf;
8802 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8803 (in->raw != NULL)) {
8804 int nbchars;
8805
8806 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8807 if (nbchars < 0) {
8808 xmlGenericError(xmlGenericErrorContext,
8809 "xmlParseChunk: encoder error\n");
8810 return(XML_ERR_INVALID_ENCODING);
8811 }
8812 }
8813 }
8814 }
8815 xmlParseTryOrFinish(ctxt, terminate);
8816 if (terminate) {
8817 /*
8818 * Check for termination
8819 */
8820 if ((ctxt->instate != XML_PARSER_EOF) &&
8821 (ctxt->instate != XML_PARSER_EPILOG)) {
8822 ctxt->errNo = XML_ERR_DOCUMENT_END;
8823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8824 ctxt->sax->error(ctxt->userData,
8825 "Extra content at the end of the document\n");
8826 ctxt->wellFormed = 0;
8827 ctxt->disableSAX = 1;
8828 }
8829 if (ctxt->instate != XML_PARSER_EOF) {
Daniel Veillard8d24cc12002-03-05 15:41:29 +00008830 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
Owen Taylor3473f882001-02-23 17:55:21 +00008831 ctxt->sax->endDocument(ctxt->userData);
8832 }
8833 ctxt->instate = XML_PARSER_EOF;
8834 }
8835 return((xmlParserErrors) ctxt->errNo);
8836}
8837
8838/************************************************************************
8839 * *
8840 * I/O front end functions to the parser *
8841 * *
8842 ************************************************************************/
8843
8844/**
8845 * xmlStopParser:
8846 * @ctxt: an XML parser context
8847 *
8848 * Blocks further parser processing
8849 */
8850void
8851xmlStopParser(xmlParserCtxtPtr ctxt) {
8852 ctxt->instate = XML_PARSER_EOF;
8853 if (ctxt->input != NULL)
8854 ctxt->input->cur = BAD_CAST"";
8855}
8856
8857/**
8858 * xmlCreatePushParserCtxt:
8859 * @sax: a SAX handler
8860 * @user_data: The user data returned on SAX callbacks
8861 * @chunk: a pointer to an array of chars
8862 * @size: number of chars in the array
8863 * @filename: an optional file name or URI
8864 *
8865 * Create a parser context for using the XML parser in push mode
8866 * To allow content encoding detection, @size should be >= 4
8867 * The value of @filename is used for fetching external entities
8868 * and error/warning reports.
8869 *
8870 * Returns the new parser context or NULL
8871 */
8872xmlParserCtxtPtr
8873xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8874 const char *chunk, int size, const char *filename) {
8875 xmlParserCtxtPtr ctxt;
8876 xmlParserInputPtr inputStream;
8877 xmlParserInputBufferPtr buf;
8878 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8879
8880 /*
8881 * plug some encoding conversion routines
8882 */
8883 if ((chunk != NULL) && (size >= 4))
8884 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8885
8886 buf = xmlAllocParserInputBuffer(enc);
8887 if (buf == NULL) return(NULL);
8888
8889 ctxt = xmlNewParserCtxt();
8890 if (ctxt == NULL) {
8891 xmlFree(buf);
8892 return(NULL);
8893 }
8894 if (sax != NULL) {
8895 if (ctxt->sax != &xmlDefaultSAXHandler)
8896 xmlFree(ctxt->sax);
8897 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8898 if (ctxt->sax == NULL) {
8899 xmlFree(buf);
8900 xmlFree(ctxt);
8901 return(NULL);
8902 }
8903 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8904 if (user_data != NULL)
8905 ctxt->userData = user_data;
8906 }
8907 if (filename == NULL) {
8908 ctxt->directory = NULL;
8909 } else {
8910 ctxt->directory = xmlParserGetDirectory(filename);
8911 }
8912
8913 inputStream = xmlNewInputStream(ctxt);
8914 if (inputStream == NULL) {
8915 xmlFreeParserCtxt(ctxt);
8916 return(NULL);
8917 }
8918
8919 if (filename == NULL)
8920 inputStream->filename = NULL;
8921 else
8922 inputStream->filename = xmlMemStrdup(filename);
8923 inputStream->buf = buf;
8924 inputStream->base = inputStream->buf->buffer->content;
8925 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008926 inputStream->end =
8927 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008928
8929 inputPush(ctxt, inputStream);
8930
8931 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8932 (ctxt->input->buf != NULL)) {
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008933 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8934 int cur = ctxt->input->cur - ctxt->input->base;
8935
Owen Taylor3473f882001-02-23 17:55:21 +00008936 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
Daniel Veillardaa39a0f2002-01-06 12:47:22 +00008937
8938 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8939 ctxt->input->cur = ctxt->input->base + cur;
8940 ctxt->input->end =
8941 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008942#ifdef DEBUG_PUSH
8943 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8944#endif
8945 }
8946
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008947 if (enc != XML_CHAR_ENCODING_NONE) {
8948 xmlSwitchEncoding(ctxt, enc);
8949 }
8950
Owen Taylor3473f882001-02-23 17:55:21 +00008951 return(ctxt);
8952}
8953
8954/**
8955 * xmlCreateIOParserCtxt:
8956 * @sax: a SAX handler
8957 * @user_data: The user data returned on SAX callbacks
8958 * @ioread: an I/O read function
8959 * @ioclose: an I/O close function
8960 * @ioctx: an I/O handler
8961 * @enc: the charset encoding if known
8962 *
8963 * Create a parser context for using the XML parser with an existing
8964 * I/O stream
8965 *
8966 * Returns the new parser context or NULL
8967 */
8968xmlParserCtxtPtr
8969xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8970 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8971 void *ioctx, xmlCharEncoding enc) {
8972 xmlParserCtxtPtr ctxt;
8973 xmlParserInputPtr inputStream;
8974 xmlParserInputBufferPtr buf;
8975
8976 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8977 if (buf == NULL) return(NULL);
8978
8979 ctxt = xmlNewParserCtxt();
8980 if (ctxt == NULL) {
8981 xmlFree(buf);
8982 return(NULL);
8983 }
8984 if (sax != NULL) {
8985 if (ctxt->sax != &xmlDefaultSAXHandler)
8986 xmlFree(ctxt->sax);
8987 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8988 if (ctxt->sax == NULL) {
8989 xmlFree(buf);
8990 xmlFree(ctxt);
8991 return(NULL);
8992 }
8993 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8994 if (user_data != NULL)
8995 ctxt->userData = user_data;
8996 }
8997
8998 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8999 if (inputStream == NULL) {
9000 xmlFreeParserCtxt(ctxt);
9001 return(NULL);
9002 }
9003 inputPush(ctxt, inputStream);
9004
9005 return(ctxt);
9006}
9007
9008/************************************************************************
9009 * *
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009010 * Front ends when parsing a DTD *
Owen Taylor3473f882001-02-23 17:55:21 +00009011 * *
9012 ************************************************************************/
9013
9014/**
9015 * xmlIOParseDTD:
9016 * @sax: the SAX handler block or NULL
9017 * @input: an Input Buffer
9018 * @enc: the charset encoding if known
9019 *
9020 * Load and parse a DTD
9021 *
9022 * Returns the resulting xmlDtdPtr or NULL in case of error.
9023 * @input will be freed at parsing end.
9024 */
9025
9026xmlDtdPtr
9027xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
9028 xmlCharEncoding enc) {
9029 xmlDtdPtr ret = NULL;
9030 xmlParserCtxtPtr ctxt;
9031 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009032 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00009033
9034 if (input == NULL)
9035 return(NULL);
9036
9037 ctxt = xmlNewParserCtxt();
9038 if (ctxt == NULL) {
9039 return(NULL);
9040 }
9041
9042 /*
9043 * Set-up the SAX context
9044 */
9045 if (sax != NULL) {
9046 if (ctxt->sax != NULL)
9047 xmlFree(ctxt->sax);
9048 ctxt->sax = sax;
9049 ctxt->userData = NULL;
9050 }
9051
9052 /*
9053 * generate a parser input from the I/O handler
9054 */
9055
9056 pinput = xmlNewIOInputStream(ctxt, input, enc);
9057 if (pinput == NULL) {
9058 if (sax != NULL) ctxt->sax = NULL;
9059 xmlFreeParserCtxt(ctxt);
9060 return(NULL);
9061 }
9062
9063 /*
9064 * plug some encoding conversion routines here.
9065 */
9066 xmlPushInput(ctxt, pinput);
9067
9068 pinput->filename = NULL;
9069 pinput->line = 1;
9070 pinput->col = 1;
9071 pinput->base = ctxt->input->cur;
9072 pinput->cur = ctxt->input->cur;
9073 pinput->free = NULL;
9074
9075 /*
9076 * let's parse that entity knowing it's an external subset.
9077 */
9078 ctxt->inSubset = 2;
9079 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9080 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9081 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00009082
9083 if (enc == XML_CHAR_ENCODING_NONE) {
9084 /*
9085 * Get the 4 first bytes and decode the charset
9086 * if enc != XML_CHAR_ENCODING_NONE
9087 * plug some encoding conversion routines.
9088 */
9089 start[0] = RAW;
9090 start[1] = NXT(1);
9091 start[2] = NXT(2);
9092 start[3] = NXT(3);
9093 enc = xmlDetectCharEncoding(start, 4);
9094 if (enc != XML_CHAR_ENCODING_NONE) {
9095 xmlSwitchEncoding(ctxt, enc);
9096 }
9097 }
9098
Owen Taylor3473f882001-02-23 17:55:21 +00009099 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
9100
9101 if (ctxt->myDoc != NULL) {
9102 if (ctxt->wellFormed) {
9103 ret = ctxt->myDoc->extSubset;
9104 ctxt->myDoc->extSubset = NULL;
9105 } else {
9106 ret = NULL;
9107 }
9108 xmlFreeDoc(ctxt->myDoc);
9109 ctxt->myDoc = NULL;
9110 }
9111 if (sax != NULL) ctxt->sax = NULL;
9112 xmlFreeParserCtxt(ctxt);
9113
9114 return(ret);
9115}
9116
9117/**
9118 * xmlSAXParseDTD:
9119 * @sax: the SAX handler block
9120 * @ExternalID: a NAME* containing the External ID of the DTD
9121 * @SystemID: a NAME* containing the URL to the DTD
9122 *
9123 * Load and parse an external subset.
9124 *
9125 * Returns the resulting xmlDtdPtr or NULL in case of error.
9126 */
9127
9128xmlDtdPtr
9129xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
9130 const xmlChar *SystemID) {
9131 xmlDtdPtr ret = NULL;
9132 xmlParserCtxtPtr ctxt;
9133 xmlParserInputPtr input = NULL;
9134 xmlCharEncoding enc;
9135
9136 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
9137
9138 ctxt = xmlNewParserCtxt();
9139 if (ctxt == NULL) {
9140 return(NULL);
9141 }
9142
9143 /*
9144 * Set-up the SAX context
9145 */
9146 if (sax != NULL) {
9147 if (ctxt->sax != NULL)
9148 xmlFree(ctxt->sax);
9149 ctxt->sax = sax;
9150 ctxt->userData = NULL;
9151 }
9152
9153 /*
9154 * Ask the Entity resolver to load the damn thing
9155 */
9156
9157 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
9158 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
9159 if (input == NULL) {
9160 if (sax != NULL) ctxt->sax = NULL;
9161 xmlFreeParserCtxt(ctxt);
9162 return(NULL);
9163 }
9164
9165 /*
9166 * plug some encoding conversion routines here.
9167 */
9168 xmlPushInput(ctxt, input);
9169 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9170 xmlSwitchEncoding(ctxt, enc);
9171
9172 if (input->filename == NULL)
9173 input->filename = (char *) xmlStrdup(SystemID);
9174 input->line = 1;
9175 input->col = 1;
9176 input->base = ctxt->input->cur;
9177 input->cur = ctxt->input->cur;
9178 input->free = NULL;
9179
9180 /*
9181 * let's parse that entity knowing it's an external subset.
9182 */
9183 ctxt->inSubset = 2;
9184 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9185 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9186 ExternalID, SystemID);
9187 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9188
9189 if (ctxt->myDoc != NULL) {
9190 if (ctxt->wellFormed) {
9191 ret = ctxt->myDoc->extSubset;
9192 ctxt->myDoc->extSubset = NULL;
9193 } else {
9194 ret = NULL;
9195 }
9196 xmlFreeDoc(ctxt->myDoc);
9197 ctxt->myDoc = NULL;
9198 }
9199 if (sax != NULL) ctxt->sax = NULL;
9200 xmlFreeParserCtxt(ctxt);
9201
9202 return(ret);
9203}
9204
9205/**
9206 * xmlParseDTD:
9207 * @ExternalID: a NAME* containing the External ID of the DTD
9208 * @SystemID: a NAME* containing the URL to the DTD
9209 *
9210 * Load and parse an external subset.
9211 *
9212 * Returns the resulting xmlDtdPtr or NULL in case of error.
9213 */
9214
9215xmlDtdPtr
9216xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9217 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9218}
9219
9220/************************************************************************
9221 * *
9222 * Front ends when parsing an Entity *
9223 * *
9224 ************************************************************************/
9225
9226/**
Owen Taylor3473f882001-02-23 17:55:21 +00009227 * xmlParseCtxtExternalEntity:
9228 * @ctx: the existing parsing context
9229 * @URL: the URL for the entity to load
9230 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009231 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009232 *
9233 * Parse an external general entity within an existing parsing context
9234 * An external general parsed entity is well-formed if it matches the
9235 * production labeled extParsedEnt.
9236 *
9237 * [78] extParsedEnt ::= TextDecl? content
9238 *
9239 * Returns 0 if the entity is well formed, -1 in case of args problem and
9240 * the parser error code otherwise
9241 */
9242
9243int
9244xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009245 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009246 xmlParserCtxtPtr ctxt;
9247 xmlDocPtr newDoc;
9248 xmlSAXHandlerPtr oldsax = NULL;
9249 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009250 xmlChar start[4];
9251 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009252
9253 if (ctx->depth > 40) {
9254 return(XML_ERR_ENTITY_LOOP);
9255 }
9256
Daniel Veillardcda96922001-08-21 10:56:31 +00009257 if (lst != NULL)
9258 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009259 if ((URL == NULL) && (ID == NULL))
9260 return(-1);
9261 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9262 return(-1);
9263
9264
9265 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9266 if (ctxt == NULL) return(-1);
9267 ctxt->userData = ctxt;
9268 oldsax = ctxt->sax;
9269 ctxt->sax = ctx->sax;
9270 newDoc = xmlNewDoc(BAD_CAST "1.0");
9271 if (newDoc == NULL) {
9272 xmlFreeParserCtxt(ctxt);
9273 return(-1);
9274 }
9275 if (ctx->myDoc != NULL) {
9276 newDoc->intSubset = ctx->myDoc->intSubset;
9277 newDoc->extSubset = ctx->myDoc->extSubset;
9278 }
9279 if (ctx->myDoc->URL != NULL) {
9280 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9281 }
9282 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9283 if (newDoc->children == NULL) {
9284 ctxt->sax = oldsax;
9285 xmlFreeParserCtxt(ctxt);
9286 newDoc->intSubset = NULL;
9287 newDoc->extSubset = NULL;
9288 xmlFreeDoc(newDoc);
9289 return(-1);
9290 }
9291 nodePush(ctxt, newDoc->children);
9292 if (ctx->myDoc == NULL) {
9293 ctxt->myDoc = newDoc;
9294 } else {
9295 ctxt->myDoc = ctx->myDoc;
9296 newDoc->children->doc = ctx->myDoc;
9297 }
9298
Daniel Veillard87a764e2001-06-20 17:41:10 +00009299 /*
9300 * Get the 4 first bytes and decode the charset
9301 * if enc != XML_CHAR_ENCODING_NONE
9302 * plug some encoding conversion routines.
9303 */
9304 GROW
9305 start[0] = RAW;
9306 start[1] = NXT(1);
9307 start[2] = NXT(2);
9308 start[3] = NXT(3);
9309 enc = xmlDetectCharEncoding(start, 4);
9310 if (enc != XML_CHAR_ENCODING_NONE) {
9311 xmlSwitchEncoding(ctxt, enc);
9312 }
9313
Owen Taylor3473f882001-02-23 17:55:21 +00009314 /*
9315 * Parse a possible text declaration first
9316 */
Owen Taylor3473f882001-02-23 17:55:21 +00009317 if ((RAW == '<') && (NXT(1) == '?') &&
9318 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9319 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9320 xmlParseTextDecl(ctxt);
9321 }
9322
9323 /*
9324 * Doing validity checking on chunk doesn't make sense
9325 */
9326 ctxt->instate = XML_PARSER_CONTENT;
9327 ctxt->validate = ctx->validate;
9328 ctxt->loadsubset = ctx->loadsubset;
9329 ctxt->depth = ctx->depth + 1;
9330 ctxt->replaceEntities = ctx->replaceEntities;
9331 if (ctxt->validate) {
9332 ctxt->vctxt.error = ctx->vctxt.error;
9333 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009334 } else {
9335 ctxt->vctxt.error = NULL;
9336 ctxt->vctxt.warning = NULL;
9337 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009338 ctxt->vctxt.nodeTab = NULL;
9339 ctxt->vctxt.nodeNr = 0;
9340 ctxt->vctxt.nodeMax = 0;
9341 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009342
9343 xmlParseContent(ctxt);
9344
9345 if ((RAW == '<') && (NXT(1) == '/')) {
9346 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9348 ctxt->sax->error(ctxt->userData,
9349 "chunk is not well balanced\n");
9350 ctxt->wellFormed = 0;
9351 ctxt->disableSAX = 1;
9352 } else if (RAW != 0) {
9353 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9355 ctxt->sax->error(ctxt->userData,
9356 "extra content at the end of well balanced chunk\n");
9357 ctxt->wellFormed = 0;
9358 ctxt->disableSAX = 1;
9359 }
9360 if (ctxt->node != newDoc->children) {
9361 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9363 ctxt->sax->error(ctxt->userData,
9364 "chunk is not well balanced\n");
9365 ctxt->wellFormed = 0;
9366 ctxt->disableSAX = 1;
9367 }
9368
9369 if (!ctxt->wellFormed) {
9370 if (ctxt->errNo == 0)
9371 ret = 1;
9372 else
9373 ret = ctxt->errNo;
9374 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009375 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009376 xmlNodePtr cur;
9377
9378 /*
9379 * Return the newly created nodeset after unlinking it from
9380 * they pseudo parent.
9381 */
9382 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009383 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009384 while (cur != NULL) {
9385 cur->parent = NULL;
9386 cur = cur->next;
9387 }
9388 newDoc->children->children = NULL;
9389 }
9390 ret = 0;
9391 }
9392 ctxt->sax = oldsax;
9393 xmlFreeParserCtxt(ctxt);
9394 newDoc->intSubset = NULL;
9395 newDoc->extSubset = NULL;
9396 xmlFreeDoc(newDoc);
9397
9398 return(ret);
9399}
9400
9401/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009402 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009403 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009404 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009405 * @sax: the SAX handler bloc (possibly NULL)
9406 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9407 * @depth: Used for loop detection, use 0
9408 * @URL: the URL for the entity to load
9409 * @ID: the System ID for the entity to load
9410 * @list: the return value for the set of parsed nodes
9411 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009412 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009413 *
9414 * Returns 0 if the entity is well formed, -1 in case of args problem and
9415 * the parser error code otherwise
9416 */
9417
Daniel Veillard257d9102001-05-08 10:41:44 +00009418static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009419xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9420 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009421 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009422 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009423 xmlParserCtxtPtr ctxt;
9424 xmlDocPtr newDoc;
9425 xmlSAXHandlerPtr oldsax = NULL;
9426 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009427 xmlChar start[4];
9428 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009429
9430 if (depth > 40) {
9431 return(XML_ERR_ENTITY_LOOP);
9432 }
9433
9434
9435
9436 if (list != NULL)
9437 *list = NULL;
9438 if ((URL == NULL) && (ID == NULL))
9439 return(-1);
9440 if (doc == NULL) /* @@ relax but check for dereferences */
9441 return(-1);
9442
9443
9444 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9445 if (ctxt == NULL) return(-1);
9446 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009447 if (oldctxt != NULL) {
9448 ctxt->_private = oldctxt->_private;
9449 ctxt->loadsubset = oldctxt->loadsubset;
9450 ctxt->validate = oldctxt->validate;
9451 ctxt->external = oldctxt->external;
9452 } else {
9453 /*
9454 * Doing validity checking on chunk without context
9455 * doesn't make sense
9456 */
9457 ctxt->_private = NULL;
9458 ctxt->validate = 0;
9459 ctxt->external = 2;
9460 ctxt->loadsubset = 0;
9461 }
Owen Taylor3473f882001-02-23 17:55:21 +00009462 if (sax != NULL) {
9463 oldsax = ctxt->sax;
9464 ctxt->sax = sax;
9465 if (user_data != NULL)
9466 ctxt->userData = user_data;
9467 }
9468 newDoc = xmlNewDoc(BAD_CAST "1.0");
9469 if (newDoc == NULL) {
9470 xmlFreeParserCtxt(ctxt);
9471 return(-1);
9472 }
9473 if (doc != NULL) {
9474 newDoc->intSubset = doc->intSubset;
9475 newDoc->extSubset = doc->extSubset;
9476 }
9477 if (doc->URL != NULL) {
9478 newDoc->URL = xmlStrdup(doc->URL);
9479 }
9480 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9481 if (newDoc->children == NULL) {
9482 if (sax != NULL)
9483 ctxt->sax = oldsax;
9484 xmlFreeParserCtxt(ctxt);
9485 newDoc->intSubset = NULL;
9486 newDoc->extSubset = NULL;
9487 xmlFreeDoc(newDoc);
9488 return(-1);
9489 }
9490 nodePush(ctxt, newDoc->children);
9491 if (doc == NULL) {
9492 ctxt->myDoc = newDoc;
9493 } else {
9494 ctxt->myDoc = doc;
9495 newDoc->children->doc = doc;
9496 }
9497
Daniel Veillard87a764e2001-06-20 17:41:10 +00009498 /*
9499 * Get the 4 first bytes and decode the charset
9500 * if enc != XML_CHAR_ENCODING_NONE
9501 * plug some encoding conversion routines.
9502 */
9503 GROW;
9504 start[0] = RAW;
9505 start[1] = NXT(1);
9506 start[2] = NXT(2);
9507 start[3] = NXT(3);
9508 enc = xmlDetectCharEncoding(start, 4);
9509 if (enc != XML_CHAR_ENCODING_NONE) {
9510 xmlSwitchEncoding(ctxt, enc);
9511 }
9512
Owen Taylor3473f882001-02-23 17:55:21 +00009513 /*
9514 * Parse a possible text declaration first
9515 */
Owen Taylor3473f882001-02-23 17:55:21 +00009516 if ((RAW == '<') && (NXT(1) == '?') &&
9517 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9518 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9519 xmlParseTextDecl(ctxt);
9520 }
9521
Owen Taylor3473f882001-02-23 17:55:21 +00009522 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009523 ctxt->depth = depth;
9524
9525 xmlParseContent(ctxt);
9526
9527 if ((RAW == '<') && (NXT(1) == '/')) {
9528 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9530 ctxt->sax->error(ctxt->userData,
9531 "chunk is not well balanced\n");
9532 ctxt->wellFormed = 0;
9533 ctxt->disableSAX = 1;
9534 } else if (RAW != 0) {
9535 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9537 ctxt->sax->error(ctxt->userData,
9538 "extra content at the end of well balanced chunk\n");
9539 ctxt->wellFormed = 0;
9540 ctxt->disableSAX = 1;
9541 }
9542 if (ctxt->node != newDoc->children) {
9543 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9545 ctxt->sax->error(ctxt->userData,
9546 "chunk is not well balanced\n");
9547 ctxt->wellFormed = 0;
9548 ctxt->disableSAX = 1;
9549 }
9550
9551 if (!ctxt->wellFormed) {
9552 if (ctxt->errNo == 0)
9553 ret = 1;
9554 else
9555 ret = ctxt->errNo;
9556 } else {
9557 if (list != NULL) {
9558 xmlNodePtr cur;
9559
9560 /*
9561 * Return the newly created nodeset after unlinking it from
9562 * they pseudo parent.
9563 */
9564 cur = newDoc->children->children;
9565 *list = cur;
9566 while (cur != NULL) {
9567 cur->parent = NULL;
9568 cur = cur->next;
9569 }
9570 newDoc->children->children = NULL;
9571 }
9572 ret = 0;
9573 }
9574 if (sax != NULL)
9575 ctxt->sax = oldsax;
9576 xmlFreeParserCtxt(ctxt);
9577 newDoc->intSubset = NULL;
9578 newDoc->extSubset = NULL;
9579 xmlFreeDoc(newDoc);
9580
9581 return(ret);
9582}
9583
9584/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009585 * xmlParseExternalEntity:
9586 * @doc: the document the chunk pertains to
9587 * @sax: the SAX handler bloc (possibly NULL)
9588 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9589 * @depth: Used for loop detection, use 0
9590 * @URL: the URL for the entity to load
9591 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009592 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009593 *
9594 * Parse an external general entity
9595 * An external general parsed entity is well-formed if it matches the
9596 * production labeled extParsedEnt.
9597 *
9598 * [78] extParsedEnt ::= TextDecl? content
9599 *
9600 * Returns 0 if the entity is well formed, -1 in case of args problem and
9601 * the parser error code otherwise
9602 */
9603
9604int
9605xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009606 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009607 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009608 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009609}
9610
9611/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009612 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009613 * @doc: the document the chunk pertains to
9614 * @sax: the SAX handler bloc (possibly NULL)
9615 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9616 * @depth: Used for loop detection, use 0
9617 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009618 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009619 *
9620 * Parse a well-balanced chunk of an XML document
9621 * called by the parser
9622 * The allowed sequence for the Well Balanced Chunk is the one defined by
9623 * the content production in the XML grammar:
9624 *
9625 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9626 *
9627 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9628 * the parser error code otherwise
9629 */
9630
9631int
9632xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009633 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009634 xmlParserCtxtPtr ctxt;
9635 xmlDocPtr newDoc;
9636 xmlSAXHandlerPtr oldsax = NULL;
9637 int size;
9638 int ret = 0;
9639
9640 if (depth > 40) {
9641 return(XML_ERR_ENTITY_LOOP);
9642 }
9643
9644
Daniel Veillardcda96922001-08-21 10:56:31 +00009645 if (lst != NULL)
9646 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009647 if (string == NULL)
9648 return(-1);
9649
9650 size = xmlStrlen(string);
9651
9652 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9653 if (ctxt == NULL) return(-1);
9654 ctxt->userData = ctxt;
9655 if (sax != NULL) {
9656 oldsax = ctxt->sax;
9657 ctxt->sax = sax;
9658 if (user_data != NULL)
9659 ctxt->userData = user_data;
9660 }
9661 newDoc = xmlNewDoc(BAD_CAST "1.0");
9662 if (newDoc == NULL) {
9663 xmlFreeParserCtxt(ctxt);
9664 return(-1);
9665 }
9666 if (doc != NULL) {
9667 newDoc->intSubset = doc->intSubset;
9668 newDoc->extSubset = doc->extSubset;
9669 }
9670 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9671 if (newDoc->children == NULL) {
9672 if (sax != NULL)
9673 ctxt->sax = oldsax;
9674 xmlFreeParserCtxt(ctxt);
9675 newDoc->intSubset = NULL;
9676 newDoc->extSubset = NULL;
9677 xmlFreeDoc(newDoc);
9678 return(-1);
9679 }
9680 nodePush(ctxt, newDoc->children);
9681 if (doc == NULL) {
9682 ctxt->myDoc = newDoc;
9683 } else {
9684 ctxt->myDoc = doc;
9685 newDoc->children->doc = doc;
9686 }
9687 ctxt->instate = XML_PARSER_CONTENT;
9688 ctxt->depth = depth;
9689
9690 /*
9691 * Doing validity checking on chunk doesn't make sense
9692 */
9693 ctxt->validate = 0;
9694 ctxt->loadsubset = 0;
9695
9696 xmlParseContent(ctxt);
9697
9698 if ((RAW == '<') && (NXT(1) == '/')) {
9699 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9701 ctxt->sax->error(ctxt->userData,
9702 "chunk is not well balanced\n");
9703 ctxt->wellFormed = 0;
9704 ctxt->disableSAX = 1;
9705 } else if (RAW != 0) {
9706 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9708 ctxt->sax->error(ctxt->userData,
9709 "extra content at the end of well balanced chunk\n");
9710 ctxt->wellFormed = 0;
9711 ctxt->disableSAX = 1;
9712 }
9713 if (ctxt->node != newDoc->children) {
9714 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9716 ctxt->sax->error(ctxt->userData,
9717 "chunk is not well balanced\n");
9718 ctxt->wellFormed = 0;
9719 ctxt->disableSAX = 1;
9720 }
9721
9722 if (!ctxt->wellFormed) {
9723 if (ctxt->errNo == 0)
9724 ret = 1;
9725 else
9726 ret = ctxt->errNo;
9727 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009728 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009729 xmlNodePtr cur;
9730
9731 /*
9732 * Return the newly created nodeset after unlinking it from
9733 * they pseudo parent.
9734 */
9735 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009736 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009737 while (cur != NULL) {
9738 cur->parent = NULL;
9739 cur = cur->next;
9740 }
9741 newDoc->children->children = NULL;
9742 }
9743 ret = 0;
9744 }
9745 if (sax != NULL)
9746 ctxt->sax = oldsax;
9747 xmlFreeParserCtxt(ctxt);
9748 newDoc->intSubset = NULL;
9749 newDoc->extSubset = NULL;
9750 xmlFreeDoc(newDoc);
9751
9752 return(ret);
9753}
9754
9755/**
9756 * xmlSAXParseEntity:
9757 * @sax: the SAX handler block
9758 * @filename: the filename
9759 *
9760 * parse an XML external entity out of context and build a tree.
9761 * It use the given SAX function block to handle the parsing callback.
9762 * If sax is NULL, fallback to the default DOM tree building routines.
9763 *
9764 * [78] extParsedEnt ::= TextDecl? content
9765 *
9766 * This correspond to a "Well Balanced" chunk
9767 *
9768 * Returns the resulting document tree
9769 */
9770
9771xmlDocPtr
9772xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9773 xmlDocPtr ret;
9774 xmlParserCtxtPtr ctxt;
9775 char *directory = NULL;
9776
9777 ctxt = xmlCreateFileParserCtxt(filename);
9778 if (ctxt == NULL) {
9779 return(NULL);
9780 }
9781 if (sax != NULL) {
9782 if (ctxt->sax != NULL)
9783 xmlFree(ctxt->sax);
9784 ctxt->sax = sax;
9785 ctxt->userData = NULL;
9786 }
9787
9788 if ((ctxt->directory == NULL) && (directory == NULL))
9789 directory = xmlParserGetDirectory(filename);
9790
9791 xmlParseExtParsedEnt(ctxt);
9792
9793 if (ctxt->wellFormed)
9794 ret = ctxt->myDoc;
9795 else {
9796 ret = NULL;
9797 xmlFreeDoc(ctxt->myDoc);
9798 ctxt->myDoc = NULL;
9799 }
9800 if (sax != NULL)
9801 ctxt->sax = NULL;
9802 xmlFreeParserCtxt(ctxt);
9803
9804 return(ret);
9805}
9806
9807/**
9808 * xmlParseEntity:
9809 * @filename: the filename
9810 *
9811 * parse an XML external entity out of context and build a tree.
9812 *
9813 * [78] extParsedEnt ::= TextDecl? content
9814 *
9815 * This correspond to a "Well Balanced" chunk
9816 *
9817 * Returns the resulting document tree
9818 */
9819
9820xmlDocPtr
9821xmlParseEntity(const char *filename) {
9822 return(xmlSAXParseEntity(NULL, filename));
9823}
9824
9825/**
9826 * xmlCreateEntityParserCtxt:
9827 * @URL: the entity URL
9828 * @ID: the entity PUBLIC ID
Daniel Veillardcbaf3992001-12-31 16:16:02 +00009829 * @base: a possible base for the target URI
Owen Taylor3473f882001-02-23 17:55:21 +00009830 *
9831 * Create a parser context for an external entity
9832 * Automatic support for ZLIB/Compress compressed document is provided
9833 * by default if found at compile-time.
9834 *
9835 * Returns the new parser context or NULL
9836 */
9837xmlParserCtxtPtr
9838xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9839 const xmlChar *base) {
9840 xmlParserCtxtPtr ctxt;
9841 xmlParserInputPtr inputStream;
9842 char *directory = NULL;
9843 xmlChar *uri;
9844
9845 ctxt = xmlNewParserCtxt();
9846 if (ctxt == NULL) {
9847 return(NULL);
9848 }
9849
9850 uri = xmlBuildURI(URL, base);
9851
9852 if (uri == NULL) {
9853 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9854 if (inputStream == NULL) {
9855 xmlFreeParserCtxt(ctxt);
9856 return(NULL);
9857 }
9858
9859 inputPush(ctxt, inputStream);
9860
9861 if ((ctxt->directory == NULL) && (directory == NULL))
9862 directory = xmlParserGetDirectory((char *)URL);
9863 if ((ctxt->directory == NULL) && (directory != NULL))
9864 ctxt->directory = directory;
9865 } else {
9866 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9867 if (inputStream == NULL) {
9868 xmlFree(uri);
9869 xmlFreeParserCtxt(ctxt);
9870 return(NULL);
9871 }
9872
9873 inputPush(ctxt, inputStream);
9874
9875 if ((ctxt->directory == NULL) && (directory == NULL))
9876 directory = xmlParserGetDirectory((char *)uri);
9877 if ((ctxt->directory == NULL) && (directory != NULL))
9878 ctxt->directory = directory;
9879 xmlFree(uri);
9880 }
9881
9882 return(ctxt);
9883}
9884
9885/************************************************************************
9886 * *
9887 * Front ends when parsing from a file *
9888 * *
9889 ************************************************************************/
9890
9891/**
9892 * xmlCreateFileParserCtxt:
9893 * @filename: the filename
9894 *
9895 * Create a parser context for a file content.
9896 * Automatic support for ZLIB/Compress compressed document is provided
9897 * by default if found at compile-time.
9898 *
9899 * Returns the new parser context or NULL
9900 */
9901xmlParserCtxtPtr
9902xmlCreateFileParserCtxt(const char *filename)
9903{
9904 xmlParserCtxtPtr ctxt;
9905 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009906 char *directory = NULL;
9907
Owen Taylor3473f882001-02-23 17:55:21 +00009908 ctxt = xmlNewParserCtxt();
9909 if (ctxt == NULL) {
9910 if (xmlDefaultSAXHandler.error != NULL) {
9911 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9912 }
9913 return(NULL);
9914 }
9915
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009916 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009917 if (inputStream == NULL) {
9918 xmlFreeParserCtxt(ctxt);
9919 return(NULL);
9920 }
9921
Owen Taylor3473f882001-02-23 17:55:21 +00009922 inputPush(ctxt, inputStream);
9923 if ((ctxt->directory == NULL) && (directory == NULL))
9924 directory = xmlParserGetDirectory(filename);
9925 if ((ctxt->directory == NULL) && (directory != NULL))
9926 ctxt->directory = directory;
9927
9928 return(ctxt);
9929}
9930
9931/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009932 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009933 * @sax: the SAX handler block
9934 * @filename: the filename
9935 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9936 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009937 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009938 *
9939 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9940 * compressed document is provided by default if found at compile-time.
9941 * It use the given SAX function block to handle the parsing callback.
9942 * If sax is NULL, fallback to the default DOM tree building routines.
9943 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009944 * User data (void *) is stored within the parser context, so it is
9945 * available nearly everywhere in libxml.
9946 *
Owen Taylor3473f882001-02-23 17:55:21 +00009947 * Returns the resulting document tree
9948 */
9949
9950xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009951xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9952 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009953 xmlDocPtr ret;
9954 xmlParserCtxtPtr ctxt;
9955 char *directory = NULL;
9956
Daniel Veillard635ef722001-10-29 11:48:19 +00009957 xmlInitParser();
9958
Owen Taylor3473f882001-02-23 17:55:21 +00009959 ctxt = xmlCreateFileParserCtxt(filename);
9960 if (ctxt == NULL) {
9961 return(NULL);
9962 }
9963 if (sax != NULL) {
9964 if (ctxt->sax != NULL)
9965 xmlFree(ctxt->sax);
9966 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009967 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009968 if (data!=NULL) {
9969 ctxt->_private=data;
9970 }
Owen Taylor3473f882001-02-23 17:55:21 +00009971
9972 if ((ctxt->directory == NULL) && (directory == NULL))
9973 directory = xmlParserGetDirectory(filename);
9974 if ((ctxt->directory == NULL) && (directory != NULL))
9975 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9976
9977 xmlParseDocument(ctxt);
9978
9979 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9980 else {
9981 ret = NULL;
9982 xmlFreeDoc(ctxt->myDoc);
9983 ctxt->myDoc = NULL;
9984 }
9985 if (sax != NULL)
9986 ctxt->sax = NULL;
9987 xmlFreeParserCtxt(ctxt);
9988
9989 return(ret);
9990}
9991
9992/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009993 * xmlSAXParseFile:
9994 * @sax: the SAX handler block
9995 * @filename: the filename
9996 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9997 * documents
9998 *
9999 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10000 * compressed document is provided by default if found at compile-time.
10001 * It use the given SAX function block to handle the parsing callback.
10002 * If sax is NULL, fallback to the default DOM tree building routines.
10003 *
10004 * Returns the resulting document tree
10005 */
10006
10007xmlDocPtr
10008xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
10009 int recovery) {
10010 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
10011}
10012
10013/**
Owen Taylor3473f882001-02-23 17:55:21 +000010014 * xmlRecoverDoc:
10015 * @cur: a pointer to an array of xmlChar
10016 *
10017 * parse an XML in-memory document and build a tree.
10018 * In the case the document is not Well Formed, a tree is built anyway
10019 *
10020 * Returns the resulting document tree
10021 */
10022
10023xmlDocPtr
10024xmlRecoverDoc(xmlChar *cur) {
10025 return(xmlSAXParseDoc(NULL, cur, 1));
10026}
10027
10028/**
10029 * xmlParseFile:
10030 * @filename: the filename
10031 *
10032 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10033 * compressed document is provided by default if found at compile-time.
10034 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +000010035 * Returns the resulting document tree if the file was wellformed,
10036 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +000010037 */
10038
10039xmlDocPtr
10040xmlParseFile(const char *filename) {
10041 return(xmlSAXParseFile(NULL, filename, 0));
10042}
10043
10044/**
10045 * xmlRecoverFile:
10046 * @filename: the filename
10047 *
10048 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
10049 * compressed document is provided by default if found at compile-time.
10050 * In the case the document is not Well Formed, a tree is built anyway
10051 *
10052 * Returns the resulting document tree
10053 */
10054
10055xmlDocPtr
10056xmlRecoverFile(const char *filename) {
10057 return(xmlSAXParseFile(NULL, filename, 1));
10058}
10059
10060
10061/**
10062 * xmlSetupParserForBuffer:
10063 * @ctxt: an XML parser context
10064 * @buffer: a xmlChar * buffer
10065 * @filename: a file name
10066 *
10067 * Setup the parser context to parse a new buffer; Clears any prior
10068 * contents from the parser context. The buffer parameter must not be
10069 * NULL, but the filename parameter can be
10070 */
10071void
10072xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
10073 const char* filename)
10074{
10075 xmlParserInputPtr input;
10076
10077 input = xmlNewInputStream(ctxt);
10078 if (input == NULL) {
10079 perror("malloc");
10080 xmlFree(ctxt);
10081 return;
10082 }
10083
10084 xmlClearParserCtxt(ctxt);
10085 if (filename != NULL)
10086 input->filename = xmlMemStrdup(filename);
10087 input->base = buffer;
10088 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010089 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +000010090 inputPush(ctxt, input);
10091}
10092
10093/**
10094 * xmlSAXUserParseFile:
10095 * @sax: a SAX handler
10096 * @user_data: The user data returned on SAX callbacks
10097 * @filename: a file name
10098 *
10099 * parse an XML file and call the given SAX handler routines.
10100 * Automatic support for ZLIB/Compress compressed document is provided
10101 *
10102 * Returns 0 in case of success or a error number otherwise
10103 */
10104int
10105xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
10106 const char *filename) {
10107 int ret = 0;
10108 xmlParserCtxtPtr ctxt;
10109
10110 ctxt = xmlCreateFileParserCtxt(filename);
10111 if (ctxt == NULL) return -1;
10112 if (ctxt->sax != &xmlDefaultSAXHandler)
10113 xmlFree(ctxt->sax);
10114 ctxt->sax = sax;
10115 if (user_data != NULL)
10116 ctxt->userData = user_data;
10117
10118 xmlParseDocument(ctxt);
10119
10120 if (ctxt->wellFormed)
10121 ret = 0;
10122 else {
10123 if (ctxt->errNo != 0)
10124 ret = ctxt->errNo;
10125 else
10126 ret = -1;
10127 }
10128 if (sax != NULL)
10129 ctxt->sax = NULL;
10130 xmlFreeParserCtxt(ctxt);
10131
10132 return ret;
10133}
10134
10135/************************************************************************
10136 * *
10137 * Front ends when parsing from memory *
10138 * *
10139 ************************************************************************/
10140
10141/**
10142 * xmlCreateMemoryParserCtxt:
10143 * @buffer: a pointer to a char array
10144 * @size: the size of the array
10145 *
10146 * Create a parser context for an XML in-memory document.
10147 *
10148 * Returns the new parser context or NULL
10149 */
10150xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010151xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010152 xmlParserCtxtPtr ctxt;
10153 xmlParserInputPtr input;
10154 xmlParserInputBufferPtr buf;
10155
10156 if (buffer == NULL)
10157 return(NULL);
10158 if (size <= 0)
10159 return(NULL);
10160
10161 ctxt = xmlNewParserCtxt();
10162 if (ctxt == NULL)
10163 return(NULL);
10164
10165 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10166 if (buf == NULL) return(NULL);
10167
10168 input = xmlNewInputStream(ctxt);
10169 if (input == NULL) {
10170 xmlFreeParserCtxt(ctxt);
10171 return(NULL);
10172 }
10173
10174 input->filename = NULL;
10175 input->buf = buf;
10176 input->base = input->buf->buffer->content;
10177 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010178 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010179
10180 inputPush(ctxt, input);
10181 return(ctxt);
10182}
10183
10184/**
10185 * xmlSAXParseMemory:
10186 * @sax: the SAX handler block
10187 * @buffer: an pointer to a char array
10188 * @size: the size of the array
10189 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10190 * documents
10191 *
10192 * parse an XML in-memory block and use the given SAX function block
10193 * to handle the parsing callback. If sax is NULL, fallback to the default
10194 * DOM tree building routines.
10195 *
10196 * Returns the resulting document tree
10197 */
10198xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010199xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10200 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010201 xmlDocPtr ret;
10202 xmlParserCtxtPtr ctxt;
10203
10204 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10205 if (ctxt == NULL) return(NULL);
10206 if (sax != NULL) {
10207 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010208 }
10209
10210 xmlParseDocument(ctxt);
10211
10212 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10213 else {
10214 ret = NULL;
10215 xmlFreeDoc(ctxt->myDoc);
10216 ctxt->myDoc = NULL;
10217 }
10218 if (sax != NULL)
10219 ctxt->sax = NULL;
10220 xmlFreeParserCtxt(ctxt);
10221
10222 return(ret);
10223}
10224
10225/**
10226 * xmlParseMemory:
10227 * @buffer: an pointer to a char array
10228 * @size: the size of the array
10229 *
10230 * parse an XML in-memory block and build a tree.
10231 *
10232 * Returns the resulting document tree
10233 */
10234
Daniel Veillard50822cb2001-07-26 20:05:51 +000010235xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010236 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10237}
10238
10239/**
10240 * xmlRecoverMemory:
10241 * @buffer: an pointer to a char array
10242 * @size: the size of the array
10243 *
10244 * parse an XML in-memory block and build a tree.
10245 * In the case the document is not Well Formed, a tree is built anyway
10246 *
10247 * Returns the resulting document tree
10248 */
10249
Daniel Veillard50822cb2001-07-26 20:05:51 +000010250xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010251 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10252}
10253
10254/**
10255 * xmlSAXUserParseMemory:
10256 * @sax: a SAX handler
10257 * @user_data: The user data returned on SAX callbacks
10258 * @buffer: an in-memory XML document input
10259 * @size: the length of the XML document in bytes
10260 *
10261 * A better SAX parsing routine.
10262 * parse an XML in-memory buffer and call the given SAX handler routines.
10263 *
10264 * Returns 0 in case of success or a error number otherwise
10265 */
10266int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010267 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010268 int ret = 0;
10269 xmlParserCtxtPtr ctxt;
10270 xmlSAXHandlerPtr oldsax = NULL;
10271
10272 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10273 if (ctxt == NULL) return -1;
10274 if (sax != NULL) {
10275 oldsax = ctxt->sax;
10276 ctxt->sax = sax;
10277 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010278 if (user_data != NULL)
10279 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010280
10281 xmlParseDocument(ctxt);
10282
10283 if (ctxt->wellFormed)
10284 ret = 0;
10285 else {
10286 if (ctxt->errNo != 0)
10287 ret = ctxt->errNo;
10288 else
10289 ret = -1;
10290 }
10291 if (sax != NULL) {
10292 ctxt->sax = oldsax;
10293 }
10294 xmlFreeParserCtxt(ctxt);
10295
10296 return ret;
10297}
10298
10299/**
10300 * xmlCreateDocParserCtxt:
10301 * @cur: a pointer to an array of xmlChar
10302 *
10303 * Creates a parser context for an XML in-memory document.
10304 *
10305 * Returns the new parser context or NULL
10306 */
10307xmlParserCtxtPtr
10308xmlCreateDocParserCtxt(xmlChar *cur) {
10309 int len;
10310
10311 if (cur == NULL)
10312 return(NULL);
10313 len = xmlStrlen(cur);
10314 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10315}
10316
10317/**
10318 * xmlSAXParseDoc:
10319 * @sax: the SAX handler block
10320 * @cur: a pointer to an array of xmlChar
10321 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10322 * documents
10323 *
10324 * parse an XML in-memory document and build a tree.
10325 * It use the given SAX function block to handle the parsing callback.
10326 * If sax is NULL, fallback to the default DOM tree building routines.
10327 *
10328 * Returns the resulting document tree
10329 */
10330
10331xmlDocPtr
10332xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10333 xmlDocPtr ret;
10334 xmlParserCtxtPtr ctxt;
10335
10336 if (cur == NULL) return(NULL);
10337
10338
10339 ctxt = xmlCreateDocParserCtxt(cur);
10340 if (ctxt == NULL) return(NULL);
10341 if (sax != NULL) {
10342 ctxt->sax = sax;
10343 ctxt->userData = NULL;
10344 }
10345
10346 xmlParseDocument(ctxt);
10347 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10348 else {
10349 ret = NULL;
10350 xmlFreeDoc(ctxt->myDoc);
10351 ctxt->myDoc = NULL;
10352 }
10353 if (sax != NULL)
10354 ctxt->sax = NULL;
10355 xmlFreeParserCtxt(ctxt);
10356
10357 return(ret);
10358}
10359
10360/**
10361 * xmlParseDoc:
10362 * @cur: a pointer to an array of xmlChar
10363 *
10364 * parse an XML in-memory document and build a tree.
10365 *
10366 * Returns the resulting document tree
10367 */
10368
10369xmlDocPtr
10370xmlParseDoc(xmlChar *cur) {
10371 return(xmlSAXParseDoc(NULL, cur, 0));
10372}
10373
Daniel Veillard8107a222002-01-13 14:10:10 +000010374/************************************************************************
10375 * *
10376 * Specific function to keep track of entities references *
10377 * and used by the XSLT debugger *
10378 * *
10379 ************************************************************************/
10380
10381static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
10382
10383/**
10384 * xmlAddEntityReference:
10385 * @ent : A valid entity
10386 * @firstNode : A valid first node for children of entity
10387 * @lastNode : A valid last node of children entity
10388 *
10389 * Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
10390 */
10391static void
10392xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
10393 xmlNodePtr lastNode)
10394{
10395 if (xmlEntityRefFunc != NULL) {
10396 (*xmlEntityRefFunc) (ent, firstNode, lastNode);
10397 }
10398}
10399
10400
10401/**
10402 * xmlSetEntityReferenceFunc:
10403 * @func : A valid function
10404 *
10405 * Set the function to call call back when a xml reference has been made
10406 */
10407void
10408xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
10409{
10410 xmlEntityRefFunc = func;
10411}
Owen Taylor3473f882001-02-23 17:55:21 +000010412
10413/************************************************************************
10414 * *
10415 * Miscellaneous *
10416 * *
10417 ************************************************************************/
10418
10419#ifdef LIBXML_XPATH_ENABLED
10420#include <libxml/xpath.h>
10421#endif
10422
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010423extern void xmlGenericErrorDefaultFunc(void *ctx, const char *msg, ...);
Owen Taylor3473f882001-02-23 17:55:21 +000010424static int xmlParserInitialized = 0;
10425
10426/**
10427 * xmlInitParser:
10428 *
10429 * Initialization function for the XML parser.
10430 * This is not reentrant. Call once before processing in case of
10431 * use in multithreaded programs.
10432 */
10433
10434void
10435xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010436 if (xmlParserInitialized != 0)
10437 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010438
Daniel Veillarddb5850a2002-01-18 11:49:26 +000010439 if ((xmlGenericError == xmlGenericErrorDefaultFunc) ||
10440 (xmlGenericError == NULL))
10441 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010442 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010443 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010444 xmlInitCharEncodingHandlers();
10445 xmlInitializePredefinedEntities();
10446 xmlDefaultSAXHandlerInit();
10447 xmlRegisterDefaultInputCallbacks();
10448 xmlRegisterDefaultOutputCallbacks();
10449#ifdef LIBXML_HTML_ENABLED
10450 htmlInitAutoClose();
10451 htmlDefaultSAXHandlerInit();
10452#endif
10453#ifdef LIBXML_XPATH_ENABLED
10454 xmlXPathInit();
10455#endif
10456 xmlParserInitialized = 1;
10457}
10458
10459/**
10460 * xmlCleanupParser:
10461 *
10462 * Cleanup function for the XML parser. It tries to reclaim all
10463 * parsing related global memory allocated for the parser processing.
10464 * It doesn't deallocate any document related memory. Calling this
10465 * function should not prevent reusing the parser.
10466 */
10467
10468void
10469xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010470 xmlCleanupCharEncodingHandlers();
10471 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010472#ifdef LIBXML_CATALOG_ENABLED
10473 xmlCatalogCleanup();
10474#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010475 xmlCleanupThreads();
10476 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010477}