blob: 15a0f6915e4e2de883dc3579d501b06d2a428571 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
102/************************************************************************
103 * *
104 * Parser stacks related functions and macros *
105 * *
106 ************************************************************************/
107
108xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
109 const xmlChar ** str);
110
111/*
112 * Generic function for accessing stacks in the Parser Context
113 */
114
115#define PUSH_AND_POP(scope, type, name) \
116scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
117 if (ctxt->name##Nr >= ctxt->name##Max) { \
118 ctxt->name##Max *= 2; \
119 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
120 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
121 if (ctxt->name##Tab == NULL) { \
122 xmlGenericError(xmlGenericErrorContext, \
123 "realloc failed !\n"); \
124 return(0); \
125 } \
126 } \
127 ctxt->name##Tab[ctxt->name##Nr] = value; \
128 ctxt->name = value; \
129 return(ctxt->name##Nr++); \
130} \
131scope type name##Pop(xmlParserCtxtPtr ctxt) { \
132 type ret; \
133 if (ctxt->name##Nr <= 0) return(0); \
134 ctxt->name##Nr--; \
135 if (ctxt->name##Nr > 0) \
136 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137 else \
138 ctxt->name = NULL; \
139 ret = ctxt->name##Tab[ctxt->name##Nr]; \
140 ctxt->name##Tab[ctxt->name##Nr] = 0; \
141 return(ret); \
142} \
143
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000144/**
145 * inputPop:
146 * @ctxt: an XML parser context
147 *
148 * Pops the top parser input from the input stack
149 *
150 * Returns the input just removed
151 */
152/**
153 * inputPush:
154 * @ctxt: an XML parser context
155 * @input: the parser input
156 *
157 * Pushes a new parser input on top of the input stack
158 */
159/**
160 * namePop:
161 * @ctxt: an XML parser context
162 *
163 * Pops the top element name from the name stack
164 *
165 * Returns the name just removed
166 */
167/**
168 * namePush:
169 * @ctxt: an XML parser context
170 * @name: the element name
171 *
172 * Pushes a new element name on top of the name stack
173 */
174/**
175 * nodePop:
176 * @ctxt: an XML parser context
177 *
178 * Pops the top element node from the node stack
179 *
180 * Returns the node just removed
181 */
182/**
183 * nodePush:
184 * @ctxt: an XML parser context
185 * @node: the element node
186 *
187 * Pushes a new element node on top of the node stack
188 */
Owen Taylor3473f882001-02-23 17:55:21 +0000189/*
190 * Those macros actually generate the functions
191 */
192PUSH_AND_POP(extern, xmlParserInputPtr, input)
193PUSH_AND_POP(extern, xmlNodePtr, node)
194PUSH_AND_POP(extern, xmlChar*, name)
195
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000196static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000197 if (ctxt->spaceNr >= ctxt->spaceMax) {
198 ctxt->spaceMax *= 2;
199 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
200 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
201 if (ctxt->spaceTab == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "realloc failed !\n");
204 return(0);
205 }
206 }
207 ctxt->spaceTab[ctxt->spaceNr] = val;
208 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
209 return(ctxt->spaceNr++);
210}
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 int ret;
214 if (ctxt->spaceNr <= 0) return(0);
215 ctxt->spaceNr--;
216 if (ctxt->spaceNr > 0)
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
218 else
219 ctxt->space = NULL;
220 ret = ctxt->spaceTab[ctxt->spaceNr];
221 ctxt->spaceTab[ctxt->spaceNr] = -1;
222 return(ret);
223}
224
225/*
226 * Macros for accessing the content. Those should be used only by the parser,
227 * and not exported.
228 *
229 * Dirty macros, i.e. one often need to make assumption on the context to
230 * use them
231 *
232 * CUR_PTR return the current pointer to the xmlChar to be parsed.
233 * To be used with extreme caution since operations consuming
234 * characters may move the input buffer to a different location !
235 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
236 * This should be used internally by the parser
237 * only to compare to ASCII values otherwise it would break when
238 * running with UTF-8 encoding.
239 * RAW same as CUR but in the input buffer, bypass any token
240 * extraction that may have been done
241 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
242 * to compare on ASCII based substring.
243 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
244 * strings within the parser.
245 *
246 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
247 *
248 * NEXT Skip to the next character, this does the proper decoding
249 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
250 * NEXTL(l) Skip l xmlChars in the input buffer
251 * CUR_CHAR(l) returns the current unicode character (int), set l
252 * to the number of xmlChars used for the encoding [0-5].
253 * CUR_SCHAR same but operate on a string instead of the context
254 * COPY_BUF copy the current unicode char to the target buffer, increment
255 * the index
256 * GROW, SHRINK handling of input buffers
257 */
258
259#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
260#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
261#define NXT(val) ctxt->input->cur[(val)]
262#define CUR_PTR ctxt->input->cur
263
264#define SKIP(val) do { \
265 ctxt->nbChars += (val),ctxt->input->cur += (val); \
266 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000267 if ((*ctxt->input->cur == 0) && \
268 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
269 xmlPopInput(ctxt); \
270 } while (0)
271
Daniel Veillard48b2f892001-02-25 16:11:03 +0000272#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000273 xmlParserInputShrink(ctxt->input); \
274 if ((*ctxt->input->cur == 0) && \
275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
276 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000277 }
Owen Taylor3473f882001-02-23 17:55:21 +0000278
Daniel Veillard48b2f892001-02-25 16:11:03 +0000279#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
281 if ((*ctxt->input->cur == 0) && \
282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
283 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285
286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
287
288#define NEXT xmlNextChar(ctxt)
289
Daniel Veillard21a0f912001-02-25 19:54:14 +0000290#define NEXT1 { \
291 ctxt->input->cur++; \
292 ctxt->nbChars++; \
293 if (*ctxt->input->cur == 0) \
294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
295 }
296
Owen Taylor3473f882001-02-23 17:55:21 +0000297#define NEXTL(l) do { \
298 if (*(ctxt->input->cur) == '\n') { \
299 ctxt->input->line++; ctxt->input->col = 1; \
300 } else ctxt->input->col++; \
301 ctxt->token = 0; ctxt->input->cur += l; \
302 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000303 } while (0)
304
305#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
306#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
307
308#define COPY_BUF(l,b,i,v) \
309 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000311
312/**
313 * xmlSkipBlankChars:
314 * @ctxt: the XML parser context
315 *
316 * skip all blanks character found at that point in the input streams.
317 * It pops up finished entities in the process if allowable at that point.
318 *
319 * Returns the number of space chars skipped
320 */
321
322int
323xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000324 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325
Daniel Veillard02141ea2001-04-30 11:46:40 +0000326 if (ctxt->token != 0) {
327 if (!IS_BLANK(ctxt->token))
328 return(0);
329 ctxt->token = 0;
330 res++;
331 }
Owen Taylor3473f882001-02-23 17:55:21 +0000332 /*
333 * It's Okay to use CUR/NEXT here since all the blanks are on
334 * the ASCII range.
335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
337 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000340 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000341 cur = ctxt->input->cur;
342 while (IS_BLANK(*cur)) {
343 if (*cur == '\n') {
344 ctxt->input->line++; ctxt->input->col = 1;
345 }
346 cur++;
347 res++;
348 if (*cur == 0) {
349 ctxt->input->cur = cur;
350 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
351 cur = ctxt->input->cur;
352 }
353 }
354 ctxt->input->cur = cur;
355 } else {
356 int cur;
357 do {
358 cur = CUR;
359 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
360 NEXT;
361 cur = CUR;
362 res++;
363 }
364 while ((cur == 0) && (ctxt->inputNr > 1) &&
365 (ctxt->instate != XML_PARSER_COMMENT)) {
366 xmlPopInput(ctxt);
367 cur = CUR;
368 }
369 /*
370 * Need to handle support of entities branching here
371 */
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
373 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
374 }
Owen Taylor3473f882001-02-23 17:55:21 +0000375 return(res);
376}
377
378/************************************************************************
379 * *
380 * Commodity functions to handle entities *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlPopInput:
386 * @ctxt: an XML parser context
387 *
388 * xmlPopInput: the current input pointed by ctxt->input came to an end
389 * pop it and return the next char.
390 *
391 * Returns the current xmlChar in the parser context
392 */
393xmlChar
394xmlPopInput(xmlParserCtxtPtr ctxt) {
395 if (ctxt->inputNr == 1) return(0); /* End of main Input */
396 if (xmlParserDebugEntities)
397 xmlGenericError(xmlGenericErrorContext,
398 "Popping input %d\n", ctxt->inputNr);
399 xmlFreeInputStream(inputPop(ctxt));
400 if ((*ctxt->input->cur == 0) &&
401 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
402 return(xmlPopInput(ctxt));
403 return(CUR);
404}
405
406/**
407 * xmlPushInput:
408 * @ctxt: an XML parser context
409 * @input: an XML parser input fragment (entity, XML fragment ...).
410 *
411 * xmlPushInput: switch to a new input stream which is stacked on top
412 * of the previous one(s).
413 */
414void
415xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
416 if (input == NULL) return;
417
418 if (xmlParserDebugEntities) {
419 if ((ctxt->input != NULL) && (ctxt->input->filename))
420 xmlGenericError(xmlGenericErrorContext,
421 "%s(%d): ", ctxt->input->filename,
422 ctxt->input->line);
423 xmlGenericError(xmlGenericErrorContext,
424 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
425 }
426 inputPush(ctxt, input);
427 GROW;
428}
429
430/**
431 * xmlParseCharRef:
432 * @ctxt: an XML parser context
433 *
434 * parse Reference declarations
435 *
436 * [66] CharRef ::= '&#' [0-9]+ ';' |
437 * '&#x' [0-9a-fA-F]+ ';'
438 *
439 * [ WFC: Legal Character ]
440 * Characters referred to using character references must match the
441 * production for Char.
442 *
443 * Returns the value parsed (as an int), 0 in case of error
444 */
445int
446xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000447 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000448 int count = 0;
449
450 if (ctxt->token != 0) {
451 val = ctxt->token;
452 ctxt->token = 0;
453 return(val);
454 }
455 /*
456 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
457 */
458 if ((RAW == '&') && (NXT(1) == '#') &&
459 (NXT(2) == 'x')) {
460 SKIP(3);
461 GROW;
462 while (RAW != ';') { /* loop blocked by count */
463 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
464 val = val * 16 + (CUR - '0');
465 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
466 val = val * 16 + (CUR - 'a') + 10;
467 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
468 val = val * 16 + (CUR - 'A') + 10;
469 else {
470 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
472 ctxt->sax->error(ctxt->userData,
473 "xmlParseCharRef: invalid hexadecimal value\n");
474 ctxt->wellFormed = 0;
475 ctxt->disableSAX = 1;
476 val = 0;
477 break;
478 }
479 NEXT;
480 count++;
481 }
482 if (RAW == ';') {
483 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
484 ctxt->nbChars ++;
485 ctxt->input->cur++;
486 }
487 } else if ((RAW == '&') && (NXT(1) == '#')) {
488 SKIP(2);
489 GROW;
490 while (RAW != ';') { /* loop blocked by count */
491 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
492 val = val * 10 + (CUR - '0');
493 else {
494 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData,
497 "xmlParseCharRef: invalid decimal value\n");
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 val = 0;
501 break;
502 }
503 NEXT;
504 count++;
505 }
506 if (RAW == ';') {
507 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
508 ctxt->nbChars ++;
509 ctxt->input->cur++;
510 }
511 } else {
512 ctxt->errNo = XML_ERR_INVALID_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 }
519
520 /*
521 * [ WFC: Legal Character ]
522 * Characters referred to using character references must match the
523 * production for Char.
524 */
525 if (IS_CHAR(val)) {
526 return(val);
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHAR;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
531 val);
532 ctxt->wellFormed = 0;
533 ctxt->disableSAX = 1;
534 }
535 return(0);
536}
537
538/**
539 * xmlParseStringCharRef:
540 * @ctxt: an XML parser context
541 * @str: a pointer to an index in the string
542 *
543 * parse Reference declarations, variant parsing from a string rather
544 * than an an input flow.
545 *
546 * [66] CharRef ::= '&#' [0-9]+ ';' |
547 * '&#x' [0-9a-fA-F]+ ';'
548 *
549 * [ WFC: Legal Character ]
550 * Characters referred to using character references must match the
551 * production for Char.
552 *
553 * Returns the value parsed (as an int), 0 in case of error, str will be
554 * updated to the current value of the index
555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556static int
Owen Taylor3473f882001-02-23 17:55:21 +0000557xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
558 const xmlChar *ptr;
559 xmlChar cur;
560 int val = 0;
561
562 if ((str == NULL) || (*str == NULL)) return(0);
563 ptr = *str;
564 cur = *ptr;
565 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
566 ptr += 3;
567 cur = *ptr;
568 while (cur != ';') { /* Non input consuming loop */
569 if ((cur >= '0') && (cur <= '9'))
570 val = val * 16 + (cur - '0');
571 else if ((cur >= 'a') && (cur <= 'f'))
572 val = val * 16 + (cur - 'a') + 10;
573 else if ((cur >= 'A') && (cur <= 'F'))
574 val = val * 16 + (cur - 'A') + 10;
575 else {
576 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
578 ctxt->sax->error(ctxt->userData,
579 "xmlParseStringCharRef: invalid hexadecimal value\n");
580 ctxt->wellFormed = 0;
581 ctxt->disableSAX = 1;
582 val = 0;
583 break;
584 }
585 ptr++;
586 cur = *ptr;
587 }
588 if (cur == ';')
589 ptr++;
590 } else if ((cur == '&') && (ptr[1] == '#')){
591 ptr += 2;
592 cur = *ptr;
593 while (cur != ';') { /* Non input consuming loops */
594 if ((cur >= '0') && (cur <= '9'))
595 val = val * 10 + (cur - '0');
596 else {
597 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid decimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else {
612 ctxt->errNo = XML_ERR_INVALID_CHARREF;
613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
614 ctxt->sax->error(ctxt->userData,
615 "xmlParseCharRef: invalid value\n");
616 ctxt->wellFormed = 0;
617 ctxt->disableSAX = 1;
618 return(0);
619 }
620 *str = ptr;
621
622 /*
623 * [ WFC: Legal Character ]
624 * Characters referred to using character references must match the
625 * production for Char.
626 */
627 if (IS_CHAR(val)) {
628 return(val);
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHAR;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
633 "CharRef: invalid xmlChar value %d\n", val);
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 }
637 return(0);
638}
639
640/**
641 * xmlParserHandlePEReference:
642 * @ctxt: the parser context
643 *
644 * [69] PEReference ::= '%' Name ';'
645 *
646 * [ WFC: No Recursion ]
647 * A parsed entity must not contain a recursive
648 * reference to itself, either directly or indirectly.
649 *
650 * [ WFC: Entity Declared ]
651 * In a document without any DTD, a document with only an internal DTD
652 * subset which contains no parameter entity references, or a document
653 * with "standalone='yes'", ... ... The declaration of a parameter
654 * entity must precede any reference to it...
655 *
656 * [ VC: Entity Declared ]
657 * In a document with an external subset or external parameter entities
658 * with "standalone='no'", ... ... The declaration of a parameter entity
659 * must precede any reference to it...
660 *
661 * [ WFC: In DTD ]
662 * Parameter-entity references may only appear in the DTD.
663 * NOTE: misleading but this is handled.
664 *
665 * A PEReference may have been detected in the current input stream
666 * the handling is done accordingly to
667 * http://www.w3.org/TR/REC-xml#entproc
668 * i.e.
669 * - Included in literal in entity values
670 * - Included as Paraemeter Entity reference within DTDs
671 */
672void
673xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
674 xmlChar *name;
675 xmlEntityPtr entity = NULL;
676 xmlParserInputPtr input;
677
678 if (ctxt->token != 0) {
679 return;
680 }
681 if (RAW != '%') return;
682 switch(ctxt->instate) {
683 case XML_PARSER_CDATA_SECTION:
684 return;
685 case XML_PARSER_COMMENT:
686 return;
687 case XML_PARSER_START_TAG:
688 return;
689 case XML_PARSER_END_TAG:
690 return;
691 case XML_PARSER_EOF:
692 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
694 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
695 ctxt->wellFormed = 0;
696 ctxt->disableSAX = 1;
697 return;
698 case XML_PARSER_PROLOG:
699 case XML_PARSER_START:
700 case XML_PARSER_MISC:
701 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
703 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
704 ctxt->wellFormed = 0;
705 ctxt->disableSAX = 1;
706 return;
707 case XML_PARSER_ENTITY_DECL:
708 case XML_PARSER_CONTENT:
709 case XML_PARSER_ATTRIBUTE_VALUE:
710 case XML_PARSER_PI:
711 case XML_PARSER_SYSTEM_LITERAL:
712 /* we just ignore it there */
713 return;
714 case XML_PARSER_EPILOG:
715 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
717 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
718 ctxt->wellFormed = 0;
719 ctxt->disableSAX = 1;
720 return;
721 case XML_PARSER_ENTITY_VALUE:
722 /*
723 * NOTE: in the case of entity values, we don't do the
724 * substitution here since we need the literal
725 * entity value to be able to save the internal
726 * subset of the document.
727 * This will be handled by xmlStringDecodeEntities
728 */
729 return;
730 case XML_PARSER_DTD:
731 /*
732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
733 * In the internal DTD subset, parameter-entity references
734 * can occur only where markup declarations can occur, not
735 * within markup declarations.
736 * In that case this is handled in xmlParseMarkupDecl
737 */
738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
739 return;
740 break;
741 case XML_PARSER_IGNORE:
742 return;
743 }
744
745 NEXT;
746 name = xmlParseName(ctxt);
747 if (xmlParserDebugEntities)
748 xmlGenericError(xmlGenericErrorContext,
749 "PE Reference: %s\n", name);
750 if (name == NULL) {
751 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
753 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
754 ctxt->wellFormed = 0;
755 ctxt->disableSAX = 1;
756 } else {
757 if (RAW == ';') {
758 NEXT;
759 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
760 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
761 if (entity == NULL) {
762
763 /*
764 * [ WFC: Entity Declared ]
765 * In a document without any DTD, a document with only an
766 * internal DTD subset which contains no parameter entity
767 * references, or a document with "standalone='yes'", ...
768 * ... The declaration of a parameter entity must precede
769 * any reference to it...
770 */
771 if ((ctxt->standalone == 1) ||
772 ((ctxt->hasExternalSubset == 0) &&
773 (ctxt->hasPErefs == 0))) {
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
775 ctxt->sax->error(ctxt->userData,
776 "PEReference: %%%s; not found\n", name);
777 ctxt->wellFormed = 0;
778 ctxt->disableSAX = 1;
779 } else {
780 /*
781 * [ VC: Entity Declared ]
782 * In a document with an external subset or external
783 * parameter entities with "standalone='no'", ...
784 * ... The declaration of a parameter entity must precede
785 * any reference to it...
786 */
787 if ((!ctxt->disableSAX) &&
788 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
789 ctxt->vctxt.error(ctxt->vctxt.userData,
790 "PEReference: %%%s; not found\n", name);
791 } else if ((!ctxt->disableSAX) &&
792 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
793 ctxt->sax->warning(ctxt->userData,
794 "PEReference: %%%s; not found\n", name);
795 ctxt->valid = 0;
796 }
797 } else {
798 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
799 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000800 xmlChar start[4];
801 xmlCharEncoding enc;
802
Owen Taylor3473f882001-02-23 17:55:21 +0000803 /*
804 * handle the extra spaces added before and after
805 * c.f. http://www.w3.org/TR/REC-xml#as-PE
806 * this is done independantly.
807 */
808 input = xmlNewEntityInputStream(ctxt, entity);
809 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000810
811 /*
812 * Get the 4 first bytes and decode the charset
813 * if enc != XML_CHAR_ENCODING_NONE
814 * plug some encoding conversion routines.
815 */
816 GROW
817 start[0] = RAW;
818 start[1] = NXT(1);
819 start[2] = NXT(2);
820 start[3] = NXT(3);
821 enc = xmlDetectCharEncoding(start, 4);
822 if (enc != XML_CHAR_ENCODING_NONE) {
823 xmlSwitchEncoding(ctxt, enc);
824 }
825
Owen Taylor3473f882001-02-23 17:55:21 +0000826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
827 (RAW == '<') && (NXT(1) == '?') &&
828 (NXT(2) == 'x') && (NXT(3) == 'm') &&
829 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
830 xmlParseTextDecl(ctxt);
831 }
832 if (ctxt->token == 0)
833 ctxt->token = ' ';
834 } else {
835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt->userData,
837 "xmlHandlePEReference: %s is not a parameter entity\n",
838 name);
839 ctxt->wellFormed = 0;
840 ctxt->disableSAX = 1;
841 }
842 }
843 } else {
844 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "xmlHandlePEReference: expecting ';'\n");
848 ctxt->wellFormed = 0;
849 ctxt->disableSAX = 1;
850 }
851 xmlFree(name);
852 }
853}
854
855/*
856 * Macro used to grow the current buffer.
857 */
858#define growBuffer(buffer) { \
859 buffer##_size *= 2; \
860 buffer = (xmlChar *) \
861 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
862 if (buffer == NULL) { \
863 perror("realloc failed"); \
864 return(NULL); \
865 } \
866}
867
868/**
869 * xmlStringDecodeEntities:
870 * @ctxt: the parser context
871 * @str: the input string
872 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
873 * @end: an end marker xmlChar, 0 if none
874 * @end2: an end marker xmlChar, 0 if none
875 * @end3: an end marker xmlChar, 0 if none
876 *
877 * Takes a entity string content and process to do the adequate subtitutions.
878 *
879 * [67] Reference ::= EntityRef | CharRef
880 *
881 * [69] PEReference ::= '%' Name ';'
882 *
883 * Returns A newly allocated string with the substitution done. The caller
884 * must deallocate it !
885 */
886xmlChar *
887xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
888 xmlChar end, xmlChar end2, xmlChar end3) {
889 xmlChar *buffer = NULL;
890 int buffer_size = 0;
891
892 xmlChar *current = NULL;
893 xmlEntityPtr ent;
894 int c,l;
895 int nbchars = 0;
896
897 if (str == NULL)
898 return(NULL);
899
900 if (ctxt->depth > 40) {
901 ctxt->errNo = XML_ERR_ENTITY_LOOP;
902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
903 ctxt->sax->error(ctxt->userData,
904 "Detected entity reference loop\n");
905 ctxt->wellFormed = 0;
906 ctxt->disableSAX = 1;
907 return(NULL);
908 }
909
910 /*
911 * allocate a translation buffer.
912 */
913 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
914 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
915 if (buffer == NULL) {
916 perror("xmlDecodeEntities: malloc failed");
917 return(NULL);
918 }
919
920 /*
921 * Ok loop until we reach one of the ending char or a size limit.
922 * we are operating on already parsed values.
923 */
924 c = CUR_SCHAR(str, l);
925 while ((c != 0) && (c != end) && /* non input consuming loop */
926 (c != end2) && (c != end3)) {
927
928 if (c == 0) break;
929 if ((c == '&') && (str[1] == '#')) {
930 int val = xmlParseStringCharRef(ctxt, &str);
931 if (val != 0) {
932 COPY_BUF(0,buffer,nbchars,val);
933 }
934 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
935 if (xmlParserDebugEntities)
936 xmlGenericError(xmlGenericErrorContext,
937 "String decoding Entity Reference: %.30s\n",
938 str);
939 ent = xmlParseStringEntityRef(ctxt, &str);
940 if ((ent != NULL) &&
941 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
942 if (ent->content != NULL) {
943 COPY_BUF(0,buffer,nbchars,ent->content[0]);
944 } else {
945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
946 ctxt->sax->error(ctxt->userData,
947 "internal error entity has no content\n");
948 }
949 } else if ((ent != NULL) && (ent->content != NULL)) {
950 xmlChar *rep;
951
952 ctxt->depth++;
953 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
954 0, 0, 0);
955 ctxt->depth--;
956 if (rep != NULL) {
957 current = rep;
958 while (*current != 0) { /* non input consuming loop */
959 buffer[nbchars++] = *current++;
960 if (nbchars >
961 buffer_size - XML_PARSER_BUFFER_SIZE) {
962 growBuffer(buffer);
963 }
964 }
965 xmlFree(rep);
966 }
967 } else if (ent != NULL) {
968 int i = xmlStrlen(ent->name);
969 const xmlChar *cur = ent->name;
970
971 buffer[nbchars++] = '&';
972 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
973 growBuffer(buffer);
974 }
975 for (;i > 0;i--)
976 buffer[nbchars++] = *cur++;
977 buffer[nbchars++] = ';';
978 }
979 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
980 if (xmlParserDebugEntities)
981 xmlGenericError(xmlGenericErrorContext,
982 "String decoding PE Reference: %.30s\n", str);
983 ent = xmlParseStringPEReference(ctxt, &str);
984 if (ent != NULL) {
985 xmlChar *rep;
986
987 ctxt->depth++;
988 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
989 0, 0, 0);
990 ctxt->depth--;
991 if (rep != NULL) {
992 current = rep;
993 while (*current != 0) { /* non input consuming loop */
994 buffer[nbchars++] = *current++;
995 if (nbchars >
996 buffer_size - XML_PARSER_BUFFER_SIZE) {
997 growBuffer(buffer);
998 }
999 }
1000 xmlFree(rep);
1001 }
1002 }
1003 } else {
1004 COPY_BUF(l,buffer,nbchars,c);
1005 str += l;
1006 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1007 growBuffer(buffer);
1008 }
1009 }
1010 c = CUR_SCHAR(str, l);
1011 }
1012 buffer[nbchars++] = 0;
1013 return(buffer);
1014}
1015
1016
1017/************************************************************************
1018 * *
1019 * Commodity functions to handle xmlChars *
1020 * *
1021 ************************************************************************/
1022
1023/**
1024 * xmlStrndup:
1025 * @cur: the input xmlChar *
1026 * @len: the len of @cur
1027 *
1028 * a strndup for array of xmlChar's
1029 *
1030 * Returns a new xmlChar * or NULL
1031 */
1032xmlChar *
1033xmlStrndup(const xmlChar *cur, int len) {
1034 xmlChar *ret;
1035
1036 if ((cur == NULL) || (len < 0)) return(NULL);
1037 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1038 if (ret == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "malloc of %ld byte failed\n",
1041 (len + 1) * (long)sizeof(xmlChar));
1042 return(NULL);
1043 }
1044 memcpy(ret, cur, len * sizeof(xmlChar));
1045 ret[len] = 0;
1046 return(ret);
1047}
1048
1049/**
1050 * xmlStrdup:
1051 * @cur: the input xmlChar *
1052 *
1053 * a strdup for array of xmlChar's. Since they are supposed to be
1054 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1055 * a termination mark of '0'.
1056 *
1057 * Returns a new xmlChar * or NULL
1058 */
1059xmlChar *
1060xmlStrdup(const xmlChar *cur) {
1061 const xmlChar *p = cur;
1062
1063 if (cur == NULL) return(NULL);
1064 while (*p != 0) p++; /* non input consuming */
1065 return(xmlStrndup(cur, p - cur));
1066}
1067
1068/**
1069 * xmlCharStrndup:
1070 * @cur: the input char *
1071 * @len: the len of @cur
1072 *
1073 * a strndup for char's to xmlChar's
1074 *
1075 * Returns a new xmlChar * or NULL
1076 */
1077
1078xmlChar *
1079xmlCharStrndup(const char *cur, int len) {
1080 int i;
1081 xmlChar *ret;
1082
1083 if ((cur == NULL) || (len < 0)) return(NULL);
1084 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1085 if (ret == NULL) {
1086 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1087 (len + 1) * (long)sizeof(xmlChar));
1088 return(NULL);
1089 }
1090 for (i = 0;i < len;i++)
1091 ret[i] = (xmlChar) cur[i];
1092 ret[len] = 0;
1093 return(ret);
1094}
1095
1096/**
1097 * xmlCharStrdup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strdup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrdup(const char *cur) {
1108 const char *p = cur;
1109
1110 if (cur == NULL) return(NULL);
1111 while (*p != '\0') p++; /* non input consuming */
1112 return(xmlCharStrndup(cur, p - cur));
1113}
1114
1115/**
1116 * xmlStrcmp:
1117 * @str1: the first xmlChar *
1118 * @str2: the second xmlChar *
1119 *
1120 * a strcmp for xmlChar's
1121 *
1122 * Returns the integer result of the comparison
1123 */
1124
1125int
1126xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1127 register int tmp;
1128
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139/**
1140 * xmlStrEqual:
1141 * @str1: the first xmlChar *
1142 * @str2: the second xmlChar *
1143 *
1144 * Check if both string are equal of have same content
1145 * Should be a bit more readable and faster than xmlStrEqual()
1146 *
1147 * Returns 1 if they are equal, 0 if they are different
1148 */
1149
1150int
1151xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1152 if (str1 == str2) return(1);
1153 if (str1 == NULL) return(0);
1154 if (str2 == NULL) return(0);
1155 do {
1156 if (*str1++ != *str2) return(0);
1157 } while (*str2++);
1158 return(1);
1159}
1160
1161/**
1162 * xmlStrncmp:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 * @len: the max comparison length
1166 *
1167 * a strncmp for xmlChar's
1168 *
1169 * Returns the integer result of the comparison
1170 */
1171
1172int
1173xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174 register int tmp;
1175
1176 if (len <= 0) return(0);
1177 if (str1 == str2) return(0);
1178 if (str1 == NULL) return(-1);
1179 if (str2 == NULL) return(1);
1180 do {
1181 tmp = *str1++ - *str2;
1182 if (tmp != 0 || --len == 0) return(tmp);
1183 } while (*str2++ != 0);
1184 return 0;
1185}
1186
Daniel Veillardb44025c2001-10-11 22:55:55 +00001187static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001188 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1189 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1190 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1191 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1192 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1193 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1194 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1195 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1196 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1197 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1198 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1199 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1200 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1201 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1202 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1203 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1204 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1205 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1206 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1207 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1208 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1209 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1210 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1211 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1212 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1213 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1214 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1215 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1216 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1217 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1218 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1219 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1220};
1221
1222/**
1223 * xmlStrcasecmp:
1224 * @str1: the first xmlChar *
1225 * @str2: the second xmlChar *
1226 *
1227 * a strcasecmp for xmlChar's
1228 *
1229 * Returns the integer result of the comparison
1230 */
1231
1232int
1233xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1234 register int tmp;
1235
1236 if (str1 == str2) return(0);
1237 if (str1 == NULL) return(-1);
1238 if (str2 == NULL) return(1);
1239 do {
1240 tmp = casemap[*str1++] - casemap[*str2];
1241 if (tmp != 0) return(tmp);
1242 } while (*str2++ != 0);
1243 return 0;
1244}
1245
1246/**
1247 * xmlStrncasecmp:
1248 * @str1: the first xmlChar *
1249 * @str2: the second xmlChar *
1250 * @len: the max comparison length
1251 *
1252 * a strncasecmp for xmlChar's
1253 *
1254 * Returns the integer result of the comparison
1255 */
1256
1257int
1258xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1259 register int tmp;
1260
1261 if (len <= 0) return(0);
1262 if (str1 == str2) return(0);
1263 if (str1 == NULL) return(-1);
1264 if (str2 == NULL) return(1);
1265 do {
1266 tmp = casemap[*str1++] - casemap[*str2];
1267 if (tmp != 0 || --len == 0) return(tmp);
1268 } while (*str2++ != 0);
1269 return 0;
1270}
1271
1272/**
1273 * xmlStrchr:
1274 * @str: the xmlChar * array
1275 * @val: the xmlChar to search
1276 *
1277 * a strchr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrchr(const xmlChar *str, xmlChar val) {
1284 if (str == NULL) return(NULL);
1285 while (*str != 0) { /* non input consuming */
1286 if (*str == val) return((xmlChar *) str);
1287 str++;
1288 }
1289 return(NULL);
1290}
1291
1292/**
1293 * xmlStrstr:
1294 * @str: the xmlChar * array (haystack)
1295 * @val: the xmlChar to search (needle)
1296 *
1297 * a strstr for xmlChar's
1298 *
1299 * Returns the xmlChar * for the first occurence or NULL.
1300 */
1301
1302const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001303xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001304 int n;
1305
1306 if (str == NULL) return(NULL);
1307 if (val == NULL) return(NULL);
1308 n = xmlStrlen(val);
1309
1310 if (n == 0) return(str);
1311 while (*str != 0) { /* non input consuming */
1312 if (*str == *val) {
1313 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1314 }
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrcasestr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a case-ignoring strstr for xmlChar's
1326 *
1327 * Returns the xmlChar * for the first occurence or NULL.
1328 */
1329
1330const xmlChar *
1331xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (casemap[*str] == casemap[*val])
1341 if (!xmlStrncasecmp(str, val, n)) return(str);
1342 str++;
1343 }
1344 return(NULL);
1345}
1346
1347/**
1348 * xmlStrsub:
1349 * @str: the xmlChar * array (haystack)
1350 * @start: the index of the first char (zero based)
1351 * @len: the length of the substring
1352 *
1353 * Extract a substring of a given string
1354 *
1355 * Returns the xmlChar * for the first occurence or NULL.
1356 */
1357
1358xmlChar *
1359xmlStrsub(const xmlChar *str, int start, int len) {
1360 int i;
1361
1362 if (str == NULL) return(NULL);
1363 if (start < 0) return(NULL);
1364 if (len < 0) return(NULL);
1365
1366 for (i = 0;i < start;i++) {
1367 if (*str == 0) return(NULL);
1368 str++;
1369 }
1370 if (*str == 0) return(NULL);
1371 return(xmlStrndup(str, len));
1372}
1373
1374/**
1375 * xmlStrlen:
1376 * @str: the xmlChar * array
1377 *
1378 * length of a xmlChar's string
1379 *
1380 * Returns the number of xmlChar contained in the ARRAY.
1381 */
1382
1383int
1384xmlStrlen(const xmlChar *str) {
1385 int len = 0;
1386
1387 if (str == NULL) return(0);
1388 while (*str != 0) { /* non input consuming */
1389 str++;
1390 len++;
1391 }
1392 return(len);
1393}
1394
1395/**
1396 * xmlStrncat:
1397 * @cur: the original xmlChar * array
1398 * @add: the xmlChar * array added
1399 * @len: the length of @add
1400 *
1401 * a strncat for array of xmlChar's, it will extend cur with the len
1402 * first bytes of @add.
1403 *
1404 * Returns a new xmlChar *, the original @cur is reallocated if needed
1405 * and should not be freed
1406 */
1407
1408xmlChar *
1409xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1410 int size;
1411 xmlChar *ret;
1412
1413 if ((add == NULL) || (len == 0))
1414 return(cur);
1415 if (cur == NULL)
1416 return(xmlStrndup(add, len));
1417
1418 size = xmlStrlen(cur);
1419 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1420 if (ret == NULL) {
1421 xmlGenericError(xmlGenericErrorContext,
1422 "xmlStrncat: realloc of %ld byte failed\n",
1423 (size + len + 1) * (long)sizeof(xmlChar));
1424 return(cur);
1425 }
1426 memcpy(&ret[size], add, len * sizeof(xmlChar));
1427 ret[size + len] = 0;
1428 return(ret);
1429}
1430
1431/**
1432 * xmlStrcat:
1433 * @cur: the original xmlChar * array
1434 * @add: the xmlChar * array added
1435 *
1436 * a strcat for array of xmlChar's. Since they are supposed to be
1437 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1438 * a termination mark of '0'.
1439 *
1440 * Returns a new xmlChar * containing the concatenated string.
1441 */
1442xmlChar *
1443xmlStrcat(xmlChar *cur, const xmlChar *add) {
1444 const xmlChar *p = add;
1445
1446 if (add == NULL) return(cur);
1447 if (cur == NULL)
1448 return(xmlStrdup(add));
1449
1450 while (*p != 0) p++; /* non input consuming */
1451 return(xmlStrncat(cur, add, p - add));
1452}
1453
1454/************************************************************************
1455 * *
1456 * Commodity functions, cleanup needed ? *
1457 * *
1458 ************************************************************************/
1459
1460/**
1461 * areBlanks:
1462 * @ctxt: an XML parser context
1463 * @str: a xmlChar *
1464 * @len: the size of @str
1465 *
1466 * Is this a sequence of blank chars that one can ignore ?
1467 *
1468 * Returns 1 if ignorable 0 otherwise.
1469 */
1470
1471static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1472 int i, ret;
1473 xmlNodePtr lastChild;
1474
Daniel Veillard05c13a22001-09-09 08:38:09 +00001475 /*
1476 * Don't spend time trying to differentiate them, the same callback is
1477 * used !
1478 */
1479 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001480 return(0);
1481
Owen Taylor3473f882001-02-23 17:55:21 +00001482 /*
1483 * Check for xml:space value.
1484 */
1485 if (*(ctxt->space) == 1)
1486 return(0);
1487
1488 /*
1489 * Check that the string is made of blanks
1490 */
1491 for (i = 0;i < len;i++)
1492 if (!(IS_BLANK(str[i]))) return(0);
1493
1494 /*
1495 * Look if the element is mixed content in the Dtd if available
1496 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001497 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001498 if (ctxt->myDoc != NULL) {
1499 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1500 if (ret == 0) return(1);
1501 if (ret == 1) return(0);
1502 }
1503
1504 /*
1505 * Otherwise, heuristic :-\
1506 */
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 if ((ctxt->node->children == NULL) &&
1509 (RAW == '<') && (NXT(1) == '/')) return(0);
1510
1511 lastChild = xmlGetLastChild(ctxt->node);
1512 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001513 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1514 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001515 } else if (xmlNodeIsText(lastChild))
1516 return(0);
1517 else if ((ctxt->node->children != NULL) &&
1518 (xmlNodeIsText(ctxt->node->children)))
1519 return(0);
1520 return(1);
1521}
1522
1523/*
1524 * Forward definition for recusive behaviour.
1525 */
1526void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1527void xmlParseReference(xmlParserCtxtPtr ctxt);
1528
1529/************************************************************************
1530 * *
1531 * Extra stuff for namespace support *
1532 * Relates to http://www.w3.org/TR/WD-xml-names *
1533 * *
1534 ************************************************************************/
1535
1536/**
1537 * xmlSplitQName:
1538 * @ctxt: an XML parser context
1539 * @name: an XML parser context
1540 * @prefix: a xmlChar **
1541 *
1542 * parse an UTF8 encoded XML qualified name string
1543 *
1544 * [NS 5] QName ::= (Prefix ':')? LocalPart
1545 *
1546 * [NS 6] Prefix ::= NCName
1547 *
1548 * [NS 7] LocalPart ::= NCName
1549 *
1550 * Returns the local part, and prefix is updated
1551 * to get the Prefix if any.
1552 */
1553
1554xmlChar *
1555xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1556 xmlChar buf[XML_MAX_NAMELEN + 5];
1557 xmlChar *buffer = NULL;
1558 int len = 0;
1559 int max = XML_MAX_NAMELEN;
1560 xmlChar *ret = NULL;
1561 const xmlChar *cur = name;
1562 int c;
1563
1564 *prefix = NULL;
1565
1566#ifndef XML_XML_NAMESPACE
1567 /* xml: prefix is not really a namespace */
1568 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1569 (cur[2] == 'l') && (cur[3] == ':'))
1570 return(xmlStrdup(name));
1571#endif
1572
1573 /* nasty but valid */
1574 if (cur[0] == ':')
1575 return(xmlStrdup(name));
1576
1577 c = *cur++;
1578 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1579 buf[len++] = c;
1580 c = *cur++;
1581 }
1582 if (len >= max) {
1583 /*
1584 * Okay someone managed to make a huge name, so he's ready to pay
1585 * for the processing speed.
1586 */
1587 max = len * 2;
1588
1589 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1590 if (buffer == NULL) {
1591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1592 ctxt->sax->error(ctxt->userData,
1593 "xmlSplitQName: out of memory\n");
1594 return(NULL);
1595 }
1596 memcpy(buffer, buf, len);
1597 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1598 if (len + 10 > max) {
1599 max *= 2;
1600 buffer = (xmlChar *) xmlRealloc(buffer,
1601 max * sizeof(xmlChar));
1602 if (buffer == NULL) {
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData,
1605 "xmlSplitQName: out of memory\n");
1606 return(NULL);
1607 }
1608 }
1609 buffer[len++] = c;
1610 c = *cur++;
1611 }
1612 buffer[len] = 0;
1613 }
1614
1615 if (buffer == NULL)
1616 ret = xmlStrndup(buf, len);
1617 else {
1618 ret = buffer;
1619 buffer = NULL;
1620 max = XML_MAX_NAMELEN;
1621 }
1622
1623
1624 if (c == ':') {
1625 c = *cur++;
1626 if (c == 0) return(ret);
1627 *prefix = ret;
1628 len = 0;
1629
1630 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1631 buf[len++] = c;
1632 c = *cur++;
1633 }
1634 if (len >= max) {
1635 /*
1636 * Okay someone managed to make a huge name, so he's ready to pay
1637 * for the processing speed.
1638 */
1639 max = len * 2;
1640
1641 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642 if (buffer == NULL) {
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "xmlSplitQName: out of memory\n");
1646 return(NULL);
1647 }
1648 memcpy(buffer, buf, len);
1649 while (c != 0) { /* tested bigname2.xml */
1650 if (len + 10 > max) {
1651 max *= 2;
1652 buffer = (xmlChar *) xmlRealloc(buffer,
1653 max * sizeof(xmlChar));
1654 if (buffer == NULL) {
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "xmlSplitQName: out of memory\n");
1658 return(NULL);
1659 }
1660 }
1661 buffer[len++] = c;
1662 c = *cur++;
1663 }
1664 buffer[len] = 0;
1665 }
1666
1667 if (buffer == NULL)
1668 ret = xmlStrndup(buf, len);
1669 else {
1670 ret = buffer;
1671 }
1672 }
1673
1674 return(ret);
1675}
1676
1677/************************************************************************
1678 * *
1679 * The parser itself *
1680 * Relates to http://www.w3.org/TR/REC-xml *
1681 * *
1682 ************************************************************************/
1683
Daniel Veillard76d66f42001-05-16 21:05:17 +00001684static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001685/**
1686 * xmlParseName:
1687 * @ctxt: an XML parser context
1688 *
1689 * parse an XML name.
1690 *
1691 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1692 * CombiningChar | Extender
1693 *
1694 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1695 *
1696 * [6] Names ::= Name (S Name)*
1697 *
1698 * Returns the Name parsed or NULL
1699 */
1700
1701xmlChar *
1702xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001703 const xmlChar *in;
1704 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001705 int count = 0;
1706
1707 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001708
1709 /*
1710 * Accelerator for simple ASCII names
1711 */
1712 in = ctxt->input->cur;
1713 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1714 ((*in >= 0x41) && (*in <= 0x5A)) ||
1715 (*in == '_') || (*in == ':')) {
1716 in++;
1717 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1718 ((*in >= 0x41) && (*in <= 0x5A)) ||
1719 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001720 (*in == '_') || (*in == '-') ||
1721 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001722 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001723 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724 count = in - ctxt->input->cur;
1725 ret = xmlStrndup(ctxt->input->cur, count);
1726 ctxt->input->cur = in;
1727 return(ret);
1728 }
1729 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001730 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001731}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001732
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001734xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1735 xmlChar buf[XML_MAX_NAMELEN + 5];
1736 int len = 0, l;
1737 int c;
1738 int count = 0;
1739
1740 /*
1741 * Handler for more complex cases
1742 */
1743 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001744 c = CUR_CHAR(l);
1745 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1746 (!IS_LETTER(c) && (c != '_') &&
1747 (c != ':'))) {
1748 return(NULL);
1749 }
1750
1751 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1752 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1753 (c == '.') || (c == '-') ||
1754 (c == '_') || (c == ':') ||
1755 (IS_COMBINING(c)) ||
1756 (IS_EXTENDER(c)))) {
1757 if (count++ > 100) {
1758 count = 0;
1759 GROW;
1760 }
1761 COPY_BUF(l,buf,len,c);
1762 NEXTL(l);
1763 c = CUR_CHAR(l);
1764 if (len >= XML_MAX_NAMELEN) {
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001776 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (count++ > 100) {
1786 count = 0;
1787 GROW;
1788 }
1789 if (len + 10 > max) {
1790 max *= 2;
1791 buffer = (xmlChar *) xmlRealloc(buffer,
1792 max * sizeof(xmlChar));
1793 if (buffer == NULL) {
1794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001796 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001797 return(NULL);
1798 }
1799 }
1800 COPY_BUF(l,buffer,len,c);
1801 NEXTL(l);
1802 c = CUR_CHAR(l);
1803 }
1804 buffer[len] = 0;
1805 return(buffer);
1806 }
1807 }
1808 return(xmlStrndup(buf, len));
1809}
1810
1811/**
1812 * xmlParseStringName:
1813 * @ctxt: an XML parser context
1814 * @str: a pointer to the string pointer (IN/OUT)
1815 *
1816 * parse an XML name.
1817 *
1818 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1819 * CombiningChar | Extender
1820 *
1821 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1822 *
1823 * [6] Names ::= Name (S Name)*
1824 *
1825 * Returns the Name parsed or NULL. The str pointer
1826 * is updated to the current location in the string.
1827 */
1828
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001829static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001830xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1831 xmlChar buf[XML_MAX_NAMELEN + 5];
1832 const xmlChar *cur = *str;
1833 int len = 0, l;
1834 int c;
1835
1836 c = CUR_SCHAR(cur, l);
1837 if (!IS_LETTER(c) && (c != '_') &&
1838 (c != ':')) {
1839 return(NULL);
1840 }
1841
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 COPY_BUF(l,buf,len,c);
1848 cur += l;
1849 c = CUR_SCHAR(cur, l);
1850 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1851 /*
1852 * Okay someone managed to make a huge name, so he's ready to pay
1853 * for the processing speed.
1854 */
1855 xmlChar *buffer;
1856 int max = len * 2;
1857
1858 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1859 if (buffer == NULL) {
1860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1861 ctxt->sax->error(ctxt->userData,
1862 "xmlParseStringName: out of memory\n");
1863 return(NULL);
1864 }
1865 memcpy(buffer, buf, len);
1866 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1867 (c == '.') || (c == '-') ||
1868 (c == '_') || (c == ':') ||
1869 (IS_COMBINING(c)) ||
1870 (IS_EXTENDER(c))) {
1871 if (len + 10 > max) {
1872 max *= 2;
1873 buffer = (xmlChar *) xmlRealloc(buffer,
1874 max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 }
1882 COPY_BUF(l,buffer,len,c);
1883 cur += l;
1884 c = CUR_SCHAR(cur, l);
1885 }
1886 buffer[len] = 0;
1887 *str = cur;
1888 return(buffer);
1889 }
1890 }
1891 *str = cur;
1892 return(xmlStrndup(buf, len));
1893}
1894
1895/**
1896 * xmlParseNmtoken:
1897 * @ctxt: an XML parser context
1898 *
1899 * parse an XML Nmtoken.
1900 *
1901 * [7] Nmtoken ::= (NameChar)+
1902 *
1903 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1904 *
1905 * Returns the Nmtoken parsed or NULL
1906 */
1907
1908xmlChar *
1909xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1910 xmlChar buf[XML_MAX_NAMELEN + 5];
1911 int len = 0, l;
1912 int c;
1913 int count = 0;
1914
1915 GROW;
1916 c = CUR_CHAR(l);
1917
1918 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1919 (c == '.') || (c == '-') ||
1920 (c == '_') || (c == ':') ||
1921 (IS_COMBINING(c)) ||
1922 (IS_EXTENDER(c))) {
1923 if (count++ > 100) {
1924 count = 0;
1925 GROW;
1926 }
1927 COPY_BUF(l,buf,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1930 if (len >= XML_MAX_NAMELEN) {
1931 /*
1932 * Okay someone managed to make a huge token, so he's ready to pay
1933 * for the processing speed.
1934 */
1935 xmlChar *buffer;
1936 int max = len * 2;
1937
1938 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1939 if (buffer == NULL) {
1940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1941 ctxt->sax->error(ctxt->userData,
1942 "xmlParseNmtoken: out of memory\n");
1943 return(NULL);
1944 }
1945 memcpy(buffer, buf, len);
1946 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1947 (c == '.') || (c == '-') ||
1948 (c == '_') || (c == ':') ||
1949 (IS_COMBINING(c)) ||
1950 (IS_EXTENDER(c))) {
1951 if (count++ > 100) {
1952 count = 0;
1953 GROW;
1954 }
1955 if (len + 10 > max) {
1956 max *= 2;
1957 buffer = (xmlChar *) xmlRealloc(buffer,
1958 max * sizeof(xmlChar));
1959 if (buffer == NULL) {
1960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1961 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001962 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001963 return(NULL);
1964 }
1965 }
1966 COPY_BUF(l,buffer,len,c);
1967 NEXTL(l);
1968 c = CUR_CHAR(l);
1969 }
1970 buffer[len] = 0;
1971 return(buffer);
1972 }
1973 }
1974 if (len == 0)
1975 return(NULL);
1976 return(xmlStrndup(buf, len));
1977}
1978
1979/**
1980 * xmlParseEntityValue:
1981 * @ctxt: an XML parser context
1982 * @orig: if non-NULL store a copy of the original entity value
1983 *
1984 * parse a value for ENTITY declarations
1985 *
1986 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1987 * "'" ([^%&'] | PEReference | Reference)* "'"
1988 *
1989 * Returns the EntityValue parsed with reference substitued or NULL
1990 */
1991
1992xmlChar *
1993xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1994 xmlChar *buf = NULL;
1995 int len = 0;
1996 int size = XML_PARSER_BUFFER_SIZE;
1997 int c, l;
1998 xmlChar stop;
1999 xmlChar *ret = NULL;
2000 const xmlChar *cur = NULL;
2001 xmlParserInputPtr input;
2002
2003 if (RAW == '"') stop = '"';
2004 else if (RAW == '\'') stop = '\'';
2005 else {
2006 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2008 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 return(NULL);
2012 }
2013 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2014 if (buf == NULL) {
2015 xmlGenericError(xmlGenericErrorContext,
2016 "malloc of %d byte failed\n", size);
2017 return(NULL);
2018 }
2019
2020 /*
2021 * The content of the entity definition is copied in a buffer.
2022 */
2023
2024 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2025 input = ctxt->input;
2026 GROW;
2027 NEXT;
2028 c = CUR_CHAR(l);
2029 /*
2030 * NOTE: 4.4.5 Included in Literal
2031 * When a parameter entity reference appears in a literal entity
2032 * value, ... a single or double quote character in the replacement
2033 * text is always treated as a normal data character and will not
2034 * terminate the literal.
2035 * In practice it means we stop the loop only when back at parsing
2036 * the initial entity and the quote is found
2037 */
2038 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2039 (ctxt->input != input))) {
2040 if (len + 5 >= size) {
2041 size *= 2;
2042 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2043 if (buf == NULL) {
2044 xmlGenericError(xmlGenericErrorContext,
2045 "realloc of %d byte failed\n", size);
2046 return(NULL);
2047 }
2048 }
2049 COPY_BUF(l,buf,len,c);
2050 NEXTL(l);
2051 /*
2052 * Pop-up of finished entities.
2053 */
2054 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2055 xmlPopInput(ctxt);
2056
2057 GROW;
2058 c = CUR_CHAR(l);
2059 if (c == 0) {
2060 GROW;
2061 c = CUR_CHAR(l);
2062 }
2063 }
2064 buf[len] = 0;
2065
2066 /*
2067 * Raise problem w.r.t. '&' and '%' being used in non-entities
2068 * reference constructs. Note Charref will be handled in
2069 * xmlStringDecodeEntities()
2070 */
2071 cur = buf;
2072 while (*cur != 0) { /* non input consuming */
2073 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2074 xmlChar *name;
2075 xmlChar tmp = *cur;
2076
2077 cur++;
2078 name = xmlParseStringName(ctxt, &cur);
2079 if ((name == NULL) || (*cur != ';')) {
2080 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082 ctxt->sax->error(ctxt->userData,
2083 "EntityValue: '%c' forbidden except for entities references\n",
2084 tmp);
2085 ctxt->wellFormed = 0;
2086 ctxt->disableSAX = 1;
2087 }
2088 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2089 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2091 ctxt->sax->error(ctxt->userData,
2092 "EntityValue: PEReferences forbidden in internal subset\n",
2093 tmp);
2094 ctxt->wellFormed = 0;
2095 ctxt->disableSAX = 1;
2096 }
2097 if (name != NULL)
2098 xmlFree(name);
2099 }
2100 cur++;
2101 }
2102
2103 /*
2104 * Then PEReference entities are substituted.
2105 */
2106 if (c != stop) {
2107 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2109 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2110 ctxt->wellFormed = 0;
2111 ctxt->disableSAX = 1;
2112 xmlFree(buf);
2113 } else {
2114 NEXT;
2115 /*
2116 * NOTE: 4.4.7 Bypassed
2117 * When a general entity reference appears in the EntityValue in
2118 * an entity declaration, it is bypassed and left as is.
2119 * so XML_SUBSTITUTE_REF is not set here.
2120 */
2121 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2122 0, 0, 0);
2123 if (orig != NULL)
2124 *orig = buf;
2125 else
2126 xmlFree(buf);
2127 }
2128
2129 return(ret);
2130}
2131
2132/**
2133 * xmlParseAttValue:
2134 * @ctxt: an XML parser context
2135 *
2136 * parse a value for an attribute
2137 * Note: the parser won't do substitution of entities here, this
2138 * will be handled later in xmlStringGetNodeList
2139 *
2140 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2141 * "'" ([^<&'] | Reference)* "'"
2142 *
2143 * 3.3.3 Attribute-Value Normalization:
2144 * Before the value of an attribute is passed to the application or
2145 * checked for validity, the XML processor must normalize it as follows:
2146 * - a character reference is processed by appending the referenced
2147 * character to the attribute value
2148 * - an entity reference is processed by recursively processing the
2149 * replacement text of the entity
2150 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2151 * appending #x20 to the normalized value, except that only a single
2152 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2153 * parsed entity or the literal entity value of an internal parsed entity
2154 * - other characters are processed by appending them to the normalized value
2155 * If the declared value is not CDATA, then the XML processor must further
2156 * process the normalized attribute value by discarding any leading and
2157 * trailing space (#x20) characters, and by replacing sequences of space
2158 * (#x20) characters by a single space (#x20) character.
2159 * All attributes for which no declaration has been read should be treated
2160 * by a non-validating parser as if declared CDATA.
2161 *
2162 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2163 */
2164
2165xmlChar *
2166xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2167 xmlChar limit = 0;
2168 xmlChar *buf = NULL;
2169 int len = 0;
2170 int buf_size = 0;
2171 int c, l;
2172 xmlChar *current = NULL;
2173 xmlEntityPtr ent;
2174
2175
2176 SHRINK;
2177 if (NXT(0) == '"') {
2178 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2179 limit = '"';
2180 NEXT;
2181 } else if (NXT(0) == '\'') {
2182 limit = '\'';
2183 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2184 NEXT;
2185 } else {
2186 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2188 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2189 ctxt->wellFormed = 0;
2190 ctxt->disableSAX = 1;
2191 return(NULL);
2192 }
2193
2194 /*
2195 * allocate a translation buffer.
2196 */
2197 buf_size = XML_PARSER_BUFFER_SIZE;
2198 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2199 if (buf == NULL) {
2200 perror("xmlParseAttValue: malloc failed");
2201 return(NULL);
2202 }
2203
2204 /*
2205 * Ok loop until we reach one of the ending char or a size limit.
2206 */
2207 c = CUR_CHAR(l);
2208 while (((NXT(0) != limit) && /* checked */
2209 (c != '<')) || (ctxt->token != 0)) {
2210 if (c == 0) break;
2211 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002212 if (ctxt->replaceEntities) {
2213 if (len > buf_size - 10) {
2214 growBuffer(buf);
2215 }
2216 buf[len++] = '&';
2217 } else {
2218 /*
2219 * The reparsing will be done in xmlStringGetNodeList()
2220 * called by the attribute() function in SAX.c
2221 */
2222 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002223
Daniel Veillard319a7422001-09-11 09:27:09 +00002224 if (len > buf_size - 10) {
2225 growBuffer(buf);
2226 }
2227 current = &buffer[0];
2228 while (*current != 0) { /* non input consuming */
2229 buf[len++] = *current++;
2230 }
2231 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002232 }
Owen Taylor3473f882001-02-23 17:55:21 +00002233 } else if (c == '&') {
2234 if (NXT(1) == '#') {
2235 int val = xmlParseCharRef(ctxt);
2236 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002237 if (ctxt->replaceEntities) {
2238 if (len > buf_size - 10) {
2239 growBuffer(buf);
2240 }
2241 buf[len++] = '&';
2242 } else {
2243 /*
2244 * The reparsing will be done in xmlStringGetNodeList()
2245 * called by the attribute() function in SAX.c
2246 */
2247 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002248
Daniel Veillard319a7422001-09-11 09:27:09 +00002249 if (len > buf_size - 10) {
2250 growBuffer(buf);
2251 }
2252 current = &buffer[0];
2253 while (*current != 0) { /* non input consuming */
2254 buf[len++] = *current++;
2255 }
Owen Taylor3473f882001-02-23 17:55:21 +00002256 }
2257 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002258 if (len > buf_size - 10) {
2259 growBuffer(buf);
2260 }
Owen Taylor3473f882001-02-23 17:55:21 +00002261 len += xmlCopyChar(0, &buf[len], val);
2262 }
2263 } else {
2264 ent = xmlParseEntityRef(ctxt);
2265 if ((ent != NULL) &&
2266 (ctxt->replaceEntities != 0)) {
2267 xmlChar *rep;
2268
2269 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2270 rep = xmlStringDecodeEntities(ctxt, ent->content,
2271 XML_SUBSTITUTE_REF, 0, 0, 0);
2272 if (rep != NULL) {
2273 current = rep;
2274 while (*current != 0) { /* non input consuming */
2275 buf[len++] = *current++;
2276 if (len > buf_size - 10) {
2277 growBuffer(buf);
2278 }
2279 }
2280 xmlFree(rep);
2281 }
2282 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002283 if (len > buf_size - 10) {
2284 growBuffer(buf);
2285 }
Owen Taylor3473f882001-02-23 17:55:21 +00002286 if (ent->content != NULL)
2287 buf[len++] = ent->content[0];
2288 }
2289 } else if (ent != NULL) {
2290 int i = xmlStrlen(ent->name);
2291 const xmlChar *cur = ent->name;
2292
2293 /*
2294 * This may look absurd but is needed to detect
2295 * entities problems
2296 */
2297 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2298 (ent->content != NULL)) {
2299 xmlChar *rep;
2300 rep = xmlStringDecodeEntities(ctxt, ent->content,
2301 XML_SUBSTITUTE_REF, 0, 0, 0);
2302 if (rep != NULL)
2303 xmlFree(rep);
2304 }
2305
2306 /*
2307 * Just output the reference
2308 */
2309 buf[len++] = '&';
2310 if (len > buf_size - i - 10) {
2311 growBuffer(buf);
2312 }
2313 for (;i > 0;i--)
2314 buf[len++] = *cur++;
2315 buf[len++] = ';';
2316 }
2317 }
2318 } else {
2319 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2320 COPY_BUF(l,buf,len,0x20);
2321 if (len > buf_size - 10) {
2322 growBuffer(buf);
2323 }
2324 } else {
2325 COPY_BUF(l,buf,len,c);
2326 if (len > buf_size - 10) {
2327 growBuffer(buf);
2328 }
2329 }
2330 NEXTL(l);
2331 }
2332 GROW;
2333 c = CUR_CHAR(l);
2334 }
2335 buf[len++] = 0;
2336 if (RAW == '<') {
2337 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2339 ctxt->sax->error(ctxt->userData,
2340 "Unescaped '<' not allowed in attributes values\n");
2341 ctxt->wellFormed = 0;
2342 ctxt->disableSAX = 1;
2343 } else if (RAW != limit) {
2344 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2346 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2347 ctxt->wellFormed = 0;
2348 ctxt->disableSAX = 1;
2349 } else
2350 NEXT;
2351 return(buf);
2352}
2353
2354/**
2355 * xmlParseSystemLiteral:
2356 * @ctxt: an XML parser context
2357 *
2358 * parse an XML Literal
2359 *
2360 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2361 *
2362 * Returns the SystemLiteral parsed or NULL
2363 */
2364
2365xmlChar *
2366xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2367 xmlChar *buf = NULL;
2368 int len = 0;
2369 int size = XML_PARSER_BUFFER_SIZE;
2370 int cur, l;
2371 xmlChar stop;
2372 int state = ctxt->instate;
2373 int count = 0;
2374
2375 SHRINK;
2376 if (RAW == '"') {
2377 NEXT;
2378 stop = '"';
2379 } else if (RAW == '\'') {
2380 NEXT;
2381 stop = '\'';
2382 } else {
2383 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2384 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2385 ctxt->sax->error(ctxt->userData,
2386 "SystemLiteral \" or ' expected\n");
2387 ctxt->wellFormed = 0;
2388 ctxt->disableSAX = 1;
2389 return(NULL);
2390 }
2391
2392 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2393 if (buf == NULL) {
2394 xmlGenericError(xmlGenericErrorContext,
2395 "malloc of %d byte failed\n", size);
2396 return(NULL);
2397 }
2398 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2399 cur = CUR_CHAR(l);
2400 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2401 if (len + 5 >= size) {
2402 size *= 2;
2403 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2404 if (buf == NULL) {
2405 xmlGenericError(xmlGenericErrorContext,
2406 "realloc of %d byte failed\n", size);
2407 ctxt->instate = (xmlParserInputState) state;
2408 return(NULL);
2409 }
2410 }
2411 count++;
2412 if (count > 50) {
2413 GROW;
2414 count = 0;
2415 }
2416 COPY_BUF(l,buf,len,cur);
2417 NEXTL(l);
2418 cur = CUR_CHAR(l);
2419 if (cur == 0) {
2420 GROW;
2421 SHRINK;
2422 cur = CUR_CHAR(l);
2423 }
2424 }
2425 buf[len] = 0;
2426 ctxt->instate = (xmlParserInputState) state;
2427 if (!IS_CHAR(cur)) {
2428 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2429 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2430 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2431 ctxt->wellFormed = 0;
2432 ctxt->disableSAX = 1;
2433 } else {
2434 NEXT;
2435 }
2436 return(buf);
2437}
2438
2439/**
2440 * xmlParsePubidLiteral:
2441 * @ctxt: an XML parser context
2442 *
2443 * parse an XML public literal
2444 *
2445 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2446 *
2447 * Returns the PubidLiteral parsed or NULL.
2448 */
2449
2450xmlChar *
2451xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2452 xmlChar *buf = NULL;
2453 int len = 0;
2454 int size = XML_PARSER_BUFFER_SIZE;
2455 xmlChar cur;
2456 xmlChar stop;
2457 int count = 0;
2458
2459 SHRINK;
2460 if (RAW == '"') {
2461 NEXT;
2462 stop = '"';
2463 } else if (RAW == '\'') {
2464 NEXT;
2465 stop = '\'';
2466 } else {
2467 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2469 ctxt->sax->error(ctxt->userData,
2470 "SystemLiteral \" or ' expected\n");
2471 ctxt->wellFormed = 0;
2472 ctxt->disableSAX = 1;
2473 return(NULL);
2474 }
2475 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2476 if (buf == NULL) {
2477 xmlGenericError(xmlGenericErrorContext,
2478 "malloc of %d byte failed\n", size);
2479 return(NULL);
2480 }
2481 cur = CUR;
2482 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2483 if (len + 1 >= size) {
2484 size *= 2;
2485 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2486 if (buf == NULL) {
2487 xmlGenericError(xmlGenericErrorContext,
2488 "realloc of %d byte failed\n", size);
2489 return(NULL);
2490 }
2491 }
2492 buf[len++] = cur;
2493 count++;
2494 if (count > 50) {
2495 GROW;
2496 count = 0;
2497 }
2498 NEXT;
2499 cur = CUR;
2500 if (cur == 0) {
2501 GROW;
2502 SHRINK;
2503 cur = CUR;
2504 }
2505 }
2506 buf[len] = 0;
2507 if (cur != stop) {
2508 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2510 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2511 ctxt->wellFormed = 0;
2512 ctxt->disableSAX = 1;
2513 } else {
2514 NEXT;
2515 }
2516 return(buf);
2517}
2518
Daniel Veillard48b2f892001-02-25 16:11:03 +00002519void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002520/**
2521 * xmlParseCharData:
2522 * @ctxt: an XML parser context
2523 * @cdata: int indicating whether we are within a CDATA section
2524 *
2525 * parse a CharData section.
2526 * if we are within a CDATA section ']]>' marks an end of section.
2527 *
2528 * The right angle bracket (>) may be represented using the string "&gt;",
2529 * and must, for compatibility, be escaped using "&gt;" or a character
2530 * reference when it appears in the string "]]>" in content, when that
2531 * string is not marking the end of a CDATA section.
2532 *
2533 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2534 */
2535
2536void
2537xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538 const xmlChar *in;
2539 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002540 int line = ctxt->input->line;
2541 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002542
2543 SHRINK;
2544 GROW;
2545 /*
2546 * Accelerated common case where input don't need to be
2547 * modified before passing it to the handler.
2548 */
2549 if ((ctxt->token == 0) && (!cdata)) {
2550 in = ctxt->input->cur;
2551 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002552get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002553 while (((*in >= 0x20) && (*in != '<') &&
2554 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2555 in++;
2556 if (*in == 0xA) {
2557 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002558 in++;
2559 while (*in == 0xA) {
2560 ctxt->input->line++;
2561 in++;
2562 }
2563 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002564 }
2565 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002566 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002567 if (IS_BLANK(*ctxt->input->cur)) {
2568 const xmlChar *tmp = ctxt->input->cur;
2569 ctxt->input->cur = in;
2570 if (areBlanks(ctxt, tmp, nbchar)) {
2571 if (ctxt->sax->ignorableWhitespace != NULL)
2572 ctxt->sax->ignorableWhitespace(ctxt->userData,
2573 tmp, nbchar);
2574 } else {
2575 if (ctxt->sax->characters != NULL)
2576 ctxt->sax->characters(ctxt->userData,
2577 tmp, nbchar);
2578 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002579 line = ctxt->input->line;
2580 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002581 } else {
2582 if (ctxt->sax->characters != NULL)
2583 ctxt->sax->characters(ctxt->userData,
2584 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002585 line = ctxt->input->line;
2586 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002587 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002588 }
2589 ctxt->input->cur = in;
2590 if (*in == 0xD) {
2591 in++;
2592 if (*in == 0xA) {
2593 ctxt->input->cur = in;
2594 in++;
2595 ctxt->input->line++;
2596 continue; /* while */
2597 }
2598 in--;
2599 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002600 if (*in == '<') {
2601 return;
2602 }
2603 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002604 return;
2605 }
2606 SHRINK;
2607 GROW;
2608 in = ctxt->input->cur;
2609 } while ((*in >= 0x20) && (*in <= 0x7F));
2610 nbchar = 0;
2611 }
Daniel Veillard50582112001-03-26 22:52:16 +00002612 ctxt->input->line = line;
2613 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002614 xmlParseCharDataComplex(ctxt, cdata);
2615}
2616
2617void
2618xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002619 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2620 int nbchar = 0;
2621 int cur, l;
2622 int count = 0;
2623
2624 SHRINK;
2625 GROW;
2626 cur = CUR_CHAR(l);
2627 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2628 ((cur != '&') || (ctxt->token == '&')) &&
2629 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2630 if ((cur == ']') && (NXT(1) == ']') &&
2631 (NXT(2) == '>')) {
2632 if (cdata) break;
2633 else {
2634 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2636 ctxt->sax->error(ctxt->userData,
2637 "Sequence ']]>' not allowed in content\n");
2638 /* Should this be relaxed ??? I see a "must here */
2639 ctxt->wellFormed = 0;
2640 ctxt->disableSAX = 1;
2641 }
2642 }
2643 COPY_BUF(l,buf,nbchar,cur);
2644 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2645 /*
2646 * Ok the segment is to be consumed as chars.
2647 */
2648 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2649 if (areBlanks(ctxt, buf, nbchar)) {
2650 if (ctxt->sax->ignorableWhitespace != NULL)
2651 ctxt->sax->ignorableWhitespace(ctxt->userData,
2652 buf, nbchar);
2653 } else {
2654 if (ctxt->sax->characters != NULL)
2655 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2656 }
2657 }
2658 nbchar = 0;
2659 }
2660 count++;
2661 if (count > 50) {
2662 GROW;
2663 count = 0;
2664 }
2665 NEXTL(l);
2666 cur = CUR_CHAR(l);
2667 }
2668 if (nbchar != 0) {
2669 /*
2670 * Ok the segment is to be consumed as chars.
2671 */
2672 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2673 if (areBlanks(ctxt, buf, nbchar)) {
2674 if (ctxt->sax->ignorableWhitespace != NULL)
2675 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2676 } else {
2677 if (ctxt->sax->characters != NULL)
2678 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2679 }
2680 }
2681 }
2682}
2683
2684/**
2685 * xmlParseExternalID:
2686 * @ctxt: an XML parser context
2687 * @publicID: a xmlChar** receiving PubidLiteral
2688 * @strict: indicate whether we should restrict parsing to only
2689 * production [75], see NOTE below
2690 *
2691 * Parse an External ID or a Public ID
2692 *
2693 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2694 * 'PUBLIC' S PubidLiteral S SystemLiteral
2695 *
2696 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2697 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2698 *
2699 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2700 *
2701 * Returns the function returns SystemLiteral and in the second
2702 * case publicID receives PubidLiteral, is strict is off
2703 * it is possible to return NULL and have publicID set.
2704 */
2705
2706xmlChar *
2707xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2708 xmlChar *URI = NULL;
2709
2710 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002711
2712 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002713 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2714 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2715 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2716 SKIP(6);
2717 if (!IS_BLANK(CUR)) {
2718 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2720 ctxt->sax->error(ctxt->userData,
2721 "Space required after 'SYSTEM'\n");
2722 ctxt->wellFormed = 0;
2723 ctxt->disableSAX = 1;
2724 }
2725 SKIP_BLANKS;
2726 URI = xmlParseSystemLiteral(ctxt);
2727 if (URI == NULL) {
2728 ctxt->errNo = XML_ERR_URI_REQUIRED;
2729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2730 ctxt->sax->error(ctxt->userData,
2731 "xmlParseExternalID: SYSTEM, no URI\n");
2732 ctxt->wellFormed = 0;
2733 ctxt->disableSAX = 1;
2734 }
2735 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2736 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2737 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2738 SKIP(6);
2739 if (!IS_BLANK(CUR)) {
2740 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2742 ctxt->sax->error(ctxt->userData,
2743 "Space required after 'PUBLIC'\n");
2744 ctxt->wellFormed = 0;
2745 ctxt->disableSAX = 1;
2746 }
2747 SKIP_BLANKS;
2748 *publicID = xmlParsePubidLiteral(ctxt);
2749 if (*publicID == NULL) {
2750 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2752 ctxt->sax->error(ctxt->userData,
2753 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2754 ctxt->wellFormed = 0;
2755 ctxt->disableSAX = 1;
2756 }
2757 if (strict) {
2758 /*
2759 * We don't handle [83] so "S SystemLiteral" is required.
2760 */
2761 if (!IS_BLANK(CUR)) {
2762 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2763 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2764 ctxt->sax->error(ctxt->userData,
2765 "Space required after the Public Identifier\n");
2766 ctxt->wellFormed = 0;
2767 ctxt->disableSAX = 1;
2768 }
2769 } else {
2770 /*
2771 * We handle [83] so we return immediately, if
2772 * "S SystemLiteral" is not detected. From a purely parsing
2773 * point of view that's a nice mess.
2774 */
2775 const xmlChar *ptr;
2776 GROW;
2777
2778 ptr = CUR_PTR;
2779 if (!IS_BLANK(*ptr)) return(NULL);
2780
2781 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2782 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2783 }
2784 SKIP_BLANKS;
2785 URI = xmlParseSystemLiteral(ctxt);
2786 if (URI == NULL) {
2787 ctxt->errNo = XML_ERR_URI_REQUIRED;
2788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2789 ctxt->sax->error(ctxt->userData,
2790 "xmlParseExternalID: PUBLIC, no URI\n");
2791 ctxt->wellFormed = 0;
2792 ctxt->disableSAX = 1;
2793 }
2794 }
2795 return(URI);
2796}
2797
2798/**
2799 * xmlParseComment:
2800 * @ctxt: an XML parser context
2801 *
2802 * Skip an XML (SGML) comment <!-- .... -->
2803 * The spec says that "For compatibility, the string "--" (double-hyphen)
2804 * must not occur within comments. "
2805 *
2806 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2807 */
2808void
2809xmlParseComment(xmlParserCtxtPtr ctxt) {
2810 xmlChar *buf = NULL;
2811 int len;
2812 int size = XML_PARSER_BUFFER_SIZE;
2813 int q, ql;
2814 int r, rl;
2815 int cur, l;
2816 xmlParserInputState state;
2817 xmlParserInputPtr input = ctxt->input;
2818 int count = 0;
2819
2820 /*
2821 * Check that there is a comment right here.
2822 */
2823 if ((RAW != '<') || (NXT(1) != '!') ||
2824 (NXT(2) != '-') || (NXT(3) != '-')) return;
2825
2826 state = ctxt->instate;
2827 ctxt->instate = XML_PARSER_COMMENT;
2828 SHRINK;
2829 SKIP(4);
2830 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2831 if (buf == NULL) {
2832 xmlGenericError(xmlGenericErrorContext,
2833 "malloc of %d byte failed\n", size);
2834 ctxt->instate = state;
2835 return;
2836 }
2837 q = CUR_CHAR(ql);
2838 NEXTL(ql);
2839 r = CUR_CHAR(rl);
2840 NEXTL(rl);
2841 cur = CUR_CHAR(l);
2842 len = 0;
2843 while (IS_CHAR(cur) && /* checked */
2844 ((cur != '>') ||
2845 (r != '-') || (q != '-'))) {
2846 if ((r == '-') && (q == '-') && (len > 1)) {
2847 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2849 ctxt->sax->error(ctxt->userData,
2850 "Comment must not contain '--' (double-hyphen)`\n");
2851 ctxt->wellFormed = 0;
2852 ctxt->disableSAX = 1;
2853 }
2854 if (len + 5 >= size) {
2855 size *= 2;
2856 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2857 if (buf == NULL) {
2858 xmlGenericError(xmlGenericErrorContext,
2859 "realloc of %d byte failed\n", size);
2860 ctxt->instate = state;
2861 return;
2862 }
2863 }
2864 COPY_BUF(ql,buf,len,q);
2865 q = r;
2866 ql = rl;
2867 r = cur;
2868 rl = l;
2869
2870 count++;
2871 if (count > 50) {
2872 GROW;
2873 count = 0;
2874 }
2875 NEXTL(l);
2876 cur = CUR_CHAR(l);
2877 if (cur == 0) {
2878 SHRINK;
2879 GROW;
2880 cur = CUR_CHAR(l);
2881 }
2882 }
2883 buf[len] = 0;
2884 if (!IS_CHAR(cur)) {
2885 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2887 ctxt->sax->error(ctxt->userData,
2888 "Comment not terminated \n<!--%.50s\n", buf);
2889 ctxt->wellFormed = 0;
2890 ctxt->disableSAX = 1;
2891 xmlFree(buf);
2892 } else {
2893 if (input != ctxt->input) {
2894 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2896 ctxt->sax->error(ctxt->userData,
2897"Comment doesn't start and stop in the same entity\n");
2898 ctxt->wellFormed = 0;
2899 ctxt->disableSAX = 1;
2900 }
2901 NEXT;
2902 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2903 (!ctxt->disableSAX))
2904 ctxt->sax->comment(ctxt->userData, buf);
2905 xmlFree(buf);
2906 }
2907 ctxt->instate = state;
2908}
2909
2910/**
2911 * xmlParsePITarget:
2912 * @ctxt: an XML parser context
2913 *
2914 * parse the name of a PI
2915 *
2916 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2917 *
2918 * Returns the PITarget name or NULL
2919 */
2920
2921xmlChar *
2922xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2923 xmlChar *name;
2924
2925 name = xmlParseName(ctxt);
2926 if ((name != NULL) &&
2927 ((name[0] == 'x') || (name[0] == 'X')) &&
2928 ((name[1] == 'm') || (name[1] == 'M')) &&
2929 ((name[2] == 'l') || (name[2] == 'L'))) {
2930 int i;
2931 if ((name[0] == 'x') && (name[1] == 'm') &&
2932 (name[2] == 'l') && (name[3] == 0)) {
2933 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2935 ctxt->sax->error(ctxt->userData,
2936 "XML declaration allowed only at the start of the document\n");
2937 ctxt->wellFormed = 0;
2938 ctxt->disableSAX = 1;
2939 return(name);
2940 } else if (name[3] == 0) {
2941 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2943 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2944 ctxt->wellFormed = 0;
2945 ctxt->disableSAX = 1;
2946 return(name);
2947 }
2948 for (i = 0;;i++) {
2949 if (xmlW3CPIs[i] == NULL) break;
2950 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2951 return(name);
2952 }
2953 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2954 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2955 ctxt->sax->warning(ctxt->userData,
2956 "xmlParsePItarget: invalid name prefix 'xml'\n");
2957 }
2958 }
2959 return(name);
2960}
2961
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002962#ifdef LIBXML_CATALOG_ENABLED
2963/**
2964 * xmlParseCatalogPI:
2965 * @ctxt: an XML parser context
2966 * @catalog: the PI value string
2967 *
2968 * parse an XML Catalog Processing Instruction.
2969 *
2970 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2971 *
2972 * Occurs only if allowed by the user and if happening in the Misc
2973 * part of the document before any doctype informations
2974 * This will add the given catalog to the parsing context in order
2975 * to be used if there is a resolution need further down in the document
2976 */
2977
2978static void
2979xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2980 xmlChar *URL = NULL;
2981 const xmlChar *tmp, *base;
2982 xmlChar marker;
2983
2984 tmp = catalog;
2985 while (IS_BLANK(*tmp)) tmp++;
2986 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2987 goto error;
2988 tmp += 7;
2989 while (IS_BLANK(*tmp)) tmp++;
2990 if (*tmp != '=') {
2991 return;
2992 }
2993 tmp++;
2994 while (IS_BLANK(*tmp)) tmp++;
2995 marker = *tmp;
2996 if ((marker != '\'') && (marker != '"'))
2997 goto error;
2998 tmp++;
2999 base = tmp;
3000 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3001 if (*tmp == 0)
3002 goto error;
3003 URL = xmlStrndup(base, tmp - base);
3004 tmp++;
3005 while (IS_BLANK(*tmp)) tmp++;
3006 if (*tmp != 0)
3007 goto error;
3008
3009 if (URL != NULL) {
3010 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3011 xmlFree(URL);
3012 }
3013 return;
3014
3015error:
3016 ctxt->errNo = XML_WAR_CATALOG_PI;
3017 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3018 ctxt->sax->warning(ctxt->userData,
3019 "Catalog PI syntax error: %s\n", catalog);
3020 if (URL != NULL)
3021 xmlFree(URL);
3022}
3023#endif
3024
Owen Taylor3473f882001-02-23 17:55:21 +00003025/**
3026 * xmlParsePI:
3027 * @ctxt: an XML parser context
3028 *
3029 * parse an XML Processing Instruction.
3030 *
3031 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3032 *
3033 * The processing is transfered to SAX once parsed.
3034 */
3035
3036void
3037xmlParsePI(xmlParserCtxtPtr ctxt) {
3038 xmlChar *buf = NULL;
3039 int len = 0;
3040 int size = XML_PARSER_BUFFER_SIZE;
3041 int cur, l;
3042 xmlChar *target;
3043 xmlParserInputState state;
3044 int count = 0;
3045
3046 if ((RAW == '<') && (NXT(1) == '?')) {
3047 xmlParserInputPtr input = ctxt->input;
3048 state = ctxt->instate;
3049 ctxt->instate = XML_PARSER_PI;
3050 /*
3051 * this is a Processing Instruction.
3052 */
3053 SKIP(2);
3054 SHRINK;
3055
3056 /*
3057 * Parse the target name and check for special support like
3058 * namespace.
3059 */
3060 target = xmlParsePITarget(ctxt);
3061 if (target != NULL) {
3062 if ((RAW == '?') && (NXT(1) == '>')) {
3063 if (input != ctxt->input) {
3064 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3065 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3066 ctxt->sax->error(ctxt->userData,
3067 "PI declaration doesn't start and stop in the same entity\n");
3068 ctxt->wellFormed = 0;
3069 ctxt->disableSAX = 1;
3070 }
3071 SKIP(2);
3072
3073 /*
3074 * SAX: PI detected.
3075 */
3076 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3077 (ctxt->sax->processingInstruction != NULL))
3078 ctxt->sax->processingInstruction(ctxt->userData,
3079 target, NULL);
3080 ctxt->instate = state;
3081 xmlFree(target);
3082 return;
3083 }
3084 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3085 if (buf == NULL) {
3086 xmlGenericError(xmlGenericErrorContext,
3087 "malloc of %d byte failed\n", size);
3088 ctxt->instate = state;
3089 return;
3090 }
3091 cur = CUR;
3092 if (!IS_BLANK(cur)) {
3093 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3094 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3095 ctxt->sax->error(ctxt->userData,
3096 "xmlParsePI: PI %s space expected\n", target);
3097 ctxt->wellFormed = 0;
3098 ctxt->disableSAX = 1;
3099 }
3100 SKIP_BLANKS;
3101 cur = CUR_CHAR(l);
3102 while (IS_CHAR(cur) && /* checked */
3103 ((cur != '?') || (NXT(1) != '>'))) {
3104 if (len + 5 >= size) {
3105 size *= 2;
3106 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3107 if (buf == NULL) {
3108 xmlGenericError(xmlGenericErrorContext,
3109 "realloc of %d byte failed\n", size);
3110 ctxt->instate = state;
3111 return;
3112 }
3113 }
3114 count++;
3115 if (count > 50) {
3116 GROW;
3117 count = 0;
3118 }
3119 COPY_BUF(l,buf,len,cur);
3120 NEXTL(l);
3121 cur = CUR_CHAR(l);
3122 if (cur == 0) {
3123 SHRINK;
3124 GROW;
3125 cur = CUR_CHAR(l);
3126 }
3127 }
3128 buf[len] = 0;
3129 if (cur != '?') {
3130 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3131 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3132 ctxt->sax->error(ctxt->userData,
3133 "xmlParsePI: PI %s never end ...\n", target);
3134 ctxt->wellFormed = 0;
3135 ctxt->disableSAX = 1;
3136 } else {
3137 if (input != ctxt->input) {
3138 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3140 ctxt->sax->error(ctxt->userData,
3141 "PI declaration doesn't start and stop in the same entity\n");
3142 ctxt->wellFormed = 0;
3143 ctxt->disableSAX = 1;
3144 }
3145 SKIP(2);
3146
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003147#ifdef LIBXML_CATALOG_ENABLED
3148 if (((state == XML_PARSER_MISC) ||
3149 (state == XML_PARSER_START)) &&
3150 (xmlStrEqual(target, XML_CATALOG_PI))) {
3151 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3152 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3153 (allow == XML_CATA_ALLOW_ALL))
3154 xmlParseCatalogPI(ctxt, buf);
3155 }
3156#endif
3157
3158
Owen Taylor3473f882001-02-23 17:55:21 +00003159 /*
3160 * SAX: PI detected.
3161 */
3162 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3163 (ctxt->sax->processingInstruction != NULL))
3164 ctxt->sax->processingInstruction(ctxt->userData,
3165 target, buf);
3166 }
3167 xmlFree(buf);
3168 xmlFree(target);
3169 } else {
3170 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3172 ctxt->sax->error(ctxt->userData,
3173 "xmlParsePI : no target name\n");
3174 ctxt->wellFormed = 0;
3175 ctxt->disableSAX = 1;
3176 }
3177 ctxt->instate = state;
3178 }
3179}
3180
3181/**
3182 * xmlParseNotationDecl:
3183 * @ctxt: an XML parser context
3184 *
3185 * parse a notation declaration
3186 *
3187 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3188 *
3189 * Hence there is actually 3 choices:
3190 * 'PUBLIC' S PubidLiteral
3191 * 'PUBLIC' S PubidLiteral S SystemLiteral
3192 * and 'SYSTEM' S SystemLiteral
3193 *
3194 * See the NOTE on xmlParseExternalID().
3195 */
3196
3197void
3198xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3199 xmlChar *name;
3200 xmlChar *Pubid;
3201 xmlChar *Systemid;
3202
3203 if ((RAW == '<') && (NXT(1) == '!') &&
3204 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3205 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3206 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3207 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3208 xmlParserInputPtr input = ctxt->input;
3209 SHRINK;
3210 SKIP(10);
3211 if (!IS_BLANK(CUR)) {
3212 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3214 ctxt->sax->error(ctxt->userData,
3215 "Space required after '<!NOTATION'\n");
3216 ctxt->wellFormed = 0;
3217 ctxt->disableSAX = 1;
3218 return;
3219 }
3220 SKIP_BLANKS;
3221
Daniel Veillard76d66f42001-05-16 21:05:17 +00003222 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003223 if (name == NULL) {
3224 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3226 ctxt->sax->error(ctxt->userData,
3227 "NOTATION: Name expected here\n");
3228 ctxt->wellFormed = 0;
3229 ctxt->disableSAX = 1;
3230 return;
3231 }
3232 if (!IS_BLANK(CUR)) {
3233 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3234 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3235 ctxt->sax->error(ctxt->userData,
3236 "Space required after the NOTATION name'\n");
3237 ctxt->wellFormed = 0;
3238 ctxt->disableSAX = 1;
3239 return;
3240 }
3241 SKIP_BLANKS;
3242
3243 /*
3244 * Parse the IDs.
3245 */
3246 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3247 SKIP_BLANKS;
3248
3249 if (RAW == '>') {
3250 if (input != ctxt->input) {
3251 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3252 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3253 ctxt->sax->error(ctxt->userData,
3254"Notation declaration doesn't start and stop in the same entity\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 }
3258 NEXT;
3259 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3260 (ctxt->sax->notationDecl != NULL))
3261 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3262 } else {
3263 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3265 ctxt->sax->error(ctxt->userData,
3266 "'>' required to close NOTATION declaration\n");
3267 ctxt->wellFormed = 0;
3268 ctxt->disableSAX = 1;
3269 }
3270 xmlFree(name);
3271 if (Systemid != NULL) xmlFree(Systemid);
3272 if (Pubid != NULL) xmlFree(Pubid);
3273 }
3274}
3275
3276/**
3277 * xmlParseEntityDecl:
3278 * @ctxt: an XML parser context
3279 *
3280 * parse <!ENTITY declarations
3281 *
3282 * [70] EntityDecl ::= GEDecl | PEDecl
3283 *
3284 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3285 *
3286 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3287 *
3288 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3289 *
3290 * [74] PEDef ::= EntityValue | ExternalID
3291 *
3292 * [76] NDataDecl ::= S 'NDATA' S Name
3293 *
3294 * [ VC: Notation Declared ]
3295 * The Name must match the declared name of a notation.
3296 */
3297
3298void
3299xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3300 xmlChar *name = NULL;
3301 xmlChar *value = NULL;
3302 xmlChar *URI = NULL, *literal = NULL;
3303 xmlChar *ndata = NULL;
3304 int isParameter = 0;
3305 xmlChar *orig = NULL;
3306
3307 GROW;
3308 if ((RAW == '<') && (NXT(1) == '!') &&
3309 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3310 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3311 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3312 xmlParserInputPtr input = ctxt->input;
3313 ctxt->instate = XML_PARSER_ENTITY_DECL;
3314 SHRINK;
3315 SKIP(8);
3316 if (!IS_BLANK(CUR)) {
3317 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3318 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3319 ctxt->sax->error(ctxt->userData,
3320 "Space required after '<!ENTITY'\n");
3321 ctxt->wellFormed = 0;
3322 ctxt->disableSAX = 1;
3323 }
3324 SKIP_BLANKS;
3325
3326 if (RAW == '%') {
3327 NEXT;
3328 if (!IS_BLANK(CUR)) {
3329 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3331 ctxt->sax->error(ctxt->userData,
3332 "Space required after '%'\n");
3333 ctxt->wellFormed = 0;
3334 ctxt->disableSAX = 1;
3335 }
3336 SKIP_BLANKS;
3337 isParameter = 1;
3338 }
3339
Daniel Veillard76d66f42001-05-16 21:05:17 +00003340 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003341 if (name == NULL) {
3342 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3344 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 return;
3348 }
3349 if (!IS_BLANK(CUR)) {
3350 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3351 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Space required after the entity name\n");
3354 ctxt->wellFormed = 0;
3355 ctxt->disableSAX = 1;
3356 }
3357 SKIP_BLANKS;
3358
3359 /*
3360 * handle the various case of definitions...
3361 */
3362 if (isParameter) {
3363 if ((RAW == '"') || (RAW == '\'')) {
3364 value = xmlParseEntityValue(ctxt, &orig);
3365 if (value) {
3366 if ((ctxt->sax != NULL) &&
3367 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3368 ctxt->sax->entityDecl(ctxt->userData, name,
3369 XML_INTERNAL_PARAMETER_ENTITY,
3370 NULL, NULL, value);
3371 }
3372 } else {
3373 URI = xmlParseExternalID(ctxt, &literal, 1);
3374 if ((URI == NULL) && (literal == NULL)) {
3375 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3377 ctxt->sax->error(ctxt->userData,
3378 "Entity value required\n");
3379 ctxt->wellFormed = 0;
3380 ctxt->disableSAX = 1;
3381 }
3382 if (URI) {
3383 xmlURIPtr uri;
3384
3385 uri = xmlParseURI((const char *) URI);
3386 if (uri == NULL) {
3387 ctxt->errNo = XML_ERR_INVALID_URI;
3388 if ((ctxt->sax != NULL) &&
3389 (!ctxt->disableSAX) &&
3390 (ctxt->sax->error != NULL))
3391 ctxt->sax->error(ctxt->userData,
3392 "Invalid URI: %s\n", URI);
3393 ctxt->wellFormed = 0;
3394 } else {
3395 if (uri->fragment != NULL) {
3396 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3397 if ((ctxt->sax != NULL) &&
3398 (!ctxt->disableSAX) &&
3399 (ctxt->sax->error != NULL))
3400 ctxt->sax->error(ctxt->userData,
3401 "Fragment not allowed: %s\n", URI);
3402 ctxt->wellFormed = 0;
3403 } else {
3404 if ((ctxt->sax != NULL) &&
3405 (!ctxt->disableSAX) &&
3406 (ctxt->sax->entityDecl != NULL))
3407 ctxt->sax->entityDecl(ctxt->userData, name,
3408 XML_EXTERNAL_PARAMETER_ENTITY,
3409 literal, URI, NULL);
3410 }
3411 xmlFreeURI(uri);
3412 }
3413 }
3414 }
3415 } else {
3416 if ((RAW == '"') || (RAW == '\'')) {
3417 value = xmlParseEntityValue(ctxt, &orig);
3418 if ((ctxt->sax != NULL) &&
3419 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3420 ctxt->sax->entityDecl(ctxt->userData, name,
3421 XML_INTERNAL_GENERAL_ENTITY,
3422 NULL, NULL, value);
3423 } else {
3424 URI = xmlParseExternalID(ctxt, &literal, 1);
3425 if ((URI == NULL) && (literal == NULL)) {
3426 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3428 ctxt->sax->error(ctxt->userData,
3429 "Entity value required\n");
3430 ctxt->wellFormed = 0;
3431 ctxt->disableSAX = 1;
3432 }
3433 if (URI) {
3434 xmlURIPtr uri;
3435
3436 uri = xmlParseURI((const char *)URI);
3437 if (uri == NULL) {
3438 ctxt->errNo = XML_ERR_INVALID_URI;
3439 if ((ctxt->sax != NULL) &&
3440 (!ctxt->disableSAX) &&
3441 (ctxt->sax->error != NULL))
3442 ctxt->sax->error(ctxt->userData,
3443 "Invalid URI: %s\n", URI);
3444 ctxt->wellFormed = 0;
3445 } else {
3446 if (uri->fragment != NULL) {
3447 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3448 if ((ctxt->sax != NULL) &&
3449 (!ctxt->disableSAX) &&
3450 (ctxt->sax->error != NULL))
3451 ctxt->sax->error(ctxt->userData,
3452 "Fragment not allowed: %s\n", URI);
3453 ctxt->wellFormed = 0;
3454 }
3455 xmlFreeURI(uri);
3456 }
3457 }
3458 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3459 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3461 ctxt->sax->error(ctxt->userData,
3462 "Space required before 'NDATA'\n");
3463 ctxt->wellFormed = 0;
3464 ctxt->disableSAX = 1;
3465 }
3466 SKIP_BLANKS;
3467 if ((RAW == 'N') && (NXT(1) == 'D') &&
3468 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3469 (NXT(4) == 'A')) {
3470 SKIP(5);
3471 if (!IS_BLANK(CUR)) {
3472 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3473 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3474 ctxt->sax->error(ctxt->userData,
3475 "Space required after 'NDATA'\n");
3476 ctxt->wellFormed = 0;
3477 ctxt->disableSAX = 1;
3478 }
3479 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003480 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003481 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3482 (ctxt->sax->unparsedEntityDecl != NULL))
3483 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3484 literal, URI, ndata);
3485 } else {
3486 if ((ctxt->sax != NULL) &&
3487 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3488 ctxt->sax->entityDecl(ctxt->userData, name,
3489 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3490 literal, URI, NULL);
3491 }
3492 }
3493 }
3494 SKIP_BLANKS;
3495 if (RAW != '>') {
3496 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3498 ctxt->sax->error(ctxt->userData,
3499 "xmlParseEntityDecl: entity %s not terminated\n", name);
3500 ctxt->wellFormed = 0;
3501 ctxt->disableSAX = 1;
3502 } else {
3503 if (input != ctxt->input) {
3504 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3506 ctxt->sax->error(ctxt->userData,
3507"Entity declaration doesn't start and stop in the same entity\n");
3508 ctxt->wellFormed = 0;
3509 ctxt->disableSAX = 1;
3510 }
3511 NEXT;
3512 }
3513 if (orig != NULL) {
3514 /*
3515 * Ugly mechanism to save the raw entity value.
3516 */
3517 xmlEntityPtr cur = NULL;
3518
3519 if (isParameter) {
3520 if ((ctxt->sax != NULL) &&
3521 (ctxt->sax->getParameterEntity != NULL))
3522 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3523 } else {
3524 if ((ctxt->sax != NULL) &&
3525 (ctxt->sax->getEntity != NULL))
3526 cur = ctxt->sax->getEntity(ctxt->userData, name);
3527 }
3528 if (cur != NULL) {
3529 if (cur->orig != NULL)
3530 xmlFree(orig);
3531 else
3532 cur->orig = orig;
3533 } else
3534 xmlFree(orig);
3535 }
3536 if (name != NULL) xmlFree(name);
3537 if (value != NULL) xmlFree(value);
3538 if (URI != NULL) xmlFree(URI);
3539 if (literal != NULL) xmlFree(literal);
3540 if (ndata != NULL) xmlFree(ndata);
3541 }
3542}
3543
3544/**
3545 * xmlParseDefaultDecl:
3546 * @ctxt: an XML parser context
3547 * @value: Receive a possible fixed default value for the attribute
3548 *
3549 * Parse an attribute default declaration
3550 *
3551 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3552 *
3553 * [ VC: Required Attribute ]
3554 * if the default declaration is the keyword #REQUIRED, then the
3555 * attribute must be specified for all elements of the type in the
3556 * attribute-list declaration.
3557 *
3558 * [ VC: Attribute Default Legal ]
3559 * The declared default value must meet the lexical constraints of
3560 * the declared attribute type c.f. xmlValidateAttributeDecl()
3561 *
3562 * [ VC: Fixed Attribute Default ]
3563 * if an attribute has a default value declared with the #FIXED
3564 * keyword, instances of that attribute must match the default value.
3565 *
3566 * [ WFC: No < in Attribute Values ]
3567 * handled in xmlParseAttValue()
3568 *
3569 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3570 * or XML_ATTRIBUTE_FIXED.
3571 */
3572
3573int
3574xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3575 int val;
3576 xmlChar *ret;
3577
3578 *value = NULL;
3579 if ((RAW == '#') && (NXT(1) == 'R') &&
3580 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3581 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3582 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3583 (NXT(8) == 'D')) {
3584 SKIP(9);
3585 return(XML_ATTRIBUTE_REQUIRED);
3586 }
3587 if ((RAW == '#') && (NXT(1) == 'I') &&
3588 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3589 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3590 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3591 SKIP(8);
3592 return(XML_ATTRIBUTE_IMPLIED);
3593 }
3594 val = XML_ATTRIBUTE_NONE;
3595 if ((RAW == '#') && (NXT(1) == 'F') &&
3596 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3597 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3598 SKIP(6);
3599 val = XML_ATTRIBUTE_FIXED;
3600 if (!IS_BLANK(CUR)) {
3601 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3603 ctxt->sax->error(ctxt->userData,
3604 "Space required after '#FIXED'\n");
3605 ctxt->wellFormed = 0;
3606 ctxt->disableSAX = 1;
3607 }
3608 SKIP_BLANKS;
3609 }
3610 ret = xmlParseAttValue(ctxt);
3611 ctxt->instate = XML_PARSER_DTD;
3612 if (ret == NULL) {
3613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3614 ctxt->sax->error(ctxt->userData,
3615 "Attribute default value declaration error\n");
3616 ctxt->wellFormed = 0;
3617 ctxt->disableSAX = 1;
3618 } else
3619 *value = ret;
3620 return(val);
3621}
3622
3623/**
3624 * xmlParseNotationType:
3625 * @ctxt: an XML parser context
3626 *
3627 * parse an Notation attribute type.
3628 *
3629 * Note: the leading 'NOTATION' S part has already being parsed...
3630 *
3631 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3632 *
3633 * [ VC: Notation Attributes ]
3634 * Values of this type must match one of the notation names included
3635 * in the declaration; all notation names in the declaration must be declared.
3636 *
3637 * Returns: the notation attribute tree built while parsing
3638 */
3639
3640xmlEnumerationPtr
3641xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3642 xmlChar *name;
3643 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3644
3645 if (RAW != '(') {
3646 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3647 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3648 ctxt->sax->error(ctxt->userData,
3649 "'(' required to start 'NOTATION'\n");
3650 ctxt->wellFormed = 0;
3651 ctxt->disableSAX = 1;
3652 return(NULL);
3653 }
3654 SHRINK;
3655 do {
3656 NEXT;
3657 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003658 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003659 if (name == NULL) {
3660 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3662 ctxt->sax->error(ctxt->userData,
3663 "Name expected in NOTATION declaration\n");
3664 ctxt->wellFormed = 0;
3665 ctxt->disableSAX = 1;
3666 return(ret);
3667 }
3668 cur = xmlCreateEnumeration(name);
3669 xmlFree(name);
3670 if (cur == NULL) return(ret);
3671 if (last == NULL) ret = last = cur;
3672 else {
3673 last->next = cur;
3674 last = cur;
3675 }
3676 SKIP_BLANKS;
3677 } while (RAW == '|');
3678 if (RAW != ')') {
3679 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3681 ctxt->sax->error(ctxt->userData,
3682 "')' required to finish NOTATION declaration\n");
3683 ctxt->wellFormed = 0;
3684 ctxt->disableSAX = 1;
3685 if ((last != NULL) && (last != ret))
3686 xmlFreeEnumeration(last);
3687 return(ret);
3688 }
3689 NEXT;
3690 return(ret);
3691}
3692
3693/**
3694 * xmlParseEnumerationType:
3695 * @ctxt: an XML parser context
3696 *
3697 * parse an Enumeration attribute type.
3698 *
3699 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3700 *
3701 * [ VC: Enumeration ]
3702 * Values of this type must match one of the Nmtoken tokens in
3703 * the declaration
3704 *
3705 * Returns: the enumeration attribute tree built while parsing
3706 */
3707
3708xmlEnumerationPtr
3709xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3710 xmlChar *name;
3711 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3712
3713 if (RAW != '(') {
3714 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3715 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3716 ctxt->sax->error(ctxt->userData,
3717 "'(' required to start ATTLIST enumeration\n");
3718 ctxt->wellFormed = 0;
3719 ctxt->disableSAX = 1;
3720 return(NULL);
3721 }
3722 SHRINK;
3723 do {
3724 NEXT;
3725 SKIP_BLANKS;
3726 name = xmlParseNmtoken(ctxt);
3727 if (name == NULL) {
3728 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3730 ctxt->sax->error(ctxt->userData,
3731 "NmToken expected in ATTLIST enumeration\n");
3732 ctxt->wellFormed = 0;
3733 ctxt->disableSAX = 1;
3734 return(ret);
3735 }
3736 cur = xmlCreateEnumeration(name);
3737 xmlFree(name);
3738 if (cur == NULL) return(ret);
3739 if (last == NULL) ret = last = cur;
3740 else {
3741 last->next = cur;
3742 last = cur;
3743 }
3744 SKIP_BLANKS;
3745 } while (RAW == '|');
3746 if (RAW != ')') {
3747 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3749 ctxt->sax->error(ctxt->userData,
3750 "')' required to finish ATTLIST enumeration\n");
3751 ctxt->wellFormed = 0;
3752 ctxt->disableSAX = 1;
3753 return(ret);
3754 }
3755 NEXT;
3756 return(ret);
3757}
3758
3759/**
3760 * xmlParseEnumeratedType:
3761 * @ctxt: an XML parser context
3762 * @tree: the enumeration tree built while parsing
3763 *
3764 * parse an Enumerated attribute type.
3765 *
3766 * [57] EnumeratedType ::= NotationType | Enumeration
3767 *
3768 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3769 *
3770 *
3771 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3772 */
3773
3774int
3775xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3776 if ((RAW == 'N') && (NXT(1) == 'O') &&
3777 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3778 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3779 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3780 SKIP(8);
3781 if (!IS_BLANK(CUR)) {
3782 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3784 ctxt->sax->error(ctxt->userData,
3785 "Space required after 'NOTATION'\n");
3786 ctxt->wellFormed = 0;
3787 ctxt->disableSAX = 1;
3788 return(0);
3789 }
3790 SKIP_BLANKS;
3791 *tree = xmlParseNotationType(ctxt);
3792 if (*tree == NULL) return(0);
3793 return(XML_ATTRIBUTE_NOTATION);
3794 }
3795 *tree = xmlParseEnumerationType(ctxt);
3796 if (*tree == NULL) return(0);
3797 return(XML_ATTRIBUTE_ENUMERATION);
3798}
3799
3800/**
3801 * xmlParseAttributeType:
3802 * @ctxt: an XML parser context
3803 * @tree: the enumeration tree built while parsing
3804 *
3805 * parse the Attribute list def for an element
3806 *
3807 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3808 *
3809 * [55] StringType ::= 'CDATA'
3810 *
3811 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3812 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3813 *
3814 * Validity constraints for attribute values syntax are checked in
3815 * xmlValidateAttributeValue()
3816 *
3817 * [ VC: ID ]
3818 * Values of type ID must match the Name production. A name must not
3819 * appear more than once in an XML document as a value of this type;
3820 * i.e., ID values must uniquely identify the elements which bear them.
3821 *
3822 * [ VC: One ID per Element Type ]
3823 * No element type may have more than one ID attribute specified.
3824 *
3825 * [ VC: ID Attribute Default ]
3826 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3827 *
3828 * [ VC: IDREF ]
3829 * Values of type IDREF must match the Name production, and values
3830 * of type IDREFS must match Names; each IDREF Name must match the value
3831 * of an ID attribute on some element in the XML document; i.e. IDREF
3832 * values must match the value of some ID attribute.
3833 *
3834 * [ VC: Entity Name ]
3835 * Values of type ENTITY must match the Name production, values
3836 * of type ENTITIES must match Names; each Entity Name must match the
3837 * name of an unparsed entity declared in the DTD.
3838 *
3839 * [ VC: Name Token ]
3840 * Values of type NMTOKEN must match the Nmtoken production; values
3841 * of type NMTOKENS must match Nmtokens.
3842 *
3843 * Returns the attribute type
3844 */
3845int
3846xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3847 SHRINK;
3848 if ((RAW == 'C') && (NXT(1) == 'D') &&
3849 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3850 (NXT(4) == 'A')) {
3851 SKIP(5);
3852 return(XML_ATTRIBUTE_CDATA);
3853 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3854 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3855 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3856 SKIP(6);
3857 return(XML_ATTRIBUTE_IDREFS);
3858 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3859 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3860 (NXT(4) == 'F')) {
3861 SKIP(5);
3862 return(XML_ATTRIBUTE_IDREF);
3863 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3864 SKIP(2);
3865 return(XML_ATTRIBUTE_ID);
3866 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3867 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3868 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3869 SKIP(6);
3870 return(XML_ATTRIBUTE_ENTITY);
3871 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3872 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3873 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3874 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3875 SKIP(8);
3876 return(XML_ATTRIBUTE_ENTITIES);
3877 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3878 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3879 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3880 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3881 SKIP(8);
3882 return(XML_ATTRIBUTE_NMTOKENS);
3883 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3884 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3885 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3886 (NXT(6) == 'N')) {
3887 SKIP(7);
3888 return(XML_ATTRIBUTE_NMTOKEN);
3889 }
3890 return(xmlParseEnumeratedType(ctxt, tree));
3891}
3892
3893/**
3894 * xmlParseAttributeListDecl:
3895 * @ctxt: an XML parser context
3896 *
3897 * : parse the Attribute list def for an element
3898 *
3899 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3900 *
3901 * [53] AttDef ::= S Name S AttType S DefaultDecl
3902 *
3903 */
3904void
3905xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3906 xmlChar *elemName;
3907 xmlChar *attrName;
3908 xmlEnumerationPtr tree;
3909
3910 if ((RAW == '<') && (NXT(1) == '!') &&
3911 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3912 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3913 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3914 (NXT(8) == 'T')) {
3915 xmlParserInputPtr input = ctxt->input;
3916
3917 SKIP(9);
3918 if (!IS_BLANK(CUR)) {
3919 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3921 ctxt->sax->error(ctxt->userData,
3922 "Space required after '<!ATTLIST'\n");
3923 ctxt->wellFormed = 0;
3924 ctxt->disableSAX = 1;
3925 }
3926 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003927 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003928 if (elemName == NULL) {
3929 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3931 ctxt->sax->error(ctxt->userData,
3932 "ATTLIST: no name for Element\n");
3933 ctxt->wellFormed = 0;
3934 ctxt->disableSAX = 1;
3935 return;
3936 }
3937 SKIP_BLANKS;
3938 GROW;
3939 while (RAW != '>') {
3940 const xmlChar *check = CUR_PTR;
3941 int type;
3942 int def;
3943 xmlChar *defaultValue = NULL;
3944
3945 GROW;
3946 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003947 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003948 if (attrName == NULL) {
3949 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3951 ctxt->sax->error(ctxt->userData,
3952 "ATTLIST: no name for Attribute\n");
3953 ctxt->wellFormed = 0;
3954 ctxt->disableSAX = 1;
3955 break;
3956 }
3957 GROW;
3958 if (!IS_BLANK(CUR)) {
3959 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3961 ctxt->sax->error(ctxt->userData,
3962 "Space required after the attribute name\n");
3963 ctxt->wellFormed = 0;
3964 ctxt->disableSAX = 1;
3965 if (attrName != NULL)
3966 xmlFree(attrName);
3967 if (defaultValue != NULL)
3968 xmlFree(defaultValue);
3969 break;
3970 }
3971 SKIP_BLANKS;
3972
3973 type = xmlParseAttributeType(ctxt, &tree);
3974 if (type <= 0) {
3975 if (attrName != NULL)
3976 xmlFree(attrName);
3977 if (defaultValue != NULL)
3978 xmlFree(defaultValue);
3979 break;
3980 }
3981
3982 GROW;
3983 if (!IS_BLANK(CUR)) {
3984 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3985 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3986 ctxt->sax->error(ctxt->userData,
3987 "Space required after the attribute type\n");
3988 ctxt->wellFormed = 0;
3989 ctxt->disableSAX = 1;
3990 if (attrName != NULL)
3991 xmlFree(attrName);
3992 if (defaultValue != NULL)
3993 xmlFree(defaultValue);
3994 if (tree != NULL)
3995 xmlFreeEnumeration(tree);
3996 break;
3997 }
3998 SKIP_BLANKS;
3999
4000 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4001 if (def <= 0) {
4002 if (attrName != NULL)
4003 xmlFree(attrName);
4004 if (defaultValue != NULL)
4005 xmlFree(defaultValue);
4006 if (tree != NULL)
4007 xmlFreeEnumeration(tree);
4008 break;
4009 }
4010
4011 GROW;
4012 if (RAW != '>') {
4013 if (!IS_BLANK(CUR)) {
4014 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4016 ctxt->sax->error(ctxt->userData,
4017 "Space required after the attribute default value\n");
4018 ctxt->wellFormed = 0;
4019 ctxt->disableSAX = 1;
4020 if (attrName != NULL)
4021 xmlFree(attrName);
4022 if (defaultValue != NULL)
4023 xmlFree(defaultValue);
4024 if (tree != NULL)
4025 xmlFreeEnumeration(tree);
4026 break;
4027 }
4028 SKIP_BLANKS;
4029 }
4030 if (check == CUR_PTR) {
4031 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4033 ctxt->sax->error(ctxt->userData,
4034 "xmlParseAttributeListDecl: detected internal error\n");
4035 if (attrName != NULL)
4036 xmlFree(attrName);
4037 if (defaultValue != NULL)
4038 xmlFree(defaultValue);
4039 if (tree != NULL)
4040 xmlFreeEnumeration(tree);
4041 break;
4042 }
4043 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4044 (ctxt->sax->attributeDecl != NULL))
4045 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4046 type, def, defaultValue, tree);
4047 if (attrName != NULL)
4048 xmlFree(attrName);
4049 if (defaultValue != NULL)
4050 xmlFree(defaultValue);
4051 GROW;
4052 }
4053 if (RAW == '>') {
4054 if (input != ctxt->input) {
4055 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4057 ctxt->sax->error(ctxt->userData,
4058"Attribute list declaration doesn't start and stop in the same entity\n");
4059 ctxt->wellFormed = 0;
4060 ctxt->disableSAX = 1;
4061 }
4062 NEXT;
4063 }
4064
4065 xmlFree(elemName);
4066 }
4067}
4068
4069/**
4070 * xmlParseElementMixedContentDecl:
4071 * @ctxt: an XML parser context
4072 *
4073 * parse the declaration for a Mixed Element content
4074 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4075 *
4076 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4077 * '(' S? '#PCDATA' S? ')'
4078 *
4079 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4080 *
4081 * [ VC: No Duplicate Types ]
4082 * The same name must not appear more than once in a single
4083 * mixed-content declaration.
4084 *
4085 * returns: the list of the xmlElementContentPtr describing the element choices
4086 */
4087xmlElementContentPtr
4088xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4089 xmlElementContentPtr ret = NULL, cur = NULL, n;
4090 xmlChar *elem = NULL;
4091
4092 GROW;
4093 if ((RAW == '#') && (NXT(1) == 'P') &&
4094 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4095 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4096 (NXT(6) == 'A')) {
4097 SKIP(7);
4098 SKIP_BLANKS;
4099 SHRINK;
4100 if (RAW == ')') {
4101 ctxt->entity = ctxt->input;
4102 NEXT;
4103 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4104 if (RAW == '*') {
4105 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4106 NEXT;
4107 }
4108 return(ret);
4109 }
4110 if ((RAW == '(') || (RAW == '|')) {
4111 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4112 if (ret == NULL) return(NULL);
4113 }
4114 while (RAW == '|') {
4115 NEXT;
4116 if (elem == NULL) {
4117 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4118 if (ret == NULL) return(NULL);
4119 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004120 if (cur != NULL)
4121 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004122 cur = ret;
4123 } else {
4124 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4125 if (n == NULL) return(NULL);
4126 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004127 if (n->c1 != NULL)
4128 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004129 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004130 if (n != NULL)
4131 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 cur = n;
4133 xmlFree(elem);
4134 }
4135 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004136 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004137 if (elem == NULL) {
4138 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4139 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4140 ctxt->sax->error(ctxt->userData,
4141 "xmlParseElementMixedContentDecl : Name expected\n");
4142 ctxt->wellFormed = 0;
4143 ctxt->disableSAX = 1;
4144 xmlFreeElementContent(cur);
4145 return(NULL);
4146 }
4147 SKIP_BLANKS;
4148 GROW;
4149 }
4150 if ((RAW == ')') && (NXT(1) == '*')) {
4151 if (elem != NULL) {
4152 cur->c2 = xmlNewElementContent(elem,
4153 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004154 if (cur->c2 != NULL)
4155 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004156 xmlFree(elem);
4157 }
4158 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4159 ctxt->entity = ctxt->input;
4160 SKIP(2);
4161 } else {
4162 if (elem != NULL) xmlFree(elem);
4163 xmlFreeElementContent(ret);
4164 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4166 ctxt->sax->error(ctxt->userData,
4167 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4168 ctxt->wellFormed = 0;
4169 ctxt->disableSAX = 1;
4170 return(NULL);
4171 }
4172
4173 } else {
4174 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4176 ctxt->sax->error(ctxt->userData,
4177 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4178 ctxt->wellFormed = 0;
4179 ctxt->disableSAX = 1;
4180 }
4181 return(ret);
4182}
4183
4184/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004185 * xmlParseElementChildrenContentD:
4186 * @ctxt: an XML parser context
4187 *
4188 * VMS version of xmlParseElementChildrenContentDecl()
4189 *
4190 * Returns the tree of xmlElementContentPtr describing the element
4191 * hierarchy.
4192 */
4193/**
Owen Taylor3473f882001-02-23 17:55:21 +00004194 * xmlParseElementChildrenContentDecl:
4195 * @ctxt: an XML parser context
4196 *
4197 * parse the declaration for a Mixed Element content
4198 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4199 *
4200 *
4201 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4202 *
4203 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4204 *
4205 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4206 *
4207 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4208 *
4209 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4210 * TODO Parameter-entity replacement text must be properly nested
4211 * with parenthetized groups. That is to say, if either of the
4212 * opening or closing parentheses in a choice, seq, or Mixed
4213 * construct is contained in the replacement text for a parameter
4214 * entity, both must be contained in the same replacement text. For
4215 * interoperability, if a parameter-entity reference appears in a
4216 * choice, seq, or Mixed construct, its replacement text should not
4217 * be empty, and neither the first nor last non-blank character of
4218 * the replacement text should be a connector (| or ,).
4219 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004220 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004221 * hierarchy.
4222 */
4223xmlElementContentPtr
4224#ifdef VMS
4225xmlParseElementChildrenContentD
4226#else
4227xmlParseElementChildrenContentDecl
4228#endif
4229(xmlParserCtxtPtr ctxt) {
4230 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4231 xmlChar *elem;
4232 xmlChar type = 0;
4233
4234 SKIP_BLANKS;
4235 GROW;
4236 if (RAW == '(') {
4237 /* Recurse on first child */
4238 NEXT;
4239 SKIP_BLANKS;
4240 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4241 SKIP_BLANKS;
4242 GROW;
4243 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004244 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004245 if (elem == NULL) {
4246 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4248 ctxt->sax->error(ctxt->userData,
4249 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4250 ctxt->wellFormed = 0;
4251 ctxt->disableSAX = 1;
4252 return(NULL);
4253 }
4254 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4255 GROW;
4256 if (RAW == '?') {
4257 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4258 NEXT;
4259 } else if (RAW == '*') {
4260 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4261 NEXT;
4262 } else if (RAW == '+') {
4263 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4264 NEXT;
4265 } else {
4266 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4267 }
4268 xmlFree(elem);
4269 GROW;
4270 }
4271 SKIP_BLANKS;
4272 SHRINK;
4273 while (RAW != ')') {
4274 /*
4275 * Each loop we parse one separator and one element.
4276 */
4277 if (RAW == ',') {
4278 if (type == 0) type = CUR;
4279
4280 /*
4281 * Detect "Name | Name , Name" error
4282 */
4283 else if (type != CUR) {
4284 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286 ctxt->sax->error(ctxt->userData,
4287 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4288 type);
4289 ctxt->wellFormed = 0;
4290 ctxt->disableSAX = 1;
4291 if ((op != NULL) && (op != ret))
4292 xmlFreeElementContent(op);
4293 if ((last != NULL) && (last != ret) &&
4294 (last != ret->c1) && (last != ret->c2))
4295 xmlFreeElementContent(last);
4296 if (ret != NULL)
4297 xmlFreeElementContent(ret);
4298 return(NULL);
4299 }
4300 NEXT;
4301
4302 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4303 if (op == NULL) {
4304 xmlFreeElementContent(ret);
4305 return(NULL);
4306 }
4307 if (last == NULL) {
4308 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004309 if (ret != NULL)
4310 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004311 ret = cur = op;
4312 } else {
4313 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004314 if (op != NULL)
4315 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004316 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004317 if (last != NULL)
4318 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004319 cur =op;
4320 last = NULL;
4321 }
4322 } else if (RAW == '|') {
4323 if (type == 0) type = CUR;
4324
4325 /*
4326 * Detect "Name , Name | Name" error
4327 */
4328 else if (type != CUR) {
4329 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4331 ctxt->sax->error(ctxt->userData,
4332 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4333 type);
4334 ctxt->wellFormed = 0;
4335 ctxt->disableSAX = 1;
4336 if ((op != NULL) && (op != ret) && (op != last))
4337 xmlFreeElementContent(op);
4338 if ((last != NULL) && (last != ret) &&
4339 (last != ret->c1) && (last != ret->c2))
4340 xmlFreeElementContent(last);
4341 if (ret != NULL)
4342 xmlFreeElementContent(ret);
4343 return(NULL);
4344 }
4345 NEXT;
4346
4347 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4348 if (op == NULL) {
4349 if ((op != NULL) && (op != ret))
4350 xmlFreeElementContent(op);
4351 if ((last != NULL) && (last != ret) &&
4352 (last != ret->c1) && (last != ret->c2))
4353 xmlFreeElementContent(last);
4354 if (ret != NULL)
4355 xmlFreeElementContent(ret);
4356 return(NULL);
4357 }
4358 if (last == NULL) {
4359 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004360 if (ret != NULL)
4361 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004362 ret = cur = op;
4363 } else {
4364 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004365 if (op != NULL)
4366 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004367 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004368 if (last != NULL)
4369 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004370 cur =op;
4371 last = NULL;
4372 }
4373 } else {
4374 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4376 ctxt->sax->error(ctxt->userData,
4377 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4378 ctxt->wellFormed = 0;
4379 ctxt->disableSAX = 1;
4380 if ((op != NULL) && (op != ret))
4381 xmlFreeElementContent(op);
4382 if ((last != NULL) && (last != ret) &&
4383 (last != ret->c1) && (last != ret->c2))
4384 xmlFreeElementContent(last);
4385 if (ret != NULL)
4386 xmlFreeElementContent(ret);
4387 return(NULL);
4388 }
4389 GROW;
4390 SKIP_BLANKS;
4391 GROW;
4392 if (RAW == '(') {
4393 /* Recurse on second child */
4394 NEXT;
4395 SKIP_BLANKS;
4396 last = xmlParseElementChildrenContentDecl(ctxt);
4397 SKIP_BLANKS;
4398 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004399 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004400 if (elem == NULL) {
4401 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4403 ctxt->sax->error(ctxt->userData,
4404 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4405 ctxt->wellFormed = 0;
4406 ctxt->disableSAX = 1;
4407 if ((op != NULL) && (op != ret))
4408 xmlFreeElementContent(op);
4409 if ((last != NULL) && (last != ret) &&
4410 (last != ret->c1) && (last != ret->c2))
4411 xmlFreeElementContent(last);
4412 if (ret != NULL)
4413 xmlFreeElementContent(ret);
4414 return(NULL);
4415 }
4416 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4417 xmlFree(elem);
4418 if (RAW == '?') {
4419 last->ocur = XML_ELEMENT_CONTENT_OPT;
4420 NEXT;
4421 } else if (RAW == '*') {
4422 last->ocur = XML_ELEMENT_CONTENT_MULT;
4423 NEXT;
4424 } else if (RAW == '+') {
4425 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4426 NEXT;
4427 } else {
4428 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4429 }
4430 }
4431 SKIP_BLANKS;
4432 GROW;
4433 }
4434 if ((cur != NULL) && (last != NULL)) {
4435 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004436 if (last != NULL)
4437 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004438 }
4439 ctxt->entity = ctxt->input;
4440 NEXT;
4441 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004442 if (ret != NULL)
4443 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004444 NEXT;
4445 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004446 if (ret != NULL)
4447 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004448 NEXT;
4449 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004450 if (ret != NULL)
4451 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004452 NEXT;
4453 }
4454 return(ret);
4455}
4456
4457/**
4458 * xmlParseElementContentDecl:
4459 * @ctxt: an XML parser context
4460 * @name: the name of the element being defined.
4461 * @result: the Element Content pointer will be stored here if any
4462 *
4463 * parse the declaration for an Element content either Mixed or Children,
4464 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4465 *
4466 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4467 *
4468 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4469 */
4470
4471int
4472xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4473 xmlElementContentPtr *result) {
4474
4475 xmlElementContentPtr tree = NULL;
4476 xmlParserInputPtr input = ctxt->input;
4477 int res;
4478
4479 *result = NULL;
4480
4481 if (RAW != '(') {
4482 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4483 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4484 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004485 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004486 ctxt->wellFormed = 0;
4487 ctxt->disableSAX = 1;
4488 return(-1);
4489 }
4490 NEXT;
4491 GROW;
4492 SKIP_BLANKS;
4493 if ((RAW == '#') && (NXT(1) == 'P') &&
4494 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4495 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4496 (NXT(6) == 'A')) {
4497 tree = xmlParseElementMixedContentDecl(ctxt);
4498 res = XML_ELEMENT_TYPE_MIXED;
4499 } else {
4500 tree = xmlParseElementChildrenContentDecl(ctxt);
4501 res = XML_ELEMENT_TYPE_ELEMENT;
4502 }
4503 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4504 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4506 ctxt->sax->error(ctxt->userData,
4507"Element content declaration doesn't start and stop in the same entity\n");
4508 ctxt->wellFormed = 0;
4509 ctxt->disableSAX = 1;
4510 }
4511 SKIP_BLANKS;
4512 *result = tree;
4513 return(res);
4514}
4515
4516/**
4517 * xmlParseElementDecl:
4518 * @ctxt: an XML parser context
4519 *
4520 * parse an Element declaration.
4521 *
4522 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4523 *
4524 * [ VC: Unique Element Type Declaration ]
4525 * No element type may be declared more than once
4526 *
4527 * Returns the type of the element, or -1 in case of error
4528 */
4529int
4530xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4531 xmlChar *name;
4532 int ret = -1;
4533 xmlElementContentPtr content = NULL;
4534
4535 GROW;
4536 if ((RAW == '<') && (NXT(1) == '!') &&
4537 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4538 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4539 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4540 (NXT(8) == 'T')) {
4541 xmlParserInputPtr input = ctxt->input;
4542
4543 SKIP(9);
4544 if (!IS_BLANK(CUR)) {
4545 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4547 ctxt->sax->error(ctxt->userData,
4548 "Space required after 'ELEMENT'\n");
4549 ctxt->wellFormed = 0;
4550 ctxt->disableSAX = 1;
4551 }
4552 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004553 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004554 if (name == NULL) {
4555 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4557 ctxt->sax->error(ctxt->userData,
4558 "xmlParseElementDecl: no name for Element\n");
4559 ctxt->wellFormed = 0;
4560 ctxt->disableSAX = 1;
4561 return(-1);
4562 }
4563 while ((RAW == 0) && (ctxt->inputNr > 1))
4564 xmlPopInput(ctxt);
4565 if (!IS_BLANK(CUR)) {
4566 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4568 ctxt->sax->error(ctxt->userData,
4569 "Space required after the element name\n");
4570 ctxt->wellFormed = 0;
4571 ctxt->disableSAX = 1;
4572 }
4573 SKIP_BLANKS;
4574 if ((RAW == 'E') && (NXT(1) == 'M') &&
4575 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4576 (NXT(4) == 'Y')) {
4577 SKIP(5);
4578 /*
4579 * Element must always be empty.
4580 */
4581 ret = XML_ELEMENT_TYPE_EMPTY;
4582 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4583 (NXT(2) == 'Y')) {
4584 SKIP(3);
4585 /*
4586 * Element is a generic container.
4587 */
4588 ret = XML_ELEMENT_TYPE_ANY;
4589 } else if (RAW == '(') {
4590 ret = xmlParseElementContentDecl(ctxt, name, &content);
4591 } else {
4592 /*
4593 * [ WFC: PEs in Internal Subset ] error handling.
4594 */
4595 if ((RAW == '%') && (ctxt->external == 0) &&
4596 (ctxt->inputNr == 1)) {
4597 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4599 ctxt->sax->error(ctxt->userData,
4600 "PEReference: forbidden within markup decl in internal subset\n");
4601 } else {
4602 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4604 ctxt->sax->error(ctxt->userData,
4605 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4606 }
4607 ctxt->wellFormed = 0;
4608 ctxt->disableSAX = 1;
4609 if (name != NULL) xmlFree(name);
4610 return(-1);
4611 }
4612
4613 SKIP_BLANKS;
4614 /*
4615 * Pop-up of finished entities.
4616 */
4617 while ((RAW == 0) && (ctxt->inputNr > 1))
4618 xmlPopInput(ctxt);
4619 SKIP_BLANKS;
4620
4621 if (RAW != '>') {
4622 ctxt->errNo = XML_ERR_GT_REQUIRED;
4623 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4624 ctxt->sax->error(ctxt->userData,
4625 "xmlParseElementDecl: expected '>' at the end\n");
4626 ctxt->wellFormed = 0;
4627 ctxt->disableSAX = 1;
4628 } else {
4629 if (input != ctxt->input) {
4630 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4632 ctxt->sax->error(ctxt->userData,
4633"Element declaration doesn't start and stop in the same entity\n");
4634 ctxt->wellFormed = 0;
4635 ctxt->disableSAX = 1;
4636 }
4637
4638 NEXT;
4639 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4640 (ctxt->sax->elementDecl != NULL))
4641 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4642 content);
4643 }
4644 if (content != NULL) {
4645 xmlFreeElementContent(content);
4646 }
4647 if (name != NULL) {
4648 xmlFree(name);
4649 }
4650 }
4651 return(ret);
4652}
4653
4654/**
Owen Taylor3473f882001-02-23 17:55:21 +00004655 * xmlParseConditionalSections
4656 * @ctxt: an XML parser context
4657 *
4658 * [61] conditionalSect ::= includeSect | ignoreSect
4659 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4660 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4661 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4662 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4663 */
4664
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004665static void
Owen Taylor3473f882001-02-23 17:55:21 +00004666xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4667 SKIP(3);
4668 SKIP_BLANKS;
4669 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4670 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4671 (NXT(6) == 'E')) {
4672 SKIP(7);
4673 SKIP_BLANKS;
4674 if (RAW != '[') {
4675 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4677 ctxt->sax->error(ctxt->userData,
4678 "XML conditional section '[' expected\n");
4679 ctxt->wellFormed = 0;
4680 ctxt->disableSAX = 1;
4681 } else {
4682 NEXT;
4683 }
4684 if (xmlParserDebugEntities) {
4685 if ((ctxt->input != NULL) && (ctxt->input->filename))
4686 xmlGenericError(xmlGenericErrorContext,
4687 "%s(%d): ", ctxt->input->filename,
4688 ctxt->input->line);
4689 xmlGenericError(xmlGenericErrorContext,
4690 "Entering INCLUDE Conditional Section\n");
4691 }
4692
4693 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4694 (NXT(2) != '>'))) {
4695 const xmlChar *check = CUR_PTR;
4696 int cons = ctxt->input->consumed;
4697 int tok = ctxt->token;
4698
4699 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4700 xmlParseConditionalSections(ctxt);
4701 } else if (IS_BLANK(CUR)) {
4702 NEXT;
4703 } else if (RAW == '%') {
4704 xmlParsePEReference(ctxt);
4705 } else
4706 xmlParseMarkupDecl(ctxt);
4707
4708 /*
4709 * Pop-up of finished entities.
4710 */
4711 while ((RAW == 0) && (ctxt->inputNr > 1))
4712 xmlPopInput(ctxt);
4713
4714 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4715 (tok == ctxt->token)) {
4716 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4717 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4718 ctxt->sax->error(ctxt->userData,
4719 "Content error in the external subset\n");
4720 ctxt->wellFormed = 0;
4721 ctxt->disableSAX = 1;
4722 break;
4723 }
4724 }
4725 if (xmlParserDebugEntities) {
4726 if ((ctxt->input != NULL) && (ctxt->input->filename))
4727 xmlGenericError(xmlGenericErrorContext,
4728 "%s(%d): ", ctxt->input->filename,
4729 ctxt->input->line);
4730 xmlGenericError(xmlGenericErrorContext,
4731 "Leaving INCLUDE Conditional Section\n");
4732 }
4733
4734 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4735 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4736 int state;
4737 int instate;
4738 int depth = 0;
4739
4740 SKIP(6);
4741 SKIP_BLANKS;
4742 if (RAW != '[') {
4743 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4745 ctxt->sax->error(ctxt->userData,
4746 "XML conditional section '[' expected\n");
4747 ctxt->wellFormed = 0;
4748 ctxt->disableSAX = 1;
4749 } else {
4750 NEXT;
4751 }
4752 if (xmlParserDebugEntities) {
4753 if ((ctxt->input != NULL) && (ctxt->input->filename))
4754 xmlGenericError(xmlGenericErrorContext,
4755 "%s(%d): ", ctxt->input->filename,
4756 ctxt->input->line);
4757 xmlGenericError(xmlGenericErrorContext,
4758 "Entering IGNORE Conditional Section\n");
4759 }
4760
4761 /*
4762 * Parse up to the end of the conditionnal section
4763 * But disable SAX event generating DTD building in the meantime
4764 */
4765 state = ctxt->disableSAX;
4766 instate = ctxt->instate;
4767 ctxt->disableSAX = 1;
4768 ctxt->instate = XML_PARSER_IGNORE;
4769
4770 while (depth >= 0) {
4771 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4772 depth++;
4773 SKIP(3);
4774 continue;
4775 }
4776 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4777 if (--depth >= 0) SKIP(3);
4778 continue;
4779 }
4780 NEXT;
4781 continue;
4782 }
4783
4784 ctxt->disableSAX = state;
4785 ctxt->instate = instate;
4786
4787 if (xmlParserDebugEntities) {
4788 if ((ctxt->input != NULL) && (ctxt->input->filename))
4789 xmlGenericError(xmlGenericErrorContext,
4790 "%s(%d): ", ctxt->input->filename,
4791 ctxt->input->line);
4792 xmlGenericError(xmlGenericErrorContext,
4793 "Leaving IGNORE Conditional Section\n");
4794 }
4795
4796 } else {
4797 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4799 ctxt->sax->error(ctxt->userData,
4800 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4801 ctxt->wellFormed = 0;
4802 ctxt->disableSAX = 1;
4803 }
4804
4805 if (RAW == 0)
4806 SHRINK;
4807
4808 if (RAW == 0) {
4809 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4810 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4811 ctxt->sax->error(ctxt->userData,
4812 "XML conditional section not closed\n");
4813 ctxt->wellFormed = 0;
4814 ctxt->disableSAX = 1;
4815 } else {
4816 SKIP(3);
4817 }
4818}
4819
4820/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004821 * xmlParseMarkupDecl:
4822 * @ctxt: an XML parser context
4823 *
4824 * parse Markup declarations
4825 *
4826 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4827 * NotationDecl | PI | Comment
4828 *
4829 * [ VC: Proper Declaration/PE Nesting ]
4830 * Parameter-entity replacement text must be properly nested with
4831 * markup declarations. That is to say, if either the first character
4832 * or the last character of a markup declaration (markupdecl above) is
4833 * contained in the replacement text for a parameter-entity reference,
4834 * both must be contained in the same replacement text.
4835 *
4836 * [ WFC: PEs in Internal Subset ]
4837 * In the internal DTD subset, parameter-entity references can occur
4838 * only where markup declarations can occur, not within markup declarations.
4839 * (This does not apply to references that occur in external parameter
4840 * entities or to the external subset.)
4841 */
4842void
4843xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4844 GROW;
4845 xmlParseElementDecl(ctxt);
4846 xmlParseAttributeListDecl(ctxt);
4847 xmlParseEntityDecl(ctxt);
4848 xmlParseNotationDecl(ctxt);
4849 xmlParsePI(ctxt);
4850 xmlParseComment(ctxt);
4851 /*
4852 * This is only for internal subset. On external entities,
4853 * the replacement is done before parsing stage
4854 */
4855 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4856 xmlParsePEReference(ctxt);
4857
4858 /*
4859 * Conditional sections are allowed from entities included
4860 * by PE References in the internal subset.
4861 */
4862 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4863 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4864 xmlParseConditionalSections(ctxt);
4865 }
4866 }
4867
4868 ctxt->instate = XML_PARSER_DTD;
4869}
4870
4871/**
4872 * xmlParseTextDecl:
4873 * @ctxt: an XML parser context
4874 *
4875 * parse an XML declaration header for external entities
4876 *
4877 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4878 *
4879 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4880 */
4881
4882void
4883xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4884 xmlChar *version;
4885
4886 /*
4887 * We know that '<?xml' is here.
4888 */
4889 if ((RAW == '<') && (NXT(1) == '?') &&
4890 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4891 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4892 SKIP(5);
4893 } else {
4894 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4896 ctxt->sax->error(ctxt->userData,
4897 "Text declaration '<?xml' required\n");
4898 ctxt->wellFormed = 0;
4899 ctxt->disableSAX = 1;
4900
4901 return;
4902 }
4903
4904 if (!IS_BLANK(CUR)) {
4905 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4907 ctxt->sax->error(ctxt->userData,
4908 "Space needed after '<?xml'\n");
4909 ctxt->wellFormed = 0;
4910 ctxt->disableSAX = 1;
4911 }
4912 SKIP_BLANKS;
4913
4914 /*
4915 * We may have the VersionInfo here.
4916 */
4917 version = xmlParseVersionInfo(ctxt);
4918 if (version == NULL)
4919 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4920 ctxt->input->version = version;
4921
4922 /*
4923 * We must have the encoding declaration
4924 */
4925 if (!IS_BLANK(CUR)) {
4926 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4928 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4929 ctxt->wellFormed = 0;
4930 ctxt->disableSAX = 1;
4931 }
4932 xmlParseEncodingDecl(ctxt);
4933 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4934 /*
4935 * The XML REC instructs us to stop parsing right here
4936 */
4937 return;
4938 }
4939
4940 SKIP_BLANKS;
4941 if ((RAW == '?') && (NXT(1) == '>')) {
4942 SKIP(2);
4943 } else if (RAW == '>') {
4944 /* Deprecated old WD ... */
4945 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4947 ctxt->sax->error(ctxt->userData,
4948 "XML declaration must end-up with '?>'\n");
4949 ctxt->wellFormed = 0;
4950 ctxt->disableSAX = 1;
4951 NEXT;
4952 } else {
4953 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4954 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4955 ctxt->sax->error(ctxt->userData,
4956 "parsing XML declaration: '?>' expected\n");
4957 ctxt->wellFormed = 0;
4958 ctxt->disableSAX = 1;
4959 MOVETO_ENDTAG(CUR_PTR);
4960 NEXT;
4961 }
4962}
4963
4964/**
Owen Taylor3473f882001-02-23 17:55:21 +00004965 * xmlParseExternalSubset:
4966 * @ctxt: an XML parser context
4967 * @ExternalID: the external identifier
4968 * @SystemID: the system identifier (or URL)
4969 *
4970 * parse Markup declarations from an external subset
4971 *
4972 * [30] extSubset ::= textDecl? extSubsetDecl
4973 *
4974 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4975 */
4976void
4977xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4978 const xmlChar *SystemID) {
4979 GROW;
4980 if ((RAW == '<') && (NXT(1) == '?') &&
4981 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4982 (NXT(4) == 'l')) {
4983 xmlParseTextDecl(ctxt);
4984 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4985 /*
4986 * The XML REC instructs us to stop parsing right here
4987 */
4988 ctxt->instate = XML_PARSER_EOF;
4989 return;
4990 }
4991 }
4992 if (ctxt->myDoc == NULL) {
4993 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4994 }
4995 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4996 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4997
4998 ctxt->instate = XML_PARSER_DTD;
4999 ctxt->external = 1;
5000 while (((RAW == '<') && (NXT(1) == '?')) ||
5001 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005002 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005003 const xmlChar *check = CUR_PTR;
5004 int cons = ctxt->input->consumed;
5005 int tok = ctxt->token;
5006
5007 GROW;
5008 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5009 xmlParseConditionalSections(ctxt);
5010 } else if (IS_BLANK(CUR)) {
5011 NEXT;
5012 } else if (RAW == '%') {
5013 xmlParsePEReference(ctxt);
5014 } else
5015 xmlParseMarkupDecl(ctxt);
5016
5017 /*
5018 * Pop-up of finished entities.
5019 */
5020 while ((RAW == 0) && (ctxt->inputNr > 1))
5021 xmlPopInput(ctxt);
5022
5023 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5024 (tok == ctxt->token)) {
5025 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5027 ctxt->sax->error(ctxt->userData,
5028 "Content error in the external subset\n");
5029 ctxt->wellFormed = 0;
5030 ctxt->disableSAX = 1;
5031 break;
5032 }
5033 }
5034
5035 if (RAW != 0) {
5036 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5038 ctxt->sax->error(ctxt->userData,
5039 "Extra content at the end of the document\n");
5040 ctxt->wellFormed = 0;
5041 ctxt->disableSAX = 1;
5042 }
5043
5044}
5045
5046/**
5047 * xmlParseReference:
5048 * @ctxt: an XML parser context
5049 *
5050 * parse and handle entity references in content, depending on the SAX
5051 * interface, this may end-up in a call to character() if this is a
5052 * CharRef, a predefined entity, if there is no reference() callback.
5053 * or if the parser was asked to switch to that mode.
5054 *
5055 * [67] Reference ::= EntityRef | CharRef
5056 */
5057void
5058xmlParseReference(xmlParserCtxtPtr ctxt) {
5059 xmlEntityPtr ent;
5060 xmlChar *val;
5061 if (RAW != '&') return;
5062
5063 if (NXT(1) == '#') {
5064 int i = 0;
5065 xmlChar out[10];
5066 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005067 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005068
5069 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5070 /*
5071 * So we are using non-UTF-8 buffers
5072 * Check that the char fit on 8bits, if not
5073 * generate a CharRef.
5074 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005075 if (value <= 0xFF) {
5076 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005077 out[1] = 0;
5078 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5079 (!ctxt->disableSAX))
5080 ctxt->sax->characters(ctxt->userData, out, 1);
5081 } else {
5082 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005083 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005084 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005085 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005086 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5087 (!ctxt->disableSAX))
5088 ctxt->sax->reference(ctxt->userData, out);
5089 }
5090 } else {
5091 /*
5092 * Just encode the value in UTF-8
5093 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005094 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005095 out[i] = 0;
5096 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5097 (!ctxt->disableSAX))
5098 ctxt->sax->characters(ctxt->userData, out, i);
5099 }
5100 } else {
5101 ent = xmlParseEntityRef(ctxt);
5102 if (ent == NULL) return;
5103 if ((ent->name != NULL) &&
5104 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5105 xmlNodePtr list = NULL;
5106 int ret;
5107
5108
5109 /*
5110 * The first reference to the entity trigger a parsing phase
5111 * where the ent->children is filled with the result from
5112 * the parsing.
5113 */
5114 if (ent->children == NULL) {
5115 xmlChar *value;
5116 value = ent->content;
5117
5118 /*
5119 * Check that this entity is well formed
5120 */
5121 if ((value != NULL) &&
5122 (value[1] == 0) && (value[0] == '<') &&
5123 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5124 /*
5125 * DONE: get definite answer on this !!!
5126 * Lots of entity decls are used to declare a single
5127 * char
5128 * <!ENTITY lt "<">
5129 * Which seems to be valid since
5130 * 2.4: The ampersand character (&) and the left angle
5131 * bracket (<) may appear in their literal form only
5132 * when used ... They are also legal within the literal
5133 * entity value of an internal entity declaration;i
5134 * see "4.3.2 Well-Formed Parsed Entities".
5135 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5136 * Looking at the OASIS test suite and James Clark
5137 * tests, this is broken. However the XML REC uses
5138 * it. Is the XML REC not well-formed ????
5139 * This is a hack to avoid this problem
5140 *
5141 * ANSWER: since lt gt amp .. are already defined,
5142 * this is a redefinition and hence the fact that the
5143 * contentis not well balanced is not a Wf error, this
5144 * is lousy but acceptable.
5145 */
5146 list = xmlNewDocText(ctxt->myDoc, value);
5147 if (list != NULL) {
5148 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5149 (ent->children == NULL)) {
5150 ent->children = list;
5151 ent->last = list;
5152 list->parent = (xmlNodePtr) ent;
5153 } else {
5154 xmlFreeNodeList(list);
5155 }
5156 } else if (list != NULL) {
5157 xmlFreeNodeList(list);
5158 }
5159 } else {
5160 /*
5161 * 4.3.2: An internal general parsed entity is well-formed
5162 * if its replacement text matches the production labeled
5163 * content.
5164 */
5165 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5166 ctxt->depth++;
5167 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5168 ctxt->sax, NULL, ctxt->depth,
5169 value, &list);
5170 ctxt->depth--;
5171 } else if (ent->etype ==
5172 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5173 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005174 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005175 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005176 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005177 ctxt->depth--;
5178 } else {
5179 ret = -1;
5180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5181 ctxt->sax->error(ctxt->userData,
5182 "Internal: invalid entity type\n");
5183 }
5184 if (ret == XML_ERR_ENTITY_LOOP) {
5185 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5187 ctxt->sax->error(ctxt->userData,
5188 "Detected entity reference loop\n");
5189 ctxt->wellFormed = 0;
5190 ctxt->disableSAX = 1;
5191 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005192 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5193 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005194 (ent->children == NULL)) {
5195 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005196 if (ctxt->replaceEntities) {
5197 /*
5198 * Prune it directly in the generated document
5199 * except for single text nodes.
5200 */
5201 if ((list->type == XML_TEXT_NODE) &&
5202 (list->next == NULL)) {
5203 list->parent = (xmlNodePtr) ent;
5204 list = NULL;
5205 } else {
5206 while (list != NULL) {
5207 list->parent = (xmlNodePtr) ctxt->node;
5208 if (list->next == NULL)
5209 ent->last = list;
5210 list = list->next;
5211 }
5212 list = ent->children;
5213 }
5214 } else {
5215 while (list != NULL) {
5216 list->parent = (xmlNodePtr) ent;
5217 if (list->next == NULL)
5218 ent->last = list;
5219 list = list->next;
5220 }
Owen Taylor3473f882001-02-23 17:55:21 +00005221 }
5222 } else {
5223 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005224 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005225 }
5226 } else if (ret > 0) {
5227 ctxt->errNo = ret;
5228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5229 ctxt->sax->error(ctxt->userData,
5230 "Entity value required\n");
5231 ctxt->wellFormed = 0;
5232 ctxt->disableSAX = 1;
5233 } else if (list != NULL) {
5234 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005235 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005236 }
5237 }
5238 }
5239 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5240 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5241 /*
5242 * Create a node.
5243 */
5244 ctxt->sax->reference(ctxt->userData, ent->name);
5245 return;
5246 } else if (ctxt->replaceEntities) {
5247 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5248 /*
5249 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005250 * a simple tree copy for all references except the first
5251 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005252 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005253 if (list == NULL) {
5254 xmlNodePtr new, cur;
5255 cur = ent->children;
5256 while (cur != NULL) {
5257 new = xmlCopyNode(cur, 1);
5258 xmlAddChild(ctxt->node, new);
5259 if (cur == ent->last)
5260 break;
5261 cur = cur->next;
5262 }
5263 } else {
5264 /*
5265 * the name change is to avoid coalescing of the
5266 * node with a prossible previous text one which
5267 * would make ent->children a dandling pointer
5268 */
5269 if (ent->children->type == XML_TEXT_NODE)
5270 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5271 if ((ent->last != ent->children) &&
5272 (ent->last->type == XML_TEXT_NODE))
5273 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5274 xmlAddChildList(ctxt->node, ent->children);
5275 }
5276
Owen Taylor3473f882001-02-23 17:55:21 +00005277 /*
5278 * This is to avoid a nasty side effect, see
5279 * characters() in SAX.c
5280 */
5281 ctxt->nodemem = 0;
5282 ctxt->nodelen = 0;
5283 return;
5284 } else {
5285 /*
5286 * Probably running in SAX mode
5287 */
5288 xmlParserInputPtr input;
5289
5290 input = xmlNewEntityInputStream(ctxt, ent);
5291 xmlPushInput(ctxt, input);
5292 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5293 (RAW == '<') && (NXT(1) == '?') &&
5294 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5295 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5296 xmlParseTextDecl(ctxt);
5297 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5298 /*
5299 * The XML REC instructs us to stop parsing right here
5300 */
5301 ctxt->instate = XML_PARSER_EOF;
5302 return;
5303 }
5304 if (input->standalone == 1) {
5305 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5307 ctxt->sax->error(ctxt->userData,
5308 "external parsed entities cannot be standalone\n");
5309 ctxt->wellFormed = 0;
5310 ctxt->disableSAX = 1;
5311 }
5312 }
5313 return;
5314 }
5315 }
5316 } else {
5317 val = ent->content;
5318 if (val == NULL) return;
5319 /*
5320 * inline the entity.
5321 */
5322 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5323 (!ctxt->disableSAX))
5324 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5325 }
5326 }
5327}
5328
5329/**
5330 * xmlParseEntityRef:
5331 * @ctxt: an XML parser context
5332 *
5333 * parse ENTITY references declarations
5334 *
5335 * [68] EntityRef ::= '&' Name ';'
5336 *
5337 * [ WFC: Entity Declared ]
5338 * In a document without any DTD, a document with only an internal DTD
5339 * subset which contains no parameter entity references, or a document
5340 * with "standalone='yes'", the Name given in the entity reference
5341 * must match that in an entity declaration, except that well-formed
5342 * documents need not declare any of the following entities: amp, lt,
5343 * gt, apos, quot. The declaration of a parameter entity must precede
5344 * any reference to it. Similarly, the declaration of a general entity
5345 * must precede any reference to it which appears in a default value in an
5346 * attribute-list declaration. Note that if entities are declared in the
5347 * external subset or in external parameter entities, a non-validating
5348 * processor is not obligated to read and process their declarations;
5349 * for such documents, the rule that an entity must be declared is a
5350 * well-formedness constraint only if standalone='yes'.
5351 *
5352 * [ WFC: Parsed Entity ]
5353 * An entity reference must not contain the name of an unparsed entity
5354 *
5355 * Returns the xmlEntityPtr if found, or NULL otherwise.
5356 */
5357xmlEntityPtr
5358xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5359 xmlChar *name;
5360 xmlEntityPtr ent = NULL;
5361
5362 GROW;
5363
5364 if (RAW == '&') {
5365 NEXT;
5366 name = xmlParseName(ctxt);
5367 if (name == NULL) {
5368 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5370 ctxt->sax->error(ctxt->userData,
5371 "xmlParseEntityRef: no name\n");
5372 ctxt->wellFormed = 0;
5373 ctxt->disableSAX = 1;
5374 } else {
5375 if (RAW == ';') {
5376 NEXT;
5377 /*
5378 * Ask first SAX for entity resolution, otherwise try the
5379 * predefined set.
5380 */
5381 if (ctxt->sax != NULL) {
5382 if (ctxt->sax->getEntity != NULL)
5383 ent = ctxt->sax->getEntity(ctxt->userData, name);
5384 if (ent == NULL)
5385 ent = xmlGetPredefinedEntity(name);
5386 }
5387 /*
5388 * [ WFC: Entity Declared ]
5389 * In a document without any DTD, a document with only an
5390 * internal DTD subset which contains no parameter entity
5391 * references, or a document with "standalone='yes'", the
5392 * Name given in the entity reference must match that in an
5393 * entity declaration, except that well-formed documents
5394 * need not declare any of the following entities: amp, lt,
5395 * gt, apos, quot.
5396 * The declaration of a parameter entity must precede any
5397 * reference to it.
5398 * Similarly, the declaration of a general entity must
5399 * precede any reference to it which appears in a default
5400 * value in an attribute-list declaration. Note that if
5401 * entities are declared in the external subset or in
5402 * external parameter entities, a non-validating processor
5403 * is not obligated to read and process their declarations;
5404 * for such documents, the rule that an entity must be
5405 * declared is a well-formedness constraint only if
5406 * standalone='yes'.
5407 */
5408 if (ent == NULL) {
5409 if ((ctxt->standalone == 1) ||
5410 ((ctxt->hasExternalSubset == 0) &&
5411 (ctxt->hasPErefs == 0))) {
5412 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5414 ctxt->sax->error(ctxt->userData,
5415 "Entity '%s' not defined\n", name);
5416 ctxt->wellFormed = 0;
5417 ctxt->disableSAX = 1;
5418 } else {
5419 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005421 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005422 "Entity '%s' not defined\n", name);
5423 }
5424 }
5425
5426 /*
5427 * [ WFC: Parsed Entity ]
5428 * An entity reference must not contain the name of an
5429 * unparsed entity
5430 */
5431 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5432 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5434 ctxt->sax->error(ctxt->userData,
5435 "Entity reference to unparsed entity %s\n", name);
5436 ctxt->wellFormed = 0;
5437 ctxt->disableSAX = 1;
5438 }
5439
5440 /*
5441 * [ WFC: No External Entity References ]
5442 * Attribute values cannot contain direct or indirect
5443 * entity references to external entities.
5444 */
5445 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5446 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5447 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5449 ctxt->sax->error(ctxt->userData,
5450 "Attribute references external entity '%s'\n", name);
5451 ctxt->wellFormed = 0;
5452 ctxt->disableSAX = 1;
5453 }
5454 /*
5455 * [ WFC: No < in Attribute Values ]
5456 * The replacement text of any entity referred to directly or
5457 * indirectly in an attribute value (other than "&lt;") must
5458 * not contain a <.
5459 */
5460 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5461 (ent != NULL) &&
5462 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5463 (ent->content != NULL) &&
5464 (xmlStrchr(ent->content, '<'))) {
5465 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5467 ctxt->sax->error(ctxt->userData,
5468 "'<' in entity '%s' is not allowed in attributes values\n", name);
5469 ctxt->wellFormed = 0;
5470 ctxt->disableSAX = 1;
5471 }
5472
5473 /*
5474 * Internal check, no parameter entities here ...
5475 */
5476 else {
5477 switch (ent->etype) {
5478 case XML_INTERNAL_PARAMETER_ENTITY:
5479 case XML_EXTERNAL_PARAMETER_ENTITY:
5480 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5482 ctxt->sax->error(ctxt->userData,
5483 "Attempt to reference the parameter entity '%s'\n", name);
5484 ctxt->wellFormed = 0;
5485 ctxt->disableSAX = 1;
5486 break;
5487 default:
5488 break;
5489 }
5490 }
5491
5492 /*
5493 * [ WFC: No Recursion ]
5494 * A parsed entity must not contain a recursive reference
5495 * to itself, either directly or indirectly.
5496 * Done somewhere else
5497 */
5498
5499 } else {
5500 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5502 ctxt->sax->error(ctxt->userData,
5503 "xmlParseEntityRef: expecting ';'\n");
5504 ctxt->wellFormed = 0;
5505 ctxt->disableSAX = 1;
5506 }
5507 xmlFree(name);
5508 }
5509 }
5510 return(ent);
5511}
5512
5513/**
5514 * xmlParseStringEntityRef:
5515 * @ctxt: an XML parser context
5516 * @str: a pointer to an index in the string
5517 *
5518 * parse ENTITY references declarations, but this version parses it from
5519 * a string value.
5520 *
5521 * [68] EntityRef ::= '&' Name ';'
5522 *
5523 * [ WFC: Entity Declared ]
5524 * In a document without any DTD, a document with only an internal DTD
5525 * subset which contains no parameter entity references, or a document
5526 * with "standalone='yes'", the Name given in the entity reference
5527 * must match that in an entity declaration, except that well-formed
5528 * documents need not declare any of the following entities: amp, lt,
5529 * gt, apos, quot. The declaration of a parameter entity must precede
5530 * any reference to it. Similarly, the declaration of a general entity
5531 * must precede any reference to it which appears in a default value in an
5532 * attribute-list declaration. Note that if entities are declared in the
5533 * external subset or in external parameter entities, a non-validating
5534 * processor is not obligated to read and process their declarations;
5535 * for such documents, the rule that an entity must be declared is a
5536 * well-formedness constraint only if standalone='yes'.
5537 *
5538 * [ WFC: Parsed Entity ]
5539 * An entity reference must not contain the name of an unparsed entity
5540 *
5541 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5542 * is updated to the current location in the string.
5543 */
5544xmlEntityPtr
5545xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5546 xmlChar *name;
5547 const xmlChar *ptr;
5548 xmlChar cur;
5549 xmlEntityPtr ent = NULL;
5550
5551 if ((str == NULL) || (*str == NULL))
5552 return(NULL);
5553 ptr = *str;
5554 cur = *ptr;
5555 if (cur == '&') {
5556 ptr++;
5557 cur = *ptr;
5558 name = xmlParseStringName(ctxt, &ptr);
5559 if (name == NULL) {
5560 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5562 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005563 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005564 ctxt->wellFormed = 0;
5565 ctxt->disableSAX = 1;
5566 } else {
5567 if (*ptr == ';') {
5568 ptr++;
5569 /*
5570 * Ask first SAX for entity resolution, otherwise try the
5571 * predefined set.
5572 */
5573 if (ctxt->sax != NULL) {
5574 if (ctxt->sax->getEntity != NULL)
5575 ent = ctxt->sax->getEntity(ctxt->userData, name);
5576 if (ent == NULL)
5577 ent = xmlGetPredefinedEntity(name);
5578 }
5579 /*
5580 * [ WFC: Entity Declared ]
5581 * In a document without any DTD, a document with only an
5582 * internal DTD subset which contains no parameter entity
5583 * references, or a document with "standalone='yes'", the
5584 * Name given in the entity reference must match that in an
5585 * entity declaration, except that well-formed documents
5586 * need not declare any of the following entities: amp, lt,
5587 * gt, apos, quot.
5588 * The declaration of a parameter entity must precede any
5589 * reference to it.
5590 * Similarly, the declaration of a general entity must
5591 * precede any reference to it which appears in a default
5592 * value in an attribute-list declaration. Note that if
5593 * entities are declared in the external subset or in
5594 * external parameter entities, a non-validating processor
5595 * is not obligated to read and process their declarations;
5596 * for such documents, the rule that an entity must be
5597 * declared is a well-formedness constraint only if
5598 * standalone='yes'.
5599 */
5600 if (ent == NULL) {
5601 if ((ctxt->standalone == 1) ||
5602 ((ctxt->hasExternalSubset == 0) &&
5603 (ctxt->hasPErefs == 0))) {
5604 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5606 ctxt->sax->error(ctxt->userData,
5607 "Entity '%s' not defined\n", name);
5608 ctxt->wellFormed = 0;
5609 ctxt->disableSAX = 1;
5610 } else {
5611 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5612 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5613 ctxt->sax->warning(ctxt->userData,
5614 "Entity '%s' not defined\n", name);
5615 }
5616 }
5617
5618 /*
5619 * [ WFC: Parsed Entity ]
5620 * An entity reference must not contain the name of an
5621 * unparsed entity
5622 */
5623 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5624 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5626 ctxt->sax->error(ctxt->userData,
5627 "Entity reference to unparsed entity %s\n", name);
5628 ctxt->wellFormed = 0;
5629 ctxt->disableSAX = 1;
5630 }
5631
5632 /*
5633 * [ WFC: No External Entity References ]
5634 * Attribute values cannot contain direct or indirect
5635 * entity references to external entities.
5636 */
5637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5638 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5639 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5641 ctxt->sax->error(ctxt->userData,
5642 "Attribute references external entity '%s'\n", name);
5643 ctxt->wellFormed = 0;
5644 ctxt->disableSAX = 1;
5645 }
5646 /*
5647 * [ WFC: No < in Attribute Values ]
5648 * The replacement text of any entity referred to directly or
5649 * indirectly in an attribute value (other than "&lt;") must
5650 * not contain a <.
5651 */
5652 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5653 (ent != NULL) &&
5654 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5655 (ent->content != NULL) &&
5656 (xmlStrchr(ent->content, '<'))) {
5657 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5659 ctxt->sax->error(ctxt->userData,
5660 "'<' in entity '%s' is not allowed in attributes values\n", name);
5661 ctxt->wellFormed = 0;
5662 ctxt->disableSAX = 1;
5663 }
5664
5665 /*
5666 * Internal check, no parameter entities here ...
5667 */
5668 else {
5669 switch (ent->etype) {
5670 case XML_INTERNAL_PARAMETER_ENTITY:
5671 case XML_EXTERNAL_PARAMETER_ENTITY:
5672 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5673 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5674 ctxt->sax->error(ctxt->userData,
5675 "Attempt to reference the parameter entity '%s'\n", name);
5676 ctxt->wellFormed = 0;
5677 ctxt->disableSAX = 1;
5678 break;
5679 default:
5680 break;
5681 }
5682 }
5683
5684 /*
5685 * [ WFC: No Recursion ]
5686 * A parsed entity must not contain a recursive reference
5687 * to itself, either directly or indirectly.
5688 * Done somewhwere else
5689 */
5690
5691 } else {
5692 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5694 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005695 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005696 ctxt->wellFormed = 0;
5697 ctxt->disableSAX = 1;
5698 }
5699 xmlFree(name);
5700 }
5701 }
5702 *str = ptr;
5703 return(ent);
5704}
5705
5706/**
5707 * xmlParsePEReference:
5708 * @ctxt: an XML parser context
5709 *
5710 * parse PEReference declarations
5711 * The entity content is handled directly by pushing it's content as
5712 * a new input stream.
5713 *
5714 * [69] PEReference ::= '%' Name ';'
5715 *
5716 * [ WFC: No Recursion ]
5717 * A parsed entity must not contain a recursive
5718 * reference to itself, either directly or indirectly.
5719 *
5720 * [ WFC: Entity Declared ]
5721 * In a document without any DTD, a document with only an internal DTD
5722 * subset which contains no parameter entity references, or a document
5723 * with "standalone='yes'", ... ... The declaration of a parameter
5724 * entity must precede any reference to it...
5725 *
5726 * [ VC: Entity Declared ]
5727 * In a document with an external subset or external parameter entities
5728 * with "standalone='no'", ... ... The declaration of a parameter entity
5729 * must precede any reference to it...
5730 *
5731 * [ WFC: In DTD ]
5732 * Parameter-entity references may only appear in the DTD.
5733 * NOTE: misleading but this is handled.
5734 */
5735void
5736xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5737 xmlChar *name;
5738 xmlEntityPtr entity = NULL;
5739 xmlParserInputPtr input;
5740
5741 if (RAW == '%') {
5742 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005743 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005744 if (name == NULL) {
5745 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5746 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5747 ctxt->sax->error(ctxt->userData,
5748 "xmlParsePEReference: no name\n");
5749 ctxt->wellFormed = 0;
5750 ctxt->disableSAX = 1;
5751 } else {
5752 if (RAW == ';') {
5753 NEXT;
5754 if ((ctxt->sax != NULL) &&
5755 (ctxt->sax->getParameterEntity != NULL))
5756 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5757 name);
5758 if (entity == NULL) {
5759 /*
5760 * [ WFC: Entity Declared ]
5761 * In a document without any DTD, a document with only an
5762 * internal DTD subset which contains no parameter entity
5763 * references, or a document with "standalone='yes'", ...
5764 * ... The declaration of a parameter entity must precede
5765 * any reference to it...
5766 */
5767 if ((ctxt->standalone == 1) ||
5768 ((ctxt->hasExternalSubset == 0) &&
5769 (ctxt->hasPErefs == 0))) {
5770 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5771 if ((!ctxt->disableSAX) &&
5772 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5773 ctxt->sax->error(ctxt->userData,
5774 "PEReference: %%%s; not found\n", name);
5775 ctxt->wellFormed = 0;
5776 ctxt->disableSAX = 1;
5777 } else {
5778 /*
5779 * [ VC: Entity Declared ]
5780 * In a document with an external subset or external
5781 * parameter entities with "standalone='no'", ...
5782 * ... The declaration of a parameter entity must precede
5783 * any reference to it...
5784 */
5785 if ((!ctxt->disableSAX) &&
5786 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5787 ctxt->sax->warning(ctxt->userData,
5788 "PEReference: %%%s; not found\n", name);
5789 ctxt->valid = 0;
5790 }
5791 } else {
5792 /*
5793 * Internal checking in case the entity quest barfed
5794 */
5795 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5796 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5797 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5798 ctxt->sax->warning(ctxt->userData,
5799 "Internal: %%%s; is not a parameter entity\n", name);
5800 } else {
5801 /*
5802 * TODO !!!
5803 * handle the extra spaces added before and after
5804 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5805 */
5806 input = xmlNewEntityInputStream(ctxt, entity);
5807 xmlPushInput(ctxt, input);
5808 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5809 (RAW == '<') && (NXT(1) == '?') &&
5810 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5811 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5812 xmlParseTextDecl(ctxt);
5813 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5814 /*
5815 * The XML REC instructs us to stop parsing
5816 * right here
5817 */
5818 ctxt->instate = XML_PARSER_EOF;
5819 xmlFree(name);
5820 return;
5821 }
5822 }
5823 if (ctxt->token == 0)
5824 ctxt->token = ' ';
5825 }
5826 }
5827 ctxt->hasPErefs = 1;
5828 } else {
5829 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5831 ctxt->sax->error(ctxt->userData,
5832 "xmlParsePEReference: expecting ';'\n");
5833 ctxt->wellFormed = 0;
5834 ctxt->disableSAX = 1;
5835 }
5836 xmlFree(name);
5837 }
5838 }
5839}
5840
5841/**
5842 * xmlParseStringPEReference:
5843 * @ctxt: an XML parser context
5844 * @str: a pointer to an index in the string
5845 *
5846 * parse PEReference declarations
5847 *
5848 * [69] PEReference ::= '%' Name ';'
5849 *
5850 * [ WFC: No Recursion ]
5851 * A parsed entity must not contain a recursive
5852 * reference to itself, either directly or indirectly.
5853 *
5854 * [ WFC: Entity Declared ]
5855 * In a document without any DTD, a document with only an internal DTD
5856 * subset which contains no parameter entity references, or a document
5857 * with "standalone='yes'", ... ... The declaration of a parameter
5858 * entity must precede any reference to it...
5859 *
5860 * [ VC: Entity Declared ]
5861 * In a document with an external subset or external parameter entities
5862 * with "standalone='no'", ... ... The declaration of a parameter entity
5863 * must precede any reference to it...
5864 *
5865 * [ WFC: In DTD ]
5866 * Parameter-entity references may only appear in the DTD.
5867 * NOTE: misleading but this is handled.
5868 *
5869 * Returns the string of the entity content.
5870 * str is updated to the current value of the index
5871 */
5872xmlEntityPtr
5873xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5874 const xmlChar *ptr;
5875 xmlChar cur;
5876 xmlChar *name;
5877 xmlEntityPtr entity = NULL;
5878
5879 if ((str == NULL) || (*str == NULL)) return(NULL);
5880 ptr = *str;
5881 cur = *ptr;
5882 if (cur == '%') {
5883 ptr++;
5884 cur = *ptr;
5885 name = xmlParseStringName(ctxt, &ptr);
5886 if (name == NULL) {
5887 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5889 ctxt->sax->error(ctxt->userData,
5890 "xmlParseStringPEReference: no name\n");
5891 ctxt->wellFormed = 0;
5892 ctxt->disableSAX = 1;
5893 } else {
5894 cur = *ptr;
5895 if (cur == ';') {
5896 ptr++;
5897 cur = *ptr;
5898 if ((ctxt->sax != NULL) &&
5899 (ctxt->sax->getParameterEntity != NULL))
5900 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5901 name);
5902 if (entity == NULL) {
5903 /*
5904 * [ WFC: Entity Declared ]
5905 * In a document without any DTD, a document with only an
5906 * internal DTD subset which contains no parameter entity
5907 * references, or a document with "standalone='yes'", ...
5908 * ... The declaration of a parameter entity must precede
5909 * any reference to it...
5910 */
5911 if ((ctxt->standalone == 1) ||
5912 ((ctxt->hasExternalSubset == 0) &&
5913 (ctxt->hasPErefs == 0))) {
5914 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5916 ctxt->sax->error(ctxt->userData,
5917 "PEReference: %%%s; not found\n", name);
5918 ctxt->wellFormed = 0;
5919 ctxt->disableSAX = 1;
5920 } else {
5921 /*
5922 * [ VC: Entity Declared ]
5923 * In a document with an external subset or external
5924 * parameter entities with "standalone='no'", ...
5925 * ... The declaration of a parameter entity must
5926 * precede any reference to it...
5927 */
5928 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5929 ctxt->sax->warning(ctxt->userData,
5930 "PEReference: %%%s; not found\n", name);
5931 ctxt->valid = 0;
5932 }
5933 } else {
5934 /*
5935 * Internal checking in case the entity quest barfed
5936 */
5937 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5938 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5939 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5940 ctxt->sax->warning(ctxt->userData,
5941 "Internal: %%%s; is not a parameter entity\n", name);
5942 }
5943 }
5944 ctxt->hasPErefs = 1;
5945 } else {
5946 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5948 ctxt->sax->error(ctxt->userData,
5949 "xmlParseStringPEReference: expecting ';'\n");
5950 ctxt->wellFormed = 0;
5951 ctxt->disableSAX = 1;
5952 }
5953 xmlFree(name);
5954 }
5955 }
5956 *str = ptr;
5957 return(entity);
5958}
5959
5960/**
5961 * xmlParseDocTypeDecl:
5962 * @ctxt: an XML parser context
5963 *
5964 * parse a DOCTYPE declaration
5965 *
5966 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5967 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5968 *
5969 * [ VC: Root Element Type ]
5970 * The Name in the document type declaration must match the element
5971 * type of the root element.
5972 */
5973
5974void
5975xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5976 xmlChar *name = NULL;
5977 xmlChar *ExternalID = NULL;
5978 xmlChar *URI = NULL;
5979
5980 /*
5981 * We know that '<!DOCTYPE' has been detected.
5982 */
5983 SKIP(9);
5984
5985 SKIP_BLANKS;
5986
5987 /*
5988 * Parse the DOCTYPE name.
5989 */
5990 name = xmlParseName(ctxt);
5991 if (name == NULL) {
5992 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5994 ctxt->sax->error(ctxt->userData,
5995 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5996 ctxt->wellFormed = 0;
5997 ctxt->disableSAX = 1;
5998 }
5999 ctxt->intSubName = name;
6000
6001 SKIP_BLANKS;
6002
6003 /*
6004 * Check for SystemID and ExternalID
6005 */
6006 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6007
6008 if ((URI != NULL) || (ExternalID != NULL)) {
6009 ctxt->hasExternalSubset = 1;
6010 }
6011 ctxt->extSubURI = URI;
6012 ctxt->extSubSystem = ExternalID;
6013
6014 SKIP_BLANKS;
6015
6016 /*
6017 * Create and update the internal subset.
6018 */
6019 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6020 (!ctxt->disableSAX))
6021 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6022
6023 /*
6024 * Is there any internal subset declarations ?
6025 * they are handled separately in xmlParseInternalSubset()
6026 */
6027 if (RAW == '[')
6028 return;
6029
6030 /*
6031 * We should be at the end of the DOCTYPE declaration.
6032 */
6033 if (RAW != '>') {
6034 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006036 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006037 ctxt->wellFormed = 0;
6038 ctxt->disableSAX = 1;
6039 }
6040 NEXT;
6041}
6042
6043/**
6044 * xmlParseInternalsubset:
6045 * @ctxt: an XML parser context
6046 *
6047 * parse the internal subset declaration
6048 *
6049 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6050 */
6051
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006052static void
Owen Taylor3473f882001-02-23 17:55:21 +00006053xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6054 /*
6055 * Is there any DTD definition ?
6056 */
6057 if (RAW == '[') {
6058 ctxt->instate = XML_PARSER_DTD;
6059 NEXT;
6060 /*
6061 * Parse the succession of Markup declarations and
6062 * PEReferences.
6063 * Subsequence (markupdecl | PEReference | S)*
6064 */
6065 while (RAW != ']') {
6066 const xmlChar *check = CUR_PTR;
6067 int cons = ctxt->input->consumed;
6068
6069 SKIP_BLANKS;
6070 xmlParseMarkupDecl(ctxt);
6071 xmlParsePEReference(ctxt);
6072
6073 /*
6074 * Pop-up of finished entities.
6075 */
6076 while ((RAW == 0) && (ctxt->inputNr > 1))
6077 xmlPopInput(ctxt);
6078
6079 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6080 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6082 ctxt->sax->error(ctxt->userData,
6083 "xmlParseInternalSubset: error detected in Markup declaration\n");
6084 ctxt->wellFormed = 0;
6085 ctxt->disableSAX = 1;
6086 break;
6087 }
6088 }
6089 if (RAW == ']') {
6090 NEXT;
6091 SKIP_BLANKS;
6092 }
6093 }
6094
6095 /*
6096 * We should be at the end of the DOCTYPE declaration.
6097 */
6098 if (RAW != '>') {
6099 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6100 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006101 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006102 ctxt->wellFormed = 0;
6103 ctxt->disableSAX = 1;
6104 }
6105 NEXT;
6106}
6107
6108/**
6109 * xmlParseAttribute:
6110 * @ctxt: an XML parser context
6111 * @value: a xmlChar ** used to store the value of the attribute
6112 *
6113 * parse an attribute
6114 *
6115 * [41] Attribute ::= Name Eq AttValue
6116 *
6117 * [ WFC: No External Entity References ]
6118 * Attribute values cannot contain direct or indirect entity references
6119 * to external entities.
6120 *
6121 * [ WFC: No < in Attribute Values ]
6122 * The replacement text of any entity referred to directly or indirectly in
6123 * an attribute value (other than "&lt;") must not contain a <.
6124 *
6125 * [ VC: Attribute Value Type ]
6126 * The attribute must have been declared; the value must be of the type
6127 * declared for it.
6128 *
6129 * [25] Eq ::= S? '=' S?
6130 *
6131 * With namespace:
6132 *
6133 * [NS 11] Attribute ::= QName Eq AttValue
6134 *
6135 * Also the case QName == xmlns:??? is handled independently as a namespace
6136 * definition.
6137 *
6138 * Returns the attribute name, and the value in *value.
6139 */
6140
6141xmlChar *
6142xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6143 xmlChar *name, *val;
6144
6145 *value = NULL;
6146 name = xmlParseName(ctxt);
6147 if (name == NULL) {
6148 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6150 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6151 ctxt->wellFormed = 0;
6152 ctxt->disableSAX = 1;
6153 return(NULL);
6154 }
6155
6156 /*
6157 * read the value
6158 */
6159 SKIP_BLANKS;
6160 if (RAW == '=') {
6161 NEXT;
6162 SKIP_BLANKS;
6163 val = xmlParseAttValue(ctxt);
6164 ctxt->instate = XML_PARSER_CONTENT;
6165 } else {
6166 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6168 ctxt->sax->error(ctxt->userData,
6169 "Specification mandate value for attribute %s\n", name);
6170 ctxt->wellFormed = 0;
6171 ctxt->disableSAX = 1;
6172 xmlFree(name);
6173 return(NULL);
6174 }
6175
6176 /*
6177 * Check that xml:lang conforms to the specification
6178 * No more registered as an error, just generate a warning now
6179 * since this was deprecated in XML second edition
6180 */
6181 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6182 if (!xmlCheckLanguageID(val)) {
6183 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6184 ctxt->sax->warning(ctxt->userData,
6185 "Malformed value for xml:lang : %s\n", val);
6186 }
6187 }
6188
6189 /*
6190 * Check that xml:space conforms to the specification
6191 */
6192 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6193 if (xmlStrEqual(val, BAD_CAST "default"))
6194 *(ctxt->space) = 0;
6195 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6196 *(ctxt->space) = 1;
6197 else {
6198 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6200 ctxt->sax->error(ctxt->userData,
6201"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6202 val);
6203 ctxt->wellFormed = 0;
6204 ctxt->disableSAX = 1;
6205 }
6206 }
6207
6208 *value = val;
6209 return(name);
6210}
6211
6212/**
6213 * xmlParseStartTag:
6214 * @ctxt: an XML parser context
6215 *
6216 * parse a start of tag either for rule element or
6217 * EmptyElement. In both case we don't parse the tag closing chars.
6218 *
6219 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6220 *
6221 * [ WFC: Unique Att Spec ]
6222 * No attribute name may appear more than once in the same start-tag or
6223 * empty-element tag.
6224 *
6225 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6226 *
6227 * [ WFC: Unique Att Spec ]
6228 * No attribute name may appear more than once in the same start-tag or
6229 * empty-element tag.
6230 *
6231 * With namespace:
6232 *
6233 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6234 *
6235 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6236 *
6237 * Returns the element name parsed
6238 */
6239
6240xmlChar *
6241xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6242 xmlChar *name;
6243 xmlChar *attname;
6244 xmlChar *attvalue;
6245 const xmlChar **atts = NULL;
6246 int nbatts = 0;
6247 int maxatts = 0;
6248 int i;
6249
6250 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006251 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006252
6253 name = xmlParseName(ctxt);
6254 if (name == NULL) {
6255 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6256 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6257 ctxt->sax->error(ctxt->userData,
6258 "xmlParseStartTag: invalid element name\n");
6259 ctxt->wellFormed = 0;
6260 ctxt->disableSAX = 1;
6261 return(NULL);
6262 }
6263
6264 /*
6265 * Now parse the attributes, it ends up with the ending
6266 *
6267 * (S Attribute)* S?
6268 */
6269 SKIP_BLANKS;
6270 GROW;
6271
Daniel Veillard21a0f912001-02-25 19:54:14 +00006272 while ((RAW != '>') &&
6273 ((RAW != '/') || (NXT(1) != '>')) &&
6274 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006275 const xmlChar *q = CUR_PTR;
6276 int cons = ctxt->input->consumed;
6277
6278 attname = xmlParseAttribute(ctxt, &attvalue);
6279 if ((attname != NULL) && (attvalue != NULL)) {
6280 /*
6281 * [ WFC: Unique Att Spec ]
6282 * No attribute name may appear more than once in the same
6283 * start-tag or empty-element tag.
6284 */
6285 for (i = 0; i < nbatts;i += 2) {
6286 if (xmlStrEqual(atts[i], attname)) {
6287 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6289 ctxt->sax->error(ctxt->userData,
6290 "Attribute %s redefined\n",
6291 attname);
6292 ctxt->wellFormed = 0;
6293 ctxt->disableSAX = 1;
6294 xmlFree(attname);
6295 xmlFree(attvalue);
6296 goto failed;
6297 }
6298 }
6299
6300 /*
6301 * Add the pair to atts
6302 */
6303 if (atts == NULL) {
6304 maxatts = 10;
6305 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6306 if (atts == NULL) {
6307 xmlGenericError(xmlGenericErrorContext,
6308 "malloc of %ld byte failed\n",
6309 maxatts * (long)sizeof(xmlChar *));
6310 return(NULL);
6311 }
6312 } else if (nbatts + 4 > maxatts) {
6313 maxatts *= 2;
6314 atts = (const xmlChar **) xmlRealloc((void *) atts,
6315 maxatts * sizeof(xmlChar *));
6316 if (atts == NULL) {
6317 xmlGenericError(xmlGenericErrorContext,
6318 "realloc of %ld byte failed\n",
6319 maxatts * (long)sizeof(xmlChar *));
6320 return(NULL);
6321 }
6322 }
6323 atts[nbatts++] = attname;
6324 atts[nbatts++] = attvalue;
6325 atts[nbatts] = NULL;
6326 atts[nbatts + 1] = NULL;
6327 } else {
6328 if (attname != NULL)
6329 xmlFree(attname);
6330 if (attvalue != NULL)
6331 xmlFree(attvalue);
6332 }
6333
6334failed:
6335
6336 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6337 break;
6338 if (!IS_BLANK(RAW)) {
6339 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6341 ctxt->sax->error(ctxt->userData,
6342 "attributes construct error\n");
6343 ctxt->wellFormed = 0;
6344 ctxt->disableSAX = 1;
6345 }
6346 SKIP_BLANKS;
6347 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6348 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6350 ctxt->sax->error(ctxt->userData,
6351 "xmlParseStartTag: problem parsing attributes\n");
6352 ctxt->wellFormed = 0;
6353 ctxt->disableSAX = 1;
6354 break;
6355 }
6356 GROW;
6357 }
6358
6359 /*
6360 * SAX: Start of Element !
6361 */
6362 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6363 (!ctxt->disableSAX))
6364 ctxt->sax->startElement(ctxt->userData, name, atts);
6365
6366 if (atts != NULL) {
6367 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6368 xmlFree((void *) atts);
6369 }
6370 return(name);
6371}
6372
6373/**
6374 * xmlParseEndTag:
6375 * @ctxt: an XML parser context
6376 *
6377 * parse an end of tag
6378 *
6379 * [42] ETag ::= '</' Name S? '>'
6380 *
6381 * With namespace
6382 *
6383 * [NS 9] ETag ::= '</' QName S? '>'
6384 */
6385
6386void
6387xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6388 xmlChar *name;
6389 xmlChar *oldname;
6390
6391 GROW;
6392 if ((RAW != '<') || (NXT(1) != '/')) {
6393 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6395 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6396 ctxt->wellFormed = 0;
6397 ctxt->disableSAX = 1;
6398 return;
6399 }
6400 SKIP(2);
6401
6402 name = xmlParseName(ctxt);
6403
6404 /*
6405 * We should definitely be at the ending "S? '>'" part
6406 */
6407 GROW;
6408 SKIP_BLANKS;
6409 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6410 ctxt->errNo = XML_ERR_GT_REQUIRED;
6411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6412 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6413 ctxt->wellFormed = 0;
6414 ctxt->disableSAX = 1;
6415 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006416 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006417
6418 /*
6419 * [ WFC: Element Type Match ]
6420 * The Name in an element's end-tag must match the element type in the
6421 * start-tag.
6422 *
6423 */
6424 if ((name == NULL) || (ctxt->name == NULL) ||
6425 (!xmlStrEqual(name, ctxt->name))) {
6426 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6427 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6428 if ((name != NULL) && (ctxt->name != NULL)) {
6429 ctxt->sax->error(ctxt->userData,
6430 "Opening and ending tag mismatch: %s and %s\n",
6431 ctxt->name, name);
6432 } else if (ctxt->name != NULL) {
6433 ctxt->sax->error(ctxt->userData,
6434 "Ending tag eror for: %s\n", ctxt->name);
6435 } else {
6436 ctxt->sax->error(ctxt->userData,
6437 "Ending tag error: internal error ???\n");
6438 }
6439
6440 }
6441 ctxt->wellFormed = 0;
6442 ctxt->disableSAX = 1;
6443 }
6444
6445 /*
6446 * SAX: End of Tag
6447 */
6448 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6449 (!ctxt->disableSAX))
6450 ctxt->sax->endElement(ctxt->userData, name);
6451
6452 if (name != NULL)
6453 xmlFree(name);
6454 oldname = namePop(ctxt);
6455 spacePop(ctxt);
6456 if (oldname != NULL) {
6457#ifdef DEBUG_STACK
6458 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6459#endif
6460 xmlFree(oldname);
6461 }
6462 return;
6463}
6464
6465/**
6466 * xmlParseCDSect:
6467 * @ctxt: an XML parser context
6468 *
6469 * Parse escaped pure raw content.
6470 *
6471 * [18] CDSect ::= CDStart CData CDEnd
6472 *
6473 * [19] CDStart ::= '<![CDATA['
6474 *
6475 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6476 *
6477 * [21] CDEnd ::= ']]>'
6478 */
6479void
6480xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6481 xmlChar *buf = NULL;
6482 int len = 0;
6483 int size = XML_PARSER_BUFFER_SIZE;
6484 int r, rl;
6485 int s, sl;
6486 int cur, l;
6487 int count = 0;
6488
6489 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6490 (NXT(2) == '[') && (NXT(3) == 'C') &&
6491 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6492 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6493 (NXT(8) == '[')) {
6494 SKIP(9);
6495 } else
6496 return;
6497
6498 ctxt->instate = XML_PARSER_CDATA_SECTION;
6499 r = CUR_CHAR(rl);
6500 if (!IS_CHAR(r)) {
6501 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6503 ctxt->sax->error(ctxt->userData,
6504 "CData section not finished\n");
6505 ctxt->wellFormed = 0;
6506 ctxt->disableSAX = 1;
6507 ctxt->instate = XML_PARSER_CONTENT;
6508 return;
6509 }
6510 NEXTL(rl);
6511 s = CUR_CHAR(sl);
6512 if (!IS_CHAR(s)) {
6513 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6515 ctxt->sax->error(ctxt->userData,
6516 "CData section not finished\n");
6517 ctxt->wellFormed = 0;
6518 ctxt->disableSAX = 1;
6519 ctxt->instate = XML_PARSER_CONTENT;
6520 return;
6521 }
6522 NEXTL(sl);
6523 cur = CUR_CHAR(l);
6524 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6525 if (buf == NULL) {
6526 xmlGenericError(xmlGenericErrorContext,
6527 "malloc of %d byte failed\n", size);
6528 return;
6529 }
6530 while (IS_CHAR(cur) &&
6531 ((r != ']') || (s != ']') || (cur != '>'))) {
6532 if (len + 5 >= size) {
6533 size *= 2;
6534 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6535 if (buf == NULL) {
6536 xmlGenericError(xmlGenericErrorContext,
6537 "realloc of %d byte failed\n", size);
6538 return;
6539 }
6540 }
6541 COPY_BUF(rl,buf,len,r);
6542 r = s;
6543 rl = sl;
6544 s = cur;
6545 sl = l;
6546 count++;
6547 if (count > 50) {
6548 GROW;
6549 count = 0;
6550 }
6551 NEXTL(l);
6552 cur = CUR_CHAR(l);
6553 }
6554 buf[len] = 0;
6555 ctxt->instate = XML_PARSER_CONTENT;
6556 if (cur != '>') {
6557 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "CData section not finished\n%.50s\n", buf);
6561 ctxt->wellFormed = 0;
6562 ctxt->disableSAX = 1;
6563 xmlFree(buf);
6564 return;
6565 }
6566 NEXTL(l);
6567
6568 /*
6569 * Ok the buffer is to be consumed as cdata.
6570 */
6571 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6572 if (ctxt->sax->cdataBlock != NULL)
6573 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006574 else if (ctxt->sax->characters != NULL)
6575 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006576 }
6577 xmlFree(buf);
6578}
6579
6580/**
6581 * xmlParseContent:
6582 * @ctxt: an XML parser context
6583 *
6584 * Parse a content:
6585 *
6586 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6587 */
6588
6589void
6590xmlParseContent(xmlParserCtxtPtr ctxt) {
6591 GROW;
6592 while (((RAW != 0) || (ctxt->token != 0)) &&
6593 ((RAW != '<') || (NXT(1) != '/'))) {
6594 const xmlChar *test = CUR_PTR;
6595 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006596 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006597 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006598
6599 /*
6600 * Handle possible processed charrefs.
6601 */
6602 if (ctxt->token != 0) {
6603 xmlParseCharData(ctxt, 0);
6604 }
6605 /*
6606 * First case : a Processing Instruction.
6607 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006608 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006609 xmlParsePI(ctxt);
6610 }
6611
6612 /*
6613 * Second case : a CDSection
6614 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006615 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006616 (NXT(2) == '[') && (NXT(3) == 'C') &&
6617 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6618 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6619 (NXT(8) == '[')) {
6620 xmlParseCDSect(ctxt);
6621 }
6622
6623 /*
6624 * Third case : a comment
6625 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006626 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006627 (NXT(2) == '-') && (NXT(3) == '-')) {
6628 xmlParseComment(ctxt);
6629 ctxt->instate = XML_PARSER_CONTENT;
6630 }
6631
6632 /*
6633 * Fourth case : a sub-element.
6634 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006635 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006636 xmlParseElement(ctxt);
6637 }
6638
6639 /*
6640 * Fifth case : a reference. If if has not been resolved,
6641 * parsing returns it's Name, create the node
6642 */
6643
Daniel Veillard21a0f912001-02-25 19:54:14 +00006644 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006645 xmlParseReference(ctxt);
6646 }
6647
6648 /*
6649 * Last case, text. Note that References are handled directly.
6650 */
6651 else {
6652 xmlParseCharData(ctxt, 0);
6653 }
6654
6655 GROW;
6656 /*
6657 * Pop-up of finished entities.
6658 */
6659 while ((RAW == 0) && (ctxt->inputNr > 1))
6660 xmlPopInput(ctxt);
6661 SHRINK;
6662
6663 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6664 (tok == ctxt->token)) {
6665 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6667 ctxt->sax->error(ctxt->userData,
6668 "detected an error in element content\n");
6669 ctxt->wellFormed = 0;
6670 ctxt->disableSAX = 1;
6671 ctxt->instate = XML_PARSER_EOF;
6672 break;
6673 }
6674 }
6675}
6676
6677/**
6678 * xmlParseElement:
6679 * @ctxt: an XML parser context
6680 *
6681 * parse an XML element, this is highly recursive
6682 *
6683 * [39] element ::= EmptyElemTag | STag content ETag
6684 *
6685 * [ WFC: Element Type Match ]
6686 * The Name in an element's end-tag must match the element type in the
6687 * start-tag.
6688 *
6689 * [ VC: Element Valid ]
6690 * An element is valid if there is a declaration matching elementdecl
6691 * where the Name matches the element type and one of the following holds:
6692 * - The declaration matches EMPTY and the element has no content.
6693 * - The declaration matches children and the sequence of child elements
6694 * belongs to the language generated by the regular expression in the
6695 * content model, with optional white space (characters matching the
6696 * nonterminal S) between each pair of child elements.
6697 * - The declaration matches Mixed and the content consists of character
6698 * data and child elements whose types match names in the content model.
6699 * - The declaration matches ANY, and the types of any child elements have
6700 * been declared.
6701 */
6702
6703void
6704xmlParseElement(xmlParserCtxtPtr ctxt) {
6705 const xmlChar *openTag = CUR_PTR;
6706 xmlChar *name;
6707 xmlChar *oldname;
6708 xmlParserNodeInfo node_info;
6709 xmlNodePtr ret;
6710
6711 /* Capture start position */
6712 if (ctxt->record_info) {
6713 node_info.begin_pos = ctxt->input->consumed +
6714 (CUR_PTR - ctxt->input->base);
6715 node_info.begin_line = ctxt->input->line;
6716 }
6717
6718 if (ctxt->spaceNr == 0)
6719 spacePush(ctxt, -1);
6720 else
6721 spacePush(ctxt, *ctxt->space);
6722
6723 name = xmlParseStartTag(ctxt);
6724 if (name == NULL) {
6725 spacePop(ctxt);
6726 return;
6727 }
6728 namePush(ctxt, name);
6729 ret = ctxt->node;
6730
6731 /*
6732 * [ VC: Root Element Type ]
6733 * The Name in the document type declaration must match the element
6734 * type of the root element.
6735 */
6736 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6737 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6738 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6739
6740 /*
6741 * Check for an Empty Element.
6742 */
6743 if ((RAW == '/') && (NXT(1) == '>')) {
6744 SKIP(2);
6745 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6746 (!ctxt->disableSAX))
6747 ctxt->sax->endElement(ctxt->userData, name);
6748 oldname = namePop(ctxt);
6749 spacePop(ctxt);
6750 if (oldname != NULL) {
6751#ifdef DEBUG_STACK
6752 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6753#endif
6754 xmlFree(oldname);
6755 }
6756 if ( ret != NULL && ctxt->record_info ) {
6757 node_info.end_pos = ctxt->input->consumed +
6758 (CUR_PTR - ctxt->input->base);
6759 node_info.end_line = ctxt->input->line;
6760 node_info.node = ret;
6761 xmlParserAddNodeInfo(ctxt, &node_info);
6762 }
6763 return;
6764 }
6765 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006766 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006767 } else {
6768 ctxt->errNo = XML_ERR_GT_REQUIRED;
6769 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6770 ctxt->sax->error(ctxt->userData,
6771 "Couldn't find end of Start Tag\n%.30s\n",
6772 openTag);
6773 ctxt->wellFormed = 0;
6774 ctxt->disableSAX = 1;
6775
6776 /*
6777 * end of parsing of this node.
6778 */
6779 nodePop(ctxt);
6780 oldname = namePop(ctxt);
6781 spacePop(ctxt);
6782 if (oldname != NULL) {
6783#ifdef DEBUG_STACK
6784 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6785#endif
6786 xmlFree(oldname);
6787 }
6788
6789 /*
6790 * Capture end position and add node
6791 */
6792 if ( ret != NULL && ctxt->record_info ) {
6793 node_info.end_pos = ctxt->input->consumed +
6794 (CUR_PTR - ctxt->input->base);
6795 node_info.end_line = ctxt->input->line;
6796 node_info.node = ret;
6797 xmlParserAddNodeInfo(ctxt, &node_info);
6798 }
6799 return;
6800 }
6801
6802 /*
6803 * Parse the content of the element:
6804 */
6805 xmlParseContent(ctxt);
6806 if (!IS_CHAR(RAW)) {
6807 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809 ctxt->sax->error(ctxt->userData,
6810 "Premature end of data in tag %.30s\n", openTag);
6811 ctxt->wellFormed = 0;
6812 ctxt->disableSAX = 1;
6813
6814 /*
6815 * end of parsing of this node.
6816 */
6817 nodePop(ctxt);
6818 oldname = namePop(ctxt);
6819 spacePop(ctxt);
6820 if (oldname != NULL) {
6821#ifdef DEBUG_STACK
6822 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6823#endif
6824 xmlFree(oldname);
6825 }
6826 return;
6827 }
6828
6829 /*
6830 * parse the end of tag: '</' should be here.
6831 */
6832 xmlParseEndTag(ctxt);
6833
6834 /*
6835 * Capture end position and add node
6836 */
6837 if ( ret != NULL && ctxt->record_info ) {
6838 node_info.end_pos = ctxt->input->consumed +
6839 (CUR_PTR - ctxt->input->base);
6840 node_info.end_line = ctxt->input->line;
6841 node_info.node = ret;
6842 xmlParserAddNodeInfo(ctxt, &node_info);
6843 }
6844}
6845
6846/**
6847 * xmlParseVersionNum:
6848 * @ctxt: an XML parser context
6849 *
6850 * parse the XML version value.
6851 *
6852 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6853 *
6854 * Returns the string giving the XML version number, or NULL
6855 */
6856xmlChar *
6857xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6858 xmlChar *buf = NULL;
6859 int len = 0;
6860 int size = 10;
6861 xmlChar cur;
6862
6863 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6864 if (buf == NULL) {
6865 xmlGenericError(xmlGenericErrorContext,
6866 "malloc of %d byte failed\n", size);
6867 return(NULL);
6868 }
6869 cur = CUR;
6870 while (((cur >= 'a') && (cur <= 'z')) ||
6871 ((cur >= 'A') && (cur <= 'Z')) ||
6872 ((cur >= '0') && (cur <= '9')) ||
6873 (cur == '_') || (cur == '.') ||
6874 (cur == ':') || (cur == '-')) {
6875 if (len + 1 >= size) {
6876 size *= 2;
6877 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6878 if (buf == NULL) {
6879 xmlGenericError(xmlGenericErrorContext,
6880 "realloc of %d byte failed\n", size);
6881 return(NULL);
6882 }
6883 }
6884 buf[len++] = cur;
6885 NEXT;
6886 cur=CUR;
6887 }
6888 buf[len] = 0;
6889 return(buf);
6890}
6891
6892/**
6893 * xmlParseVersionInfo:
6894 * @ctxt: an XML parser context
6895 *
6896 * parse the XML version.
6897 *
6898 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6899 *
6900 * [25] Eq ::= S? '=' S?
6901 *
6902 * Returns the version string, e.g. "1.0"
6903 */
6904
6905xmlChar *
6906xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6907 xmlChar *version = NULL;
6908 const xmlChar *q;
6909
6910 if ((RAW == 'v') && (NXT(1) == 'e') &&
6911 (NXT(2) == 'r') && (NXT(3) == 's') &&
6912 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6913 (NXT(6) == 'n')) {
6914 SKIP(7);
6915 SKIP_BLANKS;
6916 if (RAW != '=') {
6917 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData,
6920 "xmlParseVersionInfo : expected '='\n");
6921 ctxt->wellFormed = 0;
6922 ctxt->disableSAX = 1;
6923 return(NULL);
6924 }
6925 NEXT;
6926 SKIP_BLANKS;
6927 if (RAW == '"') {
6928 NEXT;
6929 q = CUR_PTR;
6930 version = xmlParseVersionNum(ctxt);
6931 if (RAW != '"') {
6932 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6934 ctxt->sax->error(ctxt->userData,
6935 "String not closed\n%.50s\n", q);
6936 ctxt->wellFormed = 0;
6937 ctxt->disableSAX = 1;
6938 } else
6939 NEXT;
6940 } else if (RAW == '\''){
6941 NEXT;
6942 q = CUR_PTR;
6943 version = xmlParseVersionNum(ctxt);
6944 if (RAW != '\'') {
6945 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6947 ctxt->sax->error(ctxt->userData,
6948 "String not closed\n%.50s\n", q);
6949 ctxt->wellFormed = 0;
6950 ctxt->disableSAX = 1;
6951 } else
6952 NEXT;
6953 } else {
6954 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6956 ctxt->sax->error(ctxt->userData,
6957 "xmlParseVersionInfo : expected ' or \"\n");
6958 ctxt->wellFormed = 0;
6959 ctxt->disableSAX = 1;
6960 }
6961 }
6962 return(version);
6963}
6964
6965/**
6966 * xmlParseEncName:
6967 * @ctxt: an XML parser context
6968 *
6969 * parse the XML encoding name
6970 *
6971 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6972 *
6973 * Returns the encoding name value or NULL
6974 */
6975xmlChar *
6976xmlParseEncName(xmlParserCtxtPtr ctxt) {
6977 xmlChar *buf = NULL;
6978 int len = 0;
6979 int size = 10;
6980 xmlChar cur;
6981
6982 cur = CUR;
6983 if (((cur >= 'a') && (cur <= 'z')) ||
6984 ((cur >= 'A') && (cur <= 'Z'))) {
6985 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6986 if (buf == NULL) {
6987 xmlGenericError(xmlGenericErrorContext,
6988 "malloc of %d byte failed\n", size);
6989 return(NULL);
6990 }
6991
6992 buf[len++] = cur;
6993 NEXT;
6994 cur = CUR;
6995 while (((cur >= 'a') && (cur <= 'z')) ||
6996 ((cur >= 'A') && (cur <= 'Z')) ||
6997 ((cur >= '0') && (cur <= '9')) ||
6998 (cur == '.') || (cur == '_') ||
6999 (cur == '-')) {
7000 if (len + 1 >= size) {
7001 size *= 2;
7002 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7003 if (buf == NULL) {
7004 xmlGenericError(xmlGenericErrorContext,
7005 "realloc of %d byte failed\n", size);
7006 return(NULL);
7007 }
7008 }
7009 buf[len++] = cur;
7010 NEXT;
7011 cur = CUR;
7012 if (cur == 0) {
7013 SHRINK;
7014 GROW;
7015 cur = CUR;
7016 }
7017 }
7018 buf[len] = 0;
7019 } else {
7020 ctxt->errNo = XML_ERR_ENCODING_NAME;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7023 ctxt->wellFormed = 0;
7024 ctxt->disableSAX = 1;
7025 }
7026 return(buf);
7027}
7028
7029/**
7030 * xmlParseEncodingDecl:
7031 * @ctxt: an XML parser context
7032 *
7033 * parse the XML encoding declaration
7034 *
7035 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7036 *
7037 * this setups the conversion filters.
7038 *
7039 * Returns the encoding value or NULL
7040 */
7041
7042xmlChar *
7043xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7044 xmlChar *encoding = NULL;
7045 const xmlChar *q;
7046
7047 SKIP_BLANKS;
7048 if ((RAW == 'e') && (NXT(1) == 'n') &&
7049 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7050 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7051 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7052 SKIP(8);
7053 SKIP_BLANKS;
7054 if (RAW != '=') {
7055 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7057 ctxt->sax->error(ctxt->userData,
7058 "xmlParseEncodingDecl : expected '='\n");
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 return(NULL);
7062 }
7063 NEXT;
7064 SKIP_BLANKS;
7065 if (RAW == '"') {
7066 NEXT;
7067 q = CUR_PTR;
7068 encoding = xmlParseEncName(ctxt);
7069 if (RAW != '"') {
7070 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7072 ctxt->sax->error(ctxt->userData,
7073 "String not closed\n%.50s\n", q);
7074 ctxt->wellFormed = 0;
7075 ctxt->disableSAX = 1;
7076 } else
7077 NEXT;
7078 } else if (RAW == '\''){
7079 NEXT;
7080 q = CUR_PTR;
7081 encoding = xmlParseEncName(ctxt);
7082 if (RAW != '\'') {
7083 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7085 ctxt->sax->error(ctxt->userData,
7086 "String not closed\n%.50s\n", q);
7087 ctxt->wellFormed = 0;
7088 ctxt->disableSAX = 1;
7089 } else
7090 NEXT;
7091 } else if (RAW == '"'){
7092 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7094 ctxt->sax->error(ctxt->userData,
7095 "xmlParseEncodingDecl : expected ' or \"\n");
7096 ctxt->wellFormed = 0;
7097 ctxt->disableSAX = 1;
7098 }
7099 if (encoding != NULL) {
7100 xmlCharEncoding enc;
7101 xmlCharEncodingHandlerPtr handler;
7102
7103 if (ctxt->input->encoding != NULL)
7104 xmlFree((xmlChar *) ctxt->input->encoding);
7105 ctxt->input->encoding = encoding;
7106
7107 enc = xmlParseCharEncoding((const char *) encoding);
7108 /*
7109 * registered set of known encodings
7110 */
7111 if (enc != XML_CHAR_ENCODING_ERROR) {
7112 xmlSwitchEncoding(ctxt, enc);
7113 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7114 xmlFree(encoding);
7115 return(NULL);
7116 }
7117 } else {
7118 /*
7119 * fallback for unknown encodings
7120 */
7121 handler = xmlFindCharEncodingHandler((const char *) encoding);
7122 if (handler != NULL) {
7123 xmlSwitchToEncoding(ctxt, handler);
7124 } else {
7125 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7126 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7127 ctxt->sax->error(ctxt->userData,
7128 "Unsupported encoding %s\n", encoding);
7129 return(NULL);
7130 }
7131 }
7132 }
7133 }
7134 return(encoding);
7135}
7136
7137/**
7138 * xmlParseSDDecl:
7139 * @ctxt: an XML parser context
7140 *
7141 * parse the XML standalone declaration
7142 *
7143 * [32] SDDecl ::= S 'standalone' Eq
7144 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7145 *
7146 * [ VC: Standalone Document Declaration ]
7147 * TODO The standalone document declaration must have the value "no"
7148 * if any external markup declarations contain declarations of:
7149 * - attributes with default values, if elements to which these
7150 * attributes apply appear in the document without specifications
7151 * of values for these attributes, or
7152 * - entities (other than amp, lt, gt, apos, quot), if references
7153 * to those entities appear in the document, or
7154 * - attributes with values subject to normalization, where the
7155 * attribute appears in the document with a value which will change
7156 * as a result of normalization, or
7157 * - element types with element content, if white space occurs directly
7158 * within any instance of those types.
7159 *
7160 * Returns 1 if standalone, 0 otherwise
7161 */
7162
7163int
7164xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7165 int standalone = -1;
7166
7167 SKIP_BLANKS;
7168 if ((RAW == 's') && (NXT(1) == 't') &&
7169 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7170 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7171 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7172 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7173 SKIP(10);
7174 SKIP_BLANKS;
7175 if (RAW != '=') {
7176 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7178 ctxt->sax->error(ctxt->userData,
7179 "XML standalone declaration : expected '='\n");
7180 ctxt->wellFormed = 0;
7181 ctxt->disableSAX = 1;
7182 return(standalone);
7183 }
7184 NEXT;
7185 SKIP_BLANKS;
7186 if (RAW == '\''){
7187 NEXT;
7188 if ((RAW == 'n') && (NXT(1) == 'o')) {
7189 standalone = 0;
7190 SKIP(2);
7191 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7192 (NXT(2) == 's')) {
7193 standalone = 1;
7194 SKIP(3);
7195 } else {
7196 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7197 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7198 ctxt->sax->error(ctxt->userData,
7199 "standalone accepts only 'yes' or 'no'\n");
7200 ctxt->wellFormed = 0;
7201 ctxt->disableSAX = 1;
7202 }
7203 if (RAW != '\'') {
7204 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7206 ctxt->sax->error(ctxt->userData, "String not closed\n");
7207 ctxt->wellFormed = 0;
7208 ctxt->disableSAX = 1;
7209 } else
7210 NEXT;
7211 } else if (RAW == '"'){
7212 NEXT;
7213 if ((RAW == 'n') && (NXT(1) == 'o')) {
7214 standalone = 0;
7215 SKIP(2);
7216 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7217 (NXT(2) == 's')) {
7218 standalone = 1;
7219 SKIP(3);
7220 } else {
7221 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7223 ctxt->sax->error(ctxt->userData,
7224 "standalone accepts only 'yes' or 'no'\n");
7225 ctxt->wellFormed = 0;
7226 ctxt->disableSAX = 1;
7227 }
7228 if (RAW != '"') {
7229 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7230 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7231 ctxt->sax->error(ctxt->userData, "String not closed\n");
7232 ctxt->wellFormed = 0;
7233 ctxt->disableSAX = 1;
7234 } else
7235 NEXT;
7236 } else {
7237 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7239 ctxt->sax->error(ctxt->userData,
7240 "Standalone value not found\n");
7241 ctxt->wellFormed = 0;
7242 ctxt->disableSAX = 1;
7243 }
7244 }
7245 return(standalone);
7246}
7247
7248/**
7249 * xmlParseXMLDecl:
7250 * @ctxt: an XML parser context
7251 *
7252 * parse an XML declaration header
7253 *
7254 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7255 */
7256
7257void
7258xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7259 xmlChar *version;
7260
7261 /*
7262 * We know that '<?xml' is here.
7263 */
7264 SKIP(5);
7265
7266 if (!IS_BLANK(RAW)) {
7267 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7269 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7270 ctxt->wellFormed = 0;
7271 ctxt->disableSAX = 1;
7272 }
7273 SKIP_BLANKS;
7274
7275 /*
7276 * We should have the VersionInfo here.
7277 */
7278 version = xmlParseVersionInfo(ctxt);
7279 if (version == NULL)
7280 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillarda050d232001-09-05 15:51:05 +00007281 else if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7282 /*
7283 * TODO: Blueberry should be detected here
7284 */
7285 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7286 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7287 version);
7288 }
Owen Taylor3473f882001-02-23 17:55:21 +00007289 ctxt->version = xmlStrdup(version);
7290 xmlFree(version);
7291
7292 /*
7293 * We may have the encoding declaration
7294 */
7295 if (!IS_BLANK(RAW)) {
7296 if ((RAW == '?') && (NXT(1) == '>')) {
7297 SKIP(2);
7298 return;
7299 }
7300 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7301 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7302 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7303 ctxt->wellFormed = 0;
7304 ctxt->disableSAX = 1;
7305 }
7306 xmlParseEncodingDecl(ctxt);
7307 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7308 /*
7309 * The XML REC instructs us to stop parsing right here
7310 */
7311 return;
7312 }
7313
7314 /*
7315 * We may have the standalone status.
7316 */
7317 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7318 if ((RAW == '?') && (NXT(1) == '>')) {
7319 SKIP(2);
7320 return;
7321 }
7322 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7324 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7325 ctxt->wellFormed = 0;
7326 ctxt->disableSAX = 1;
7327 }
7328 SKIP_BLANKS;
7329 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7330
7331 SKIP_BLANKS;
7332 if ((RAW == '?') && (NXT(1) == '>')) {
7333 SKIP(2);
7334 } else if (RAW == '>') {
7335 /* Deprecated old WD ... */
7336 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7338 ctxt->sax->error(ctxt->userData,
7339 "XML declaration must end-up with '?>'\n");
7340 ctxt->wellFormed = 0;
7341 ctxt->disableSAX = 1;
7342 NEXT;
7343 } else {
7344 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7345 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7346 ctxt->sax->error(ctxt->userData,
7347 "parsing XML declaration: '?>' expected\n");
7348 ctxt->wellFormed = 0;
7349 ctxt->disableSAX = 1;
7350 MOVETO_ENDTAG(CUR_PTR);
7351 NEXT;
7352 }
7353}
7354
7355/**
7356 * xmlParseMisc:
7357 * @ctxt: an XML parser context
7358 *
7359 * parse an XML Misc* optionnal field.
7360 *
7361 * [27] Misc ::= Comment | PI | S
7362 */
7363
7364void
7365xmlParseMisc(xmlParserCtxtPtr ctxt) {
7366 while (((RAW == '<') && (NXT(1) == '?')) ||
7367 ((RAW == '<') && (NXT(1) == '!') &&
7368 (NXT(2) == '-') && (NXT(3) == '-')) ||
7369 IS_BLANK(CUR)) {
7370 if ((RAW == '<') && (NXT(1) == '?')) {
7371 xmlParsePI(ctxt);
7372 } else if (IS_BLANK(CUR)) {
7373 NEXT;
7374 } else
7375 xmlParseComment(ctxt);
7376 }
7377}
7378
7379/**
7380 * xmlParseDocument:
7381 * @ctxt: an XML parser context
7382 *
7383 * parse an XML document (and build a tree if using the standard SAX
7384 * interface).
7385 *
7386 * [1] document ::= prolog element Misc*
7387 *
7388 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7389 *
7390 * Returns 0, -1 in case of error. the parser context is augmented
7391 * as a result of the parsing.
7392 */
7393
7394int
7395xmlParseDocument(xmlParserCtxtPtr ctxt) {
7396 xmlChar start[4];
7397 xmlCharEncoding enc;
7398
7399 xmlInitParser();
7400
7401 GROW;
7402
7403 /*
7404 * SAX: beginning of the document processing.
7405 */
7406 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7407 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7408
Daniel Veillard50f34372001-08-03 12:06:36 +00007409 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007410 /*
7411 * Get the 4 first bytes and decode the charset
7412 * if enc != XML_CHAR_ENCODING_NONE
7413 * plug some encoding conversion routines.
7414 */
7415 start[0] = RAW;
7416 start[1] = NXT(1);
7417 start[2] = NXT(2);
7418 start[3] = NXT(3);
7419 enc = xmlDetectCharEncoding(start, 4);
7420 if (enc != XML_CHAR_ENCODING_NONE) {
7421 xmlSwitchEncoding(ctxt, enc);
7422 }
Owen Taylor3473f882001-02-23 17:55:21 +00007423 }
7424
7425
7426 if (CUR == 0) {
7427 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7429 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7430 ctxt->wellFormed = 0;
7431 ctxt->disableSAX = 1;
7432 }
7433
7434 /*
7435 * Check for the XMLDecl in the Prolog.
7436 */
7437 GROW;
7438 if ((RAW == '<') && (NXT(1) == '?') &&
7439 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7440 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7441
7442 /*
7443 * Note that we will switch encoding on the fly.
7444 */
7445 xmlParseXMLDecl(ctxt);
7446 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7447 /*
7448 * The XML REC instructs us to stop parsing right here
7449 */
7450 return(-1);
7451 }
7452 ctxt->standalone = ctxt->input->standalone;
7453 SKIP_BLANKS;
7454 } else {
7455 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7456 }
7457 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7458 ctxt->sax->startDocument(ctxt->userData);
7459
7460 /*
7461 * The Misc part of the Prolog
7462 */
7463 GROW;
7464 xmlParseMisc(ctxt);
7465
7466 /*
7467 * Then possibly doc type declaration(s) and more Misc
7468 * (doctypedecl Misc*)?
7469 */
7470 GROW;
7471 if ((RAW == '<') && (NXT(1) == '!') &&
7472 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7473 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7474 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7475 (NXT(8) == 'E')) {
7476
7477 ctxt->inSubset = 1;
7478 xmlParseDocTypeDecl(ctxt);
7479 if (RAW == '[') {
7480 ctxt->instate = XML_PARSER_DTD;
7481 xmlParseInternalSubset(ctxt);
7482 }
7483
7484 /*
7485 * Create and update the external subset.
7486 */
7487 ctxt->inSubset = 2;
7488 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7489 (!ctxt->disableSAX))
7490 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7491 ctxt->extSubSystem, ctxt->extSubURI);
7492 ctxt->inSubset = 0;
7493
7494
7495 ctxt->instate = XML_PARSER_PROLOG;
7496 xmlParseMisc(ctxt);
7497 }
7498
7499 /*
7500 * Time to start parsing the tree itself
7501 */
7502 GROW;
7503 if (RAW != '<') {
7504 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7506 ctxt->sax->error(ctxt->userData,
7507 "Start tag expected, '<' not found\n");
7508 ctxt->wellFormed = 0;
7509 ctxt->disableSAX = 1;
7510 ctxt->instate = XML_PARSER_EOF;
7511 } else {
7512 ctxt->instate = XML_PARSER_CONTENT;
7513 xmlParseElement(ctxt);
7514 ctxt->instate = XML_PARSER_EPILOG;
7515
7516
7517 /*
7518 * The Misc part at the end
7519 */
7520 xmlParseMisc(ctxt);
7521
7522 if (RAW != 0) {
7523 ctxt->errNo = XML_ERR_DOCUMENT_END;
7524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7525 ctxt->sax->error(ctxt->userData,
7526 "Extra content at the end of the document\n");
7527 ctxt->wellFormed = 0;
7528 ctxt->disableSAX = 1;
7529 }
7530 ctxt->instate = XML_PARSER_EOF;
7531 }
7532
7533 /*
7534 * SAX: end of the document processing.
7535 */
7536 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7537 (!ctxt->disableSAX))
7538 ctxt->sax->endDocument(ctxt->userData);
7539
7540 if (! ctxt->wellFormed) return(-1);
7541 return(0);
7542}
7543
7544/**
7545 * xmlParseExtParsedEnt:
7546 * @ctxt: an XML parser context
7547 *
7548 * parse a genreral parsed entity
7549 * An external general parsed entity is well-formed if it matches the
7550 * production labeled extParsedEnt.
7551 *
7552 * [78] extParsedEnt ::= TextDecl? content
7553 *
7554 * Returns 0, -1 in case of error. the parser context is augmented
7555 * as a result of the parsing.
7556 */
7557
7558int
7559xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7560 xmlChar start[4];
7561 xmlCharEncoding enc;
7562
7563 xmlDefaultSAXHandlerInit();
7564
7565 GROW;
7566
7567 /*
7568 * SAX: beginning of the document processing.
7569 */
7570 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7571 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7572
7573 /*
7574 * Get the 4 first bytes and decode the charset
7575 * if enc != XML_CHAR_ENCODING_NONE
7576 * plug some encoding conversion routines.
7577 */
7578 start[0] = RAW;
7579 start[1] = NXT(1);
7580 start[2] = NXT(2);
7581 start[3] = NXT(3);
7582 enc = xmlDetectCharEncoding(start, 4);
7583 if (enc != XML_CHAR_ENCODING_NONE) {
7584 xmlSwitchEncoding(ctxt, enc);
7585 }
7586
7587
7588 if (CUR == 0) {
7589 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7591 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7592 ctxt->wellFormed = 0;
7593 ctxt->disableSAX = 1;
7594 }
7595
7596 /*
7597 * Check for the XMLDecl in the Prolog.
7598 */
7599 GROW;
7600 if ((RAW == '<') && (NXT(1) == '?') &&
7601 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7602 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7603
7604 /*
7605 * Note that we will switch encoding on the fly.
7606 */
7607 xmlParseXMLDecl(ctxt);
7608 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7609 /*
7610 * The XML REC instructs us to stop parsing right here
7611 */
7612 return(-1);
7613 }
7614 SKIP_BLANKS;
7615 } else {
7616 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7617 }
7618 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7619 ctxt->sax->startDocument(ctxt->userData);
7620
7621 /*
7622 * Doing validity checking on chunk doesn't make sense
7623 */
7624 ctxt->instate = XML_PARSER_CONTENT;
7625 ctxt->validate = 0;
7626 ctxt->loadsubset = 0;
7627 ctxt->depth = 0;
7628
7629 xmlParseContent(ctxt);
7630
7631 if ((RAW == '<') && (NXT(1) == '/')) {
7632 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7634 ctxt->sax->error(ctxt->userData,
7635 "chunk is not well balanced\n");
7636 ctxt->wellFormed = 0;
7637 ctxt->disableSAX = 1;
7638 } else if (RAW != 0) {
7639 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7640 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7641 ctxt->sax->error(ctxt->userData,
7642 "extra content at the end of well balanced chunk\n");
7643 ctxt->wellFormed = 0;
7644 ctxt->disableSAX = 1;
7645 }
7646
7647 /*
7648 * SAX: end of the document processing.
7649 */
7650 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7651 (!ctxt->disableSAX))
7652 ctxt->sax->endDocument(ctxt->userData);
7653
7654 if (! ctxt->wellFormed) return(-1);
7655 return(0);
7656}
7657
7658/************************************************************************
7659 * *
7660 * Progressive parsing interfaces *
7661 * *
7662 ************************************************************************/
7663
7664/**
7665 * xmlParseLookupSequence:
7666 * @ctxt: an XML parser context
7667 * @first: the first char to lookup
7668 * @next: the next char to lookup or zero
7669 * @third: the next char to lookup or zero
7670 *
7671 * Try to find if a sequence (first, next, third) or just (first next) or
7672 * (first) is available in the input stream.
7673 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7674 * to avoid rescanning sequences of bytes, it DOES change the state of the
7675 * parser, do not use liberally.
7676 *
7677 * Returns the index to the current parsing point if the full sequence
7678 * is available, -1 otherwise.
7679 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007680static int
Owen Taylor3473f882001-02-23 17:55:21 +00007681xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7682 xmlChar next, xmlChar third) {
7683 int base, len;
7684 xmlParserInputPtr in;
7685 const xmlChar *buf;
7686
7687 in = ctxt->input;
7688 if (in == NULL) return(-1);
7689 base = in->cur - in->base;
7690 if (base < 0) return(-1);
7691 if (ctxt->checkIndex > base)
7692 base = ctxt->checkIndex;
7693 if (in->buf == NULL) {
7694 buf = in->base;
7695 len = in->length;
7696 } else {
7697 buf = in->buf->buffer->content;
7698 len = in->buf->buffer->use;
7699 }
7700 /* take into account the sequence length */
7701 if (third) len -= 2;
7702 else if (next) len --;
7703 for (;base < len;base++) {
7704 if (buf[base] == first) {
7705 if (third != 0) {
7706 if ((buf[base + 1] != next) ||
7707 (buf[base + 2] != third)) continue;
7708 } else if (next != 0) {
7709 if (buf[base + 1] != next) continue;
7710 }
7711 ctxt->checkIndex = 0;
7712#ifdef DEBUG_PUSH
7713 if (next == 0)
7714 xmlGenericError(xmlGenericErrorContext,
7715 "PP: lookup '%c' found at %d\n",
7716 first, base);
7717 else if (third == 0)
7718 xmlGenericError(xmlGenericErrorContext,
7719 "PP: lookup '%c%c' found at %d\n",
7720 first, next, base);
7721 else
7722 xmlGenericError(xmlGenericErrorContext,
7723 "PP: lookup '%c%c%c' found at %d\n",
7724 first, next, third, base);
7725#endif
7726 return(base - (in->cur - in->base));
7727 }
7728 }
7729 ctxt->checkIndex = base;
7730#ifdef DEBUG_PUSH
7731 if (next == 0)
7732 xmlGenericError(xmlGenericErrorContext,
7733 "PP: lookup '%c' failed\n", first);
7734 else if (third == 0)
7735 xmlGenericError(xmlGenericErrorContext,
7736 "PP: lookup '%c%c' failed\n", first, next);
7737 else
7738 xmlGenericError(xmlGenericErrorContext,
7739 "PP: lookup '%c%c%c' failed\n", first, next, third);
7740#endif
7741 return(-1);
7742}
7743
7744/**
7745 * xmlParseTryOrFinish:
7746 * @ctxt: an XML parser context
7747 * @terminate: last chunk indicator
7748 *
7749 * Try to progress on parsing
7750 *
7751 * Returns zero if no parsing was possible
7752 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007753static int
Owen Taylor3473f882001-02-23 17:55:21 +00007754xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7755 int ret = 0;
7756 int avail;
7757 xmlChar cur, next;
7758
7759#ifdef DEBUG_PUSH
7760 switch (ctxt->instate) {
7761 case XML_PARSER_EOF:
7762 xmlGenericError(xmlGenericErrorContext,
7763 "PP: try EOF\n"); break;
7764 case XML_PARSER_START:
7765 xmlGenericError(xmlGenericErrorContext,
7766 "PP: try START\n"); break;
7767 case XML_PARSER_MISC:
7768 xmlGenericError(xmlGenericErrorContext,
7769 "PP: try MISC\n");break;
7770 case XML_PARSER_COMMENT:
7771 xmlGenericError(xmlGenericErrorContext,
7772 "PP: try COMMENT\n");break;
7773 case XML_PARSER_PROLOG:
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: try PROLOG\n");break;
7776 case XML_PARSER_START_TAG:
7777 xmlGenericError(xmlGenericErrorContext,
7778 "PP: try START_TAG\n");break;
7779 case XML_PARSER_CONTENT:
7780 xmlGenericError(xmlGenericErrorContext,
7781 "PP: try CONTENT\n");break;
7782 case XML_PARSER_CDATA_SECTION:
7783 xmlGenericError(xmlGenericErrorContext,
7784 "PP: try CDATA_SECTION\n");break;
7785 case XML_PARSER_END_TAG:
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: try END_TAG\n");break;
7788 case XML_PARSER_ENTITY_DECL:
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: try ENTITY_DECL\n");break;
7791 case XML_PARSER_ENTITY_VALUE:
7792 xmlGenericError(xmlGenericErrorContext,
7793 "PP: try ENTITY_VALUE\n");break;
7794 case XML_PARSER_ATTRIBUTE_VALUE:
7795 xmlGenericError(xmlGenericErrorContext,
7796 "PP: try ATTRIBUTE_VALUE\n");break;
7797 case XML_PARSER_DTD:
7798 xmlGenericError(xmlGenericErrorContext,
7799 "PP: try DTD\n");break;
7800 case XML_PARSER_EPILOG:
7801 xmlGenericError(xmlGenericErrorContext,
7802 "PP: try EPILOG\n");break;
7803 case XML_PARSER_PI:
7804 xmlGenericError(xmlGenericErrorContext,
7805 "PP: try PI\n");break;
7806 case XML_PARSER_IGNORE:
7807 xmlGenericError(xmlGenericErrorContext,
7808 "PP: try IGNORE\n");break;
7809 }
7810#endif
7811
7812 while (1) {
7813 /*
7814 * Pop-up of finished entities.
7815 */
7816 while ((RAW == 0) && (ctxt->inputNr > 1))
7817 xmlPopInput(ctxt);
7818
7819 if (ctxt->input ==NULL) break;
7820 if (ctxt->input->buf == NULL)
7821 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7822 else
7823 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7824 if (avail < 1)
7825 goto done;
7826 switch (ctxt->instate) {
7827 case XML_PARSER_EOF:
7828 /*
7829 * Document parsing is done !
7830 */
7831 goto done;
7832 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007833 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7834 xmlChar start[4];
7835 xmlCharEncoding enc;
7836
7837 /*
7838 * Very first chars read from the document flow.
7839 */
7840 if (avail < 4)
7841 goto done;
7842
7843 /*
7844 * Get the 4 first bytes and decode the charset
7845 * if enc != XML_CHAR_ENCODING_NONE
7846 * plug some encoding conversion routines.
7847 */
7848 start[0] = RAW;
7849 start[1] = NXT(1);
7850 start[2] = NXT(2);
7851 start[3] = NXT(3);
7852 enc = xmlDetectCharEncoding(start, 4);
7853 if (enc != XML_CHAR_ENCODING_NONE) {
7854 xmlSwitchEncoding(ctxt, enc);
7855 }
7856 break;
7857 }
Owen Taylor3473f882001-02-23 17:55:21 +00007858
7859 cur = ctxt->input->cur[0];
7860 next = ctxt->input->cur[1];
7861 if (cur == 0) {
7862 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7863 ctxt->sax->setDocumentLocator(ctxt->userData,
7864 &xmlDefaultSAXLocator);
7865 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7867 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7868 ctxt->wellFormed = 0;
7869 ctxt->disableSAX = 1;
7870 ctxt->instate = XML_PARSER_EOF;
7871#ifdef DEBUG_PUSH
7872 xmlGenericError(xmlGenericErrorContext,
7873 "PP: entering EOF\n");
7874#endif
7875 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7876 ctxt->sax->endDocument(ctxt->userData);
7877 goto done;
7878 }
7879 if ((cur == '<') && (next == '?')) {
7880 /* PI or XML decl */
7881 if (avail < 5) return(ret);
7882 if ((!terminate) &&
7883 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7884 return(ret);
7885 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7886 ctxt->sax->setDocumentLocator(ctxt->userData,
7887 &xmlDefaultSAXLocator);
7888 if ((ctxt->input->cur[2] == 'x') &&
7889 (ctxt->input->cur[3] == 'm') &&
7890 (ctxt->input->cur[4] == 'l') &&
7891 (IS_BLANK(ctxt->input->cur[5]))) {
7892 ret += 5;
7893#ifdef DEBUG_PUSH
7894 xmlGenericError(xmlGenericErrorContext,
7895 "PP: Parsing XML Decl\n");
7896#endif
7897 xmlParseXMLDecl(ctxt);
7898 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7899 /*
7900 * The XML REC instructs us to stop parsing right
7901 * here
7902 */
7903 ctxt->instate = XML_PARSER_EOF;
7904 return(0);
7905 }
7906 ctxt->standalone = ctxt->input->standalone;
7907 if ((ctxt->encoding == NULL) &&
7908 (ctxt->input->encoding != NULL))
7909 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7910 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7911 (!ctxt->disableSAX))
7912 ctxt->sax->startDocument(ctxt->userData);
7913 ctxt->instate = XML_PARSER_MISC;
7914#ifdef DEBUG_PUSH
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: entering MISC\n");
7917#endif
7918 } else {
7919 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7920 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7921 (!ctxt->disableSAX))
7922 ctxt->sax->startDocument(ctxt->userData);
7923 ctxt->instate = XML_PARSER_MISC;
7924#ifdef DEBUG_PUSH
7925 xmlGenericError(xmlGenericErrorContext,
7926 "PP: entering MISC\n");
7927#endif
7928 }
7929 } else {
7930 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7931 ctxt->sax->setDocumentLocator(ctxt->userData,
7932 &xmlDefaultSAXLocator);
7933 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7934 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7935 (!ctxt->disableSAX))
7936 ctxt->sax->startDocument(ctxt->userData);
7937 ctxt->instate = XML_PARSER_MISC;
7938#ifdef DEBUG_PUSH
7939 xmlGenericError(xmlGenericErrorContext,
7940 "PP: entering MISC\n");
7941#endif
7942 }
7943 break;
7944 case XML_PARSER_MISC:
7945 SKIP_BLANKS;
7946 if (ctxt->input->buf == NULL)
7947 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7948 else
7949 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7950 if (avail < 2)
7951 goto done;
7952 cur = ctxt->input->cur[0];
7953 next = ctxt->input->cur[1];
7954 if ((cur == '<') && (next == '?')) {
7955 if ((!terminate) &&
7956 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7957 goto done;
7958#ifdef DEBUG_PUSH
7959 xmlGenericError(xmlGenericErrorContext,
7960 "PP: Parsing PI\n");
7961#endif
7962 xmlParsePI(ctxt);
7963 } else if ((cur == '<') && (next == '!') &&
7964 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7965 if ((!terminate) &&
7966 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7967 goto done;
7968#ifdef DEBUG_PUSH
7969 xmlGenericError(xmlGenericErrorContext,
7970 "PP: Parsing Comment\n");
7971#endif
7972 xmlParseComment(ctxt);
7973 ctxt->instate = XML_PARSER_MISC;
7974 } else if ((cur == '<') && (next == '!') &&
7975 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7976 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7977 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7978 (ctxt->input->cur[8] == 'E')) {
7979 if ((!terminate) &&
7980 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7981 goto done;
7982#ifdef DEBUG_PUSH
7983 xmlGenericError(xmlGenericErrorContext,
7984 "PP: Parsing internal subset\n");
7985#endif
7986 ctxt->inSubset = 1;
7987 xmlParseDocTypeDecl(ctxt);
7988 if (RAW == '[') {
7989 ctxt->instate = XML_PARSER_DTD;
7990#ifdef DEBUG_PUSH
7991 xmlGenericError(xmlGenericErrorContext,
7992 "PP: entering DTD\n");
7993#endif
7994 } else {
7995 /*
7996 * Create and update the external subset.
7997 */
7998 ctxt->inSubset = 2;
7999 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8000 (ctxt->sax->externalSubset != NULL))
8001 ctxt->sax->externalSubset(ctxt->userData,
8002 ctxt->intSubName, ctxt->extSubSystem,
8003 ctxt->extSubURI);
8004 ctxt->inSubset = 0;
8005 ctxt->instate = XML_PARSER_PROLOG;
8006#ifdef DEBUG_PUSH
8007 xmlGenericError(xmlGenericErrorContext,
8008 "PP: entering PROLOG\n");
8009#endif
8010 }
8011 } else if ((cur == '<') && (next == '!') &&
8012 (avail < 9)) {
8013 goto done;
8014 } else {
8015 ctxt->instate = XML_PARSER_START_TAG;
8016#ifdef DEBUG_PUSH
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: entering START_TAG\n");
8019#endif
8020 }
8021 break;
8022 case XML_PARSER_IGNORE:
8023 xmlGenericError(xmlGenericErrorContext,
8024 "PP: internal error, state == IGNORE");
8025 ctxt->instate = XML_PARSER_DTD;
8026#ifdef DEBUG_PUSH
8027 xmlGenericError(xmlGenericErrorContext,
8028 "PP: entering DTD\n");
8029#endif
8030 break;
8031 case XML_PARSER_PROLOG:
8032 SKIP_BLANKS;
8033 if (ctxt->input->buf == NULL)
8034 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8035 else
8036 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8037 if (avail < 2)
8038 goto done;
8039 cur = ctxt->input->cur[0];
8040 next = ctxt->input->cur[1];
8041 if ((cur == '<') && (next == '?')) {
8042 if ((!terminate) &&
8043 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8044 goto done;
8045#ifdef DEBUG_PUSH
8046 xmlGenericError(xmlGenericErrorContext,
8047 "PP: Parsing PI\n");
8048#endif
8049 xmlParsePI(ctxt);
8050 } else if ((cur == '<') && (next == '!') &&
8051 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8052 if ((!terminate) &&
8053 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8054 goto done;
8055#ifdef DEBUG_PUSH
8056 xmlGenericError(xmlGenericErrorContext,
8057 "PP: Parsing Comment\n");
8058#endif
8059 xmlParseComment(ctxt);
8060 ctxt->instate = XML_PARSER_PROLOG;
8061 } else if ((cur == '<') && (next == '!') &&
8062 (avail < 4)) {
8063 goto done;
8064 } else {
8065 ctxt->instate = XML_PARSER_START_TAG;
8066#ifdef DEBUG_PUSH
8067 xmlGenericError(xmlGenericErrorContext,
8068 "PP: entering START_TAG\n");
8069#endif
8070 }
8071 break;
8072 case XML_PARSER_EPILOG:
8073 SKIP_BLANKS;
8074 if (ctxt->input->buf == NULL)
8075 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8076 else
8077 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8078 if (avail < 2)
8079 goto done;
8080 cur = ctxt->input->cur[0];
8081 next = ctxt->input->cur[1];
8082 if ((cur == '<') && (next == '?')) {
8083 if ((!terminate) &&
8084 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8085 goto done;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: Parsing PI\n");
8089#endif
8090 xmlParsePI(ctxt);
8091 ctxt->instate = XML_PARSER_EPILOG;
8092 } else if ((cur == '<') && (next == '!') &&
8093 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8094 if ((!terminate) &&
8095 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8096 goto done;
8097#ifdef DEBUG_PUSH
8098 xmlGenericError(xmlGenericErrorContext,
8099 "PP: Parsing Comment\n");
8100#endif
8101 xmlParseComment(ctxt);
8102 ctxt->instate = XML_PARSER_EPILOG;
8103 } else if ((cur == '<') && (next == '!') &&
8104 (avail < 4)) {
8105 goto done;
8106 } else {
8107 ctxt->errNo = XML_ERR_DOCUMENT_END;
8108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8109 ctxt->sax->error(ctxt->userData,
8110 "Extra content at the end of the document\n");
8111 ctxt->wellFormed = 0;
8112 ctxt->disableSAX = 1;
8113 ctxt->instate = XML_PARSER_EOF;
8114#ifdef DEBUG_PUSH
8115 xmlGenericError(xmlGenericErrorContext,
8116 "PP: entering EOF\n");
8117#endif
8118 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8119 (!ctxt->disableSAX))
8120 ctxt->sax->endDocument(ctxt->userData);
8121 goto done;
8122 }
8123 break;
8124 case XML_PARSER_START_TAG: {
8125 xmlChar *name, *oldname;
8126
8127 if ((avail < 2) && (ctxt->inputNr == 1))
8128 goto done;
8129 cur = ctxt->input->cur[0];
8130 if (cur != '<') {
8131 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8133 ctxt->sax->error(ctxt->userData,
8134 "Start tag expect, '<' not found\n");
8135 ctxt->wellFormed = 0;
8136 ctxt->disableSAX = 1;
8137 ctxt->instate = XML_PARSER_EOF;
8138#ifdef DEBUG_PUSH
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: entering EOF\n");
8141#endif
8142 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8143 (!ctxt->disableSAX))
8144 ctxt->sax->endDocument(ctxt->userData);
8145 goto done;
8146 }
8147 if ((!terminate) &&
8148 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8149 goto done;
8150 if (ctxt->spaceNr == 0)
8151 spacePush(ctxt, -1);
8152 else
8153 spacePush(ctxt, *ctxt->space);
8154 name = xmlParseStartTag(ctxt);
8155 if (name == NULL) {
8156 spacePop(ctxt);
8157 ctxt->instate = XML_PARSER_EOF;
8158#ifdef DEBUG_PUSH
8159 xmlGenericError(xmlGenericErrorContext,
8160 "PP: entering EOF\n");
8161#endif
8162 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8163 (!ctxt->disableSAX))
8164 ctxt->sax->endDocument(ctxt->userData);
8165 goto done;
8166 }
8167 namePush(ctxt, xmlStrdup(name));
8168
8169 /*
8170 * [ VC: Root Element Type ]
8171 * The Name in the document type declaration must match
8172 * the element type of the root element.
8173 */
8174 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8175 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8176 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8177
8178 /*
8179 * Check for an Empty Element.
8180 */
8181 if ((RAW == '/') && (NXT(1) == '>')) {
8182 SKIP(2);
8183 if ((ctxt->sax != NULL) &&
8184 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8185 ctxt->sax->endElement(ctxt->userData, name);
8186 xmlFree(name);
8187 oldname = namePop(ctxt);
8188 spacePop(ctxt);
8189 if (oldname != NULL) {
8190#ifdef DEBUG_STACK
8191 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8192#endif
8193 xmlFree(oldname);
8194 }
8195 if (ctxt->name == NULL) {
8196 ctxt->instate = XML_PARSER_EPILOG;
8197#ifdef DEBUG_PUSH
8198 xmlGenericError(xmlGenericErrorContext,
8199 "PP: entering EPILOG\n");
8200#endif
8201 } else {
8202 ctxt->instate = XML_PARSER_CONTENT;
8203#ifdef DEBUG_PUSH
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: entering CONTENT\n");
8206#endif
8207 }
8208 break;
8209 }
8210 if (RAW == '>') {
8211 NEXT;
8212 } else {
8213 ctxt->errNo = XML_ERR_GT_REQUIRED;
8214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8215 ctxt->sax->error(ctxt->userData,
8216 "Couldn't find end of Start Tag %s\n",
8217 name);
8218 ctxt->wellFormed = 0;
8219 ctxt->disableSAX = 1;
8220
8221 /*
8222 * end of parsing of this node.
8223 */
8224 nodePop(ctxt);
8225 oldname = namePop(ctxt);
8226 spacePop(ctxt);
8227 if (oldname != NULL) {
8228#ifdef DEBUG_STACK
8229 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8230#endif
8231 xmlFree(oldname);
8232 }
8233 }
8234 xmlFree(name);
8235 ctxt->instate = XML_PARSER_CONTENT;
8236#ifdef DEBUG_PUSH
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: entering CONTENT\n");
8239#endif
8240 break;
8241 }
8242 case XML_PARSER_CONTENT: {
8243 const xmlChar *test;
8244 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008245 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008246
8247 /*
8248 * Handle preparsed entities and charRef
8249 */
8250 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008251 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008252
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008253 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8255 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008256 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008257 ctxt->token = 0;
8258 }
8259 if ((avail < 2) && (ctxt->inputNr == 1))
8260 goto done;
8261 cur = ctxt->input->cur[0];
8262 next = ctxt->input->cur[1];
8263
8264 test = CUR_PTR;
8265 cons = ctxt->input->consumed;
8266 tok = ctxt->token;
8267 if ((cur == '<') && (next == '?')) {
8268 if ((!terminate) &&
8269 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8270 goto done;
8271#ifdef DEBUG_PUSH
8272 xmlGenericError(xmlGenericErrorContext,
8273 "PP: Parsing PI\n");
8274#endif
8275 xmlParsePI(ctxt);
8276 } else if ((cur == '<') && (next == '!') &&
8277 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8278 if ((!terminate) &&
8279 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8280 goto done;
8281#ifdef DEBUG_PUSH
8282 xmlGenericError(xmlGenericErrorContext,
8283 "PP: Parsing Comment\n");
8284#endif
8285 xmlParseComment(ctxt);
8286 ctxt->instate = XML_PARSER_CONTENT;
8287 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8288 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8289 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8290 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8291 (ctxt->input->cur[8] == '[')) {
8292 SKIP(9);
8293 ctxt->instate = XML_PARSER_CDATA_SECTION;
8294#ifdef DEBUG_PUSH
8295 xmlGenericError(xmlGenericErrorContext,
8296 "PP: entering CDATA_SECTION\n");
8297#endif
8298 break;
8299 } else if ((cur == '<') && (next == '!') &&
8300 (avail < 9)) {
8301 goto done;
8302 } else if ((cur == '<') && (next == '/')) {
8303 ctxt->instate = XML_PARSER_END_TAG;
8304#ifdef DEBUG_PUSH
8305 xmlGenericError(xmlGenericErrorContext,
8306 "PP: entering END_TAG\n");
8307#endif
8308 break;
8309 } else if (cur == '<') {
8310 ctxt->instate = XML_PARSER_START_TAG;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering START_TAG\n");
8314#endif
8315 break;
8316 } else if (cur == '&') {
8317 if ((!terminate) &&
8318 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8319 goto done;
8320#ifdef DEBUG_PUSH
8321 xmlGenericError(xmlGenericErrorContext,
8322 "PP: Parsing Reference\n");
8323#endif
8324 xmlParseReference(ctxt);
8325 } else {
8326 /* TODO Avoid the extra copy, handle directly !!! */
8327 /*
8328 * Goal of the following test is:
8329 * - minimize calls to the SAX 'character' callback
8330 * when they are mergeable
8331 * - handle an problem for isBlank when we only parse
8332 * a sequence of blank chars and the next one is
8333 * not available to check against '<' presence.
8334 * - tries to homogenize the differences in SAX
8335 * callbacks beween the push and pull versions
8336 * of the parser.
8337 */
8338 if ((ctxt->inputNr == 1) &&
8339 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8340 if ((!terminate) &&
8341 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8342 goto done;
8343 }
8344 ctxt->checkIndex = 0;
8345#ifdef DEBUG_PUSH
8346 xmlGenericError(xmlGenericErrorContext,
8347 "PP: Parsing char data\n");
8348#endif
8349 xmlParseCharData(ctxt, 0);
8350 }
8351 /*
8352 * Pop-up of finished entities.
8353 */
8354 while ((RAW == 0) && (ctxt->inputNr > 1))
8355 xmlPopInput(ctxt);
8356 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8357 (tok == ctxt->token)) {
8358 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8359 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8360 ctxt->sax->error(ctxt->userData,
8361 "detected an error in element content\n");
8362 ctxt->wellFormed = 0;
8363 ctxt->disableSAX = 1;
8364 ctxt->instate = XML_PARSER_EOF;
8365 break;
8366 }
8367 break;
8368 }
8369 case XML_PARSER_CDATA_SECTION: {
8370 /*
8371 * The Push mode need to have the SAX callback for
8372 * cdataBlock merge back contiguous callbacks.
8373 */
8374 int base;
8375
8376 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8377 if (base < 0) {
8378 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8379 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8380 if (ctxt->sax->cdataBlock != NULL)
8381 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8382 XML_PARSER_BIG_BUFFER_SIZE);
8383 }
8384 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8385 ctxt->checkIndex = 0;
8386 }
8387 goto done;
8388 } else {
8389 if ((ctxt->sax != NULL) && (base > 0) &&
8390 (!ctxt->disableSAX)) {
8391 if (ctxt->sax->cdataBlock != NULL)
8392 ctxt->sax->cdataBlock(ctxt->userData,
8393 ctxt->input->cur, base);
8394 }
8395 SKIP(base + 3);
8396 ctxt->checkIndex = 0;
8397 ctxt->instate = XML_PARSER_CONTENT;
8398#ifdef DEBUG_PUSH
8399 xmlGenericError(xmlGenericErrorContext,
8400 "PP: entering CONTENT\n");
8401#endif
8402 }
8403 break;
8404 }
8405 case XML_PARSER_END_TAG:
8406 if (avail < 2)
8407 goto done;
8408 if ((!terminate) &&
8409 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8410 goto done;
8411 xmlParseEndTag(ctxt);
8412 if (ctxt->name == NULL) {
8413 ctxt->instate = XML_PARSER_EPILOG;
8414#ifdef DEBUG_PUSH
8415 xmlGenericError(xmlGenericErrorContext,
8416 "PP: entering EPILOG\n");
8417#endif
8418 } else {
8419 ctxt->instate = XML_PARSER_CONTENT;
8420#ifdef DEBUG_PUSH
8421 xmlGenericError(xmlGenericErrorContext,
8422 "PP: entering CONTENT\n");
8423#endif
8424 }
8425 break;
8426 case XML_PARSER_DTD: {
8427 /*
8428 * Sorry but progressive parsing of the internal subset
8429 * is not expected to be supported. We first check that
8430 * the full content of the internal subset is available and
8431 * the parsing is launched only at that point.
8432 * Internal subset ends up with "']' S? '>'" in an unescaped
8433 * section and not in a ']]>' sequence which are conditional
8434 * sections (whoever argued to keep that crap in XML deserve
8435 * a place in hell !).
8436 */
8437 int base, i;
8438 xmlChar *buf;
8439 xmlChar quote = 0;
8440
8441 base = ctxt->input->cur - ctxt->input->base;
8442 if (base < 0) return(0);
8443 if (ctxt->checkIndex > base)
8444 base = ctxt->checkIndex;
8445 buf = ctxt->input->buf->buffer->content;
8446 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8447 base++) {
8448 if (quote != 0) {
8449 if (buf[base] == quote)
8450 quote = 0;
8451 continue;
8452 }
8453 if (buf[base] == '"') {
8454 quote = '"';
8455 continue;
8456 }
8457 if (buf[base] == '\'') {
8458 quote = '\'';
8459 continue;
8460 }
8461 if (buf[base] == ']') {
8462 if ((unsigned int) base +1 >=
8463 ctxt->input->buf->buffer->use)
8464 break;
8465 if (buf[base + 1] == ']') {
8466 /* conditional crap, skip both ']' ! */
8467 base++;
8468 continue;
8469 }
8470 for (i = 0;
8471 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8472 i++) {
8473 if (buf[base + i] == '>')
8474 goto found_end_int_subset;
8475 }
8476 break;
8477 }
8478 }
8479 /*
8480 * We didn't found the end of the Internal subset
8481 */
8482 if (quote == 0)
8483 ctxt->checkIndex = base;
8484#ifdef DEBUG_PUSH
8485 if (next == 0)
8486 xmlGenericError(xmlGenericErrorContext,
8487 "PP: lookup of int subset end filed\n");
8488#endif
8489 goto done;
8490
8491found_end_int_subset:
8492 xmlParseInternalSubset(ctxt);
8493 ctxt->inSubset = 2;
8494 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8495 (ctxt->sax->externalSubset != NULL))
8496 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8497 ctxt->extSubSystem, ctxt->extSubURI);
8498 ctxt->inSubset = 0;
8499 ctxt->instate = XML_PARSER_PROLOG;
8500 ctxt->checkIndex = 0;
8501#ifdef DEBUG_PUSH
8502 xmlGenericError(xmlGenericErrorContext,
8503 "PP: entering PROLOG\n");
8504#endif
8505 break;
8506 }
8507 case XML_PARSER_COMMENT:
8508 xmlGenericError(xmlGenericErrorContext,
8509 "PP: internal error, state == COMMENT\n");
8510 ctxt->instate = XML_PARSER_CONTENT;
8511#ifdef DEBUG_PUSH
8512 xmlGenericError(xmlGenericErrorContext,
8513 "PP: entering CONTENT\n");
8514#endif
8515 break;
8516 case XML_PARSER_PI:
8517 xmlGenericError(xmlGenericErrorContext,
8518 "PP: internal error, state == PI\n");
8519 ctxt->instate = XML_PARSER_CONTENT;
8520#ifdef DEBUG_PUSH
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: entering CONTENT\n");
8523#endif
8524 break;
8525 case XML_PARSER_ENTITY_DECL:
8526 xmlGenericError(xmlGenericErrorContext,
8527 "PP: internal error, state == ENTITY_DECL\n");
8528 ctxt->instate = XML_PARSER_DTD;
8529#ifdef DEBUG_PUSH
8530 xmlGenericError(xmlGenericErrorContext,
8531 "PP: entering DTD\n");
8532#endif
8533 break;
8534 case XML_PARSER_ENTITY_VALUE:
8535 xmlGenericError(xmlGenericErrorContext,
8536 "PP: internal error, state == ENTITY_VALUE\n");
8537 ctxt->instate = XML_PARSER_CONTENT;
8538#ifdef DEBUG_PUSH
8539 xmlGenericError(xmlGenericErrorContext,
8540 "PP: entering DTD\n");
8541#endif
8542 break;
8543 case XML_PARSER_ATTRIBUTE_VALUE:
8544 xmlGenericError(xmlGenericErrorContext,
8545 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8546 ctxt->instate = XML_PARSER_START_TAG;
8547#ifdef DEBUG_PUSH
8548 xmlGenericError(xmlGenericErrorContext,
8549 "PP: entering START_TAG\n");
8550#endif
8551 break;
8552 case XML_PARSER_SYSTEM_LITERAL:
8553 xmlGenericError(xmlGenericErrorContext,
8554 "PP: internal error, state == SYSTEM_LITERAL\n");
8555 ctxt->instate = XML_PARSER_START_TAG;
8556#ifdef DEBUG_PUSH
8557 xmlGenericError(xmlGenericErrorContext,
8558 "PP: entering START_TAG\n");
8559#endif
8560 break;
8561 }
8562 }
8563done:
8564#ifdef DEBUG_PUSH
8565 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8566#endif
8567 return(ret);
8568}
8569
8570/**
Owen Taylor3473f882001-02-23 17:55:21 +00008571 * xmlParseChunk:
8572 * @ctxt: an XML parser context
8573 * @chunk: an char array
8574 * @size: the size in byte of the chunk
8575 * @terminate: last chunk indicator
8576 *
8577 * Parse a Chunk of memory
8578 *
8579 * Returns zero if no error, the xmlParserErrors otherwise.
8580 */
8581int
8582xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8583 int terminate) {
8584 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8585 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8586 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8587 int cur = ctxt->input->cur - ctxt->input->base;
8588
8589 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8590 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8591 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008592 ctxt->input->end =
8593 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008594#ifdef DEBUG_PUSH
8595 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8596#endif
8597
8598 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8599 xmlParseTryOrFinish(ctxt, terminate);
8600 } else if (ctxt->instate != XML_PARSER_EOF) {
8601 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8602 xmlParserInputBufferPtr in = ctxt->input->buf;
8603 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8604 (in->raw != NULL)) {
8605 int nbchars;
8606
8607 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8608 if (nbchars < 0) {
8609 xmlGenericError(xmlGenericErrorContext,
8610 "xmlParseChunk: encoder error\n");
8611 return(XML_ERR_INVALID_ENCODING);
8612 }
8613 }
8614 }
8615 }
8616 xmlParseTryOrFinish(ctxt, terminate);
8617 if (terminate) {
8618 /*
8619 * Check for termination
8620 */
8621 if ((ctxt->instate != XML_PARSER_EOF) &&
8622 (ctxt->instate != XML_PARSER_EPILOG)) {
8623 ctxt->errNo = XML_ERR_DOCUMENT_END;
8624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8625 ctxt->sax->error(ctxt->userData,
8626 "Extra content at the end of the document\n");
8627 ctxt->wellFormed = 0;
8628 ctxt->disableSAX = 1;
8629 }
8630 if (ctxt->instate != XML_PARSER_EOF) {
8631 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8632 (!ctxt->disableSAX))
8633 ctxt->sax->endDocument(ctxt->userData);
8634 }
8635 ctxt->instate = XML_PARSER_EOF;
8636 }
8637 return((xmlParserErrors) ctxt->errNo);
8638}
8639
8640/************************************************************************
8641 * *
8642 * I/O front end functions to the parser *
8643 * *
8644 ************************************************************************/
8645
8646/**
8647 * xmlStopParser:
8648 * @ctxt: an XML parser context
8649 *
8650 * Blocks further parser processing
8651 */
8652void
8653xmlStopParser(xmlParserCtxtPtr ctxt) {
8654 ctxt->instate = XML_PARSER_EOF;
8655 if (ctxt->input != NULL)
8656 ctxt->input->cur = BAD_CAST"";
8657}
8658
8659/**
8660 * xmlCreatePushParserCtxt:
8661 * @sax: a SAX handler
8662 * @user_data: The user data returned on SAX callbacks
8663 * @chunk: a pointer to an array of chars
8664 * @size: number of chars in the array
8665 * @filename: an optional file name or URI
8666 *
8667 * Create a parser context for using the XML parser in push mode
8668 * To allow content encoding detection, @size should be >= 4
8669 * The value of @filename is used for fetching external entities
8670 * and error/warning reports.
8671 *
8672 * Returns the new parser context or NULL
8673 */
8674xmlParserCtxtPtr
8675xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8676 const char *chunk, int size, const char *filename) {
8677 xmlParserCtxtPtr ctxt;
8678 xmlParserInputPtr inputStream;
8679 xmlParserInputBufferPtr buf;
8680 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8681
8682 /*
8683 * plug some encoding conversion routines
8684 */
8685 if ((chunk != NULL) && (size >= 4))
8686 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8687
8688 buf = xmlAllocParserInputBuffer(enc);
8689 if (buf == NULL) return(NULL);
8690
8691 ctxt = xmlNewParserCtxt();
8692 if (ctxt == NULL) {
8693 xmlFree(buf);
8694 return(NULL);
8695 }
8696 if (sax != NULL) {
8697 if (ctxt->sax != &xmlDefaultSAXHandler)
8698 xmlFree(ctxt->sax);
8699 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8700 if (ctxt->sax == NULL) {
8701 xmlFree(buf);
8702 xmlFree(ctxt);
8703 return(NULL);
8704 }
8705 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8706 if (user_data != NULL)
8707 ctxt->userData = user_data;
8708 }
8709 if (filename == NULL) {
8710 ctxt->directory = NULL;
8711 } else {
8712 ctxt->directory = xmlParserGetDirectory(filename);
8713 }
8714
8715 inputStream = xmlNewInputStream(ctxt);
8716 if (inputStream == NULL) {
8717 xmlFreeParserCtxt(ctxt);
8718 return(NULL);
8719 }
8720
8721 if (filename == NULL)
8722 inputStream->filename = NULL;
8723 else
8724 inputStream->filename = xmlMemStrdup(filename);
8725 inputStream->buf = buf;
8726 inputStream->base = inputStream->buf->buffer->content;
8727 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008728 inputStream->end =
8729 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008730
8731 inputPush(ctxt, inputStream);
8732
8733 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8734 (ctxt->input->buf != NULL)) {
8735 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8736#ifdef DEBUG_PUSH
8737 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8738#endif
8739 }
8740
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008741 if (enc != XML_CHAR_ENCODING_NONE) {
8742 xmlSwitchEncoding(ctxt, enc);
8743 }
8744
Owen Taylor3473f882001-02-23 17:55:21 +00008745 return(ctxt);
8746}
8747
8748/**
8749 * xmlCreateIOParserCtxt:
8750 * @sax: a SAX handler
8751 * @user_data: The user data returned on SAX callbacks
8752 * @ioread: an I/O read function
8753 * @ioclose: an I/O close function
8754 * @ioctx: an I/O handler
8755 * @enc: the charset encoding if known
8756 *
8757 * Create a parser context for using the XML parser with an existing
8758 * I/O stream
8759 *
8760 * Returns the new parser context or NULL
8761 */
8762xmlParserCtxtPtr
8763xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8764 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8765 void *ioctx, xmlCharEncoding enc) {
8766 xmlParserCtxtPtr ctxt;
8767 xmlParserInputPtr inputStream;
8768 xmlParserInputBufferPtr buf;
8769
8770 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8771 if (buf == NULL) return(NULL);
8772
8773 ctxt = xmlNewParserCtxt();
8774 if (ctxt == NULL) {
8775 xmlFree(buf);
8776 return(NULL);
8777 }
8778 if (sax != NULL) {
8779 if (ctxt->sax != &xmlDefaultSAXHandler)
8780 xmlFree(ctxt->sax);
8781 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8782 if (ctxt->sax == NULL) {
8783 xmlFree(buf);
8784 xmlFree(ctxt);
8785 return(NULL);
8786 }
8787 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8788 if (user_data != NULL)
8789 ctxt->userData = user_data;
8790 }
8791
8792 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8793 if (inputStream == NULL) {
8794 xmlFreeParserCtxt(ctxt);
8795 return(NULL);
8796 }
8797 inputPush(ctxt, inputStream);
8798
8799 return(ctxt);
8800}
8801
8802/************************************************************************
8803 * *
8804 * Front ends when parsing a Dtd *
8805 * *
8806 ************************************************************************/
8807
8808/**
8809 * xmlIOParseDTD:
8810 * @sax: the SAX handler block or NULL
8811 * @input: an Input Buffer
8812 * @enc: the charset encoding if known
8813 *
8814 * Load and parse a DTD
8815 *
8816 * Returns the resulting xmlDtdPtr or NULL in case of error.
8817 * @input will be freed at parsing end.
8818 */
8819
8820xmlDtdPtr
8821xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8822 xmlCharEncoding enc) {
8823 xmlDtdPtr ret = NULL;
8824 xmlParserCtxtPtr ctxt;
8825 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008826 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008827
8828 if (input == NULL)
8829 return(NULL);
8830
8831 ctxt = xmlNewParserCtxt();
8832 if (ctxt == NULL) {
8833 return(NULL);
8834 }
8835
8836 /*
8837 * Set-up the SAX context
8838 */
8839 if (sax != NULL) {
8840 if (ctxt->sax != NULL)
8841 xmlFree(ctxt->sax);
8842 ctxt->sax = sax;
8843 ctxt->userData = NULL;
8844 }
8845
8846 /*
8847 * generate a parser input from the I/O handler
8848 */
8849
8850 pinput = xmlNewIOInputStream(ctxt, input, enc);
8851 if (pinput == NULL) {
8852 if (sax != NULL) ctxt->sax = NULL;
8853 xmlFreeParserCtxt(ctxt);
8854 return(NULL);
8855 }
8856
8857 /*
8858 * plug some encoding conversion routines here.
8859 */
8860 xmlPushInput(ctxt, pinput);
8861
8862 pinput->filename = NULL;
8863 pinput->line = 1;
8864 pinput->col = 1;
8865 pinput->base = ctxt->input->cur;
8866 pinput->cur = ctxt->input->cur;
8867 pinput->free = NULL;
8868
8869 /*
8870 * let's parse that entity knowing it's an external subset.
8871 */
8872 ctxt->inSubset = 2;
8873 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8874 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8875 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008876
8877 if (enc == XML_CHAR_ENCODING_NONE) {
8878 /*
8879 * Get the 4 first bytes and decode the charset
8880 * if enc != XML_CHAR_ENCODING_NONE
8881 * plug some encoding conversion routines.
8882 */
8883 start[0] = RAW;
8884 start[1] = NXT(1);
8885 start[2] = NXT(2);
8886 start[3] = NXT(3);
8887 enc = xmlDetectCharEncoding(start, 4);
8888 if (enc != XML_CHAR_ENCODING_NONE) {
8889 xmlSwitchEncoding(ctxt, enc);
8890 }
8891 }
8892
Owen Taylor3473f882001-02-23 17:55:21 +00008893 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8894
8895 if (ctxt->myDoc != NULL) {
8896 if (ctxt->wellFormed) {
8897 ret = ctxt->myDoc->extSubset;
8898 ctxt->myDoc->extSubset = NULL;
8899 } else {
8900 ret = NULL;
8901 }
8902 xmlFreeDoc(ctxt->myDoc);
8903 ctxt->myDoc = NULL;
8904 }
8905 if (sax != NULL) ctxt->sax = NULL;
8906 xmlFreeParserCtxt(ctxt);
8907
8908 return(ret);
8909}
8910
8911/**
8912 * xmlSAXParseDTD:
8913 * @sax: the SAX handler block
8914 * @ExternalID: a NAME* containing the External ID of the DTD
8915 * @SystemID: a NAME* containing the URL to the DTD
8916 *
8917 * Load and parse an external subset.
8918 *
8919 * Returns the resulting xmlDtdPtr or NULL in case of error.
8920 */
8921
8922xmlDtdPtr
8923xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8924 const xmlChar *SystemID) {
8925 xmlDtdPtr ret = NULL;
8926 xmlParserCtxtPtr ctxt;
8927 xmlParserInputPtr input = NULL;
8928 xmlCharEncoding enc;
8929
8930 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8931
8932 ctxt = xmlNewParserCtxt();
8933 if (ctxt == NULL) {
8934 return(NULL);
8935 }
8936
8937 /*
8938 * Set-up the SAX context
8939 */
8940 if (sax != NULL) {
8941 if (ctxt->sax != NULL)
8942 xmlFree(ctxt->sax);
8943 ctxt->sax = sax;
8944 ctxt->userData = NULL;
8945 }
8946
8947 /*
8948 * Ask the Entity resolver to load the damn thing
8949 */
8950
8951 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8952 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8953 if (input == NULL) {
8954 if (sax != NULL) ctxt->sax = NULL;
8955 xmlFreeParserCtxt(ctxt);
8956 return(NULL);
8957 }
8958
8959 /*
8960 * plug some encoding conversion routines here.
8961 */
8962 xmlPushInput(ctxt, input);
8963 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8964 xmlSwitchEncoding(ctxt, enc);
8965
8966 if (input->filename == NULL)
8967 input->filename = (char *) xmlStrdup(SystemID);
8968 input->line = 1;
8969 input->col = 1;
8970 input->base = ctxt->input->cur;
8971 input->cur = ctxt->input->cur;
8972 input->free = NULL;
8973
8974 /*
8975 * let's parse that entity knowing it's an external subset.
8976 */
8977 ctxt->inSubset = 2;
8978 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8979 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8980 ExternalID, SystemID);
8981 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8982
8983 if (ctxt->myDoc != NULL) {
8984 if (ctxt->wellFormed) {
8985 ret = ctxt->myDoc->extSubset;
8986 ctxt->myDoc->extSubset = NULL;
8987 } else {
8988 ret = NULL;
8989 }
8990 xmlFreeDoc(ctxt->myDoc);
8991 ctxt->myDoc = NULL;
8992 }
8993 if (sax != NULL) ctxt->sax = NULL;
8994 xmlFreeParserCtxt(ctxt);
8995
8996 return(ret);
8997}
8998
8999/**
9000 * xmlParseDTD:
9001 * @ExternalID: a NAME* containing the External ID of the DTD
9002 * @SystemID: a NAME* containing the URL to the DTD
9003 *
9004 * Load and parse an external subset.
9005 *
9006 * Returns the resulting xmlDtdPtr or NULL in case of error.
9007 */
9008
9009xmlDtdPtr
9010xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9011 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9012}
9013
9014/************************************************************************
9015 * *
9016 * Front ends when parsing an Entity *
9017 * *
9018 ************************************************************************/
9019
9020/**
Owen Taylor3473f882001-02-23 17:55:21 +00009021 * xmlParseCtxtExternalEntity:
9022 * @ctx: the existing parsing context
9023 * @URL: the URL for the entity to load
9024 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009025 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009026 *
9027 * Parse an external general entity within an existing parsing context
9028 * An external general parsed entity is well-formed if it matches the
9029 * production labeled extParsedEnt.
9030 *
9031 * [78] extParsedEnt ::= TextDecl? content
9032 *
9033 * Returns 0 if the entity is well formed, -1 in case of args problem and
9034 * the parser error code otherwise
9035 */
9036
9037int
9038xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009039 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009040 xmlParserCtxtPtr ctxt;
9041 xmlDocPtr newDoc;
9042 xmlSAXHandlerPtr oldsax = NULL;
9043 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009044 xmlChar start[4];
9045 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009046
9047 if (ctx->depth > 40) {
9048 return(XML_ERR_ENTITY_LOOP);
9049 }
9050
Daniel Veillardcda96922001-08-21 10:56:31 +00009051 if (lst != NULL)
9052 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009053 if ((URL == NULL) && (ID == NULL))
9054 return(-1);
9055 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9056 return(-1);
9057
9058
9059 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9060 if (ctxt == NULL) return(-1);
9061 ctxt->userData = ctxt;
9062 oldsax = ctxt->sax;
9063 ctxt->sax = ctx->sax;
9064 newDoc = xmlNewDoc(BAD_CAST "1.0");
9065 if (newDoc == NULL) {
9066 xmlFreeParserCtxt(ctxt);
9067 return(-1);
9068 }
9069 if (ctx->myDoc != NULL) {
9070 newDoc->intSubset = ctx->myDoc->intSubset;
9071 newDoc->extSubset = ctx->myDoc->extSubset;
9072 }
9073 if (ctx->myDoc->URL != NULL) {
9074 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9075 }
9076 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9077 if (newDoc->children == NULL) {
9078 ctxt->sax = oldsax;
9079 xmlFreeParserCtxt(ctxt);
9080 newDoc->intSubset = NULL;
9081 newDoc->extSubset = NULL;
9082 xmlFreeDoc(newDoc);
9083 return(-1);
9084 }
9085 nodePush(ctxt, newDoc->children);
9086 if (ctx->myDoc == NULL) {
9087 ctxt->myDoc = newDoc;
9088 } else {
9089 ctxt->myDoc = ctx->myDoc;
9090 newDoc->children->doc = ctx->myDoc;
9091 }
9092
Daniel Veillard87a764e2001-06-20 17:41:10 +00009093 /*
9094 * Get the 4 first bytes and decode the charset
9095 * if enc != XML_CHAR_ENCODING_NONE
9096 * plug some encoding conversion routines.
9097 */
9098 GROW
9099 start[0] = RAW;
9100 start[1] = NXT(1);
9101 start[2] = NXT(2);
9102 start[3] = NXT(3);
9103 enc = xmlDetectCharEncoding(start, 4);
9104 if (enc != XML_CHAR_ENCODING_NONE) {
9105 xmlSwitchEncoding(ctxt, enc);
9106 }
9107
Owen Taylor3473f882001-02-23 17:55:21 +00009108 /*
9109 * Parse a possible text declaration first
9110 */
Owen Taylor3473f882001-02-23 17:55:21 +00009111 if ((RAW == '<') && (NXT(1) == '?') &&
9112 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9113 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9114 xmlParseTextDecl(ctxt);
9115 }
9116
9117 /*
9118 * Doing validity checking on chunk doesn't make sense
9119 */
9120 ctxt->instate = XML_PARSER_CONTENT;
9121 ctxt->validate = ctx->validate;
9122 ctxt->loadsubset = ctx->loadsubset;
9123 ctxt->depth = ctx->depth + 1;
9124 ctxt->replaceEntities = ctx->replaceEntities;
9125 if (ctxt->validate) {
9126 ctxt->vctxt.error = ctx->vctxt.error;
9127 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009128 } else {
9129 ctxt->vctxt.error = NULL;
9130 ctxt->vctxt.warning = NULL;
9131 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009132 ctxt->vctxt.nodeTab = NULL;
9133 ctxt->vctxt.nodeNr = 0;
9134 ctxt->vctxt.nodeMax = 0;
9135 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009136
9137 xmlParseContent(ctxt);
9138
9139 if ((RAW == '<') && (NXT(1) == '/')) {
9140 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9141 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9142 ctxt->sax->error(ctxt->userData,
9143 "chunk is not well balanced\n");
9144 ctxt->wellFormed = 0;
9145 ctxt->disableSAX = 1;
9146 } else if (RAW != 0) {
9147 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9149 ctxt->sax->error(ctxt->userData,
9150 "extra content at the end of well balanced chunk\n");
9151 ctxt->wellFormed = 0;
9152 ctxt->disableSAX = 1;
9153 }
9154 if (ctxt->node != newDoc->children) {
9155 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9157 ctxt->sax->error(ctxt->userData,
9158 "chunk is not well balanced\n");
9159 ctxt->wellFormed = 0;
9160 ctxt->disableSAX = 1;
9161 }
9162
9163 if (!ctxt->wellFormed) {
9164 if (ctxt->errNo == 0)
9165 ret = 1;
9166 else
9167 ret = ctxt->errNo;
9168 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009169 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009170 xmlNodePtr cur;
9171
9172 /*
9173 * Return the newly created nodeset after unlinking it from
9174 * they pseudo parent.
9175 */
9176 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009177 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009178 while (cur != NULL) {
9179 cur->parent = NULL;
9180 cur = cur->next;
9181 }
9182 newDoc->children->children = NULL;
9183 }
9184 ret = 0;
9185 }
9186 ctxt->sax = oldsax;
9187 xmlFreeParserCtxt(ctxt);
9188 newDoc->intSubset = NULL;
9189 newDoc->extSubset = NULL;
9190 xmlFreeDoc(newDoc);
9191
9192 return(ret);
9193}
9194
9195/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009196 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009197 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009198 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009199 * @sax: the SAX handler bloc (possibly NULL)
9200 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9201 * @depth: Used for loop detection, use 0
9202 * @URL: the URL for the entity to load
9203 * @ID: the System ID for the entity to load
9204 * @list: the return value for the set of parsed nodes
9205 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009206 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009207 *
9208 * Returns 0 if the entity is well formed, -1 in case of args problem and
9209 * the parser error code otherwise
9210 */
9211
Daniel Veillard257d9102001-05-08 10:41:44 +00009212static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009213xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9214 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009215 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009216 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009217 xmlParserCtxtPtr ctxt;
9218 xmlDocPtr newDoc;
9219 xmlSAXHandlerPtr oldsax = NULL;
9220 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009221 xmlChar start[4];
9222 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009223
9224 if (depth > 40) {
9225 return(XML_ERR_ENTITY_LOOP);
9226 }
9227
9228
9229
9230 if (list != NULL)
9231 *list = NULL;
9232 if ((URL == NULL) && (ID == NULL))
9233 return(-1);
9234 if (doc == NULL) /* @@ relax but check for dereferences */
9235 return(-1);
9236
9237
9238 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9239 if (ctxt == NULL) return(-1);
9240 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009241 if (oldctxt != NULL) {
9242 ctxt->_private = oldctxt->_private;
9243 ctxt->loadsubset = oldctxt->loadsubset;
9244 ctxt->validate = oldctxt->validate;
9245 ctxt->external = oldctxt->external;
9246 } else {
9247 /*
9248 * Doing validity checking on chunk without context
9249 * doesn't make sense
9250 */
9251 ctxt->_private = NULL;
9252 ctxt->validate = 0;
9253 ctxt->external = 2;
9254 ctxt->loadsubset = 0;
9255 }
Owen Taylor3473f882001-02-23 17:55:21 +00009256 if (sax != NULL) {
9257 oldsax = ctxt->sax;
9258 ctxt->sax = sax;
9259 if (user_data != NULL)
9260 ctxt->userData = user_data;
9261 }
9262 newDoc = xmlNewDoc(BAD_CAST "1.0");
9263 if (newDoc == NULL) {
9264 xmlFreeParserCtxt(ctxt);
9265 return(-1);
9266 }
9267 if (doc != NULL) {
9268 newDoc->intSubset = doc->intSubset;
9269 newDoc->extSubset = doc->extSubset;
9270 }
9271 if (doc->URL != NULL) {
9272 newDoc->URL = xmlStrdup(doc->URL);
9273 }
9274 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9275 if (newDoc->children == NULL) {
9276 if (sax != NULL)
9277 ctxt->sax = oldsax;
9278 xmlFreeParserCtxt(ctxt);
9279 newDoc->intSubset = NULL;
9280 newDoc->extSubset = NULL;
9281 xmlFreeDoc(newDoc);
9282 return(-1);
9283 }
9284 nodePush(ctxt, newDoc->children);
9285 if (doc == NULL) {
9286 ctxt->myDoc = newDoc;
9287 } else {
9288 ctxt->myDoc = doc;
9289 newDoc->children->doc = doc;
9290 }
9291
Daniel Veillard87a764e2001-06-20 17:41:10 +00009292 /*
9293 * Get the 4 first bytes and decode the charset
9294 * if enc != XML_CHAR_ENCODING_NONE
9295 * plug some encoding conversion routines.
9296 */
9297 GROW;
9298 start[0] = RAW;
9299 start[1] = NXT(1);
9300 start[2] = NXT(2);
9301 start[3] = NXT(3);
9302 enc = xmlDetectCharEncoding(start, 4);
9303 if (enc != XML_CHAR_ENCODING_NONE) {
9304 xmlSwitchEncoding(ctxt, enc);
9305 }
9306
Owen Taylor3473f882001-02-23 17:55:21 +00009307 /*
9308 * Parse a possible text declaration first
9309 */
Owen Taylor3473f882001-02-23 17:55:21 +00009310 if ((RAW == '<') && (NXT(1) == '?') &&
9311 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9312 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9313 xmlParseTextDecl(ctxt);
9314 }
9315
Owen Taylor3473f882001-02-23 17:55:21 +00009316 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009317 ctxt->depth = depth;
9318
9319 xmlParseContent(ctxt);
9320
9321 if ((RAW == '<') && (NXT(1) == '/')) {
9322 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9324 ctxt->sax->error(ctxt->userData,
9325 "chunk is not well balanced\n");
9326 ctxt->wellFormed = 0;
9327 ctxt->disableSAX = 1;
9328 } else if (RAW != 0) {
9329 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9331 ctxt->sax->error(ctxt->userData,
9332 "extra content at the end of well balanced chunk\n");
9333 ctxt->wellFormed = 0;
9334 ctxt->disableSAX = 1;
9335 }
9336 if (ctxt->node != newDoc->children) {
9337 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9338 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9339 ctxt->sax->error(ctxt->userData,
9340 "chunk is not well balanced\n");
9341 ctxt->wellFormed = 0;
9342 ctxt->disableSAX = 1;
9343 }
9344
9345 if (!ctxt->wellFormed) {
9346 if (ctxt->errNo == 0)
9347 ret = 1;
9348 else
9349 ret = ctxt->errNo;
9350 } else {
9351 if (list != NULL) {
9352 xmlNodePtr cur;
9353
9354 /*
9355 * Return the newly created nodeset after unlinking it from
9356 * they pseudo parent.
9357 */
9358 cur = newDoc->children->children;
9359 *list = cur;
9360 while (cur != NULL) {
9361 cur->parent = NULL;
9362 cur = cur->next;
9363 }
9364 newDoc->children->children = NULL;
9365 }
9366 ret = 0;
9367 }
9368 if (sax != NULL)
9369 ctxt->sax = oldsax;
9370 xmlFreeParserCtxt(ctxt);
9371 newDoc->intSubset = NULL;
9372 newDoc->extSubset = NULL;
9373 xmlFreeDoc(newDoc);
9374
9375 return(ret);
9376}
9377
9378/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009379 * xmlParseExternalEntity:
9380 * @doc: the document the chunk pertains to
9381 * @sax: the SAX handler bloc (possibly NULL)
9382 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9383 * @depth: Used for loop detection, use 0
9384 * @URL: the URL for the entity to load
9385 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009386 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009387 *
9388 * Parse an external general entity
9389 * An external general parsed entity is well-formed if it matches the
9390 * production labeled extParsedEnt.
9391 *
9392 * [78] extParsedEnt ::= TextDecl? content
9393 *
9394 * Returns 0 if the entity is well formed, -1 in case of args problem and
9395 * the parser error code otherwise
9396 */
9397
9398int
9399xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009400 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009401 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009402 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009403}
9404
9405/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009406 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009407 * @doc: the document the chunk pertains to
9408 * @sax: the SAX handler bloc (possibly NULL)
9409 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9410 * @depth: Used for loop detection, use 0
9411 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009412 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009413 *
9414 * Parse a well-balanced chunk of an XML document
9415 * called by the parser
9416 * The allowed sequence for the Well Balanced Chunk is the one defined by
9417 * the content production in the XML grammar:
9418 *
9419 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9420 *
9421 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9422 * the parser error code otherwise
9423 */
9424
9425int
9426xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009427 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009428 xmlParserCtxtPtr ctxt;
9429 xmlDocPtr newDoc;
9430 xmlSAXHandlerPtr oldsax = NULL;
9431 int size;
9432 int ret = 0;
9433
9434 if (depth > 40) {
9435 return(XML_ERR_ENTITY_LOOP);
9436 }
9437
9438
Daniel Veillardcda96922001-08-21 10:56:31 +00009439 if (lst != NULL)
9440 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009441 if (string == NULL)
9442 return(-1);
9443
9444 size = xmlStrlen(string);
9445
9446 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9447 if (ctxt == NULL) return(-1);
9448 ctxt->userData = ctxt;
9449 if (sax != NULL) {
9450 oldsax = ctxt->sax;
9451 ctxt->sax = sax;
9452 if (user_data != NULL)
9453 ctxt->userData = user_data;
9454 }
9455 newDoc = xmlNewDoc(BAD_CAST "1.0");
9456 if (newDoc == NULL) {
9457 xmlFreeParserCtxt(ctxt);
9458 return(-1);
9459 }
9460 if (doc != NULL) {
9461 newDoc->intSubset = doc->intSubset;
9462 newDoc->extSubset = doc->extSubset;
9463 }
9464 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9465 if (newDoc->children == NULL) {
9466 if (sax != NULL)
9467 ctxt->sax = oldsax;
9468 xmlFreeParserCtxt(ctxt);
9469 newDoc->intSubset = NULL;
9470 newDoc->extSubset = NULL;
9471 xmlFreeDoc(newDoc);
9472 return(-1);
9473 }
9474 nodePush(ctxt, newDoc->children);
9475 if (doc == NULL) {
9476 ctxt->myDoc = newDoc;
9477 } else {
9478 ctxt->myDoc = doc;
9479 newDoc->children->doc = doc;
9480 }
9481 ctxt->instate = XML_PARSER_CONTENT;
9482 ctxt->depth = depth;
9483
9484 /*
9485 * Doing validity checking on chunk doesn't make sense
9486 */
9487 ctxt->validate = 0;
9488 ctxt->loadsubset = 0;
9489
9490 xmlParseContent(ctxt);
9491
9492 if ((RAW == '<') && (NXT(1) == '/')) {
9493 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9495 ctxt->sax->error(ctxt->userData,
9496 "chunk is not well balanced\n");
9497 ctxt->wellFormed = 0;
9498 ctxt->disableSAX = 1;
9499 } else if (RAW != 0) {
9500 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9502 ctxt->sax->error(ctxt->userData,
9503 "extra content at the end of well balanced chunk\n");
9504 ctxt->wellFormed = 0;
9505 ctxt->disableSAX = 1;
9506 }
9507 if (ctxt->node != newDoc->children) {
9508 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9510 ctxt->sax->error(ctxt->userData,
9511 "chunk is not well balanced\n");
9512 ctxt->wellFormed = 0;
9513 ctxt->disableSAX = 1;
9514 }
9515
9516 if (!ctxt->wellFormed) {
9517 if (ctxt->errNo == 0)
9518 ret = 1;
9519 else
9520 ret = ctxt->errNo;
9521 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009522 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009523 xmlNodePtr cur;
9524
9525 /*
9526 * Return the newly created nodeset after unlinking it from
9527 * they pseudo parent.
9528 */
9529 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009530 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009531 while (cur != NULL) {
9532 cur->parent = NULL;
9533 cur = cur->next;
9534 }
9535 newDoc->children->children = NULL;
9536 }
9537 ret = 0;
9538 }
9539 if (sax != NULL)
9540 ctxt->sax = oldsax;
9541 xmlFreeParserCtxt(ctxt);
9542 newDoc->intSubset = NULL;
9543 newDoc->extSubset = NULL;
9544 xmlFreeDoc(newDoc);
9545
9546 return(ret);
9547}
9548
9549/**
9550 * xmlSAXParseEntity:
9551 * @sax: the SAX handler block
9552 * @filename: the filename
9553 *
9554 * parse an XML external entity out of context and build a tree.
9555 * It use the given SAX function block to handle the parsing callback.
9556 * If sax is NULL, fallback to the default DOM tree building routines.
9557 *
9558 * [78] extParsedEnt ::= TextDecl? content
9559 *
9560 * This correspond to a "Well Balanced" chunk
9561 *
9562 * Returns the resulting document tree
9563 */
9564
9565xmlDocPtr
9566xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9567 xmlDocPtr ret;
9568 xmlParserCtxtPtr ctxt;
9569 char *directory = NULL;
9570
9571 ctxt = xmlCreateFileParserCtxt(filename);
9572 if (ctxt == NULL) {
9573 return(NULL);
9574 }
9575 if (sax != NULL) {
9576 if (ctxt->sax != NULL)
9577 xmlFree(ctxt->sax);
9578 ctxt->sax = sax;
9579 ctxt->userData = NULL;
9580 }
9581
9582 if ((ctxt->directory == NULL) && (directory == NULL))
9583 directory = xmlParserGetDirectory(filename);
9584
9585 xmlParseExtParsedEnt(ctxt);
9586
9587 if (ctxt->wellFormed)
9588 ret = ctxt->myDoc;
9589 else {
9590 ret = NULL;
9591 xmlFreeDoc(ctxt->myDoc);
9592 ctxt->myDoc = NULL;
9593 }
9594 if (sax != NULL)
9595 ctxt->sax = NULL;
9596 xmlFreeParserCtxt(ctxt);
9597
9598 return(ret);
9599}
9600
9601/**
9602 * xmlParseEntity:
9603 * @filename: the filename
9604 *
9605 * parse an XML external entity out of context and build a tree.
9606 *
9607 * [78] extParsedEnt ::= TextDecl? content
9608 *
9609 * This correspond to a "Well Balanced" chunk
9610 *
9611 * Returns the resulting document tree
9612 */
9613
9614xmlDocPtr
9615xmlParseEntity(const char *filename) {
9616 return(xmlSAXParseEntity(NULL, filename));
9617}
9618
9619/**
9620 * xmlCreateEntityParserCtxt:
9621 * @URL: the entity URL
9622 * @ID: the entity PUBLIC ID
9623 * @base: a posible base for the target URI
9624 *
9625 * Create a parser context for an external entity
9626 * Automatic support for ZLIB/Compress compressed document is provided
9627 * by default if found at compile-time.
9628 *
9629 * Returns the new parser context or NULL
9630 */
9631xmlParserCtxtPtr
9632xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9633 const xmlChar *base) {
9634 xmlParserCtxtPtr ctxt;
9635 xmlParserInputPtr inputStream;
9636 char *directory = NULL;
9637 xmlChar *uri;
9638
9639 ctxt = xmlNewParserCtxt();
9640 if (ctxt == NULL) {
9641 return(NULL);
9642 }
9643
9644 uri = xmlBuildURI(URL, base);
9645
9646 if (uri == NULL) {
9647 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9648 if (inputStream == NULL) {
9649 xmlFreeParserCtxt(ctxt);
9650 return(NULL);
9651 }
9652
9653 inputPush(ctxt, inputStream);
9654
9655 if ((ctxt->directory == NULL) && (directory == NULL))
9656 directory = xmlParserGetDirectory((char *)URL);
9657 if ((ctxt->directory == NULL) && (directory != NULL))
9658 ctxt->directory = directory;
9659 } else {
9660 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9661 if (inputStream == NULL) {
9662 xmlFree(uri);
9663 xmlFreeParserCtxt(ctxt);
9664 return(NULL);
9665 }
9666
9667 inputPush(ctxt, inputStream);
9668
9669 if ((ctxt->directory == NULL) && (directory == NULL))
9670 directory = xmlParserGetDirectory((char *)uri);
9671 if ((ctxt->directory == NULL) && (directory != NULL))
9672 ctxt->directory = directory;
9673 xmlFree(uri);
9674 }
9675
9676 return(ctxt);
9677}
9678
9679/************************************************************************
9680 * *
9681 * Front ends when parsing from a file *
9682 * *
9683 ************************************************************************/
9684
9685/**
9686 * xmlCreateFileParserCtxt:
9687 * @filename: the filename
9688 *
9689 * Create a parser context for a file content.
9690 * Automatic support for ZLIB/Compress compressed document is provided
9691 * by default if found at compile-time.
9692 *
9693 * Returns the new parser context or NULL
9694 */
9695xmlParserCtxtPtr
9696xmlCreateFileParserCtxt(const char *filename)
9697{
9698 xmlParserCtxtPtr ctxt;
9699 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009700 char *directory = NULL;
9701
Owen Taylor3473f882001-02-23 17:55:21 +00009702 ctxt = xmlNewParserCtxt();
9703 if (ctxt == NULL) {
9704 if (xmlDefaultSAXHandler.error != NULL) {
9705 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9706 }
9707 return(NULL);
9708 }
9709
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009710 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009711 if (inputStream == NULL) {
9712 xmlFreeParserCtxt(ctxt);
9713 return(NULL);
9714 }
9715
Owen Taylor3473f882001-02-23 17:55:21 +00009716 inputPush(ctxt, inputStream);
9717 if ((ctxt->directory == NULL) && (directory == NULL))
9718 directory = xmlParserGetDirectory(filename);
9719 if ((ctxt->directory == NULL) && (directory != NULL))
9720 ctxt->directory = directory;
9721
9722 return(ctxt);
9723}
9724
9725/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009726 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009727 * @sax: the SAX handler block
9728 * @filename: the filename
9729 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9730 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009731 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009732 *
9733 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9734 * compressed document is provided by default if found at compile-time.
9735 * It use the given SAX function block to handle the parsing callback.
9736 * If sax is NULL, fallback to the default DOM tree building routines.
9737 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009738 * User data (void *) is stored within the parser context, so it is
9739 * available nearly everywhere in libxml.
9740 *
Owen Taylor3473f882001-02-23 17:55:21 +00009741 * Returns the resulting document tree
9742 */
9743
9744xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009745xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9746 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009747 xmlDocPtr ret;
9748 xmlParserCtxtPtr ctxt;
9749 char *directory = NULL;
9750
9751 ctxt = xmlCreateFileParserCtxt(filename);
9752 if (ctxt == NULL) {
9753 return(NULL);
9754 }
9755 if (sax != NULL) {
9756 if (ctxt->sax != NULL)
9757 xmlFree(ctxt->sax);
9758 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009759 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009760 if (data!=NULL) {
9761 ctxt->_private=data;
9762 }
Owen Taylor3473f882001-02-23 17:55:21 +00009763
9764 if ((ctxt->directory == NULL) && (directory == NULL))
9765 directory = xmlParserGetDirectory(filename);
9766 if ((ctxt->directory == NULL) && (directory != NULL))
9767 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9768
9769 xmlParseDocument(ctxt);
9770
9771 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9772 else {
9773 ret = NULL;
9774 xmlFreeDoc(ctxt->myDoc);
9775 ctxt->myDoc = NULL;
9776 }
9777 if (sax != NULL)
9778 ctxt->sax = NULL;
9779 xmlFreeParserCtxt(ctxt);
9780
9781 return(ret);
9782}
9783
9784/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009785 * xmlSAXParseFile:
9786 * @sax: the SAX handler block
9787 * @filename: the filename
9788 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9789 * documents
9790 *
9791 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9792 * compressed document is provided by default if found at compile-time.
9793 * It use the given SAX function block to handle the parsing callback.
9794 * If sax is NULL, fallback to the default DOM tree building routines.
9795 *
9796 * Returns the resulting document tree
9797 */
9798
9799xmlDocPtr
9800xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9801 int recovery) {
9802 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9803}
9804
9805/**
Owen Taylor3473f882001-02-23 17:55:21 +00009806 * xmlRecoverDoc:
9807 * @cur: a pointer to an array of xmlChar
9808 *
9809 * parse an XML in-memory document and build a tree.
9810 * In the case the document is not Well Formed, a tree is built anyway
9811 *
9812 * Returns the resulting document tree
9813 */
9814
9815xmlDocPtr
9816xmlRecoverDoc(xmlChar *cur) {
9817 return(xmlSAXParseDoc(NULL, cur, 1));
9818}
9819
9820/**
9821 * xmlParseFile:
9822 * @filename: the filename
9823 *
9824 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9825 * compressed document is provided by default if found at compile-time.
9826 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009827 * Returns the resulting document tree if the file was wellformed,
9828 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009829 */
9830
9831xmlDocPtr
9832xmlParseFile(const char *filename) {
9833 return(xmlSAXParseFile(NULL, filename, 0));
9834}
9835
9836/**
9837 * xmlRecoverFile:
9838 * @filename: the filename
9839 *
9840 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9841 * compressed document is provided by default if found at compile-time.
9842 * In the case the document is not Well Formed, a tree is built anyway
9843 *
9844 * Returns the resulting document tree
9845 */
9846
9847xmlDocPtr
9848xmlRecoverFile(const char *filename) {
9849 return(xmlSAXParseFile(NULL, filename, 1));
9850}
9851
9852
9853/**
9854 * xmlSetupParserForBuffer:
9855 * @ctxt: an XML parser context
9856 * @buffer: a xmlChar * buffer
9857 * @filename: a file name
9858 *
9859 * Setup the parser context to parse a new buffer; Clears any prior
9860 * contents from the parser context. The buffer parameter must not be
9861 * NULL, but the filename parameter can be
9862 */
9863void
9864xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9865 const char* filename)
9866{
9867 xmlParserInputPtr input;
9868
9869 input = xmlNewInputStream(ctxt);
9870 if (input == NULL) {
9871 perror("malloc");
9872 xmlFree(ctxt);
9873 return;
9874 }
9875
9876 xmlClearParserCtxt(ctxt);
9877 if (filename != NULL)
9878 input->filename = xmlMemStrdup(filename);
9879 input->base = buffer;
9880 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009881 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009882 inputPush(ctxt, input);
9883}
9884
9885/**
9886 * xmlSAXUserParseFile:
9887 * @sax: a SAX handler
9888 * @user_data: The user data returned on SAX callbacks
9889 * @filename: a file name
9890 *
9891 * parse an XML file and call the given SAX handler routines.
9892 * Automatic support for ZLIB/Compress compressed document is provided
9893 *
9894 * Returns 0 in case of success or a error number otherwise
9895 */
9896int
9897xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9898 const char *filename) {
9899 int ret = 0;
9900 xmlParserCtxtPtr ctxt;
9901
9902 ctxt = xmlCreateFileParserCtxt(filename);
9903 if (ctxt == NULL) return -1;
9904 if (ctxt->sax != &xmlDefaultSAXHandler)
9905 xmlFree(ctxt->sax);
9906 ctxt->sax = sax;
9907 if (user_data != NULL)
9908 ctxt->userData = user_data;
9909
9910 xmlParseDocument(ctxt);
9911
9912 if (ctxt->wellFormed)
9913 ret = 0;
9914 else {
9915 if (ctxt->errNo != 0)
9916 ret = ctxt->errNo;
9917 else
9918 ret = -1;
9919 }
9920 if (sax != NULL)
9921 ctxt->sax = NULL;
9922 xmlFreeParserCtxt(ctxt);
9923
9924 return ret;
9925}
9926
9927/************************************************************************
9928 * *
9929 * Front ends when parsing from memory *
9930 * *
9931 ************************************************************************/
9932
9933/**
9934 * xmlCreateMemoryParserCtxt:
9935 * @buffer: a pointer to a char array
9936 * @size: the size of the array
9937 *
9938 * Create a parser context for an XML in-memory document.
9939 *
9940 * Returns the new parser context or NULL
9941 */
9942xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009943xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009944 xmlParserCtxtPtr ctxt;
9945 xmlParserInputPtr input;
9946 xmlParserInputBufferPtr buf;
9947
9948 if (buffer == NULL)
9949 return(NULL);
9950 if (size <= 0)
9951 return(NULL);
9952
9953 ctxt = xmlNewParserCtxt();
9954 if (ctxt == NULL)
9955 return(NULL);
9956
9957 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9958 if (buf == NULL) return(NULL);
9959
9960 input = xmlNewInputStream(ctxt);
9961 if (input == NULL) {
9962 xmlFreeParserCtxt(ctxt);
9963 return(NULL);
9964 }
9965
9966 input->filename = NULL;
9967 input->buf = buf;
9968 input->base = input->buf->buffer->content;
9969 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009970 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009971
9972 inputPush(ctxt, input);
9973 return(ctxt);
9974}
9975
9976/**
9977 * xmlSAXParseMemory:
9978 * @sax: the SAX handler block
9979 * @buffer: an pointer to a char array
9980 * @size: the size of the array
9981 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9982 * documents
9983 *
9984 * parse an XML in-memory block and use the given SAX function block
9985 * to handle the parsing callback. If sax is NULL, fallback to the default
9986 * DOM tree building routines.
9987 *
9988 * Returns the resulting document tree
9989 */
9990xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009991xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9992 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009993 xmlDocPtr ret;
9994 xmlParserCtxtPtr ctxt;
9995
9996 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9997 if (ctxt == NULL) return(NULL);
9998 if (sax != NULL) {
9999 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010000 }
10001
10002 xmlParseDocument(ctxt);
10003
10004 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10005 else {
10006 ret = NULL;
10007 xmlFreeDoc(ctxt->myDoc);
10008 ctxt->myDoc = NULL;
10009 }
10010 if (sax != NULL)
10011 ctxt->sax = NULL;
10012 xmlFreeParserCtxt(ctxt);
10013
10014 return(ret);
10015}
10016
10017/**
10018 * xmlParseMemory:
10019 * @buffer: an pointer to a char array
10020 * @size: the size of the array
10021 *
10022 * parse an XML in-memory block and build a tree.
10023 *
10024 * Returns the resulting document tree
10025 */
10026
Daniel Veillard50822cb2001-07-26 20:05:51 +000010027xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010028 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10029}
10030
10031/**
10032 * xmlRecoverMemory:
10033 * @buffer: an pointer to a char array
10034 * @size: the size of the array
10035 *
10036 * parse an XML in-memory block and build a tree.
10037 * In the case the document is not Well Formed, a tree is built anyway
10038 *
10039 * Returns the resulting document tree
10040 */
10041
Daniel Veillard50822cb2001-07-26 20:05:51 +000010042xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010043 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10044}
10045
10046/**
10047 * xmlSAXUserParseMemory:
10048 * @sax: a SAX handler
10049 * @user_data: The user data returned on SAX callbacks
10050 * @buffer: an in-memory XML document input
10051 * @size: the length of the XML document in bytes
10052 *
10053 * A better SAX parsing routine.
10054 * parse an XML in-memory buffer and call the given SAX handler routines.
10055 *
10056 * Returns 0 in case of success or a error number otherwise
10057 */
10058int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010059 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010060 int ret = 0;
10061 xmlParserCtxtPtr ctxt;
10062 xmlSAXHandlerPtr oldsax = NULL;
10063
10064 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10065 if (ctxt == NULL) return -1;
10066 if (sax != NULL) {
10067 oldsax = ctxt->sax;
10068 ctxt->sax = sax;
10069 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010070 if (user_data != NULL)
10071 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010072
10073 xmlParseDocument(ctxt);
10074
10075 if (ctxt->wellFormed)
10076 ret = 0;
10077 else {
10078 if (ctxt->errNo != 0)
10079 ret = ctxt->errNo;
10080 else
10081 ret = -1;
10082 }
10083 if (sax != NULL) {
10084 ctxt->sax = oldsax;
10085 }
10086 xmlFreeParserCtxt(ctxt);
10087
10088 return ret;
10089}
10090
10091/**
10092 * xmlCreateDocParserCtxt:
10093 * @cur: a pointer to an array of xmlChar
10094 *
10095 * Creates a parser context for an XML in-memory document.
10096 *
10097 * Returns the new parser context or NULL
10098 */
10099xmlParserCtxtPtr
10100xmlCreateDocParserCtxt(xmlChar *cur) {
10101 int len;
10102
10103 if (cur == NULL)
10104 return(NULL);
10105 len = xmlStrlen(cur);
10106 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10107}
10108
10109/**
10110 * xmlSAXParseDoc:
10111 * @sax: the SAX handler block
10112 * @cur: a pointer to an array of xmlChar
10113 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10114 * documents
10115 *
10116 * parse an XML in-memory document and build a tree.
10117 * It use the given SAX function block to handle the parsing callback.
10118 * If sax is NULL, fallback to the default DOM tree building routines.
10119 *
10120 * Returns the resulting document tree
10121 */
10122
10123xmlDocPtr
10124xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10125 xmlDocPtr ret;
10126 xmlParserCtxtPtr ctxt;
10127
10128 if (cur == NULL) return(NULL);
10129
10130
10131 ctxt = xmlCreateDocParserCtxt(cur);
10132 if (ctxt == NULL) return(NULL);
10133 if (sax != NULL) {
10134 ctxt->sax = sax;
10135 ctxt->userData = NULL;
10136 }
10137
10138 xmlParseDocument(ctxt);
10139 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10140 else {
10141 ret = NULL;
10142 xmlFreeDoc(ctxt->myDoc);
10143 ctxt->myDoc = NULL;
10144 }
10145 if (sax != NULL)
10146 ctxt->sax = NULL;
10147 xmlFreeParserCtxt(ctxt);
10148
10149 return(ret);
10150}
10151
10152/**
10153 * xmlParseDoc:
10154 * @cur: a pointer to an array of xmlChar
10155 *
10156 * parse an XML in-memory document and build a tree.
10157 *
10158 * Returns the resulting document tree
10159 */
10160
10161xmlDocPtr
10162xmlParseDoc(xmlChar *cur) {
10163 return(xmlSAXParseDoc(NULL, cur, 0));
10164}
10165
10166
10167/************************************************************************
10168 * *
10169 * Miscellaneous *
10170 * *
10171 ************************************************************************/
10172
10173#ifdef LIBXML_XPATH_ENABLED
10174#include <libxml/xpath.h>
10175#endif
10176
10177static int xmlParserInitialized = 0;
10178
10179/**
10180 * xmlInitParser:
10181 *
10182 * Initialization function for the XML parser.
10183 * This is not reentrant. Call once before processing in case of
10184 * use in multithreaded programs.
10185 */
10186
10187void
10188xmlInitParser(void) {
10189 if (xmlParserInitialized) return;
10190
Daniel Veillard6f350292001-10-14 09:56:15 +000010191 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010192 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010193 xmlInitMemory();
Daniel Veillardd0463562001-10-13 09:15:48 +000010194 initGenericErrorDefaultFunc(NULL);
Owen Taylor3473f882001-02-23 17:55:21 +000010195 xmlInitCharEncodingHandlers();
10196 xmlInitializePredefinedEntities();
10197 xmlDefaultSAXHandlerInit();
10198 xmlRegisterDefaultInputCallbacks();
10199 xmlRegisterDefaultOutputCallbacks();
10200#ifdef LIBXML_HTML_ENABLED
10201 htmlInitAutoClose();
10202 htmlDefaultSAXHandlerInit();
10203#endif
10204#ifdef LIBXML_XPATH_ENABLED
10205 xmlXPathInit();
10206#endif
10207 xmlParserInitialized = 1;
10208}
10209
10210/**
10211 * xmlCleanupParser:
10212 *
10213 * Cleanup function for the XML parser. It tries to reclaim all
10214 * parsing related global memory allocated for the parser processing.
10215 * It doesn't deallocate any document related memory. Calling this
10216 * function should not prevent reusing the parser.
10217 */
10218
10219void
10220xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010221 xmlCleanupCharEncodingHandlers();
10222 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010223#ifdef LIBXML_CATALOG_ENABLED
10224 xmlCatalogCleanup();
10225#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010226 xmlCleanupThreads();
10227 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010228}