blob: dde6d1288aa1b0985c61d33b1398bd8eb338623d [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
Daniel Veillardd0463562001-10-13 09:15:48 +000044#include <libxml/threads.h>
45#include <libxml/globals.h>
Owen Taylor3473f882001-02-23 17:55:21 +000046#include <libxml/tree.h>
47#include <libxml/parser.h>
48#include <libxml/parserInternals.h>
49#include <libxml/valid.h>
50#include <libxml/entities.h>
51#include <libxml/xmlerror.h>
52#include <libxml/encoding.h>
53#include <libxml/xmlIO.h>
54#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000055#ifdef LIBXML_CATALOG_ENABLED
56#include <libxml/catalog.h>
57#endif
Owen Taylor3473f882001-02-23 17:55:21 +000058
59#ifdef HAVE_CTYPE_H
60#include <ctype.h>
61#endif
62#ifdef HAVE_STDLIB_H
63#include <stdlib.h>
64#endif
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68#ifdef HAVE_FCNTL_H
69#include <fcntl.h>
70#endif
71#ifdef HAVE_UNISTD_H
72#include <unistd.h>
73#endif
74#ifdef HAVE_ZLIB_H
75#include <zlib.h>
76#endif
77
78
Daniel Veillard21a0f912001-02-25 19:54:14 +000079#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000080#define XML_PARSER_BUFFER_SIZE 100
81
82/*
Owen Taylor3473f882001-02-23 17:55:21 +000083 * List of XML prefixed PI allowed by W3C specs
84 */
85
Daniel Veillardb44025c2001-10-11 22:55:55 +000086static const char *xmlW3CPIs[] = {
Owen Taylor3473f882001-02-23 17:55:21 +000087 "xml-stylesheet",
88 NULL
89};
90
91/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
92void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
93xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
94 const xmlChar **str);
95
Daniel Veillard257d9102001-05-08 10:41:44 +000096static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +000097xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
98 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +000099 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000101
102/************************************************************************
103 * *
104 * Parser stacks related functions and macros *
105 * *
106 ************************************************************************/
107
108xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
109 const xmlChar ** str);
110
111/*
112 * Generic function for accessing stacks in the Parser Context
113 */
114
115#define PUSH_AND_POP(scope, type, name) \
116scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
117 if (ctxt->name##Nr >= ctxt->name##Max) { \
118 ctxt->name##Max *= 2; \
119 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
120 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
121 if (ctxt->name##Tab == NULL) { \
122 xmlGenericError(xmlGenericErrorContext, \
123 "realloc failed !\n"); \
124 return(0); \
125 } \
126 } \
127 ctxt->name##Tab[ctxt->name##Nr] = value; \
128 ctxt->name = value; \
129 return(ctxt->name##Nr++); \
130} \
131scope type name##Pop(xmlParserCtxtPtr ctxt) { \
132 type ret; \
133 if (ctxt->name##Nr <= 0) return(0); \
134 ctxt->name##Nr--; \
135 if (ctxt->name##Nr > 0) \
136 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
137 else \
138 ctxt->name = NULL; \
139 ret = ctxt->name##Tab[ctxt->name##Nr]; \
140 ctxt->name##Tab[ctxt->name##Nr] = 0; \
141 return(ret); \
142} \
143
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000144/**
145 * inputPop:
146 * @ctxt: an XML parser context
147 *
148 * Pops the top parser input from the input stack
149 *
150 * Returns the input just removed
151 */
152/**
153 * inputPush:
154 * @ctxt: an XML parser context
155 * @input: the parser input
156 *
157 * Pushes a new parser input on top of the input stack
158 */
159/**
160 * namePop:
161 * @ctxt: an XML parser context
162 *
163 * Pops the top element name from the name stack
164 *
165 * Returns the name just removed
166 */
167/**
168 * namePush:
169 * @ctxt: an XML parser context
170 * @name: the element name
171 *
172 * Pushes a new element name on top of the name stack
173 */
174/**
175 * nodePop:
176 * @ctxt: an XML parser context
177 *
178 * Pops the top element node from the node stack
179 *
180 * Returns the node just removed
181 */
182/**
183 * nodePush:
184 * @ctxt: an XML parser context
185 * @node: the element node
186 *
187 * Pushes a new element node on top of the node stack
188 */
Owen Taylor3473f882001-02-23 17:55:21 +0000189/*
190 * Those macros actually generate the functions
191 */
192PUSH_AND_POP(extern, xmlParserInputPtr, input)
193PUSH_AND_POP(extern, xmlNodePtr, node)
194PUSH_AND_POP(extern, xmlChar*, name)
195
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000196static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000197 if (ctxt->spaceNr >= ctxt->spaceMax) {
198 ctxt->spaceMax *= 2;
199 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
200 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
201 if (ctxt->spaceTab == NULL) {
202 xmlGenericError(xmlGenericErrorContext,
203 "realloc failed !\n");
204 return(0);
205 }
206 }
207 ctxt->spaceTab[ctxt->spaceNr] = val;
208 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
209 return(ctxt->spaceNr++);
210}
211
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000212static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000213 int ret;
214 if (ctxt->spaceNr <= 0) return(0);
215 ctxt->spaceNr--;
216 if (ctxt->spaceNr > 0)
217 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
218 else
219 ctxt->space = NULL;
220 ret = ctxt->spaceTab[ctxt->spaceNr];
221 ctxt->spaceTab[ctxt->spaceNr] = -1;
222 return(ret);
223}
224
225/*
226 * Macros for accessing the content. Those should be used only by the parser,
227 * and not exported.
228 *
229 * Dirty macros, i.e. one often need to make assumption on the context to
230 * use them
231 *
232 * CUR_PTR return the current pointer to the xmlChar to be parsed.
233 * To be used with extreme caution since operations consuming
234 * characters may move the input buffer to a different location !
235 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
236 * This should be used internally by the parser
237 * only to compare to ASCII values otherwise it would break when
238 * running with UTF-8 encoding.
239 * RAW same as CUR but in the input buffer, bypass any token
240 * extraction that may have been done
241 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
242 * to compare on ASCII based substring.
243 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
244 * strings within the parser.
245 *
246 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
247 *
248 * NEXT Skip to the next character, this does the proper decoding
249 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
250 * NEXTL(l) Skip l xmlChars in the input buffer
251 * CUR_CHAR(l) returns the current unicode character (int), set l
252 * to the number of xmlChars used for the encoding [0-5].
253 * CUR_SCHAR same but operate on a string instead of the context
254 * COPY_BUF copy the current unicode char to the target buffer, increment
255 * the index
256 * GROW, SHRINK handling of input buffers
257 */
258
259#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
260#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
261#define NXT(val) ctxt->input->cur[(val)]
262#define CUR_PTR ctxt->input->cur
263
264#define SKIP(val) do { \
265 ctxt->nbChars += (val),ctxt->input->cur += (val); \
266 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000267 if ((*ctxt->input->cur == 0) && \
268 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
269 xmlPopInput(ctxt); \
270 } while (0)
271
Daniel Veillard48b2f892001-02-25 16:11:03 +0000272#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000273 xmlParserInputShrink(ctxt->input); \
274 if ((*ctxt->input->cur == 0) && \
275 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
276 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000277 }
Owen Taylor3473f882001-02-23 17:55:21 +0000278
Daniel Veillard48b2f892001-02-25 16:11:03 +0000279#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000280 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
281 if ((*ctxt->input->cur == 0) && \
282 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
283 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000284 }
Owen Taylor3473f882001-02-23 17:55:21 +0000285
286#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
287
288#define NEXT xmlNextChar(ctxt)
289
Daniel Veillard21a0f912001-02-25 19:54:14 +0000290#define NEXT1 { \
291 ctxt->input->cur++; \
292 ctxt->nbChars++; \
293 if (*ctxt->input->cur == 0) \
294 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
295 }
296
Owen Taylor3473f882001-02-23 17:55:21 +0000297#define NEXTL(l) do { \
298 if (*(ctxt->input->cur) == '\n') { \
299 ctxt->input->line++; ctxt->input->col = 1; \
300 } else ctxt->input->col++; \
301 ctxt->token = 0; ctxt->input->cur += l; \
302 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000303 } while (0)
304
305#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
306#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
307
308#define COPY_BUF(l,b,i,v) \
309 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000311
312/**
313 * xmlSkipBlankChars:
314 * @ctxt: the XML parser context
315 *
316 * skip all blanks character found at that point in the input streams.
317 * It pops up finished entities in the process if allowable at that point.
318 *
319 * Returns the number of space chars skipped
320 */
321
322int
323xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000324 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325
Daniel Veillard02141ea2001-04-30 11:46:40 +0000326 if (ctxt->token != 0) {
327 if (!IS_BLANK(ctxt->token))
328 return(0);
329 ctxt->token = 0;
330 res++;
331 }
Owen Taylor3473f882001-02-23 17:55:21 +0000332 /*
333 * It's Okay to use CUR/NEXT here since all the blanks are on
334 * the ASCII range.
335 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000336 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
337 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000340 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000341 cur = ctxt->input->cur;
342 while (IS_BLANK(*cur)) {
343 if (*cur == '\n') {
344 ctxt->input->line++; ctxt->input->col = 1;
345 }
346 cur++;
347 res++;
348 if (*cur == 0) {
349 ctxt->input->cur = cur;
350 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
351 cur = ctxt->input->cur;
352 }
353 }
354 ctxt->input->cur = cur;
355 } else {
356 int cur;
357 do {
358 cur = CUR;
359 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
360 NEXT;
361 cur = CUR;
362 res++;
363 }
364 while ((cur == 0) && (ctxt->inputNr > 1) &&
365 (ctxt->instate != XML_PARSER_COMMENT)) {
366 xmlPopInput(ctxt);
367 cur = CUR;
368 }
369 /*
370 * Need to handle support of entities branching here
371 */
372 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
373 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
374 }
Owen Taylor3473f882001-02-23 17:55:21 +0000375 return(res);
376}
377
378/************************************************************************
379 * *
380 * Commodity functions to handle entities *
381 * *
382 ************************************************************************/
383
384/**
385 * xmlPopInput:
386 * @ctxt: an XML parser context
387 *
388 * xmlPopInput: the current input pointed by ctxt->input came to an end
389 * pop it and return the next char.
390 *
391 * Returns the current xmlChar in the parser context
392 */
393xmlChar
394xmlPopInput(xmlParserCtxtPtr ctxt) {
395 if (ctxt->inputNr == 1) return(0); /* End of main Input */
396 if (xmlParserDebugEntities)
397 xmlGenericError(xmlGenericErrorContext,
398 "Popping input %d\n", ctxt->inputNr);
399 xmlFreeInputStream(inputPop(ctxt));
400 if ((*ctxt->input->cur == 0) &&
401 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
402 return(xmlPopInput(ctxt));
403 return(CUR);
404}
405
406/**
407 * xmlPushInput:
408 * @ctxt: an XML parser context
409 * @input: an XML parser input fragment (entity, XML fragment ...).
410 *
411 * xmlPushInput: switch to a new input stream which is stacked on top
412 * of the previous one(s).
413 */
414void
415xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
416 if (input == NULL) return;
417
418 if (xmlParserDebugEntities) {
419 if ((ctxt->input != NULL) && (ctxt->input->filename))
420 xmlGenericError(xmlGenericErrorContext,
421 "%s(%d): ", ctxt->input->filename,
422 ctxt->input->line);
423 xmlGenericError(xmlGenericErrorContext,
424 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
425 }
426 inputPush(ctxt, input);
427 GROW;
428}
429
430/**
431 * xmlParseCharRef:
432 * @ctxt: an XML parser context
433 *
434 * parse Reference declarations
435 *
436 * [66] CharRef ::= '&#' [0-9]+ ';' |
437 * '&#x' [0-9a-fA-F]+ ';'
438 *
439 * [ WFC: Legal Character ]
440 * Characters referred to using character references must match the
441 * production for Char.
442 *
443 * Returns the value parsed (as an int), 0 in case of error
444 */
445int
446xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000447 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000448 int count = 0;
449
450 if (ctxt->token != 0) {
451 val = ctxt->token;
452 ctxt->token = 0;
453 return(val);
454 }
455 /*
456 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
457 */
458 if ((RAW == '&') && (NXT(1) == '#') &&
459 (NXT(2) == 'x')) {
460 SKIP(3);
461 GROW;
462 while (RAW != ';') { /* loop blocked by count */
463 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
464 val = val * 16 + (CUR - '0');
465 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
466 val = val * 16 + (CUR - 'a') + 10;
467 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
468 val = val * 16 + (CUR - 'A') + 10;
469 else {
470 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
472 ctxt->sax->error(ctxt->userData,
473 "xmlParseCharRef: invalid hexadecimal value\n");
474 ctxt->wellFormed = 0;
475 ctxt->disableSAX = 1;
476 val = 0;
477 break;
478 }
479 NEXT;
480 count++;
481 }
482 if (RAW == ';') {
483 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
484 ctxt->nbChars ++;
485 ctxt->input->cur++;
486 }
487 } else if ((RAW == '&') && (NXT(1) == '#')) {
488 SKIP(2);
489 GROW;
490 while (RAW != ';') { /* loop blocked by count */
491 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
492 val = val * 10 + (CUR - '0');
493 else {
494 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
495 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
496 ctxt->sax->error(ctxt->userData,
497 "xmlParseCharRef: invalid decimal value\n");
498 ctxt->wellFormed = 0;
499 ctxt->disableSAX = 1;
500 val = 0;
501 break;
502 }
503 NEXT;
504 count++;
505 }
506 if (RAW == ';') {
507 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
508 ctxt->nbChars ++;
509 ctxt->input->cur++;
510 }
511 } else {
512 ctxt->errNo = XML_ERR_INVALID_CHARREF;
513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
514 ctxt->sax->error(ctxt->userData,
515 "xmlParseCharRef: invalid value\n");
516 ctxt->wellFormed = 0;
517 ctxt->disableSAX = 1;
518 }
519
520 /*
521 * [ WFC: Legal Character ]
522 * Characters referred to using character references must match the
523 * production for Char.
524 */
525 if (IS_CHAR(val)) {
526 return(val);
527 } else {
528 ctxt->errNo = XML_ERR_INVALID_CHAR;
529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
530 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
531 val);
532 ctxt->wellFormed = 0;
533 ctxt->disableSAX = 1;
534 }
535 return(0);
536}
537
538/**
539 * xmlParseStringCharRef:
540 * @ctxt: an XML parser context
541 * @str: a pointer to an index in the string
542 *
543 * parse Reference declarations, variant parsing from a string rather
544 * than an an input flow.
545 *
546 * [66] CharRef ::= '&#' [0-9]+ ';' |
547 * '&#x' [0-9a-fA-F]+ ';'
548 *
549 * [ WFC: Legal Character ]
550 * Characters referred to using character references must match the
551 * production for Char.
552 *
553 * Returns the value parsed (as an int), 0 in case of error, str will be
554 * updated to the current value of the index
555 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000556static int
Owen Taylor3473f882001-02-23 17:55:21 +0000557xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
558 const xmlChar *ptr;
559 xmlChar cur;
560 int val = 0;
561
562 if ((str == NULL) || (*str == NULL)) return(0);
563 ptr = *str;
564 cur = *ptr;
565 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
566 ptr += 3;
567 cur = *ptr;
568 while (cur != ';') { /* Non input consuming loop */
569 if ((cur >= '0') && (cur <= '9'))
570 val = val * 16 + (cur - '0');
571 else if ((cur >= 'a') && (cur <= 'f'))
572 val = val * 16 + (cur - 'a') + 10;
573 else if ((cur >= 'A') && (cur <= 'F'))
574 val = val * 16 + (cur - 'A') + 10;
575 else {
576 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
578 ctxt->sax->error(ctxt->userData,
579 "xmlParseStringCharRef: invalid hexadecimal value\n");
580 ctxt->wellFormed = 0;
581 ctxt->disableSAX = 1;
582 val = 0;
583 break;
584 }
585 ptr++;
586 cur = *ptr;
587 }
588 if (cur == ';')
589 ptr++;
590 } else if ((cur == '&') && (ptr[1] == '#')){
591 ptr += 2;
592 cur = *ptr;
593 while (cur != ';') { /* Non input consuming loops */
594 if ((cur >= '0') && (cur <= '9'))
595 val = val * 10 + (cur - '0');
596 else {
597 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
599 ctxt->sax->error(ctxt->userData,
600 "xmlParseStringCharRef: invalid decimal value\n");
601 ctxt->wellFormed = 0;
602 ctxt->disableSAX = 1;
603 val = 0;
604 break;
605 }
606 ptr++;
607 cur = *ptr;
608 }
609 if (cur == ';')
610 ptr++;
611 } else {
612 ctxt->errNo = XML_ERR_INVALID_CHARREF;
613 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
614 ctxt->sax->error(ctxt->userData,
615 "xmlParseCharRef: invalid value\n");
616 ctxt->wellFormed = 0;
617 ctxt->disableSAX = 1;
618 return(0);
619 }
620 *str = ptr;
621
622 /*
623 * [ WFC: Legal Character ]
624 * Characters referred to using character references must match the
625 * production for Char.
626 */
627 if (IS_CHAR(val)) {
628 return(val);
629 } else {
630 ctxt->errNo = XML_ERR_INVALID_CHAR;
631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
632 ctxt->sax->error(ctxt->userData,
633 "CharRef: invalid xmlChar value %d\n", val);
634 ctxt->wellFormed = 0;
635 ctxt->disableSAX = 1;
636 }
637 return(0);
638}
639
640/**
641 * xmlParserHandlePEReference:
642 * @ctxt: the parser context
643 *
644 * [69] PEReference ::= '%' Name ';'
645 *
646 * [ WFC: No Recursion ]
647 * A parsed entity must not contain a recursive
648 * reference to itself, either directly or indirectly.
649 *
650 * [ WFC: Entity Declared ]
651 * In a document without any DTD, a document with only an internal DTD
652 * subset which contains no parameter entity references, or a document
653 * with "standalone='yes'", ... ... The declaration of a parameter
654 * entity must precede any reference to it...
655 *
656 * [ VC: Entity Declared ]
657 * In a document with an external subset or external parameter entities
658 * with "standalone='no'", ... ... The declaration of a parameter entity
659 * must precede any reference to it...
660 *
661 * [ WFC: In DTD ]
662 * Parameter-entity references may only appear in the DTD.
663 * NOTE: misleading but this is handled.
664 *
665 * A PEReference may have been detected in the current input stream
666 * the handling is done accordingly to
667 * http://www.w3.org/TR/REC-xml#entproc
668 * i.e.
669 * - Included in literal in entity values
670 * - Included as Paraemeter Entity reference within DTDs
671 */
672void
673xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
674 xmlChar *name;
675 xmlEntityPtr entity = NULL;
676 xmlParserInputPtr input;
677
678 if (ctxt->token != 0) {
679 return;
680 }
681 if (RAW != '%') return;
682 switch(ctxt->instate) {
683 case XML_PARSER_CDATA_SECTION:
684 return;
685 case XML_PARSER_COMMENT:
686 return;
687 case XML_PARSER_START_TAG:
688 return;
689 case XML_PARSER_END_TAG:
690 return;
691 case XML_PARSER_EOF:
692 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
694 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
695 ctxt->wellFormed = 0;
696 ctxt->disableSAX = 1;
697 return;
698 case XML_PARSER_PROLOG:
699 case XML_PARSER_START:
700 case XML_PARSER_MISC:
701 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
703 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
704 ctxt->wellFormed = 0;
705 ctxt->disableSAX = 1;
706 return;
707 case XML_PARSER_ENTITY_DECL:
708 case XML_PARSER_CONTENT:
709 case XML_PARSER_ATTRIBUTE_VALUE:
710 case XML_PARSER_PI:
711 case XML_PARSER_SYSTEM_LITERAL:
712 /* we just ignore it there */
713 return;
714 case XML_PARSER_EPILOG:
715 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
717 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
718 ctxt->wellFormed = 0;
719 ctxt->disableSAX = 1;
720 return;
721 case XML_PARSER_ENTITY_VALUE:
722 /*
723 * NOTE: in the case of entity values, we don't do the
724 * substitution here since we need the literal
725 * entity value to be able to save the internal
726 * subset of the document.
727 * This will be handled by xmlStringDecodeEntities
728 */
729 return;
730 case XML_PARSER_DTD:
731 /*
732 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
733 * In the internal DTD subset, parameter-entity references
734 * can occur only where markup declarations can occur, not
735 * within markup declarations.
736 * In that case this is handled in xmlParseMarkupDecl
737 */
738 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
739 return;
740 break;
741 case XML_PARSER_IGNORE:
742 return;
743 }
744
745 NEXT;
746 name = xmlParseName(ctxt);
747 if (xmlParserDebugEntities)
748 xmlGenericError(xmlGenericErrorContext,
749 "PE Reference: %s\n", name);
750 if (name == NULL) {
751 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
753 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
754 ctxt->wellFormed = 0;
755 ctxt->disableSAX = 1;
756 } else {
757 if (RAW == ';') {
758 NEXT;
759 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
760 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
761 if (entity == NULL) {
762
763 /*
764 * [ WFC: Entity Declared ]
765 * In a document without any DTD, a document with only an
766 * internal DTD subset which contains no parameter entity
767 * references, or a document with "standalone='yes'", ...
768 * ... The declaration of a parameter entity must precede
769 * any reference to it...
770 */
771 if ((ctxt->standalone == 1) ||
772 ((ctxt->hasExternalSubset == 0) &&
773 (ctxt->hasPErefs == 0))) {
774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
775 ctxt->sax->error(ctxt->userData,
776 "PEReference: %%%s; not found\n", name);
777 ctxt->wellFormed = 0;
778 ctxt->disableSAX = 1;
779 } else {
780 /*
781 * [ VC: Entity Declared ]
782 * In a document with an external subset or external
783 * parameter entities with "standalone='no'", ...
784 * ... The declaration of a parameter entity must precede
785 * any reference to it...
786 */
787 if ((!ctxt->disableSAX) &&
788 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
789 ctxt->vctxt.error(ctxt->vctxt.userData,
790 "PEReference: %%%s; not found\n", name);
791 } else if ((!ctxt->disableSAX) &&
792 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
793 ctxt->sax->warning(ctxt->userData,
794 "PEReference: %%%s; not found\n", name);
795 ctxt->valid = 0;
796 }
797 } else {
798 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
799 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000800 xmlChar start[4];
801 xmlCharEncoding enc;
802
Owen Taylor3473f882001-02-23 17:55:21 +0000803 /*
804 * handle the extra spaces added before and after
805 * c.f. http://www.w3.org/TR/REC-xml#as-PE
806 * this is done independantly.
807 */
808 input = xmlNewEntityInputStream(ctxt, entity);
809 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000810
811 /*
812 * Get the 4 first bytes and decode the charset
813 * if enc != XML_CHAR_ENCODING_NONE
814 * plug some encoding conversion routines.
815 */
816 GROW
817 start[0] = RAW;
818 start[1] = NXT(1);
819 start[2] = NXT(2);
820 start[3] = NXT(3);
821 enc = xmlDetectCharEncoding(start, 4);
822 if (enc != XML_CHAR_ENCODING_NONE) {
823 xmlSwitchEncoding(ctxt, enc);
824 }
825
Owen Taylor3473f882001-02-23 17:55:21 +0000826 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
827 (RAW == '<') && (NXT(1) == '?') &&
828 (NXT(2) == 'x') && (NXT(3) == 'm') &&
829 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
830 xmlParseTextDecl(ctxt);
831 }
832 if (ctxt->token == 0)
833 ctxt->token = ' ';
834 } else {
835 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
836 ctxt->sax->error(ctxt->userData,
837 "xmlHandlePEReference: %s is not a parameter entity\n",
838 name);
839 ctxt->wellFormed = 0;
840 ctxt->disableSAX = 1;
841 }
842 }
843 } else {
844 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
846 ctxt->sax->error(ctxt->userData,
847 "xmlHandlePEReference: expecting ';'\n");
848 ctxt->wellFormed = 0;
849 ctxt->disableSAX = 1;
850 }
851 xmlFree(name);
852 }
853}
854
855/*
856 * Macro used to grow the current buffer.
857 */
858#define growBuffer(buffer) { \
859 buffer##_size *= 2; \
860 buffer = (xmlChar *) \
861 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
862 if (buffer == NULL) { \
863 perror("realloc failed"); \
864 return(NULL); \
865 } \
866}
867
868/**
869 * xmlStringDecodeEntities:
870 * @ctxt: the parser context
871 * @str: the input string
872 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
873 * @end: an end marker xmlChar, 0 if none
874 * @end2: an end marker xmlChar, 0 if none
875 * @end3: an end marker xmlChar, 0 if none
876 *
877 * Takes a entity string content and process to do the adequate subtitutions.
878 *
879 * [67] Reference ::= EntityRef | CharRef
880 *
881 * [69] PEReference ::= '%' Name ';'
882 *
883 * Returns A newly allocated string with the substitution done. The caller
884 * must deallocate it !
885 */
886xmlChar *
887xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
888 xmlChar end, xmlChar end2, xmlChar end3) {
889 xmlChar *buffer = NULL;
890 int buffer_size = 0;
891
892 xmlChar *current = NULL;
893 xmlEntityPtr ent;
894 int c,l;
895 int nbchars = 0;
896
897 if (str == NULL)
898 return(NULL);
899
900 if (ctxt->depth > 40) {
901 ctxt->errNo = XML_ERR_ENTITY_LOOP;
902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
903 ctxt->sax->error(ctxt->userData,
904 "Detected entity reference loop\n");
905 ctxt->wellFormed = 0;
906 ctxt->disableSAX = 1;
907 return(NULL);
908 }
909
910 /*
911 * allocate a translation buffer.
912 */
913 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
914 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
915 if (buffer == NULL) {
916 perror("xmlDecodeEntities: malloc failed");
917 return(NULL);
918 }
919
920 /*
921 * Ok loop until we reach one of the ending char or a size limit.
922 * we are operating on already parsed values.
923 */
924 c = CUR_SCHAR(str, l);
925 while ((c != 0) && (c != end) && /* non input consuming loop */
926 (c != end2) && (c != end3)) {
927
928 if (c == 0) break;
929 if ((c == '&') && (str[1] == '#')) {
930 int val = xmlParseStringCharRef(ctxt, &str);
931 if (val != 0) {
932 COPY_BUF(0,buffer,nbchars,val);
933 }
934 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
935 if (xmlParserDebugEntities)
936 xmlGenericError(xmlGenericErrorContext,
937 "String decoding Entity Reference: %.30s\n",
938 str);
939 ent = xmlParseStringEntityRef(ctxt, &str);
940 if ((ent != NULL) &&
941 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
942 if (ent->content != NULL) {
943 COPY_BUF(0,buffer,nbchars,ent->content[0]);
944 } else {
945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
946 ctxt->sax->error(ctxt->userData,
947 "internal error entity has no content\n");
948 }
949 } else if ((ent != NULL) && (ent->content != NULL)) {
950 xmlChar *rep;
951
952 ctxt->depth++;
953 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
954 0, 0, 0);
955 ctxt->depth--;
956 if (rep != NULL) {
957 current = rep;
958 while (*current != 0) { /* non input consuming loop */
959 buffer[nbchars++] = *current++;
960 if (nbchars >
961 buffer_size - XML_PARSER_BUFFER_SIZE) {
962 growBuffer(buffer);
963 }
964 }
965 xmlFree(rep);
966 }
967 } else if (ent != NULL) {
968 int i = xmlStrlen(ent->name);
969 const xmlChar *cur = ent->name;
970
971 buffer[nbchars++] = '&';
972 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
973 growBuffer(buffer);
974 }
975 for (;i > 0;i--)
976 buffer[nbchars++] = *cur++;
977 buffer[nbchars++] = ';';
978 }
979 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
980 if (xmlParserDebugEntities)
981 xmlGenericError(xmlGenericErrorContext,
982 "String decoding PE Reference: %.30s\n", str);
983 ent = xmlParseStringPEReference(ctxt, &str);
984 if (ent != NULL) {
985 xmlChar *rep;
986
987 ctxt->depth++;
988 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
989 0, 0, 0);
990 ctxt->depth--;
991 if (rep != NULL) {
992 current = rep;
993 while (*current != 0) { /* non input consuming loop */
994 buffer[nbchars++] = *current++;
995 if (nbchars >
996 buffer_size - XML_PARSER_BUFFER_SIZE) {
997 growBuffer(buffer);
998 }
999 }
1000 xmlFree(rep);
1001 }
1002 }
1003 } else {
1004 COPY_BUF(l,buffer,nbchars,c);
1005 str += l;
1006 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1007 growBuffer(buffer);
1008 }
1009 }
1010 c = CUR_SCHAR(str, l);
1011 }
1012 buffer[nbchars++] = 0;
1013 return(buffer);
1014}
1015
1016
1017/************************************************************************
1018 * *
1019 * Commodity functions to handle xmlChars *
1020 * *
1021 ************************************************************************/
1022
1023/**
1024 * xmlStrndup:
1025 * @cur: the input xmlChar *
1026 * @len: the len of @cur
1027 *
1028 * a strndup for array of xmlChar's
1029 *
1030 * Returns a new xmlChar * or NULL
1031 */
1032xmlChar *
1033xmlStrndup(const xmlChar *cur, int len) {
1034 xmlChar *ret;
1035
1036 if ((cur == NULL) || (len < 0)) return(NULL);
1037 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1038 if (ret == NULL) {
1039 xmlGenericError(xmlGenericErrorContext,
1040 "malloc of %ld byte failed\n",
1041 (len + 1) * (long)sizeof(xmlChar));
1042 return(NULL);
1043 }
1044 memcpy(ret, cur, len * sizeof(xmlChar));
1045 ret[len] = 0;
1046 return(ret);
1047}
1048
1049/**
1050 * xmlStrdup:
1051 * @cur: the input xmlChar *
1052 *
1053 * a strdup for array of xmlChar's. Since they are supposed to be
1054 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1055 * a termination mark of '0'.
1056 *
1057 * Returns a new xmlChar * or NULL
1058 */
1059xmlChar *
1060xmlStrdup(const xmlChar *cur) {
1061 const xmlChar *p = cur;
1062
1063 if (cur == NULL) return(NULL);
1064 while (*p != 0) p++; /* non input consuming */
1065 return(xmlStrndup(cur, p - cur));
1066}
1067
1068/**
1069 * xmlCharStrndup:
1070 * @cur: the input char *
1071 * @len: the len of @cur
1072 *
1073 * a strndup for char's to xmlChar's
1074 *
1075 * Returns a new xmlChar * or NULL
1076 */
1077
1078xmlChar *
1079xmlCharStrndup(const char *cur, int len) {
1080 int i;
1081 xmlChar *ret;
1082
1083 if ((cur == NULL) || (len < 0)) return(NULL);
1084 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1085 if (ret == NULL) {
1086 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1087 (len + 1) * (long)sizeof(xmlChar));
1088 return(NULL);
1089 }
1090 for (i = 0;i < len;i++)
1091 ret[i] = (xmlChar) cur[i];
1092 ret[len] = 0;
1093 return(ret);
1094}
1095
1096/**
1097 * xmlCharStrdup:
1098 * @cur: the input char *
1099 * @len: the len of @cur
1100 *
1101 * a strdup for char's to xmlChar's
1102 *
1103 * Returns a new xmlChar * or NULL
1104 */
1105
1106xmlChar *
1107xmlCharStrdup(const char *cur) {
1108 const char *p = cur;
1109
1110 if (cur == NULL) return(NULL);
1111 while (*p != '\0') p++; /* non input consuming */
1112 return(xmlCharStrndup(cur, p - cur));
1113}
1114
1115/**
1116 * xmlStrcmp:
1117 * @str1: the first xmlChar *
1118 * @str2: the second xmlChar *
1119 *
1120 * a strcmp for xmlChar's
1121 *
1122 * Returns the integer result of the comparison
1123 */
1124
1125int
1126xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1127 register int tmp;
1128
1129 if (str1 == str2) return(0);
1130 if (str1 == NULL) return(-1);
1131 if (str2 == NULL) return(1);
1132 do {
1133 tmp = *str1++ - *str2;
1134 if (tmp != 0) return(tmp);
1135 } while (*str2++ != 0);
1136 return 0;
1137}
1138
1139/**
1140 * xmlStrEqual:
1141 * @str1: the first xmlChar *
1142 * @str2: the second xmlChar *
1143 *
1144 * Check if both string are equal of have same content
1145 * Should be a bit more readable and faster than xmlStrEqual()
1146 *
1147 * Returns 1 if they are equal, 0 if they are different
1148 */
1149
1150int
1151xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1152 if (str1 == str2) return(1);
1153 if (str1 == NULL) return(0);
1154 if (str2 == NULL) return(0);
1155 do {
1156 if (*str1++ != *str2) return(0);
1157 } while (*str2++);
1158 return(1);
1159}
1160
1161/**
1162 * xmlStrncmp:
1163 * @str1: the first xmlChar *
1164 * @str2: the second xmlChar *
1165 * @len: the max comparison length
1166 *
1167 * a strncmp for xmlChar's
1168 *
1169 * Returns the integer result of the comparison
1170 */
1171
1172int
1173xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1174 register int tmp;
1175
1176 if (len <= 0) return(0);
1177 if (str1 == str2) return(0);
1178 if (str1 == NULL) return(-1);
1179 if (str2 == NULL) return(1);
1180 do {
1181 tmp = *str1++ - *str2;
1182 if (tmp != 0 || --len == 0) return(tmp);
1183 } while (*str2++ != 0);
1184 return 0;
1185}
1186
Daniel Veillardb44025c2001-10-11 22:55:55 +00001187static const xmlChar casemap[256] = {
Owen Taylor3473f882001-02-23 17:55:21 +00001188 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1189 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1190 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1191 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1192 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1193 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1194 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1195 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1196 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1197 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1198 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1199 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1200 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1201 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1202 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1203 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1204 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1205 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1206 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1207 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1208 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1209 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1210 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1211 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1212 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1213 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1214 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1215 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1216 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1217 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1218 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1219 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1220};
1221
1222/**
1223 * xmlStrcasecmp:
1224 * @str1: the first xmlChar *
1225 * @str2: the second xmlChar *
1226 *
1227 * a strcasecmp for xmlChar's
1228 *
1229 * Returns the integer result of the comparison
1230 */
1231
1232int
1233xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1234 register int tmp;
1235
1236 if (str1 == str2) return(0);
1237 if (str1 == NULL) return(-1);
1238 if (str2 == NULL) return(1);
1239 do {
1240 tmp = casemap[*str1++] - casemap[*str2];
1241 if (tmp != 0) return(tmp);
1242 } while (*str2++ != 0);
1243 return 0;
1244}
1245
1246/**
1247 * xmlStrncasecmp:
1248 * @str1: the first xmlChar *
1249 * @str2: the second xmlChar *
1250 * @len: the max comparison length
1251 *
1252 * a strncasecmp for xmlChar's
1253 *
1254 * Returns the integer result of the comparison
1255 */
1256
1257int
1258xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1259 register int tmp;
1260
1261 if (len <= 0) return(0);
1262 if (str1 == str2) return(0);
1263 if (str1 == NULL) return(-1);
1264 if (str2 == NULL) return(1);
1265 do {
1266 tmp = casemap[*str1++] - casemap[*str2];
1267 if (tmp != 0 || --len == 0) return(tmp);
1268 } while (*str2++ != 0);
1269 return 0;
1270}
1271
1272/**
1273 * xmlStrchr:
1274 * @str: the xmlChar * array
1275 * @val: the xmlChar to search
1276 *
1277 * a strchr for xmlChar's
1278 *
1279 * Returns the xmlChar * for the first occurence or NULL.
1280 */
1281
1282const xmlChar *
1283xmlStrchr(const xmlChar *str, xmlChar val) {
1284 if (str == NULL) return(NULL);
1285 while (*str != 0) { /* non input consuming */
1286 if (*str == val) return((xmlChar *) str);
1287 str++;
1288 }
1289 return(NULL);
1290}
1291
1292/**
1293 * xmlStrstr:
1294 * @str: the xmlChar * array (haystack)
1295 * @val: the xmlChar to search (needle)
1296 *
1297 * a strstr for xmlChar's
1298 *
1299 * Returns the xmlChar * for the first occurence or NULL.
1300 */
1301
1302const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001303xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001304 int n;
1305
1306 if (str == NULL) return(NULL);
1307 if (val == NULL) return(NULL);
1308 n = xmlStrlen(val);
1309
1310 if (n == 0) return(str);
1311 while (*str != 0) { /* non input consuming */
1312 if (*str == *val) {
1313 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1314 }
1315 str++;
1316 }
1317 return(NULL);
1318}
1319
1320/**
1321 * xmlStrcasestr:
1322 * @str: the xmlChar * array (haystack)
1323 * @val: the xmlChar to search (needle)
1324 *
1325 * a case-ignoring strstr for xmlChar's
1326 *
1327 * Returns the xmlChar * for the first occurence or NULL.
1328 */
1329
1330const xmlChar *
1331xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1332 int n;
1333
1334 if (str == NULL) return(NULL);
1335 if (val == NULL) return(NULL);
1336 n = xmlStrlen(val);
1337
1338 if (n == 0) return(str);
1339 while (*str != 0) { /* non input consuming */
1340 if (casemap[*str] == casemap[*val])
1341 if (!xmlStrncasecmp(str, val, n)) return(str);
1342 str++;
1343 }
1344 return(NULL);
1345}
1346
1347/**
1348 * xmlStrsub:
1349 * @str: the xmlChar * array (haystack)
1350 * @start: the index of the first char (zero based)
1351 * @len: the length of the substring
1352 *
1353 * Extract a substring of a given string
1354 *
1355 * Returns the xmlChar * for the first occurence or NULL.
1356 */
1357
1358xmlChar *
1359xmlStrsub(const xmlChar *str, int start, int len) {
1360 int i;
1361
1362 if (str == NULL) return(NULL);
1363 if (start < 0) return(NULL);
1364 if (len < 0) return(NULL);
1365
1366 for (i = 0;i < start;i++) {
1367 if (*str == 0) return(NULL);
1368 str++;
1369 }
1370 if (*str == 0) return(NULL);
1371 return(xmlStrndup(str, len));
1372}
1373
1374/**
1375 * xmlStrlen:
1376 * @str: the xmlChar * array
1377 *
1378 * length of a xmlChar's string
1379 *
1380 * Returns the number of xmlChar contained in the ARRAY.
1381 */
1382
1383int
1384xmlStrlen(const xmlChar *str) {
1385 int len = 0;
1386
1387 if (str == NULL) return(0);
1388 while (*str != 0) { /* non input consuming */
1389 str++;
1390 len++;
1391 }
1392 return(len);
1393}
1394
1395/**
1396 * xmlStrncat:
1397 * @cur: the original xmlChar * array
1398 * @add: the xmlChar * array added
1399 * @len: the length of @add
1400 *
1401 * a strncat for array of xmlChar's, it will extend cur with the len
1402 * first bytes of @add.
1403 *
1404 * Returns a new xmlChar *, the original @cur is reallocated if needed
1405 * and should not be freed
1406 */
1407
1408xmlChar *
1409xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1410 int size;
1411 xmlChar *ret;
1412
1413 if ((add == NULL) || (len == 0))
1414 return(cur);
1415 if (cur == NULL)
1416 return(xmlStrndup(add, len));
1417
1418 size = xmlStrlen(cur);
1419 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1420 if (ret == NULL) {
1421 xmlGenericError(xmlGenericErrorContext,
1422 "xmlStrncat: realloc of %ld byte failed\n",
1423 (size + len + 1) * (long)sizeof(xmlChar));
1424 return(cur);
1425 }
1426 memcpy(&ret[size], add, len * sizeof(xmlChar));
1427 ret[size + len] = 0;
1428 return(ret);
1429}
1430
1431/**
1432 * xmlStrcat:
1433 * @cur: the original xmlChar * array
1434 * @add: the xmlChar * array added
1435 *
1436 * a strcat for array of xmlChar's. Since they are supposed to be
1437 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1438 * a termination mark of '0'.
1439 *
1440 * Returns a new xmlChar * containing the concatenated string.
1441 */
1442xmlChar *
1443xmlStrcat(xmlChar *cur, const xmlChar *add) {
1444 const xmlChar *p = add;
1445
1446 if (add == NULL) return(cur);
1447 if (cur == NULL)
1448 return(xmlStrdup(add));
1449
1450 while (*p != 0) p++; /* non input consuming */
1451 return(xmlStrncat(cur, add, p - add));
1452}
1453
1454/************************************************************************
1455 * *
1456 * Commodity functions, cleanup needed ? *
1457 * *
1458 ************************************************************************/
1459
1460/**
1461 * areBlanks:
1462 * @ctxt: an XML parser context
1463 * @str: a xmlChar *
1464 * @len: the size of @str
1465 *
1466 * Is this a sequence of blank chars that one can ignore ?
1467 *
1468 * Returns 1 if ignorable 0 otherwise.
1469 */
1470
1471static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1472 int i, ret;
1473 xmlNodePtr lastChild;
1474
Daniel Veillard05c13a22001-09-09 08:38:09 +00001475 /*
1476 * Don't spend time trying to differentiate them, the same callback is
1477 * used !
1478 */
1479 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001480 return(0);
1481
Owen Taylor3473f882001-02-23 17:55:21 +00001482 /*
1483 * Check for xml:space value.
1484 */
1485 if (*(ctxt->space) == 1)
1486 return(0);
1487
1488 /*
1489 * Check that the string is made of blanks
1490 */
1491 for (i = 0;i < len;i++)
1492 if (!(IS_BLANK(str[i]))) return(0);
1493
1494 /*
1495 * Look if the element is mixed content in the Dtd if available
1496 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001497 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001498 if (ctxt->myDoc != NULL) {
1499 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1500 if (ret == 0) return(1);
1501 if (ret == 1) return(0);
1502 }
1503
1504 /*
1505 * Otherwise, heuristic :-\
1506 */
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001508 if ((ctxt->node->children == NULL) &&
1509 (RAW == '<') && (NXT(1) == '/')) return(0);
1510
1511 lastChild = xmlGetLastChild(ctxt->node);
1512 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001513 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1514 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001515 } else if (xmlNodeIsText(lastChild))
1516 return(0);
1517 else if ((ctxt->node->children != NULL) &&
1518 (xmlNodeIsText(ctxt->node->children)))
1519 return(0);
1520 return(1);
1521}
1522
1523/*
1524 * Forward definition for recusive behaviour.
1525 */
1526void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1527void xmlParseReference(xmlParserCtxtPtr ctxt);
1528
1529/************************************************************************
1530 * *
1531 * Extra stuff for namespace support *
1532 * Relates to http://www.w3.org/TR/WD-xml-names *
1533 * *
1534 ************************************************************************/
1535
1536/**
1537 * xmlSplitQName:
1538 * @ctxt: an XML parser context
1539 * @name: an XML parser context
1540 * @prefix: a xmlChar **
1541 *
1542 * parse an UTF8 encoded XML qualified name string
1543 *
1544 * [NS 5] QName ::= (Prefix ':')? LocalPart
1545 *
1546 * [NS 6] Prefix ::= NCName
1547 *
1548 * [NS 7] LocalPart ::= NCName
1549 *
1550 * Returns the local part, and prefix is updated
1551 * to get the Prefix if any.
1552 */
1553
1554xmlChar *
1555xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1556 xmlChar buf[XML_MAX_NAMELEN + 5];
1557 xmlChar *buffer = NULL;
1558 int len = 0;
1559 int max = XML_MAX_NAMELEN;
1560 xmlChar *ret = NULL;
1561 const xmlChar *cur = name;
1562 int c;
1563
1564 *prefix = NULL;
1565
1566#ifndef XML_XML_NAMESPACE
1567 /* xml: prefix is not really a namespace */
1568 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1569 (cur[2] == 'l') && (cur[3] == ':'))
1570 return(xmlStrdup(name));
1571#endif
1572
1573 /* nasty but valid */
1574 if (cur[0] == ':')
1575 return(xmlStrdup(name));
1576
1577 c = *cur++;
1578 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1579 buf[len++] = c;
1580 c = *cur++;
1581 }
1582 if (len >= max) {
1583 /*
1584 * Okay someone managed to make a huge name, so he's ready to pay
1585 * for the processing speed.
1586 */
1587 max = len * 2;
1588
1589 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1590 if (buffer == NULL) {
1591 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1592 ctxt->sax->error(ctxt->userData,
1593 "xmlSplitQName: out of memory\n");
1594 return(NULL);
1595 }
1596 memcpy(buffer, buf, len);
1597 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1598 if (len + 10 > max) {
1599 max *= 2;
1600 buffer = (xmlChar *) xmlRealloc(buffer,
1601 max * sizeof(xmlChar));
1602 if (buffer == NULL) {
1603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1604 ctxt->sax->error(ctxt->userData,
1605 "xmlSplitQName: out of memory\n");
1606 return(NULL);
1607 }
1608 }
1609 buffer[len++] = c;
1610 c = *cur++;
1611 }
1612 buffer[len] = 0;
1613 }
1614
1615 if (buffer == NULL)
1616 ret = xmlStrndup(buf, len);
1617 else {
1618 ret = buffer;
1619 buffer = NULL;
1620 max = XML_MAX_NAMELEN;
1621 }
1622
1623
1624 if (c == ':') {
1625 c = *cur++;
1626 if (c == 0) return(ret);
1627 *prefix = ret;
1628 len = 0;
1629
1630 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1631 buf[len++] = c;
1632 c = *cur++;
1633 }
1634 if (len >= max) {
1635 /*
1636 * Okay someone managed to make a huge name, so he's ready to pay
1637 * for the processing speed.
1638 */
1639 max = len * 2;
1640
1641 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1642 if (buffer == NULL) {
1643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1644 ctxt->sax->error(ctxt->userData,
1645 "xmlSplitQName: out of memory\n");
1646 return(NULL);
1647 }
1648 memcpy(buffer, buf, len);
1649 while (c != 0) { /* tested bigname2.xml */
1650 if (len + 10 > max) {
1651 max *= 2;
1652 buffer = (xmlChar *) xmlRealloc(buffer,
1653 max * sizeof(xmlChar));
1654 if (buffer == NULL) {
1655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1656 ctxt->sax->error(ctxt->userData,
1657 "xmlSplitQName: out of memory\n");
1658 return(NULL);
1659 }
1660 }
1661 buffer[len++] = c;
1662 c = *cur++;
1663 }
1664 buffer[len] = 0;
1665 }
1666
1667 if (buffer == NULL)
1668 ret = xmlStrndup(buf, len);
1669 else {
1670 ret = buffer;
1671 }
1672 }
1673
1674 return(ret);
1675}
1676
1677/************************************************************************
1678 * *
1679 * The parser itself *
1680 * Relates to http://www.w3.org/TR/REC-xml *
1681 * *
1682 ************************************************************************/
1683
Daniel Veillard76d66f42001-05-16 21:05:17 +00001684static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001685/**
1686 * xmlParseName:
1687 * @ctxt: an XML parser context
1688 *
1689 * parse an XML name.
1690 *
1691 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1692 * CombiningChar | Extender
1693 *
1694 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1695 *
1696 * [6] Names ::= Name (S Name)*
1697 *
1698 * Returns the Name parsed or NULL
1699 */
1700
1701xmlChar *
1702xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001703 const xmlChar *in;
1704 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001705 int count = 0;
1706
1707 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001708
1709 /*
1710 * Accelerator for simple ASCII names
1711 */
1712 in = ctxt->input->cur;
1713 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1714 ((*in >= 0x41) && (*in <= 0x5A)) ||
1715 (*in == '_') || (*in == ':')) {
1716 in++;
1717 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1718 ((*in >= 0x41) && (*in <= 0x5A)) ||
1719 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001720 (*in == '_') || (*in == '-') ||
1721 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001722 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001723 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001724 count = in - ctxt->input->cur;
1725 ret = xmlStrndup(ctxt->input->cur, count);
1726 ctxt->input->cur = in;
1727 return(ret);
1728 }
1729 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001730 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001731}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001732
Daniel Veillard76d66f42001-05-16 21:05:17 +00001733static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001734xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1735 xmlChar buf[XML_MAX_NAMELEN + 5];
1736 int len = 0, l;
1737 int c;
1738 int count = 0;
1739
1740 /*
1741 * Handler for more complex cases
1742 */
1743 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001744 c = CUR_CHAR(l);
1745 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1746 (!IS_LETTER(c) && (c != '_') &&
1747 (c != ':'))) {
1748 return(NULL);
1749 }
1750
1751 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1752 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1753 (c == '.') || (c == '-') ||
1754 (c == '_') || (c == ':') ||
1755 (IS_COMBINING(c)) ||
1756 (IS_EXTENDER(c)))) {
1757 if (count++ > 100) {
1758 count = 0;
1759 GROW;
1760 }
1761 COPY_BUF(l,buf,len,c);
1762 NEXTL(l);
1763 c = CUR_CHAR(l);
1764 if (len >= XML_MAX_NAMELEN) {
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001776 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (count++ > 100) {
1786 count = 0;
1787 GROW;
1788 }
1789 if (len + 10 > max) {
1790 max *= 2;
1791 buffer = (xmlChar *) xmlRealloc(buffer,
1792 max * sizeof(xmlChar));
1793 if (buffer == NULL) {
1794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1795 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001796 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001797 return(NULL);
1798 }
1799 }
1800 COPY_BUF(l,buffer,len,c);
1801 NEXTL(l);
1802 c = CUR_CHAR(l);
1803 }
1804 buffer[len] = 0;
1805 return(buffer);
1806 }
1807 }
1808 return(xmlStrndup(buf, len));
1809}
1810
1811/**
1812 * xmlParseStringName:
1813 * @ctxt: an XML parser context
1814 * @str: a pointer to the string pointer (IN/OUT)
1815 *
1816 * parse an XML name.
1817 *
1818 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1819 * CombiningChar | Extender
1820 *
1821 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1822 *
1823 * [6] Names ::= Name (S Name)*
1824 *
1825 * Returns the Name parsed or NULL. The str pointer
1826 * is updated to the current location in the string.
1827 */
1828
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001829static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001830xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1831 xmlChar buf[XML_MAX_NAMELEN + 5];
1832 const xmlChar *cur = *str;
1833 int len = 0, l;
1834 int c;
1835
1836 c = CUR_SCHAR(cur, l);
1837 if (!IS_LETTER(c) && (c != '_') &&
1838 (c != ':')) {
1839 return(NULL);
1840 }
1841
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 COPY_BUF(l,buf,len,c);
1848 cur += l;
1849 c = CUR_SCHAR(cur, l);
1850 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1851 /*
1852 * Okay someone managed to make a huge name, so he's ready to pay
1853 * for the processing speed.
1854 */
1855 xmlChar *buffer;
1856 int max = len * 2;
1857
1858 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1859 if (buffer == NULL) {
1860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1861 ctxt->sax->error(ctxt->userData,
1862 "xmlParseStringName: out of memory\n");
1863 return(NULL);
1864 }
1865 memcpy(buffer, buf, len);
1866 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1867 (c == '.') || (c == '-') ||
1868 (c == '_') || (c == ':') ||
1869 (IS_COMBINING(c)) ||
1870 (IS_EXTENDER(c))) {
1871 if (len + 10 > max) {
1872 max *= 2;
1873 buffer = (xmlChar *) xmlRealloc(buffer,
1874 max * sizeof(xmlChar));
1875 if (buffer == NULL) {
1876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1877 ctxt->sax->error(ctxt->userData,
1878 "xmlParseStringName: out of memory\n");
1879 return(NULL);
1880 }
1881 }
1882 COPY_BUF(l,buffer,len,c);
1883 cur += l;
1884 c = CUR_SCHAR(cur, l);
1885 }
1886 buffer[len] = 0;
1887 *str = cur;
1888 return(buffer);
1889 }
1890 }
1891 *str = cur;
1892 return(xmlStrndup(buf, len));
1893}
1894
1895/**
1896 * xmlParseNmtoken:
1897 * @ctxt: an XML parser context
1898 *
1899 * parse an XML Nmtoken.
1900 *
1901 * [7] Nmtoken ::= (NameChar)+
1902 *
1903 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1904 *
1905 * Returns the Nmtoken parsed or NULL
1906 */
1907
1908xmlChar *
1909xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1910 xmlChar buf[XML_MAX_NAMELEN + 5];
1911 int len = 0, l;
1912 int c;
1913 int count = 0;
1914
1915 GROW;
1916 c = CUR_CHAR(l);
1917
1918 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1919 (c == '.') || (c == '-') ||
1920 (c == '_') || (c == ':') ||
1921 (IS_COMBINING(c)) ||
1922 (IS_EXTENDER(c))) {
1923 if (count++ > 100) {
1924 count = 0;
1925 GROW;
1926 }
1927 COPY_BUF(l,buf,len,c);
1928 NEXTL(l);
1929 c = CUR_CHAR(l);
1930 if (len >= XML_MAX_NAMELEN) {
1931 /*
1932 * Okay someone managed to make a huge token, so he's ready to pay
1933 * for the processing speed.
1934 */
1935 xmlChar *buffer;
1936 int max = len * 2;
1937
1938 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1939 if (buffer == NULL) {
1940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1941 ctxt->sax->error(ctxt->userData,
1942 "xmlParseNmtoken: out of memory\n");
1943 return(NULL);
1944 }
1945 memcpy(buffer, buf, len);
1946 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1947 (c == '.') || (c == '-') ||
1948 (c == '_') || (c == ':') ||
1949 (IS_COMBINING(c)) ||
1950 (IS_EXTENDER(c))) {
1951 if (count++ > 100) {
1952 count = 0;
1953 GROW;
1954 }
1955 if (len + 10 > max) {
1956 max *= 2;
1957 buffer = (xmlChar *) xmlRealloc(buffer,
1958 max * sizeof(xmlChar));
1959 if (buffer == NULL) {
1960 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1961 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001962 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001963 return(NULL);
1964 }
1965 }
1966 COPY_BUF(l,buffer,len,c);
1967 NEXTL(l);
1968 c = CUR_CHAR(l);
1969 }
1970 buffer[len] = 0;
1971 return(buffer);
1972 }
1973 }
1974 if (len == 0)
1975 return(NULL);
1976 return(xmlStrndup(buf, len));
1977}
1978
1979/**
1980 * xmlParseEntityValue:
1981 * @ctxt: an XML parser context
1982 * @orig: if non-NULL store a copy of the original entity value
1983 *
1984 * parse a value for ENTITY declarations
1985 *
1986 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1987 * "'" ([^%&'] | PEReference | Reference)* "'"
1988 *
1989 * Returns the EntityValue parsed with reference substitued or NULL
1990 */
1991
1992xmlChar *
1993xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1994 xmlChar *buf = NULL;
1995 int len = 0;
1996 int size = XML_PARSER_BUFFER_SIZE;
1997 int c, l;
1998 xmlChar stop;
1999 xmlChar *ret = NULL;
2000 const xmlChar *cur = NULL;
2001 xmlParserInputPtr input;
2002
2003 if (RAW == '"') stop = '"';
2004 else if (RAW == '\'') stop = '\'';
2005 else {
2006 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2008 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2009 ctxt->wellFormed = 0;
2010 ctxt->disableSAX = 1;
2011 return(NULL);
2012 }
2013 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2014 if (buf == NULL) {
2015 xmlGenericError(xmlGenericErrorContext,
2016 "malloc of %d byte failed\n", size);
2017 return(NULL);
2018 }
2019
2020 /*
2021 * The content of the entity definition is copied in a buffer.
2022 */
2023
2024 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2025 input = ctxt->input;
2026 GROW;
2027 NEXT;
2028 c = CUR_CHAR(l);
2029 /*
2030 * NOTE: 4.4.5 Included in Literal
2031 * When a parameter entity reference appears in a literal entity
2032 * value, ... a single or double quote character in the replacement
2033 * text is always treated as a normal data character and will not
2034 * terminate the literal.
2035 * In practice it means we stop the loop only when back at parsing
2036 * the initial entity and the quote is found
2037 */
2038 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2039 (ctxt->input != input))) {
2040 if (len + 5 >= size) {
2041 size *= 2;
2042 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2043 if (buf == NULL) {
2044 xmlGenericError(xmlGenericErrorContext,
2045 "realloc of %d byte failed\n", size);
2046 return(NULL);
2047 }
2048 }
2049 COPY_BUF(l,buf,len,c);
2050 NEXTL(l);
2051 /*
2052 * Pop-up of finished entities.
2053 */
2054 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2055 xmlPopInput(ctxt);
2056
2057 GROW;
2058 c = CUR_CHAR(l);
2059 if (c == 0) {
2060 GROW;
2061 c = CUR_CHAR(l);
2062 }
2063 }
2064 buf[len] = 0;
2065
2066 /*
2067 * Raise problem w.r.t. '&' and '%' being used in non-entities
2068 * reference constructs. Note Charref will be handled in
2069 * xmlStringDecodeEntities()
2070 */
2071 cur = buf;
2072 while (*cur != 0) { /* non input consuming */
2073 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2074 xmlChar *name;
2075 xmlChar tmp = *cur;
2076
2077 cur++;
2078 name = xmlParseStringName(ctxt, &cur);
2079 if ((name == NULL) || (*cur != ';')) {
2080 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2082 ctxt->sax->error(ctxt->userData,
2083 "EntityValue: '%c' forbidden except for entities references\n",
2084 tmp);
2085 ctxt->wellFormed = 0;
2086 ctxt->disableSAX = 1;
2087 }
Daniel Veillard5151c062001-10-23 13:10:19 +00002088 if ((tmp == '%') && (ctxt->inSubset == 1) &&
2089 (ctxt->inputNr == 1)) {
Owen Taylor3473f882001-02-23 17:55:21 +00002090 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2092 ctxt->sax->error(ctxt->userData,
2093 "EntityValue: PEReferences forbidden in internal subset\n",
2094 tmp);
2095 ctxt->wellFormed = 0;
2096 ctxt->disableSAX = 1;
2097 }
2098 if (name != NULL)
2099 xmlFree(name);
2100 }
2101 cur++;
2102 }
2103
2104 /*
2105 * Then PEReference entities are substituted.
2106 */
2107 if (c != stop) {
2108 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2110 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2111 ctxt->wellFormed = 0;
2112 ctxt->disableSAX = 1;
2113 xmlFree(buf);
2114 } else {
2115 NEXT;
2116 /*
2117 * NOTE: 4.4.7 Bypassed
2118 * When a general entity reference appears in the EntityValue in
2119 * an entity declaration, it is bypassed and left as is.
2120 * so XML_SUBSTITUTE_REF is not set here.
2121 */
2122 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2123 0, 0, 0);
2124 if (orig != NULL)
2125 *orig = buf;
2126 else
2127 xmlFree(buf);
2128 }
2129
2130 return(ret);
2131}
2132
2133/**
2134 * xmlParseAttValue:
2135 * @ctxt: an XML parser context
2136 *
2137 * parse a value for an attribute
2138 * Note: the parser won't do substitution of entities here, this
2139 * will be handled later in xmlStringGetNodeList
2140 *
2141 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2142 * "'" ([^<&'] | Reference)* "'"
2143 *
2144 * 3.3.3 Attribute-Value Normalization:
2145 * Before the value of an attribute is passed to the application or
2146 * checked for validity, the XML processor must normalize it as follows:
2147 * - a character reference is processed by appending the referenced
2148 * character to the attribute value
2149 * - an entity reference is processed by recursively processing the
2150 * replacement text of the entity
2151 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2152 * appending #x20 to the normalized value, except that only a single
2153 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2154 * parsed entity or the literal entity value of an internal parsed entity
2155 * - other characters are processed by appending them to the normalized value
2156 * If the declared value is not CDATA, then the XML processor must further
2157 * process the normalized attribute value by discarding any leading and
2158 * trailing space (#x20) characters, and by replacing sequences of space
2159 * (#x20) characters by a single space (#x20) character.
2160 * All attributes for which no declaration has been read should be treated
2161 * by a non-validating parser as if declared CDATA.
2162 *
2163 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2164 */
2165
2166xmlChar *
2167xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2168 xmlChar limit = 0;
2169 xmlChar *buf = NULL;
2170 int len = 0;
2171 int buf_size = 0;
2172 int c, l;
2173 xmlChar *current = NULL;
2174 xmlEntityPtr ent;
2175
2176
2177 SHRINK;
2178 if (NXT(0) == '"') {
2179 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2180 limit = '"';
2181 NEXT;
2182 } else if (NXT(0) == '\'') {
2183 limit = '\'';
2184 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2185 NEXT;
2186 } else {
2187 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2188 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2189 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2190 ctxt->wellFormed = 0;
2191 ctxt->disableSAX = 1;
2192 return(NULL);
2193 }
2194
2195 /*
2196 * allocate a translation buffer.
2197 */
2198 buf_size = XML_PARSER_BUFFER_SIZE;
2199 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2200 if (buf == NULL) {
2201 perror("xmlParseAttValue: malloc failed");
2202 return(NULL);
2203 }
2204
2205 /*
2206 * Ok loop until we reach one of the ending char or a size limit.
2207 */
2208 c = CUR_CHAR(l);
2209 while (((NXT(0) != limit) && /* checked */
2210 (c != '<')) || (ctxt->token != 0)) {
2211 if (c == 0) break;
2212 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002213 if (ctxt->replaceEntities) {
2214 if (len > buf_size - 10) {
2215 growBuffer(buf);
2216 }
2217 buf[len++] = '&';
2218 } else {
2219 /*
2220 * The reparsing will be done in xmlStringGetNodeList()
2221 * called by the attribute() function in SAX.c
2222 */
2223 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002224
Daniel Veillard319a7422001-09-11 09:27:09 +00002225 if (len > buf_size - 10) {
2226 growBuffer(buf);
2227 }
2228 current = &buffer[0];
2229 while (*current != 0) { /* non input consuming */
2230 buf[len++] = *current++;
2231 }
2232 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002233 }
Owen Taylor3473f882001-02-23 17:55:21 +00002234 } else if (c == '&') {
2235 if (NXT(1) == '#') {
2236 int val = xmlParseCharRef(ctxt);
2237 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002238 if (ctxt->replaceEntities) {
2239 if (len > buf_size - 10) {
2240 growBuffer(buf);
2241 }
2242 buf[len++] = '&';
2243 } else {
2244 /*
2245 * The reparsing will be done in xmlStringGetNodeList()
2246 * called by the attribute() function in SAX.c
2247 */
2248 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002249
Daniel Veillard319a7422001-09-11 09:27:09 +00002250 if (len > buf_size - 10) {
2251 growBuffer(buf);
2252 }
2253 current = &buffer[0];
2254 while (*current != 0) { /* non input consuming */
2255 buf[len++] = *current++;
2256 }
Owen Taylor3473f882001-02-23 17:55:21 +00002257 }
2258 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002259 if (len > buf_size - 10) {
2260 growBuffer(buf);
2261 }
Owen Taylor3473f882001-02-23 17:55:21 +00002262 len += xmlCopyChar(0, &buf[len], val);
2263 }
2264 } else {
2265 ent = xmlParseEntityRef(ctxt);
2266 if ((ent != NULL) &&
2267 (ctxt->replaceEntities != 0)) {
2268 xmlChar *rep;
2269
2270 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2271 rep = xmlStringDecodeEntities(ctxt, ent->content,
2272 XML_SUBSTITUTE_REF, 0, 0, 0);
2273 if (rep != NULL) {
2274 current = rep;
2275 while (*current != 0) { /* non input consuming */
2276 buf[len++] = *current++;
2277 if (len > buf_size - 10) {
2278 growBuffer(buf);
2279 }
2280 }
2281 xmlFree(rep);
2282 }
2283 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002284 if (len > buf_size - 10) {
2285 growBuffer(buf);
2286 }
Owen Taylor3473f882001-02-23 17:55:21 +00002287 if (ent->content != NULL)
2288 buf[len++] = ent->content[0];
2289 }
2290 } else if (ent != NULL) {
2291 int i = xmlStrlen(ent->name);
2292 const xmlChar *cur = ent->name;
2293
2294 /*
2295 * This may look absurd but is needed to detect
2296 * entities problems
2297 */
2298 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2299 (ent->content != NULL)) {
2300 xmlChar *rep;
2301 rep = xmlStringDecodeEntities(ctxt, ent->content,
2302 XML_SUBSTITUTE_REF, 0, 0, 0);
2303 if (rep != NULL)
2304 xmlFree(rep);
2305 }
2306
2307 /*
2308 * Just output the reference
2309 */
2310 buf[len++] = '&';
2311 if (len > buf_size - i - 10) {
2312 growBuffer(buf);
2313 }
2314 for (;i > 0;i--)
2315 buf[len++] = *cur++;
2316 buf[len++] = ';';
2317 }
2318 }
2319 } else {
2320 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2321 COPY_BUF(l,buf,len,0x20);
2322 if (len > buf_size - 10) {
2323 growBuffer(buf);
2324 }
2325 } else {
2326 COPY_BUF(l,buf,len,c);
2327 if (len > buf_size - 10) {
2328 growBuffer(buf);
2329 }
2330 }
2331 NEXTL(l);
2332 }
2333 GROW;
2334 c = CUR_CHAR(l);
2335 }
2336 buf[len++] = 0;
2337 if (RAW == '<') {
2338 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2339 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2340 ctxt->sax->error(ctxt->userData,
2341 "Unescaped '<' not allowed in attributes values\n");
2342 ctxt->wellFormed = 0;
2343 ctxt->disableSAX = 1;
2344 } else if (RAW != limit) {
2345 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2347 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2348 ctxt->wellFormed = 0;
2349 ctxt->disableSAX = 1;
2350 } else
2351 NEXT;
2352 return(buf);
2353}
2354
2355/**
2356 * xmlParseSystemLiteral:
2357 * @ctxt: an XML parser context
2358 *
2359 * parse an XML Literal
2360 *
2361 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2362 *
2363 * Returns the SystemLiteral parsed or NULL
2364 */
2365
2366xmlChar *
2367xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2368 xmlChar *buf = NULL;
2369 int len = 0;
2370 int size = XML_PARSER_BUFFER_SIZE;
2371 int cur, l;
2372 xmlChar stop;
2373 int state = ctxt->instate;
2374 int count = 0;
2375
2376 SHRINK;
2377 if (RAW == '"') {
2378 NEXT;
2379 stop = '"';
2380 } else if (RAW == '\'') {
2381 NEXT;
2382 stop = '\'';
2383 } else {
2384 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2386 ctxt->sax->error(ctxt->userData,
2387 "SystemLiteral \" or ' expected\n");
2388 ctxt->wellFormed = 0;
2389 ctxt->disableSAX = 1;
2390 return(NULL);
2391 }
2392
2393 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2394 if (buf == NULL) {
2395 xmlGenericError(xmlGenericErrorContext,
2396 "malloc of %d byte failed\n", size);
2397 return(NULL);
2398 }
2399 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2400 cur = CUR_CHAR(l);
2401 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2402 if (len + 5 >= size) {
2403 size *= 2;
2404 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2405 if (buf == NULL) {
2406 xmlGenericError(xmlGenericErrorContext,
2407 "realloc of %d byte failed\n", size);
2408 ctxt->instate = (xmlParserInputState) state;
2409 return(NULL);
2410 }
2411 }
2412 count++;
2413 if (count > 50) {
2414 GROW;
2415 count = 0;
2416 }
2417 COPY_BUF(l,buf,len,cur);
2418 NEXTL(l);
2419 cur = CUR_CHAR(l);
2420 if (cur == 0) {
2421 GROW;
2422 SHRINK;
2423 cur = CUR_CHAR(l);
2424 }
2425 }
2426 buf[len] = 0;
2427 ctxt->instate = (xmlParserInputState) state;
2428 if (!IS_CHAR(cur)) {
2429 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2431 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2432 ctxt->wellFormed = 0;
2433 ctxt->disableSAX = 1;
2434 } else {
2435 NEXT;
2436 }
2437 return(buf);
2438}
2439
2440/**
2441 * xmlParsePubidLiteral:
2442 * @ctxt: an XML parser context
2443 *
2444 * parse an XML public literal
2445 *
2446 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2447 *
2448 * Returns the PubidLiteral parsed or NULL.
2449 */
2450
2451xmlChar *
2452xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2453 xmlChar *buf = NULL;
2454 int len = 0;
2455 int size = XML_PARSER_BUFFER_SIZE;
2456 xmlChar cur;
2457 xmlChar stop;
2458 int count = 0;
2459
2460 SHRINK;
2461 if (RAW == '"') {
2462 NEXT;
2463 stop = '"';
2464 } else if (RAW == '\'') {
2465 NEXT;
2466 stop = '\'';
2467 } else {
2468 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2470 ctxt->sax->error(ctxt->userData,
2471 "SystemLiteral \" or ' expected\n");
2472 ctxt->wellFormed = 0;
2473 ctxt->disableSAX = 1;
2474 return(NULL);
2475 }
2476 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2477 if (buf == NULL) {
2478 xmlGenericError(xmlGenericErrorContext,
2479 "malloc of %d byte failed\n", size);
2480 return(NULL);
2481 }
2482 cur = CUR;
2483 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2484 if (len + 1 >= size) {
2485 size *= 2;
2486 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2487 if (buf == NULL) {
2488 xmlGenericError(xmlGenericErrorContext,
2489 "realloc of %d byte failed\n", size);
2490 return(NULL);
2491 }
2492 }
2493 buf[len++] = cur;
2494 count++;
2495 if (count > 50) {
2496 GROW;
2497 count = 0;
2498 }
2499 NEXT;
2500 cur = CUR;
2501 if (cur == 0) {
2502 GROW;
2503 SHRINK;
2504 cur = CUR;
2505 }
2506 }
2507 buf[len] = 0;
2508 if (cur != stop) {
2509 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2511 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2512 ctxt->wellFormed = 0;
2513 ctxt->disableSAX = 1;
2514 } else {
2515 NEXT;
2516 }
2517 return(buf);
2518}
2519
Daniel Veillard48b2f892001-02-25 16:11:03 +00002520void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002521/**
2522 * xmlParseCharData:
2523 * @ctxt: an XML parser context
2524 * @cdata: int indicating whether we are within a CDATA section
2525 *
2526 * parse a CharData section.
2527 * if we are within a CDATA section ']]>' marks an end of section.
2528 *
2529 * The right angle bracket (>) may be represented using the string "&gt;",
2530 * and must, for compatibility, be escaped using "&gt;" or a character
2531 * reference when it appears in the string "]]>" in content, when that
2532 * string is not marking the end of a CDATA section.
2533 *
2534 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2535 */
2536
2537void
2538xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002539 const xmlChar *in;
2540 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002541 int line = ctxt->input->line;
2542 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002543
2544 SHRINK;
2545 GROW;
2546 /*
2547 * Accelerated common case where input don't need to be
2548 * modified before passing it to the handler.
2549 */
2550 if ((ctxt->token == 0) && (!cdata)) {
2551 in = ctxt->input->cur;
2552 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002553get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002554 while (((*in >= 0x20) && (*in != '<') &&
2555 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2556 in++;
2557 if (*in == 0xA) {
2558 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002559 in++;
2560 while (*in == 0xA) {
2561 ctxt->input->line++;
2562 in++;
2563 }
2564 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002565 }
2566 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002567 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002568 if (IS_BLANK(*ctxt->input->cur)) {
2569 const xmlChar *tmp = ctxt->input->cur;
2570 ctxt->input->cur = in;
2571 if (areBlanks(ctxt, tmp, nbchar)) {
2572 if (ctxt->sax->ignorableWhitespace != NULL)
2573 ctxt->sax->ignorableWhitespace(ctxt->userData,
2574 tmp, nbchar);
2575 } else {
2576 if (ctxt->sax->characters != NULL)
2577 ctxt->sax->characters(ctxt->userData,
2578 tmp, nbchar);
2579 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002580 line = ctxt->input->line;
2581 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002582 } else {
2583 if (ctxt->sax->characters != NULL)
2584 ctxt->sax->characters(ctxt->userData,
2585 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002586 line = ctxt->input->line;
2587 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002588 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 }
2590 ctxt->input->cur = in;
2591 if (*in == 0xD) {
2592 in++;
2593 if (*in == 0xA) {
2594 ctxt->input->cur = in;
2595 in++;
2596 ctxt->input->line++;
2597 continue; /* while */
2598 }
2599 in--;
2600 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002601 if (*in == '<') {
2602 return;
2603 }
2604 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002605 return;
2606 }
2607 SHRINK;
2608 GROW;
2609 in = ctxt->input->cur;
2610 } while ((*in >= 0x20) && (*in <= 0x7F));
2611 nbchar = 0;
2612 }
Daniel Veillard50582112001-03-26 22:52:16 +00002613 ctxt->input->line = line;
2614 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002615 xmlParseCharDataComplex(ctxt, cdata);
2616}
2617
2618void
2619xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002620 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2621 int nbchar = 0;
2622 int cur, l;
2623 int count = 0;
2624
2625 SHRINK;
2626 GROW;
2627 cur = CUR_CHAR(l);
2628 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2629 ((cur != '&') || (ctxt->token == '&')) &&
2630 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2631 if ((cur == ']') && (NXT(1) == ']') &&
2632 (NXT(2) == '>')) {
2633 if (cdata) break;
2634 else {
2635 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637 ctxt->sax->error(ctxt->userData,
2638 "Sequence ']]>' not allowed in content\n");
2639 /* Should this be relaxed ??? I see a "must here */
2640 ctxt->wellFormed = 0;
2641 ctxt->disableSAX = 1;
2642 }
2643 }
2644 COPY_BUF(l,buf,nbchar,cur);
2645 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2646 /*
2647 * Ok the segment is to be consumed as chars.
2648 */
2649 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2650 if (areBlanks(ctxt, buf, nbchar)) {
2651 if (ctxt->sax->ignorableWhitespace != NULL)
2652 ctxt->sax->ignorableWhitespace(ctxt->userData,
2653 buf, nbchar);
2654 } else {
2655 if (ctxt->sax->characters != NULL)
2656 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2657 }
2658 }
2659 nbchar = 0;
2660 }
2661 count++;
2662 if (count > 50) {
2663 GROW;
2664 count = 0;
2665 }
2666 NEXTL(l);
2667 cur = CUR_CHAR(l);
2668 }
2669 if (nbchar != 0) {
2670 /*
2671 * Ok the segment is to be consumed as chars.
2672 */
2673 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2674 if (areBlanks(ctxt, buf, nbchar)) {
2675 if (ctxt->sax->ignorableWhitespace != NULL)
2676 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2677 } else {
2678 if (ctxt->sax->characters != NULL)
2679 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2680 }
2681 }
2682 }
2683}
2684
2685/**
2686 * xmlParseExternalID:
2687 * @ctxt: an XML parser context
2688 * @publicID: a xmlChar** receiving PubidLiteral
2689 * @strict: indicate whether we should restrict parsing to only
2690 * production [75], see NOTE below
2691 *
2692 * Parse an External ID or a Public ID
2693 *
2694 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2695 * 'PUBLIC' S PubidLiteral S SystemLiteral
2696 *
2697 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2698 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2699 *
2700 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2701 *
2702 * Returns the function returns SystemLiteral and in the second
2703 * case publicID receives PubidLiteral, is strict is off
2704 * it is possible to return NULL and have publicID set.
2705 */
2706
2707xmlChar *
2708xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2709 xmlChar *URI = NULL;
2710
2711 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002712
2713 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002714 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2715 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2716 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2717 SKIP(6);
2718 if (!IS_BLANK(CUR)) {
2719 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2721 ctxt->sax->error(ctxt->userData,
2722 "Space required after 'SYSTEM'\n");
2723 ctxt->wellFormed = 0;
2724 ctxt->disableSAX = 1;
2725 }
2726 SKIP_BLANKS;
2727 URI = xmlParseSystemLiteral(ctxt);
2728 if (URI == NULL) {
2729 ctxt->errNo = XML_ERR_URI_REQUIRED;
2730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2731 ctxt->sax->error(ctxt->userData,
2732 "xmlParseExternalID: SYSTEM, no URI\n");
2733 ctxt->wellFormed = 0;
2734 ctxt->disableSAX = 1;
2735 }
2736 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2737 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2738 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2739 SKIP(6);
2740 if (!IS_BLANK(CUR)) {
2741 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2742 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2743 ctxt->sax->error(ctxt->userData,
2744 "Space required after 'PUBLIC'\n");
2745 ctxt->wellFormed = 0;
2746 ctxt->disableSAX = 1;
2747 }
2748 SKIP_BLANKS;
2749 *publicID = xmlParsePubidLiteral(ctxt);
2750 if (*publicID == NULL) {
2751 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753 ctxt->sax->error(ctxt->userData,
2754 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
2757 }
2758 if (strict) {
2759 /*
2760 * We don't handle [83] so "S SystemLiteral" is required.
2761 */
2762 if (!IS_BLANK(CUR)) {
2763 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2764 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2765 ctxt->sax->error(ctxt->userData,
2766 "Space required after the Public Identifier\n");
2767 ctxt->wellFormed = 0;
2768 ctxt->disableSAX = 1;
2769 }
2770 } else {
2771 /*
2772 * We handle [83] so we return immediately, if
2773 * "S SystemLiteral" is not detected. From a purely parsing
2774 * point of view that's a nice mess.
2775 */
2776 const xmlChar *ptr;
2777 GROW;
2778
2779 ptr = CUR_PTR;
2780 if (!IS_BLANK(*ptr)) return(NULL);
2781
2782 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2783 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2784 }
2785 SKIP_BLANKS;
2786 URI = xmlParseSystemLiteral(ctxt);
2787 if (URI == NULL) {
2788 ctxt->errNo = XML_ERR_URI_REQUIRED;
2789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2790 ctxt->sax->error(ctxt->userData,
2791 "xmlParseExternalID: PUBLIC, no URI\n");
2792 ctxt->wellFormed = 0;
2793 ctxt->disableSAX = 1;
2794 }
2795 }
2796 return(URI);
2797}
2798
2799/**
2800 * xmlParseComment:
2801 * @ctxt: an XML parser context
2802 *
2803 * Skip an XML (SGML) comment <!-- .... -->
2804 * The spec says that "For compatibility, the string "--" (double-hyphen)
2805 * must not occur within comments. "
2806 *
2807 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2808 */
2809void
2810xmlParseComment(xmlParserCtxtPtr ctxt) {
2811 xmlChar *buf = NULL;
2812 int len;
2813 int size = XML_PARSER_BUFFER_SIZE;
2814 int q, ql;
2815 int r, rl;
2816 int cur, l;
2817 xmlParserInputState state;
2818 xmlParserInputPtr input = ctxt->input;
2819 int count = 0;
2820
2821 /*
2822 * Check that there is a comment right here.
2823 */
2824 if ((RAW != '<') || (NXT(1) != '!') ||
2825 (NXT(2) != '-') || (NXT(3) != '-')) return;
2826
2827 state = ctxt->instate;
2828 ctxt->instate = XML_PARSER_COMMENT;
2829 SHRINK;
2830 SKIP(4);
2831 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2832 if (buf == NULL) {
2833 xmlGenericError(xmlGenericErrorContext,
2834 "malloc of %d byte failed\n", size);
2835 ctxt->instate = state;
2836 return;
2837 }
2838 q = CUR_CHAR(ql);
2839 NEXTL(ql);
2840 r = CUR_CHAR(rl);
2841 NEXTL(rl);
2842 cur = CUR_CHAR(l);
2843 len = 0;
2844 while (IS_CHAR(cur) && /* checked */
2845 ((cur != '>') ||
2846 (r != '-') || (q != '-'))) {
2847 if ((r == '-') && (q == '-') && (len > 1)) {
2848 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2849 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2850 ctxt->sax->error(ctxt->userData,
2851 "Comment must not contain '--' (double-hyphen)`\n");
2852 ctxt->wellFormed = 0;
2853 ctxt->disableSAX = 1;
2854 }
2855 if (len + 5 >= size) {
2856 size *= 2;
2857 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2858 if (buf == NULL) {
2859 xmlGenericError(xmlGenericErrorContext,
2860 "realloc of %d byte failed\n", size);
2861 ctxt->instate = state;
2862 return;
2863 }
2864 }
2865 COPY_BUF(ql,buf,len,q);
2866 q = r;
2867 ql = rl;
2868 r = cur;
2869 rl = l;
2870
2871 count++;
2872 if (count > 50) {
2873 GROW;
2874 count = 0;
2875 }
2876 NEXTL(l);
2877 cur = CUR_CHAR(l);
2878 if (cur == 0) {
2879 SHRINK;
2880 GROW;
2881 cur = CUR_CHAR(l);
2882 }
2883 }
2884 buf[len] = 0;
2885 if (!IS_CHAR(cur)) {
2886 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2888 ctxt->sax->error(ctxt->userData,
2889 "Comment not terminated \n<!--%.50s\n", buf);
2890 ctxt->wellFormed = 0;
2891 ctxt->disableSAX = 1;
2892 xmlFree(buf);
2893 } else {
2894 if (input != ctxt->input) {
2895 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2897 ctxt->sax->error(ctxt->userData,
2898"Comment doesn't start and stop in the same entity\n");
2899 ctxt->wellFormed = 0;
2900 ctxt->disableSAX = 1;
2901 }
2902 NEXT;
2903 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2904 (!ctxt->disableSAX))
2905 ctxt->sax->comment(ctxt->userData, buf);
2906 xmlFree(buf);
2907 }
2908 ctxt->instate = state;
2909}
2910
2911/**
2912 * xmlParsePITarget:
2913 * @ctxt: an XML parser context
2914 *
2915 * parse the name of a PI
2916 *
2917 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2918 *
2919 * Returns the PITarget name or NULL
2920 */
2921
2922xmlChar *
2923xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2924 xmlChar *name;
2925
2926 name = xmlParseName(ctxt);
2927 if ((name != NULL) &&
2928 ((name[0] == 'x') || (name[0] == 'X')) &&
2929 ((name[1] == 'm') || (name[1] == 'M')) &&
2930 ((name[2] == 'l') || (name[2] == 'L'))) {
2931 int i;
2932 if ((name[0] == 'x') && (name[1] == 'm') &&
2933 (name[2] == 'l') && (name[3] == 0)) {
2934 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2936 ctxt->sax->error(ctxt->userData,
2937 "XML declaration allowed only at the start of the document\n");
2938 ctxt->wellFormed = 0;
2939 ctxt->disableSAX = 1;
2940 return(name);
2941 } else if (name[3] == 0) {
2942 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2944 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2945 ctxt->wellFormed = 0;
2946 ctxt->disableSAX = 1;
2947 return(name);
2948 }
2949 for (i = 0;;i++) {
2950 if (xmlW3CPIs[i] == NULL) break;
2951 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2952 return(name);
2953 }
2954 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2955 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2956 ctxt->sax->warning(ctxt->userData,
2957 "xmlParsePItarget: invalid name prefix 'xml'\n");
2958 }
2959 }
2960 return(name);
2961}
2962
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002963#ifdef LIBXML_CATALOG_ENABLED
2964/**
2965 * xmlParseCatalogPI:
2966 * @ctxt: an XML parser context
2967 * @catalog: the PI value string
2968 *
2969 * parse an XML Catalog Processing Instruction.
2970 *
2971 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2972 *
2973 * Occurs only if allowed by the user and if happening in the Misc
2974 * part of the document before any doctype informations
2975 * This will add the given catalog to the parsing context in order
2976 * to be used if there is a resolution need further down in the document
2977 */
2978
2979static void
2980xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2981 xmlChar *URL = NULL;
2982 const xmlChar *tmp, *base;
2983 xmlChar marker;
2984
2985 tmp = catalog;
2986 while (IS_BLANK(*tmp)) tmp++;
2987 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2988 goto error;
2989 tmp += 7;
2990 while (IS_BLANK(*tmp)) tmp++;
2991 if (*tmp != '=') {
2992 return;
2993 }
2994 tmp++;
2995 while (IS_BLANK(*tmp)) tmp++;
2996 marker = *tmp;
2997 if ((marker != '\'') && (marker != '"'))
2998 goto error;
2999 tmp++;
3000 base = tmp;
3001 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3002 if (*tmp == 0)
3003 goto error;
3004 URL = xmlStrndup(base, tmp - base);
3005 tmp++;
3006 while (IS_BLANK(*tmp)) tmp++;
3007 if (*tmp != 0)
3008 goto error;
3009
3010 if (URL != NULL) {
3011 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3012 xmlFree(URL);
3013 }
3014 return;
3015
3016error:
3017 ctxt->errNo = XML_WAR_CATALOG_PI;
3018 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3019 ctxt->sax->warning(ctxt->userData,
3020 "Catalog PI syntax error: %s\n", catalog);
3021 if (URL != NULL)
3022 xmlFree(URL);
3023}
3024#endif
3025
Owen Taylor3473f882001-02-23 17:55:21 +00003026/**
3027 * xmlParsePI:
3028 * @ctxt: an XML parser context
3029 *
3030 * parse an XML Processing Instruction.
3031 *
3032 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3033 *
3034 * The processing is transfered to SAX once parsed.
3035 */
3036
3037void
3038xmlParsePI(xmlParserCtxtPtr ctxt) {
3039 xmlChar *buf = NULL;
3040 int len = 0;
3041 int size = XML_PARSER_BUFFER_SIZE;
3042 int cur, l;
3043 xmlChar *target;
3044 xmlParserInputState state;
3045 int count = 0;
3046
3047 if ((RAW == '<') && (NXT(1) == '?')) {
3048 xmlParserInputPtr input = ctxt->input;
3049 state = ctxt->instate;
3050 ctxt->instate = XML_PARSER_PI;
3051 /*
3052 * this is a Processing Instruction.
3053 */
3054 SKIP(2);
3055 SHRINK;
3056
3057 /*
3058 * Parse the target name and check for special support like
3059 * namespace.
3060 */
3061 target = xmlParsePITarget(ctxt);
3062 if (target != NULL) {
3063 if ((RAW == '?') && (NXT(1) == '>')) {
3064 if (input != ctxt->input) {
3065 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3067 ctxt->sax->error(ctxt->userData,
3068 "PI declaration doesn't start and stop in the same entity\n");
3069 ctxt->wellFormed = 0;
3070 ctxt->disableSAX = 1;
3071 }
3072 SKIP(2);
3073
3074 /*
3075 * SAX: PI detected.
3076 */
3077 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3078 (ctxt->sax->processingInstruction != NULL))
3079 ctxt->sax->processingInstruction(ctxt->userData,
3080 target, NULL);
3081 ctxt->instate = state;
3082 xmlFree(target);
3083 return;
3084 }
3085 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3086 if (buf == NULL) {
3087 xmlGenericError(xmlGenericErrorContext,
3088 "malloc of %d byte failed\n", size);
3089 ctxt->instate = state;
3090 return;
3091 }
3092 cur = CUR;
3093 if (!IS_BLANK(cur)) {
3094 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3096 ctxt->sax->error(ctxt->userData,
3097 "xmlParsePI: PI %s space expected\n", target);
3098 ctxt->wellFormed = 0;
3099 ctxt->disableSAX = 1;
3100 }
3101 SKIP_BLANKS;
3102 cur = CUR_CHAR(l);
3103 while (IS_CHAR(cur) && /* checked */
3104 ((cur != '?') || (NXT(1) != '>'))) {
3105 if (len + 5 >= size) {
3106 size *= 2;
3107 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3108 if (buf == NULL) {
3109 xmlGenericError(xmlGenericErrorContext,
3110 "realloc of %d byte failed\n", size);
3111 ctxt->instate = state;
3112 return;
3113 }
3114 }
3115 count++;
3116 if (count > 50) {
3117 GROW;
3118 count = 0;
3119 }
3120 COPY_BUF(l,buf,len,cur);
3121 NEXTL(l);
3122 cur = CUR_CHAR(l);
3123 if (cur == 0) {
3124 SHRINK;
3125 GROW;
3126 cur = CUR_CHAR(l);
3127 }
3128 }
3129 buf[len] = 0;
3130 if (cur != '?') {
3131 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3133 ctxt->sax->error(ctxt->userData,
3134 "xmlParsePI: PI %s never end ...\n", target);
3135 ctxt->wellFormed = 0;
3136 ctxt->disableSAX = 1;
3137 } else {
3138 if (input != ctxt->input) {
3139 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3141 ctxt->sax->error(ctxt->userData,
3142 "PI declaration doesn't start and stop in the same entity\n");
3143 ctxt->wellFormed = 0;
3144 ctxt->disableSAX = 1;
3145 }
3146 SKIP(2);
3147
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003148#ifdef LIBXML_CATALOG_ENABLED
3149 if (((state == XML_PARSER_MISC) ||
3150 (state == XML_PARSER_START)) &&
3151 (xmlStrEqual(target, XML_CATALOG_PI))) {
3152 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3153 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3154 (allow == XML_CATA_ALLOW_ALL))
3155 xmlParseCatalogPI(ctxt, buf);
3156 }
3157#endif
3158
3159
Owen Taylor3473f882001-02-23 17:55:21 +00003160 /*
3161 * SAX: PI detected.
3162 */
3163 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3164 (ctxt->sax->processingInstruction != NULL))
3165 ctxt->sax->processingInstruction(ctxt->userData,
3166 target, buf);
3167 }
3168 xmlFree(buf);
3169 xmlFree(target);
3170 } else {
3171 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3172 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData,
3174 "xmlParsePI : no target name\n");
3175 ctxt->wellFormed = 0;
3176 ctxt->disableSAX = 1;
3177 }
3178 ctxt->instate = state;
3179 }
3180}
3181
3182/**
3183 * xmlParseNotationDecl:
3184 * @ctxt: an XML parser context
3185 *
3186 * parse a notation declaration
3187 *
3188 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3189 *
3190 * Hence there is actually 3 choices:
3191 * 'PUBLIC' S PubidLiteral
3192 * 'PUBLIC' S PubidLiteral S SystemLiteral
3193 * and 'SYSTEM' S SystemLiteral
3194 *
3195 * See the NOTE on xmlParseExternalID().
3196 */
3197
3198void
3199xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3200 xmlChar *name;
3201 xmlChar *Pubid;
3202 xmlChar *Systemid;
3203
3204 if ((RAW == '<') && (NXT(1) == '!') &&
3205 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3206 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3207 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3208 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3209 xmlParserInputPtr input = ctxt->input;
3210 SHRINK;
3211 SKIP(10);
3212 if (!IS_BLANK(CUR)) {
3213 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData,
3216 "Space required after '<!NOTATION'\n");
3217 ctxt->wellFormed = 0;
3218 ctxt->disableSAX = 1;
3219 return;
3220 }
3221 SKIP_BLANKS;
3222
Daniel Veillard76d66f42001-05-16 21:05:17 +00003223 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003224 if (name == NULL) {
3225 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3227 ctxt->sax->error(ctxt->userData,
3228 "NOTATION: Name expected here\n");
3229 ctxt->wellFormed = 0;
3230 ctxt->disableSAX = 1;
3231 return;
3232 }
3233 if (!IS_BLANK(CUR)) {
3234 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3235 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3236 ctxt->sax->error(ctxt->userData,
3237 "Space required after the NOTATION name'\n");
3238 ctxt->wellFormed = 0;
3239 ctxt->disableSAX = 1;
3240 return;
3241 }
3242 SKIP_BLANKS;
3243
3244 /*
3245 * Parse the IDs.
3246 */
3247 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3248 SKIP_BLANKS;
3249
3250 if (RAW == '>') {
3251 if (input != ctxt->input) {
3252 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData,
3255"Notation declaration doesn't start and stop in the same entity\n");
3256 ctxt->wellFormed = 0;
3257 ctxt->disableSAX = 1;
3258 }
3259 NEXT;
3260 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3261 (ctxt->sax->notationDecl != NULL))
3262 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3263 } else {
3264 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3266 ctxt->sax->error(ctxt->userData,
3267 "'>' required to close NOTATION declaration\n");
3268 ctxt->wellFormed = 0;
3269 ctxt->disableSAX = 1;
3270 }
3271 xmlFree(name);
3272 if (Systemid != NULL) xmlFree(Systemid);
3273 if (Pubid != NULL) xmlFree(Pubid);
3274 }
3275}
3276
3277/**
3278 * xmlParseEntityDecl:
3279 * @ctxt: an XML parser context
3280 *
3281 * parse <!ENTITY declarations
3282 *
3283 * [70] EntityDecl ::= GEDecl | PEDecl
3284 *
3285 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3286 *
3287 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3288 *
3289 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3290 *
3291 * [74] PEDef ::= EntityValue | ExternalID
3292 *
3293 * [76] NDataDecl ::= S 'NDATA' S Name
3294 *
3295 * [ VC: Notation Declared ]
3296 * The Name must match the declared name of a notation.
3297 */
3298
3299void
3300xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3301 xmlChar *name = NULL;
3302 xmlChar *value = NULL;
3303 xmlChar *URI = NULL, *literal = NULL;
3304 xmlChar *ndata = NULL;
3305 int isParameter = 0;
3306 xmlChar *orig = NULL;
3307
3308 GROW;
3309 if ((RAW == '<') && (NXT(1) == '!') &&
3310 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3311 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3312 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3313 xmlParserInputPtr input = ctxt->input;
3314 ctxt->instate = XML_PARSER_ENTITY_DECL;
3315 SHRINK;
3316 SKIP(8);
3317 if (!IS_BLANK(CUR)) {
3318 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3320 ctxt->sax->error(ctxt->userData,
3321 "Space required after '<!ENTITY'\n");
3322 ctxt->wellFormed = 0;
3323 ctxt->disableSAX = 1;
3324 }
3325 SKIP_BLANKS;
3326
3327 if (RAW == '%') {
3328 NEXT;
3329 if (!IS_BLANK(CUR)) {
3330 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3332 ctxt->sax->error(ctxt->userData,
3333 "Space required after '%'\n");
3334 ctxt->wellFormed = 0;
3335 ctxt->disableSAX = 1;
3336 }
3337 SKIP_BLANKS;
3338 isParameter = 1;
3339 }
3340
Daniel Veillard76d66f42001-05-16 21:05:17 +00003341 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003342 if (name == NULL) {
3343 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3345 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3346 ctxt->wellFormed = 0;
3347 ctxt->disableSAX = 1;
3348 return;
3349 }
3350 if (!IS_BLANK(CUR)) {
3351 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3353 ctxt->sax->error(ctxt->userData,
3354 "Space required after the entity name\n");
3355 ctxt->wellFormed = 0;
3356 ctxt->disableSAX = 1;
3357 }
3358 SKIP_BLANKS;
3359
3360 /*
3361 * handle the various case of definitions...
3362 */
3363 if (isParameter) {
3364 if ((RAW == '"') || (RAW == '\'')) {
3365 value = xmlParseEntityValue(ctxt, &orig);
3366 if (value) {
3367 if ((ctxt->sax != NULL) &&
3368 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3369 ctxt->sax->entityDecl(ctxt->userData, name,
3370 XML_INTERNAL_PARAMETER_ENTITY,
3371 NULL, NULL, value);
3372 }
3373 } else {
3374 URI = xmlParseExternalID(ctxt, &literal, 1);
3375 if ((URI == NULL) && (literal == NULL)) {
3376 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3378 ctxt->sax->error(ctxt->userData,
3379 "Entity value required\n");
3380 ctxt->wellFormed = 0;
3381 ctxt->disableSAX = 1;
3382 }
3383 if (URI) {
3384 xmlURIPtr uri;
3385
3386 uri = xmlParseURI((const char *) URI);
3387 if (uri == NULL) {
3388 ctxt->errNo = XML_ERR_INVALID_URI;
3389 if ((ctxt->sax != NULL) &&
3390 (!ctxt->disableSAX) &&
3391 (ctxt->sax->error != NULL))
3392 ctxt->sax->error(ctxt->userData,
3393 "Invalid URI: %s\n", URI);
3394 ctxt->wellFormed = 0;
3395 } else {
3396 if (uri->fragment != NULL) {
3397 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3398 if ((ctxt->sax != NULL) &&
3399 (!ctxt->disableSAX) &&
3400 (ctxt->sax->error != NULL))
3401 ctxt->sax->error(ctxt->userData,
3402 "Fragment not allowed: %s\n", URI);
3403 ctxt->wellFormed = 0;
3404 } else {
3405 if ((ctxt->sax != NULL) &&
3406 (!ctxt->disableSAX) &&
3407 (ctxt->sax->entityDecl != NULL))
3408 ctxt->sax->entityDecl(ctxt->userData, name,
3409 XML_EXTERNAL_PARAMETER_ENTITY,
3410 literal, URI, NULL);
3411 }
3412 xmlFreeURI(uri);
3413 }
3414 }
3415 }
3416 } else {
3417 if ((RAW == '"') || (RAW == '\'')) {
3418 value = xmlParseEntityValue(ctxt, &orig);
3419 if ((ctxt->sax != NULL) &&
3420 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3421 ctxt->sax->entityDecl(ctxt->userData, name,
3422 XML_INTERNAL_GENERAL_ENTITY,
3423 NULL, NULL, value);
3424 } else {
3425 URI = xmlParseExternalID(ctxt, &literal, 1);
3426 if ((URI == NULL) && (literal == NULL)) {
3427 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3429 ctxt->sax->error(ctxt->userData,
3430 "Entity value required\n");
3431 ctxt->wellFormed = 0;
3432 ctxt->disableSAX = 1;
3433 }
3434 if (URI) {
3435 xmlURIPtr uri;
3436
3437 uri = xmlParseURI((const char *)URI);
3438 if (uri == NULL) {
3439 ctxt->errNo = XML_ERR_INVALID_URI;
3440 if ((ctxt->sax != NULL) &&
3441 (!ctxt->disableSAX) &&
3442 (ctxt->sax->error != NULL))
3443 ctxt->sax->error(ctxt->userData,
3444 "Invalid URI: %s\n", URI);
3445 ctxt->wellFormed = 0;
3446 } else {
3447 if (uri->fragment != NULL) {
3448 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3449 if ((ctxt->sax != NULL) &&
3450 (!ctxt->disableSAX) &&
3451 (ctxt->sax->error != NULL))
3452 ctxt->sax->error(ctxt->userData,
3453 "Fragment not allowed: %s\n", URI);
3454 ctxt->wellFormed = 0;
3455 }
3456 xmlFreeURI(uri);
3457 }
3458 }
3459 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3460 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3461 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3462 ctxt->sax->error(ctxt->userData,
3463 "Space required before 'NDATA'\n");
3464 ctxt->wellFormed = 0;
3465 ctxt->disableSAX = 1;
3466 }
3467 SKIP_BLANKS;
3468 if ((RAW == 'N') && (NXT(1) == 'D') &&
3469 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3470 (NXT(4) == 'A')) {
3471 SKIP(5);
3472 if (!IS_BLANK(CUR)) {
3473 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3475 ctxt->sax->error(ctxt->userData,
3476 "Space required after 'NDATA'\n");
3477 ctxt->wellFormed = 0;
3478 ctxt->disableSAX = 1;
3479 }
3480 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003481 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003482 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3483 (ctxt->sax->unparsedEntityDecl != NULL))
3484 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3485 literal, URI, ndata);
3486 } else {
3487 if ((ctxt->sax != NULL) &&
3488 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3489 ctxt->sax->entityDecl(ctxt->userData, name,
3490 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3491 literal, URI, NULL);
3492 }
3493 }
3494 }
3495 SKIP_BLANKS;
3496 if (RAW != '>') {
3497 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3499 ctxt->sax->error(ctxt->userData,
3500 "xmlParseEntityDecl: entity %s not terminated\n", name);
3501 ctxt->wellFormed = 0;
3502 ctxt->disableSAX = 1;
3503 } else {
3504 if (input != ctxt->input) {
3505 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508"Entity declaration doesn't start and stop in the same entity\n");
3509 ctxt->wellFormed = 0;
3510 ctxt->disableSAX = 1;
3511 }
3512 NEXT;
3513 }
3514 if (orig != NULL) {
3515 /*
3516 * Ugly mechanism to save the raw entity value.
3517 */
3518 xmlEntityPtr cur = NULL;
3519
3520 if (isParameter) {
3521 if ((ctxt->sax != NULL) &&
3522 (ctxt->sax->getParameterEntity != NULL))
3523 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3524 } else {
3525 if ((ctxt->sax != NULL) &&
3526 (ctxt->sax->getEntity != NULL))
3527 cur = ctxt->sax->getEntity(ctxt->userData, name);
3528 }
3529 if (cur != NULL) {
3530 if (cur->orig != NULL)
3531 xmlFree(orig);
3532 else
3533 cur->orig = orig;
3534 } else
3535 xmlFree(orig);
3536 }
3537 if (name != NULL) xmlFree(name);
3538 if (value != NULL) xmlFree(value);
3539 if (URI != NULL) xmlFree(URI);
3540 if (literal != NULL) xmlFree(literal);
3541 if (ndata != NULL) xmlFree(ndata);
3542 }
3543}
3544
3545/**
3546 * xmlParseDefaultDecl:
3547 * @ctxt: an XML parser context
3548 * @value: Receive a possible fixed default value for the attribute
3549 *
3550 * Parse an attribute default declaration
3551 *
3552 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3553 *
3554 * [ VC: Required Attribute ]
3555 * if the default declaration is the keyword #REQUIRED, then the
3556 * attribute must be specified for all elements of the type in the
3557 * attribute-list declaration.
3558 *
3559 * [ VC: Attribute Default Legal ]
3560 * The declared default value must meet the lexical constraints of
3561 * the declared attribute type c.f. xmlValidateAttributeDecl()
3562 *
3563 * [ VC: Fixed Attribute Default ]
3564 * if an attribute has a default value declared with the #FIXED
3565 * keyword, instances of that attribute must match the default value.
3566 *
3567 * [ WFC: No < in Attribute Values ]
3568 * handled in xmlParseAttValue()
3569 *
3570 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3571 * or XML_ATTRIBUTE_FIXED.
3572 */
3573
3574int
3575xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3576 int val;
3577 xmlChar *ret;
3578
3579 *value = NULL;
3580 if ((RAW == '#') && (NXT(1) == 'R') &&
3581 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3582 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3583 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3584 (NXT(8) == 'D')) {
3585 SKIP(9);
3586 return(XML_ATTRIBUTE_REQUIRED);
3587 }
3588 if ((RAW == '#') && (NXT(1) == 'I') &&
3589 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3590 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3591 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3592 SKIP(8);
3593 return(XML_ATTRIBUTE_IMPLIED);
3594 }
3595 val = XML_ATTRIBUTE_NONE;
3596 if ((RAW == '#') && (NXT(1) == 'F') &&
3597 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3598 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3599 SKIP(6);
3600 val = XML_ATTRIBUTE_FIXED;
3601 if (!IS_BLANK(CUR)) {
3602 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3604 ctxt->sax->error(ctxt->userData,
3605 "Space required after '#FIXED'\n");
3606 ctxt->wellFormed = 0;
3607 ctxt->disableSAX = 1;
3608 }
3609 SKIP_BLANKS;
3610 }
3611 ret = xmlParseAttValue(ctxt);
3612 ctxt->instate = XML_PARSER_DTD;
3613 if (ret == NULL) {
3614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3615 ctxt->sax->error(ctxt->userData,
3616 "Attribute default value declaration error\n");
3617 ctxt->wellFormed = 0;
3618 ctxt->disableSAX = 1;
3619 } else
3620 *value = ret;
3621 return(val);
3622}
3623
3624/**
3625 * xmlParseNotationType:
3626 * @ctxt: an XML parser context
3627 *
3628 * parse an Notation attribute type.
3629 *
3630 * Note: the leading 'NOTATION' S part has already being parsed...
3631 *
3632 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3633 *
3634 * [ VC: Notation Attributes ]
3635 * Values of this type must match one of the notation names included
3636 * in the declaration; all notation names in the declaration must be declared.
3637 *
3638 * Returns: the notation attribute tree built while parsing
3639 */
3640
3641xmlEnumerationPtr
3642xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3643 xmlChar *name;
3644 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3645
3646 if (RAW != '(') {
3647 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3649 ctxt->sax->error(ctxt->userData,
3650 "'(' required to start 'NOTATION'\n");
3651 ctxt->wellFormed = 0;
3652 ctxt->disableSAX = 1;
3653 return(NULL);
3654 }
3655 SHRINK;
3656 do {
3657 NEXT;
3658 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003659 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003660 if (name == NULL) {
3661 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3662 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3663 ctxt->sax->error(ctxt->userData,
3664 "Name expected in NOTATION declaration\n");
3665 ctxt->wellFormed = 0;
3666 ctxt->disableSAX = 1;
3667 return(ret);
3668 }
3669 cur = xmlCreateEnumeration(name);
3670 xmlFree(name);
3671 if (cur == NULL) return(ret);
3672 if (last == NULL) ret = last = cur;
3673 else {
3674 last->next = cur;
3675 last = cur;
3676 }
3677 SKIP_BLANKS;
3678 } while (RAW == '|');
3679 if (RAW != ')') {
3680 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3681 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3682 ctxt->sax->error(ctxt->userData,
3683 "')' required to finish NOTATION declaration\n");
3684 ctxt->wellFormed = 0;
3685 ctxt->disableSAX = 1;
3686 if ((last != NULL) && (last != ret))
3687 xmlFreeEnumeration(last);
3688 return(ret);
3689 }
3690 NEXT;
3691 return(ret);
3692}
3693
3694/**
3695 * xmlParseEnumerationType:
3696 * @ctxt: an XML parser context
3697 *
3698 * parse an Enumeration attribute type.
3699 *
3700 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3701 *
3702 * [ VC: Enumeration ]
3703 * Values of this type must match one of the Nmtoken tokens in
3704 * the declaration
3705 *
3706 * Returns: the enumeration attribute tree built while parsing
3707 */
3708
3709xmlEnumerationPtr
3710xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3711 xmlChar *name;
3712 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3713
3714 if (RAW != '(') {
3715 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3717 ctxt->sax->error(ctxt->userData,
3718 "'(' required to start ATTLIST enumeration\n");
3719 ctxt->wellFormed = 0;
3720 ctxt->disableSAX = 1;
3721 return(NULL);
3722 }
3723 SHRINK;
3724 do {
3725 NEXT;
3726 SKIP_BLANKS;
3727 name = xmlParseNmtoken(ctxt);
3728 if (name == NULL) {
3729 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3730 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3731 ctxt->sax->error(ctxt->userData,
3732 "NmToken expected in ATTLIST enumeration\n");
3733 ctxt->wellFormed = 0;
3734 ctxt->disableSAX = 1;
3735 return(ret);
3736 }
3737 cur = xmlCreateEnumeration(name);
3738 xmlFree(name);
3739 if (cur == NULL) return(ret);
3740 if (last == NULL) ret = last = cur;
3741 else {
3742 last->next = cur;
3743 last = cur;
3744 }
3745 SKIP_BLANKS;
3746 } while (RAW == '|');
3747 if (RAW != ')') {
3748 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3750 ctxt->sax->error(ctxt->userData,
3751 "')' required to finish ATTLIST enumeration\n");
3752 ctxt->wellFormed = 0;
3753 ctxt->disableSAX = 1;
3754 return(ret);
3755 }
3756 NEXT;
3757 return(ret);
3758}
3759
3760/**
3761 * xmlParseEnumeratedType:
3762 * @ctxt: an XML parser context
3763 * @tree: the enumeration tree built while parsing
3764 *
3765 * parse an Enumerated attribute type.
3766 *
3767 * [57] EnumeratedType ::= NotationType | Enumeration
3768 *
3769 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3770 *
3771 *
3772 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3773 */
3774
3775int
3776xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3777 if ((RAW == 'N') && (NXT(1) == 'O') &&
3778 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3779 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3780 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3781 SKIP(8);
3782 if (!IS_BLANK(CUR)) {
3783 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3785 ctxt->sax->error(ctxt->userData,
3786 "Space required after 'NOTATION'\n");
3787 ctxt->wellFormed = 0;
3788 ctxt->disableSAX = 1;
3789 return(0);
3790 }
3791 SKIP_BLANKS;
3792 *tree = xmlParseNotationType(ctxt);
3793 if (*tree == NULL) return(0);
3794 return(XML_ATTRIBUTE_NOTATION);
3795 }
3796 *tree = xmlParseEnumerationType(ctxt);
3797 if (*tree == NULL) return(0);
3798 return(XML_ATTRIBUTE_ENUMERATION);
3799}
3800
3801/**
3802 * xmlParseAttributeType:
3803 * @ctxt: an XML parser context
3804 * @tree: the enumeration tree built while parsing
3805 *
3806 * parse the Attribute list def for an element
3807 *
3808 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3809 *
3810 * [55] StringType ::= 'CDATA'
3811 *
3812 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3813 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3814 *
3815 * Validity constraints for attribute values syntax are checked in
3816 * xmlValidateAttributeValue()
3817 *
3818 * [ VC: ID ]
3819 * Values of type ID must match the Name production. A name must not
3820 * appear more than once in an XML document as a value of this type;
3821 * i.e., ID values must uniquely identify the elements which bear them.
3822 *
3823 * [ VC: One ID per Element Type ]
3824 * No element type may have more than one ID attribute specified.
3825 *
3826 * [ VC: ID Attribute Default ]
3827 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3828 *
3829 * [ VC: IDREF ]
3830 * Values of type IDREF must match the Name production, and values
3831 * of type IDREFS must match Names; each IDREF Name must match the value
3832 * of an ID attribute on some element in the XML document; i.e. IDREF
3833 * values must match the value of some ID attribute.
3834 *
3835 * [ VC: Entity Name ]
3836 * Values of type ENTITY must match the Name production, values
3837 * of type ENTITIES must match Names; each Entity Name must match the
3838 * name of an unparsed entity declared in the DTD.
3839 *
3840 * [ VC: Name Token ]
3841 * Values of type NMTOKEN must match the Nmtoken production; values
3842 * of type NMTOKENS must match Nmtokens.
3843 *
3844 * Returns the attribute type
3845 */
3846int
3847xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3848 SHRINK;
3849 if ((RAW == 'C') && (NXT(1) == 'D') &&
3850 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3851 (NXT(4) == 'A')) {
3852 SKIP(5);
3853 return(XML_ATTRIBUTE_CDATA);
3854 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3855 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3856 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3857 SKIP(6);
3858 return(XML_ATTRIBUTE_IDREFS);
3859 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3860 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3861 (NXT(4) == 'F')) {
3862 SKIP(5);
3863 return(XML_ATTRIBUTE_IDREF);
3864 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3865 SKIP(2);
3866 return(XML_ATTRIBUTE_ID);
3867 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3868 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3869 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3870 SKIP(6);
3871 return(XML_ATTRIBUTE_ENTITY);
3872 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3873 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3874 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3875 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3876 SKIP(8);
3877 return(XML_ATTRIBUTE_ENTITIES);
3878 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3879 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3880 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3881 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3882 SKIP(8);
3883 return(XML_ATTRIBUTE_NMTOKENS);
3884 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3885 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3886 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3887 (NXT(6) == 'N')) {
3888 SKIP(7);
3889 return(XML_ATTRIBUTE_NMTOKEN);
3890 }
3891 return(xmlParseEnumeratedType(ctxt, tree));
3892}
3893
3894/**
3895 * xmlParseAttributeListDecl:
3896 * @ctxt: an XML parser context
3897 *
3898 * : parse the Attribute list def for an element
3899 *
3900 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3901 *
3902 * [53] AttDef ::= S Name S AttType S DefaultDecl
3903 *
3904 */
3905void
3906xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3907 xmlChar *elemName;
3908 xmlChar *attrName;
3909 xmlEnumerationPtr tree;
3910
3911 if ((RAW == '<') && (NXT(1) == '!') &&
3912 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3913 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3914 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3915 (NXT(8) == 'T')) {
3916 xmlParserInputPtr input = ctxt->input;
3917
3918 SKIP(9);
3919 if (!IS_BLANK(CUR)) {
3920 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3922 ctxt->sax->error(ctxt->userData,
3923 "Space required after '<!ATTLIST'\n");
3924 ctxt->wellFormed = 0;
3925 ctxt->disableSAX = 1;
3926 }
3927 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003928 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003929 if (elemName == NULL) {
3930 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3932 ctxt->sax->error(ctxt->userData,
3933 "ATTLIST: no name for Element\n");
3934 ctxt->wellFormed = 0;
3935 ctxt->disableSAX = 1;
3936 return;
3937 }
3938 SKIP_BLANKS;
3939 GROW;
3940 while (RAW != '>') {
3941 const xmlChar *check = CUR_PTR;
3942 int type;
3943 int def;
3944 xmlChar *defaultValue = NULL;
3945
3946 GROW;
3947 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003948 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003949 if (attrName == NULL) {
3950 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3952 ctxt->sax->error(ctxt->userData,
3953 "ATTLIST: no name for Attribute\n");
3954 ctxt->wellFormed = 0;
3955 ctxt->disableSAX = 1;
3956 break;
3957 }
3958 GROW;
3959 if (!IS_BLANK(CUR)) {
3960 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3962 ctxt->sax->error(ctxt->userData,
3963 "Space required after the attribute name\n");
3964 ctxt->wellFormed = 0;
3965 ctxt->disableSAX = 1;
3966 if (attrName != NULL)
3967 xmlFree(attrName);
3968 if (defaultValue != NULL)
3969 xmlFree(defaultValue);
3970 break;
3971 }
3972 SKIP_BLANKS;
3973
3974 type = xmlParseAttributeType(ctxt, &tree);
3975 if (type <= 0) {
3976 if (attrName != NULL)
3977 xmlFree(attrName);
3978 if (defaultValue != NULL)
3979 xmlFree(defaultValue);
3980 break;
3981 }
3982
3983 GROW;
3984 if (!IS_BLANK(CUR)) {
3985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3987 ctxt->sax->error(ctxt->userData,
3988 "Space required after the attribute type\n");
3989 ctxt->wellFormed = 0;
3990 ctxt->disableSAX = 1;
3991 if (attrName != NULL)
3992 xmlFree(attrName);
3993 if (defaultValue != NULL)
3994 xmlFree(defaultValue);
3995 if (tree != NULL)
3996 xmlFreeEnumeration(tree);
3997 break;
3998 }
3999 SKIP_BLANKS;
4000
4001 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4002 if (def <= 0) {
4003 if (attrName != NULL)
4004 xmlFree(attrName);
4005 if (defaultValue != NULL)
4006 xmlFree(defaultValue);
4007 if (tree != NULL)
4008 xmlFreeEnumeration(tree);
4009 break;
4010 }
4011
4012 GROW;
4013 if (RAW != '>') {
4014 if (!IS_BLANK(CUR)) {
4015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4017 ctxt->sax->error(ctxt->userData,
4018 "Space required after the attribute default value\n");
4019 ctxt->wellFormed = 0;
4020 ctxt->disableSAX = 1;
4021 if (attrName != NULL)
4022 xmlFree(attrName);
4023 if (defaultValue != NULL)
4024 xmlFree(defaultValue);
4025 if (tree != NULL)
4026 xmlFreeEnumeration(tree);
4027 break;
4028 }
4029 SKIP_BLANKS;
4030 }
4031 if (check == CUR_PTR) {
4032 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4034 ctxt->sax->error(ctxt->userData,
4035 "xmlParseAttributeListDecl: detected internal error\n");
4036 if (attrName != NULL)
4037 xmlFree(attrName);
4038 if (defaultValue != NULL)
4039 xmlFree(defaultValue);
4040 if (tree != NULL)
4041 xmlFreeEnumeration(tree);
4042 break;
4043 }
4044 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4045 (ctxt->sax->attributeDecl != NULL))
4046 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4047 type, def, defaultValue, tree);
4048 if (attrName != NULL)
4049 xmlFree(attrName);
4050 if (defaultValue != NULL)
4051 xmlFree(defaultValue);
4052 GROW;
4053 }
4054 if (RAW == '>') {
4055 if (input != ctxt->input) {
4056 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4057 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4058 ctxt->sax->error(ctxt->userData,
4059"Attribute list declaration doesn't start and stop in the same entity\n");
4060 ctxt->wellFormed = 0;
4061 ctxt->disableSAX = 1;
4062 }
4063 NEXT;
4064 }
4065
4066 xmlFree(elemName);
4067 }
4068}
4069
4070/**
4071 * xmlParseElementMixedContentDecl:
4072 * @ctxt: an XML parser context
4073 *
4074 * parse the declaration for a Mixed Element content
4075 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4076 *
4077 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4078 * '(' S? '#PCDATA' S? ')'
4079 *
4080 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4081 *
4082 * [ VC: No Duplicate Types ]
4083 * The same name must not appear more than once in a single
4084 * mixed-content declaration.
4085 *
4086 * returns: the list of the xmlElementContentPtr describing the element choices
4087 */
4088xmlElementContentPtr
4089xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4090 xmlElementContentPtr ret = NULL, cur = NULL, n;
4091 xmlChar *elem = NULL;
4092
4093 GROW;
4094 if ((RAW == '#') && (NXT(1) == 'P') &&
4095 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4096 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4097 (NXT(6) == 'A')) {
4098 SKIP(7);
4099 SKIP_BLANKS;
4100 SHRINK;
4101 if (RAW == ')') {
4102 ctxt->entity = ctxt->input;
4103 NEXT;
4104 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4105 if (RAW == '*') {
4106 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4107 NEXT;
4108 }
4109 return(ret);
4110 }
4111 if ((RAW == '(') || (RAW == '|')) {
4112 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4113 if (ret == NULL) return(NULL);
4114 }
4115 while (RAW == '|') {
4116 NEXT;
4117 if (elem == NULL) {
4118 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4119 if (ret == NULL) return(NULL);
4120 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004121 if (cur != NULL)
4122 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004123 cur = ret;
4124 } else {
4125 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4126 if (n == NULL) return(NULL);
4127 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004128 if (n->c1 != NULL)
4129 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004130 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004131 if (n != NULL)
4132 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004133 cur = n;
4134 xmlFree(elem);
4135 }
4136 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004137 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004138 if (elem == NULL) {
4139 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4141 ctxt->sax->error(ctxt->userData,
4142 "xmlParseElementMixedContentDecl : Name expected\n");
4143 ctxt->wellFormed = 0;
4144 ctxt->disableSAX = 1;
4145 xmlFreeElementContent(cur);
4146 return(NULL);
4147 }
4148 SKIP_BLANKS;
4149 GROW;
4150 }
4151 if ((RAW == ')') && (NXT(1) == '*')) {
4152 if (elem != NULL) {
4153 cur->c2 = xmlNewElementContent(elem,
4154 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004155 if (cur->c2 != NULL)
4156 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004157 xmlFree(elem);
4158 }
4159 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4160 ctxt->entity = ctxt->input;
4161 SKIP(2);
4162 } else {
4163 if (elem != NULL) xmlFree(elem);
4164 xmlFreeElementContent(ret);
4165 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4167 ctxt->sax->error(ctxt->userData,
4168 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4169 ctxt->wellFormed = 0;
4170 ctxt->disableSAX = 1;
4171 return(NULL);
4172 }
4173
4174 } else {
4175 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4177 ctxt->sax->error(ctxt->userData,
4178 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4179 ctxt->wellFormed = 0;
4180 ctxt->disableSAX = 1;
4181 }
4182 return(ret);
4183}
4184
4185/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004186 * xmlParseElementChildrenContentD:
4187 * @ctxt: an XML parser context
4188 *
4189 * VMS version of xmlParseElementChildrenContentDecl()
4190 *
4191 * Returns the tree of xmlElementContentPtr describing the element
4192 * hierarchy.
4193 */
4194/**
Owen Taylor3473f882001-02-23 17:55:21 +00004195 * xmlParseElementChildrenContentDecl:
4196 * @ctxt: an XML parser context
4197 *
4198 * parse the declaration for a Mixed Element content
4199 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4200 *
4201 *
4202 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4203 *
4204 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4205 *
4206 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4207 *
4208 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4209 *
4210 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4211 * TODO Parameter-entity replacement text must be properly nested
4212 * with parenthetized groups. That is to say, if either of the
4213 * opening or closing parentheses in a choice, seq, or Mixed
4214 * construct is contained in the replacement text for a parameter
4215 * entity, both must be contained in the same replacement text. For
4216 * interoperability, if a parameter-entity reference appears in a
4217 * choice, seq, or Mixed construct, its replacement text should not
4218 * be empty, and neither the first nor last non-blank character of
4219 * the replacement text should be a connector (| or ,).
4220 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004221 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004222 * hierarchy.
4223 */
4224xmlElementContentPtr
4225#ifdef VMS
4226xmlParseElementChildrenContentD
4227#else
4228xmlParseElementChildrenContentDecl
4229#endif
4230(xmlParserCtxtPtr ctxt) {
4231 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4232 xmlChar *elem;
4233 xmlChar type = 0;
4234
4235 SKIP_BLANKS;
4236 GROW;
4237 if (RAW == '(') {
4238 /* Recurse on first child */
4239 NEXT;
4240 SKIP_BLANKS;
4241 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4242 SKIP_BLANKS;
4243 GROW;
4244 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004245 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004246 if (elem == NULL) {
4247 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4249 ctxt->sax->error(ctxt->userData,
4250 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4251 ctxt->wellFormed = 0;
4252 ctxt->disableSAX = 1;
4253 return(NULL);
4254 }
4255 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4256 GROW;
4257 if (RAW == '?') {
4258 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4259 NEXT;
4260 } else if (RAW == '*') {
4261 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4262 NEXT;
4263 } else if (RAW == '+') {
4264 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4265 NEXT;
4266 } else {
4267 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4268 }
4269 xmlFree(elem);
4270 GROW;
4271 }
4272 SKIP_BLANKS;
4273 SHRINK;
4274 while (RAW != ')') {
4275 /*
4276 * Each loop we parse one separator and one element.
4277 */
4278 if (RAW == ',') {
4279 if (type == 0) type = CUR;
4280
4281 /*
4282 * Detect "Name | Name , Name" error
4283 */
4284 else if (type != CUR) {
4285 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4287 ctxt->sax->error(ctxt->userData,
4288 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4289 type);
4290 ctxt->wellFormed = 0;
4291 ctxt->disableSAX = 1;
4292 if ((op != NULL) && (op != ret))
4293 xmlFreeElementContent(op);
4294 if ((last != NULL) && (last != ret) &&
4295 (last != ret->c1) && (last != ret->c2))
4296 xmlFreeElementContent(last);
4297 if (ret != NULL)
4298 xmlFreeElementContent(ret);
4299 return(NULL);
4300 }
4301 NEXT;
4302
4303 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4304 if (op == NULL) {
4305 xmlFreeElementContent(ret);
4306 return(NULL);
4307 }
4308 if (last == NULL) {
4309 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004310 if (ret != NULL)
4311 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004312 ret = cur = op;
4313 } else {
4314 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004315 if (op != NULL)
4316 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004317 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004318 if (last != NULL)
4319 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004320 cur =op;
4321 last = NULL;
4322 }
4323 } else if (RAW == '|') {
4324 if (type == 0) type = CUR;
4325
4326 /*
4327 * Detect "Name , Name | Name" error
4328 */
4329 else if (type != CUR) {
4330 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4332 ctxt->sax->error(ctxt->userData,
4333 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4334 type);
4335 ctxt->wellFormed = 0;
4336 ctxt->disableSAX = 1;
4337 if ((op != NULL) && (op != ret) && (op != last))
4338 xmlFreeElementContent(op);
4339 if ((last != NULL) && (last != ret) &&
4340 (last != ret->c1) && (last != ret->c2))
4341 xmlFreeElementContent(last);
4342 if (ret != NULL)
4343 xmlFreeElementContent(ret);
4344 return(NULL);
4345 }
4346 NEXT;
4347
4348 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4349 if (op == NULL) {
4350 if ((op != NULL) && (op != ret))
4351 xmlFreeElementContent(op);
4352 if ((last != NULL) && (last != ret) &&
4353 (last != ret->c1) && (last != ret->c2))
4354 xmlFreeElementContent(last);
4355 if (ret != NULL)
4356 xmlFreeElementContent(ret);
4357 return(NULL);
4358 }
4359 if (last == NULL) {
4360 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004361 if (ret != NULL)
4362 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004363 ret = cur = op;
4364 } else {
4365 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004366 if (op != NULL)
4367 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004368 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004369 if (last != NULL)
4370 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004371 cur =op;
4372 last = NULL;
4373 }
4374 } else {
4375 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4377 ctxt->sax->error(ctxt->userData,
4378 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4379 ctxt->wellFormed = 0;
4380 ctxt->disableSAX = 1;
4381 if ((op != NULL) && (op != ret))
4382 xmlFreeElementContent(op);
4383 if ((last != NULL) && (last != ret) &&
4384 (last != ret->c1) && (last != ret->c2))
4385 xmlFreeElementContent(last);
4386 if (ret != NULL)
4387 xmlFreeElementContent(ret);
4388 return(NULL);
4389 }
4390 GROW;
4391 SKIP_BLANKS;
4392 GROW;
4393 if (RAW == '(') {
4394 /* Recurse on second child */
4395 NEXT;
4396 SKIP_BLANKS;
4397 last = xmlParseElementChildrenContentDecl(ctxt);
4398 SKIP_BLANKS;
4399 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004400 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004401 if (elem == NULL) {
4402 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4404 ctxt->sax->error(ctxt->userData,
4405 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4406 ctxt->wellFormed = 0;
4407 ctxt->disableSAX = 1;
4408 if ((op != NULL) && (op != ret))
4409 xmlFreeElementContent(op);
4410 if ((last != NULL) && (last != ret) &&
4411 (last != ret->c1) && (last != ret->c2))
4412 xmlFreeElementContent(last);
4413 if (ret != NULL)
4414 xmlFreeElementContent(ret);
4415 return(NULL);
4416 }
4417 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4418 xmlFree(elem);
4419 if (RAW == '?') {
4420 last->ocur = XML_ELEMENT_CONTENT_OPT;
4421 NEXT;
4422 } else if (RAW == '*') {
4423 last->ocur = XML_ELEMENT_CONTENT_MULT;
4424 NEXT;
4425 } else if (RAW == '+') {
4426 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4427 NEXT;
4428 } else {
4429 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4430 }
4431 }
4432 SKIP_BLANKS;
4433 GROW;
4434 }
4435 if ((cur != NULL) && (last != NULL)) {
4436 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004437 if (last != NULL)
4438 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004439 }
4440 ctxt->entity = ctxt->input;
4441 NEXT;
4442 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004443 if (ret != NULL)
4444 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004445 NEXT;
4446 } else if (RAW == '*') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004447 if (ret != NULL) {
Daniel Veillarde470df72001-04-18 21:41:07 +00004448 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004449 cur = ret;
4450 /*
4451 * Some normalization:
4452 * (a | b* | c?)* == (a | b | c)*
4453 */
4454 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4455 if ((cur->c1 != NULL) &&
4456 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4457 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
4458 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4459 if ((cur->c2 != NULL) &&
4460 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4461 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
4462 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4463 cur = cur->c2;
4464 }
4465 }
Owen Taylor3473f882001-02-23 17:55:21 +00004466 NEXT;
4467 } else if (RAW == '+') {
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004468 if (ret != NULL) {
4469 int found = 0;
4470
Daniel Veillarde470df72001-04-18 21:41:07 +00004471 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Daniel Veillardce2c2f02001-10-18 14:57:24 +00004472 /*
4473 * Some normalization:
4474 * (a | b*)+ == (a | b)*
4475 * (a | b?)+ == (a | b)*
4476 */
4477 while (cur->type == XML_ELEMENT_CONTENT_OR) {
4478 if ((cur->c1 != NULL) &&
4479 ((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
4480 (cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
4481 cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
4482 found = 1;
4483 }
4484 if ((cur->c2 != NULL) &&
4485 ((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
4486 (cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
4487 cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
4488 found = 1;
4489 }
4490 cur = cur->c2;
4491 }
4492 if (found)
4493 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4494 }
Owen Taylor3473f882001-02-23 17:55:21 +00004495 NEXT;
4496 }
4497 return(ret);
4498}
4499
4500/**
4501 * xmlParseElementContentDecl:
4502 * @ctxt: an XML parser context
4503 * @name: the name of the element being defined.
4504 * @result: the Element Content pointer will be stored here if any
4505 *
4506 * parse the declaration for an Element content either Mixed or Children,
4507 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4508 *
4509 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4510 *
4511 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4512 */
4513
4514int
4515xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4516 xmlElementContentPtr *result) {
4517
4518 xmlElementContentPtr tree = NULL;
4519 xmlParserInputPtr input = ctxt->input;
4520 int res;
4521
4522 *result = NULL;
4523
4524 if (RAW != '(') {
4525 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4527 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004528 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004529 ctxt->wellFormed = 0;
4530 ctxt->disableSAX = 1;
4531 return(-1);
4532 }
4533 NEXT;
4534 GROW;
4535 SKIP_BLANKS;
4536 if ((RAW == '#') && (NXT(1) == 'P') &&
4537 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4538 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4539 (NXT(6) == 'A')) {
4540 tree = xmlParseElementMixedContentDecl(ctxt);
4541 res = XML_ELEMENT_TYPE_MIXED;
4542 } else {
4543 tree = xmlParseElementChildrenContentDecl(ctxt);
4544 res = XML_ELEMENT_TYPE_ELEMENT;
4545 }
4546 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4547 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550"Element content declaration doesn't start and stop in the same entity\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 }
4554 SKIP_BLANKS;
4555 *result = tree;
4556 return(res);
4557}
4558
4559/**
4560 * xmlParseElementDecl:
4561 * @ctxt: an XML parser context
4562 *
4563 * parse an Element declaration.
4564 *
4565 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4566 *
4567 * [ VC: Unique Element Type Declaration ]
4568 * No element type may be declared more than once
4569 *
4570 * Returns the type of the element, or -1 in case of error
4571 */
4572int
4573xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4574 xmlChar *name;
4575 int ret = -1;
4576 xmlElementContentPtr content = NULL;
4577
4578 GROW;
4579 if ((RAW == '<') && (NXT(1) == '!') &&
4580 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4581 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4582 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4583 (NXT(8) == 'T')) {
4584 xmlParserInputPtr input = ctxt->input;
4585
4586 SKIP(9);
4587 if (!IS_BLANK(CUR)) {
4588 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4590 ctxt->sax->error(ctxt->userData,
4591 "Space required after 'ELEMENT'\n");
4592 ctxt->wellFormed = 0;
4593 ctxt->disableSAX = 1;
4594 }
4595 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004596 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004597 if (name == NULL) {
4598 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4600 ctxt->sax->error(ctxt->userData,
4601 "xmlParseElementDecl: no name for Element\n");
4602 ctxt->wellFormed = 0;
4603 ctxt->disableSAX = 1;
4604 return(-1);
4605 }
4606 while ((RAW == 0) && (ctxt->inputNr > 1))
4607 xmlPopInput(ctxt);
4608 if (!IS_BLANK(CUR)) {
4609 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4611 ctxt->sax->error(ctxt->userData,
4612 "Space required after the element name\n");
4613 ctxt->wellFormed = 0;
4614 ctxt->disableSAX = 1;
4615 }
4616 SKIP_BLANKS;
4617 if ((RAW == 'E') && (NXT(1) == 'M') &&
4618 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4619 (NXT(4) == 'Y')) {
4620 SKIP(5);
4621 /*
4622 * Element must always be empty.
4623 */
4624 ret = XML_ELEMENT_TYPE_EMPTY;
4625 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4626 (NXT(2) == 'Y')) {
4627 SKIP(3);
4628 /*
4629 * Element is a generic container.
4630 */
4631 ret = XML_ELEMENT_TYPE_ANY;
4632 } else if (RAW == '(') {
4633 ret = xmlParseElementContentDecl(ctxt, name, &content);
4634 } else {
4635 /*
4636 * [ WFC: PEs in Internal Subset ] error handling.
4637 */
4638 if ((RAW == '%') && (ctxt->external == 0) &&
4639 (ctxt->inputNr == 1)) {
4640 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4642 ctxt->sax->error(ctxt->userData,
4643 "PEReference: forbidden within markup decl in internal subset\n");
4644 } else {
4645 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4647 ctxt->sax->error(ctxt->userData,
4648 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4649 }
4650 ctxt->wellFormed = 0;
4651 ctxt->disableSAX = 1;
4652 if (name != NULL) xmlFree(name);
4653 return(-1);
4654 }
4655
4656 SKIP_BLANKS;
4657 /*
4658 * Pop-up of finished entities.
4659 */
4660 while ((RAW == 0) && (ctxt->inputNr > 1))
4661 xmlPopInput(ctxt);
4662 SKIP_BLANKS;
4663
4664 if (RAW != '>') {
4665 ctxt->errNo = XML_ERR_GT_REQUIRED;
4666 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4667 ctxt->sax->error(ctxt->userData,
4668 "xmlParseElementDecl: expected '>' at the end\n");
4669 ctxt->wellFormed = 0;
4670 ctxt->disableSAX = 1;
4671 } else {
4672 if (input != ctxt->input) {
4673 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4675 ctxt->sax->error(ctxt->userData,
4676"Element declaration doesn't start and stop in the same entity\n");
4677 ctxt->wellFormed = 0;
4678 ctxt->disableSAX = 1;
4679 }
4680
4681 NEXT;
4682 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4683 (ctxt->sax->elementDecl != NULL))
4684 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4685 content);
4686 }
4687 if (content != NULL) {
4688 xmlFreeElementContent(content);
4689 }
4690 if (name != NULL) {
4691 xmlFree(name);
4692 }
4693 }
4694 return(ret);
4695}
4696
4697/**
Owen Taylor3473f882001-02-23 17:55:21 +00004698 * xmlParseConditionalSections
4699 * @ctxt: an XML parser context
4700 *
4701 * [61] conditionalSect ::= includeSect | ignoreSect
4702 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4703 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4704 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4705 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4706 */
4707
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004708static void
Owen Taylor3473f882001-02-23 17:55:21 +00004709xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4710 SKIP(3);
4711 SKIP_BLANKS;
4712 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4713 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4714 (NXT(6) == 'E')) {
4715 SKIP(7);
4716 SKIP_BLANKS;
4717 if (RAW != '[') {
4718 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4720 ctxt->sax->error(ctxt->userData,
4721 "XML conditional section '[' expected\n");
4722 ctxt->wellFormed = 0;
4723 ctxt->disableSAX = 1;
4724 } else {
4725 NEXT;
4726 }
4727 if (xmlParserDebugEntities) {
4728 if ((ctxt->input != NULL) && (ctxt->input->filename))
4729 xmlGenericError(xmlGenericErrorContext,
4730 "%s(%d): ", ctxt->input->filename,
4731 ctxt->input->line);
4732 xmlGenericError(xmlGenericErrorContext,
4733 "Entering INCLUDE Conditional Section\n");
4734 }
4735
4736 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4737 (NXT(2) != '>'))) {
4738 const xmlChar *check = CUR_PTR;
4739 int cons = ctxt->input->consumed;
4740 int tok = ctxt->token;
4741
4742 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4743 xmlParseConditionalSections(ctxt);
4744 } else if (IS_BLANK(CUR)) {
4745 NEXT;
4746 } else if (RAW == '%') {
4747 xmlParsePEReference(ctxt);
4748 } else
4749 xmlParseMarkupDecl(ctxt);
4750
4751 /*
4752 * Pop-up of finished entities.
4753 */
4754 while ((RAW == 0) && (ctxt->inputNr > 1))
4755 xmlPopInput(ctxt);
4756
4757 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4758 (tok == ctxt->token)) {
4759 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4761 ctxt->sax->error(ctxt->userData,
4762 "Content error in the external subset\n");
4763 ctxt->wellFormed = 0;
4764 ctxt->disableSAX = 1;
4765 break;
4766 }
4767 }
4768 if (xmlParserDebugEntities) {
4769 if ((ctxt->input != NULL) && (ctxt->input->filename))
4770 xmlGenericError(xmlGenericErrorContext,
4771 "%s(%d): ", ctxt->input->filename,
4772 ctxt->input->line);
4773 xmlGenericError(xmlGenericErrorContext,
4774 "Leaving INCLUDE Conditional Section\n");
4775 }
4776
4777 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4778 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4779 int state;
4780 int instate;
4781 int depth = 0;
4782
4783 SKIP(6);
4784 SKIP_BLANKS;
4785 if (RAW != '[') {
4786 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section '[' expected\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 } else {
4793 NEXT;
4794 }
4795 if (xmlParserDebugEntities) {
4796 if ((ctxt->input != NULL) && (ctxt->input->filename))
4797 xmlGenericError(xmlGenericErrorContext,
4798 "%s(%d): ", ctxt->input->filename,
4799 ctxt->input->line);
4800 xmlGenericError(xmlGenericErrorContext,
4801 "Entering IGNORE Conditional Section\n");
4802 }
4803
4804 /*
4805 * Parse up to the end of the conditionnal section
4806 * But disable SAX event generating DTD building in the meantime
4807 */
4808 state = ctxt->disableSAX;
4809 instate = ctxt->instate;
4810 ctxt->disableSAX = 1;
4811 ctxt->instate = XML_PARSER_IGNORE;
4812
4813 while (depth >= 0) {
4814 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4815 depth++;
4816 SKIP(3);
4817 continue;
4818 }
4819 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4820 if (--depth >= 0) SKIP(3);
4821 continue;
4822 }
4823 NEXT;
4824 continue;
4825 }
4826
4827 ctxt->disableSAX = state;
4828 ctxt->instate = instate;
4829
4830 if (xmlParserDebugEntities) {
4831 if ((ctxt->input != NULL) && (ctxt->input->filename))
4832 xmlGenericError(xmlGenericErrorContext,
4833 "%s(%d): ", ctxt->input->filename,
4834 ctxt->input->line);
4835 xmlGenericError(xmlGenericErrorContext,
4836 "Leaving IGNORE Conditional Section\n");
4837 }
4838
4839 } else {
4840 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 }
4847
4848 if (RAW == 0)
4849 SHRINK;
4850
4851 if (RAW == 0) {
4852 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "XML conditional section not closed\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 } else {
4859 SKIP(3);
4860 }
4861}
4862
4863/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004864 * xmlParseMarkupDecl:
4865 * @ctxt: an XML parser context
4866 *
4867 * parse Markup declarations
4868 *
4869 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4870 * NotationDecl | PI | Comment
4871 *
4872 * [ VC: Proper Declaration/PE Nesting ]
4873 * Parameter-entity replacement text must be properly nested with
4874 * markup declarations. That is to say, if either the first character
4875 * or the last character of a markup declaration (markupdecl above) is
4876 * contained in the replacement text for a parameter-entity reference,
4877 * both must be contained in the same replacement text.
4878 *
4879 * [ WFC: PEs in Internal Subset ]
4880 * In the internal DTD subset, parameter-entity references can occur
4881 * only where markup declarations can occur, not within markup declarations.
4882 * (This does not apply to references that occur in external parameter
4883 * entities or to the external subset.)
4884 */
4885void
4886xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4887 GROW;
4888 xmlParseElementDecl(ctxt);
4889 xmlParseAttributeListDecl(ctxt);
4890 xmlParseEntityDecl(ctxt);
4891 xmlParseNotationDecl(ctxt);
4892 xmlParsePI(ctxt);
4893 xmlParseComment(ctxt);
4894 /*
4895 * This is only for internal subset. On external entities,
4896 * the replacement is done before parsing stage
4897 */
4898 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4899 xmlParsePEReference(ctxt);
4900
4901 /*
4902 * Conditional sections are allowed from entities included
4903 * by PE References in the internal subset.
4904 */
4905 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4906 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4907 xmlParseConditionalSections(ctxt);
4908 }
4909 }
4910
4911 ctxt->instate = XML_PARSER_DTD;
4912}
4913
4914/**
4915 * xmlParseTextDecl:
4916 * @ctxt: an XML parser context
4917 *
4918 * parse an XML declaration header for external entities
4919 *
4920 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4921 *
4922 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4923 */
4924
4925void
4926xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4927 xmlChar *version;
4928
4929 /*
4930 * We know that '<?xml' is here.
4931 */
4932 if ((RAW == '<') && (NXT(1) == '?') &&
4933 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4934 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4935 SKIP(5);
4936 } else {
4937 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4939 ctxt->sax->error(ctxt->userData,
4940 "Text declaration '<?xml' required\n");
4941 ctxt->wellFormed = 0;
4942 ctxt->disableSAX = 1;
4943
4944 return;
4945 }
4946
4947 if (!IS_BLANK(CUR)) {
4948 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "Space needed after '<?xml'\n");
4952 ctxt->wellFormed = 0;
4953 ctxt->disableSAX = 1;
4954 }
4955 SKIP_BLANKS;
4956
4957 /*
4958 * We may have the VersionInfo here.
4959 */
4960 version = xmlParseVersionInfo(ctxt);
4961 if (version == NULL)
4962 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4963 ctxt->input->version = version;
4964
4965 /*
4966 * We must have the encoding declaration
4967 */
4968 if (!IS_BLANK(CUR)) {
4969 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4971 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4972 ctxt->wellFormed = 0;
4973 ctxt->disableSAX = 1;
4974 }
4975 xmlParseEncodingDecl(ctxt);
4976 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4977 /*
4978 * The XML REC instructs us to stop parsing right here
4979 */
4980 return;
4981 }
4982
4983 SKIP_BLANKS;
4984 if ((RAW == '?') && (NXT(1) == '>')) {
4985 SKIP(2);
4986 } else if (RAW == '>') {
4987 /* Deprecated old WD ... */
4988 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4990 ctxt->sax->error(ctxt->userData,
4991 "XML declaration must end-up with '?>'\n");
4992 ctxt->wellFormed = 0;
4993 ctxt->disableSAX = 1;
4994 NEXT;
4995 } else {
4996 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4998 ctxt->sax->error(ctxt->userData,
4999 "parsing XML declaration: '?>' expected\n");
5000 ctxt->wellFormed = 0;
5001 ctxt->disableSAX = 1;
5002 MOVETO_ENDTAG(CUR_PTR);
5003 NEXT;
5004 }
5005}
5006
5007/**
Owen Taylor3473f882001-02-23 17:55:21 +00005008 * xmlParseExternalSubset:
5009 * @ctxt: an XML parser context
5010 * @ExternalID: the external identifier
5011 * @SystemID: the system identifier (or URL)
5012 *
5013 * parse Markup declarations from an external subset
5014 *
5015 * [30] extSubset ::= textDecl? extSubsetDecl
5016 *
5017 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
5018 */
5019void
5020xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
5021 const xmlChar *SystemID) {
5022 GROW;
5023 if ((RAW == '<') && (NXT(1) == '?') &&
5024 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5025 (NXT(4) == 'l')) {
5026 xmlParseTextDecl(ctxt);
5027 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5028 /*
5029 * The XML REC instructs us to stop parsing right here
5030 */
5031 ctxt->instate = XML_PARSER_EOF;
5032 return;
5033 }
5034 }
5035 if (ctxt->myDoc == NULL) {
5036 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
5037 }
5038 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
5039 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5040
5041 ctxt->instate = XML_PARSER_DTD;
5042 ctxt->external = 1;
5043 while (((RAW == '<') && (NXT(1) == '?')) ||
5044 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005045 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005046 const xmlChar *check = CUR_PTR;
5047 int cons = ctxt->input->consumed;
5048 int tok = ctxt->token;
5049
5050 GROW;
5051 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5052 xmlParseConditionalSections(ctxt);
5053 } else if (IS_BLANK(CUR)) {
5054 NEXT;
5055 } else if (RAW == '%') {
5056 xmlParsePEReference(ctxt);
5057 } else
5058 xmlParseMarkupDecl(ctxt);
5059
5060 /*
5061 * Pop-up of finished entities.
5062 */
5063 while ((RAW == 0) && (ctxt->inputNr > 1))
5064 xmlPopInput(ctxt);
5065
5066 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5067 (tok == ctxt->token)) {
5068 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5070 ctxt->sax->error(ctxt->userData,
5071 "Content error in the external subset\n");
5072 ctxt->wellFormed = 0;
5073 ctxt->disableSAX = 1;
5074 break;
5075 }
5076 }
5077
5078 if (RAW != 0) {
5079 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5081 ctxt->sax->error(ctxt->userData,
5082 "Extra content at the end of the document\n");
5083 ctxt->wellFormed = 0;
5084 ctxt->disableSAX = 1;
5085 }
5086
5087}
5088
5089/**
5090 * xmlParseReference:
5091 * @ctxt: an XML parser context
5092 *
5093 * parse and handle entity references in content, depending on the SAX
5094 * interface, this may end-up in a call to character() if this is a
5095 * CharRef, a predefined entity, if there is no reference() callback.
5096 * or if the parser was asked to switch to that mode.
5097 *
5098 * [67] Reference ::= EntityRef | CharRef
5099 */
5100void
5101xmlParseReference(xmlParserCtxtPtr ctxt) {
5102 xmlEntityPtr ent;
5103 xmlChar *val;
5104 if (RAW != '&') return;
5105
5106 if (NXT(1) == '#') {
5107 int i = 0;
5108 xmlChar out[10];
5109 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005110 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005111
5112 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5113 /*
5114 * So we are using non-UTF-8 buffers
5115 * Check that the char fit on 8bits, if not
5116 * generate a CharRef.
5117 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005118 if (value <= 0xFF) {
5119 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005120 out[1] = 0;
5121 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5122 (!ctxt->disableSAX))
5123 ctxt->sax->characters(ctxt->userData, out, 1);
5124 } else {
5125 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005126 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005127 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005128 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005129 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5130 (!ctxt->disableSAX))
5131 ctxt->sax->reference(ctxt->userData, out);
5132 }
5133 } else {
5134 /*
5135 * Just encode the value in UTF-8
5136 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005137 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005138 out[i] = 0;
5139 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5140 (!ctxt->disableSAX))
5141 ctxt->sax->characters(ctxt->userData, out, i);
5142 }
5143 } else {
5144 ent = xmlParseEntityRef(ctxt);
5145 if (ent == NULL) return;
5146 if ((ent->name != NULL) &&
5147 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5148 xmlNodePtr list = NULL;
5149 int ret;
5150
5151
5152 /*
5153 * The first reference to the entity trigger a parsing phase
5154 * where the ent->children is filled with the result from
5155 * the parsing.
5156 */
5157 if (ent->children == NULL) {
5158 xmlChar *value;
5159 value = ent->content;
5160
5161 /*
5162 * Check that this entity is well formed
5163 */
5164 if ((value != NULL) &&
5165 (value[1] == 0) && (value[0] == '<') &&
5166 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5167 /*
5168 * DONE: get definite answer on this !!!
5169 * Lots of entity decls are used to declare a single
5170 * char
5171 * <!ENTITY lt "<">
5172 * Which seems to be valid since
5173 * 2.4: The ampersand character (&) and the left angle
5174 * bracket (<) may appear in their literal form only
5175 * when used ... They are also legal within the literal
5176 * entity value of an internal entity declaration;i
5177 * see "4.3.2 Well-Formed Parsed Entities".
5178 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5179 * Looking at the OASIS test suite and James Clark
5180 * tests, this is broken. However the XML REC uses
5181 * it. Is the XML REC not well-formed ????
5182 * This is a hack to avoid this problem
5183 *
5184 * ANSWER: since lt gt amp .. are already defined,
5185 * this is a redefinition and hence the fact that the
5186 * contentis not well balanced is not a Wf error, this
5187 * is lousy but acceptable.
5188 */
5189 list = xmlNewDocText(ctxt->myDoc, value);
5190 if (list != NULL) {
5191 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5192 (ent->children == NULL)) {
5193 ent->children = list;
5194 ent->last = list;
5195 list->parent = (xmlNodePtr) ent;
5196 } else {
5197 xmlFreeNodeList(list);
5198 }
5199 } else if (list != NULL) {
5200 xmlFreeNodeList(list);
5201 }
5202 } else {
5203 /*
5204 * 4.3.2: An internal general parsed entity is well-formed
5205 * if its replacement text matches the production labeled
5206 * content.
5207 */
5208 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5209 ctxt->depth++;
5210 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5211 ctxt->sax, NULL, ctxt->depth,
5212 value, &list);
5213 ctxt->depth--;
5214 } else if (ent->etype ==
5215 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5216 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005217 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005218 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005219 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005220 ctxt->depth--;
5221 } else {
5222 ret = -1;
5223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5224 ctxt->sax->error(ctxt->userData,
5225 "Internal: invalid entity type\n");
5226 }
5227 if (ret == XML_ERR_ENTITY_LOOP) {
5228 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5230 ctxt->sax->error(ctxt->userData,
5231 "Detected entity reference loop\n");
5232 ctxt->wellFormed = 0;
5233 ctxt->disableSAX = 1;
5234 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005235 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5236 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005237 (ent->children == NULL)) {
5238 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005239 if (ctxt->replaceEntities) {
5240 /*
5241 * Prune it directly in the generated document
5242 * except for single text nodes.
5243 */
5244 if ((list->type == XML_TEXT_NODE) &&
5245 (list->next == NULL)) {
5246 list->parent = (xmlNodePtr) ent;
5247 list = NULL;
5248 } else {
5249 while (list != NULL) {
5250 list->parent = (xmlNodePtr) ctxt->node;
5251 if (list->next == NULL)
5252 ent->last = list;
5253 list = list->next;
5254 }
5255 list = ent->children;
5256 }
5257 } else {
5258 while (list != NULL) {
5259 list->parent = (xmlNodePtr) ent;
5260 if (list->next == NULL)
5261 ent->last = list;
5262 list = list->next;
5263 }
Owen Taylor3473f882001-02-23 17:55:21 +00005264 }
5265 } else {
5266 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005267 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005268 }
5269 } else if (ret > 0) {
5270 ctxt->errNo = ret;
5271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5272 ctxt->sax->error(ctxt->userData,
5273 "Entity value required\n");
5274 ctxt->wellFormed = 0;
5275 ctxt->disableSAX = 1;
5276 } else if (list != NULL) {
5277 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005278 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005279 }
5280 }
5281 }
5282 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5283 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5284 /*
5285 * Create a node.
5286 */
5287 ctxt->sax->reference(ctxt->userData, ent->name);
5288 return;
5289 } else if (ctxt->replaceEntities) {
5290 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5291 /*
5292 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005293 * a simple tree copy for all references except the first
5294 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005295 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005296 if (list == NULL) {
5297 xmlNodePtr new, cur;
5298 cur = ent->children;
5299 while (cur != NULL) {
5300 new = xmlCopyNode(cur, 1);
5301 xmlAddChild(ctxt->node, new);
5302 if (cur == ent->last)
5303 break;
5304 cur = cur->next;
5305 }
5306 } else {
5307 /*
5308 * the name change is to avoid coalescing of the
5309 * node with a prossible previous text one which
5310 * would make ent->children a dandling pointer
5311 */
5312 if (ent->children->type == XML_TEXT_NODE)
5313 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5314 if ((ent->last != ent->children) &&
5315 (ent->last->type == XML_TEXT_NODE))
5316 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5317 xmlAddChildList(ctxt->node, ent->children);
5318 }
5319
Owen Taylor3473f882001-02-23 17:55:21 +00005320 /*
5321 * This is to avoid a nasty side effect, see
5322 * characters() in SAX.c
5323 */
5324 ctxt->nodemem = 0;
5325 ctxt->nodelen = 0;
5326 return;
5327 } else {
5328 /*
5329 * Probably running in SAX mode
5330 */
5331 xmlParserInputPtr input;
5332
5333 input = xmlNewEntityInputStream(ctxt, ent);
5334 xmlPushInput(ctxt, input);
5335 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5336 (RAW == '<') && (NXT(1) == '?') &&
5337 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5338 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5339 xmlParseTextDecl(ctxt);
5340 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5341 /*
5342 * The XML REC instructs us to stop parsing right here
5343 */
5344 ctxt->instate = XML_PARSER_EOF;
5345 return;
5346 }
5347 if (input->standalone == 1) {
5348 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5349 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5350 ctxt->sax->error(ctxt->userData,
5351 "external parsed entities cannot be standalone\n");
5352 ctxt->wellFormed = 0;
5353 ctxt->disableSAX = 1;
5354 }
5355 }
5356 return;
5357 }
5358 }
5359 } else {
5360 val = ent->content;
5361 if (val == NULL) return;
5362 /*
5363 * inline the entity.
5364 */
5365 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5366 (!ctxt->disableSAX))
5367 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5368 }
5369 }
5370}
5371
5372/**
5373 * xmlParseEntityRef:
5374 * @ctxt: an XML parser context
5375 *
5376 * parse ENTITY references declarations
5377 *
5378 * [68] EntityRef ::= '&' Name ';'
5379 *
5380 * [ WFC: Entity Declared ]
5381 * In a document without any DTD, a document with only an internal DTD
5382 * subset which contains no parameter entity references, or a document
5383 * with "standalone='yes'", the Name given in the entity reference
5384 * must match that in an entity declaration, except that well-formed
5385 * documents need not declare any of the following entities: amp, lt,
5386 * gt, apos, quot. The declaration of a parameter entity must precede
5387 * any reference to it. Similarly, the declaration of a general entity
5388 * must precede any reference to it which appears in a default value in an
5389 * attribute-list declaration. Note that if entities are declared in the
5390 * external subset or in external parameter entities, a non-validating
5391 * processor is not obligated to read and process their declarations;
5392 * for such documents, the rule that an entity must be declared is a
5393 * well-formedness constraint only if standalone='yes'.
5394 *
5395 * [ WFC: Parsed Entity ]
5396 * An entity reference must not contain the name of an unparsed entity
5397 *
5398 * Returns the xmlEntityPtr if found, or NULL otherwise.
5399 */
5400xmlEntityPtr
5401xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5402 xmlChar *name;
5403 xmlEntityPtr ent = NULL;
5404
5405 GROW;
5406
5407 if (RAW == '&') {
5408 NEXT;
5409 name = xmlParseName(ctxt);
5410 if (name == NULL) {
5411 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5413 ctxt->sax->error(ctxt->userData,
5414 "xmlParseEntityRef: no name\n");
5415 ctxt->wellFormed = 0;
5416 ctxt->disableSAX = 1;
5417 } else {
5418 if (RAW == ';') {
5419 NEXT;
5420 /*
5421 * Ask first SAX for entity resolution, otherwise try the
5422 * predefined set.
5423 */
5424 if (ctxt->sax != NULL) {
5425 if (ctxt->sax->getEntity != NULL)
5426 ent = ctxt->sax->getEntity(ctxt->userData, name);
5427 if (ent == NULL)
5428 ent = xmlGetPredefinedEntity(name);
5429 }
5430 /*
5431 * [ WFC: Entity Declared ]
5432 * In a document without any DTD, a document with only an
5433 * internal DTD subset which contains no parameter entity
5434 * references, or a document with "standalone='yes'", the
5435 * Name given in the entity reference must match that in an
5436 * entity declaration, except that well-formed documents
5437 * need not declare any of the following entities: amp, lt,
5438 * gt, apos, quot.
5439 * The declaration of a parameter entity must precede any
5440 * reference to it.
5441 * Similarly, the declaration of a general entity must
5442 * precede any reference to it which appears in a default
5443 * value in an attribute-list declaration. Note that if
5444 * entities are declared in the external subset or in
5445 * external parameter entities, a non-validating processor
5446 * is not obligated to read and process their declarations;
5447 * for such documents, the rule that an entity must be
5448 * declared is a well-formedness constraint only if
5449 * standalone='yes'.
5450 */
5451 if (ent == NULL) {
5452 if ((ctxt->standalone == 1) ||
5453 ((ctxt->hasExternalSubset == 0) &&
5454 (ctxt->hasPErefs == 0))) {
5455 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5457 ctxt->sax->error(ctxt->userData,
5458 "Entity '%s' not defined\n", name);
5459 ctxt->wellFormed = 0;
5460 ctxt->disableSAX = 1;
5461 } else {
5462 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005464 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005465 "Entity '%s' not defined\n", name);
5466 }
5467 }
5468
5469 /*
5470 * [ WFC: Parsed Entity ]
5471 * An entity reference must not contain the name of an
5472 * unparsed entity
5473 */
5474 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5475 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5477 ctxt->sax->error(ctxt->userData,
5478 "Entity reference to unparsed entity %s\n", name);
5479 ctxt->wellFormed = 0;
5480 ctxt->disableSAX = 1;
5481 }
5482
5483 /*
5484 * [ WFC: No External Entity References ]
5485 * Attribute values cannot contain direct or indirect
5486 * entity references to external entities.
5487 */
5488 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5489 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5490 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5491 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5492 ctxt->sax->error(ctxt->userData,
5493 "Attribute references external entity '%s'\n", name);
5494 ctxt->wellFormed = 0;
5495 ctxt->disableSAX = 1;
5496 }
5497 /*
5498 * [ WFC: No < in Attribute Values ]
5499 * The replacement text of any entity referred to directly or
5500 * indirectly in an attribute value (other than "&lt;") must
5501 * not contain a <.
5502 */
5503 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5504 (ent != NULL) &&
5505 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5506 (ent->content != NULL) &&
5507 (xmlStrchr(ent->content, '<'))) {
5508 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5509 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5510 ctxt->sax->error(ctxt->userData,
5511 "'<' in entity '%s' is not allowed in attributes values\n", name);
5512 ctxt->wellFormed = 0;
5513 ctxt->disableSAX = 1;
5514 }
5515
5516 /*
5517 * Internal check, no parameter entities here ...
5518 */
5519 else {
5520 switch (ent->etype) {
5521 case XML_INTERNAL_PARAMETER_ENTITY:
5522 case XML_EXTERNAL_PARAMETER_ENTITY:
5523 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5525 ctxt->sax->error(ctxt->userData,
5526 "Attempt to reference the parameter entity '%s'\n", name);
5527 ctxt->wellFormed = 0;
5528 ctxt->disableSAX = 1;
5529 break;
5530 default:
5531 break;
5532 }
5533 }
5534
5535 /*
5536 * [ WFC: No Recursion ]
5537 * A parsed entity must not contain a recursive reference
5538 * to itself, either directly or indirectly.
5539 * Done somewhere else
5540 */
5541
5542 } else {
5543 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5545 ctxt->sax->error(ctxt->userData,
5546 "xmlParseEntityRef: expecting ';'\n");
5547 ctxt->wellFormed = 0;
5548 ctxt->disableSAX = 1;
5549 }
5550 xmlFree(name);
5551 }
5552 }
5553 return(ent);
5554}
5555
5556/**
5557 * xmlParseStringEntityRef:
5558 * @ctxt: an XML parser context
5559 * @str: a pointer to an index in the string
5560 *
5561 * parse ENTITY references declarations, but this version parses it from
5562 * a string value.
5563 *
5564 * [68] EntityRef ::= '&' Name ';'
5565 *
5566 * [ WFC: Entity Declared ]
5567 * In a document without any DTD, a document with only an internal DTD
5568 * subset which contains no parameter entity references, or a document
5569 * with "standalone='yes'", the Name given in the entity reference
5570 * must match that in an entity declaration, except that well-formed
5571 * documents need not declare any of the following entities: amp, lt,
5572 * gt, apos, quot. The declaration of a parameter entity must precede
5573 * any reference to it. Similarly, the declaration of a general entity
5574 * must precede any reference to it which appears in a default value in an
5575 * attribute-list declaration. Note that if entities are declared in the
5576 * external subset or in external parameter entities, a non-validating
5577 * processor is not obligated to read and process their declarations;
5578 * for such documents, the rule that an entity must be declared is a
5579 * well-formedness constraint only if standalone='yes'.
5580 *
5581 * [ WFC: Parsed Entity ]
5582 * An entity reference must not contain the name of an unparsed entity
5583 *
5584 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5585 * is updated to the current location in the string.
5586 */
5587xmlEntityPtr
5588xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5589 xmlChar *name;
5590 const xmlChar *ptr;
5591 xmlChar cur;
5592 xmlEntityPtr ent = NULL;
5593
5594 if ((str == NULL) || (*str == NULL))
5595 return(NULL);
5596 ptr = *str;
5597 cur = *ptr;
5598 if (cur == '&') {
5599 ptr++;
5600 cur = *ptr;
5601 name = xmlParseStringName(ctxt, &ptr);
5602 if (name == NULL) {
5603 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5605 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005606 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005607 ctxt->wellFormed = 0;
5608 ctxt->disableSAX = 1;
5609 } else {
5610 if (*ptr == ';') {
5611 ptr++;
5612 /*
5613 * Ask first SAX for entity resolution, otherwise try the
5614 * predefined set.
5615 */
5616 if (ctxt->sax != NULL) {
5617 if (ctxt->sax->getEntity != NULL)
5618 ent = ctxt->sax->getEntity(ctxt->userData, name);
5619 if (ent == NULL)
5620 ent = xmlGetPredefinedEntity(name);
5621 }
5622 /*
5623 * [ WFC: Entity Declared ]
5624 * In a document without any DTD, a document with only an
5625 * internal DTD subset which contains no parameter entity
5626 * references, or a document with "standalone='yes'", the
5627 * Name given in the entity reference must match that in an
5628 * entity declaration, except that well-formed documents
5629 * need not declare any of the following entities: amp, lt,
5630 * gt, apos, quot.
5631 * The declaration of a parameter entity must precede any
5632 * reference to it.
5633 * Similarly, the declaration of a general entity must
5634 * precede any reference to it which appears in a default
5635 * value in an attribute-list declaration. Note that if
5636 * entities are declared in the external subset or in
5637 * external parameter entities, a non-validating processor
5638 * is not obligated to read and process their declarations;
5639 * for such documents, the rule that an entity must be
5640 * declared is a well-formedness constraint only if
5641 * standalone='yes'.
5642 */
5643 if (ent == NULL) {
5644 if ((ctxt->standalone == 1) ||
5645 ((ctxt->hasExternalSubset == 0) &&
5646 (ctxt->hasPErefs == 0))) {
5647 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5649 ctxt->sax->error(ctxt->userData,
5650 "Entity '%s' not defined\n", name);
5651 ctxt->wellFormed = 0;
5652 ctxt->disableSAX = 1;
5653 } else {
5654 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5655 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5656 ctxt->sax->warning(ctxt->userData,
5657 "Entity '%s' not defined\n", name);
5658 }
5659 }
5660
5661 /*
5662 * [ WFC: Parsed Entity ]
5663 * An entity reference must not contain the name of an
5664 * unparsed entity
5665 */
5666 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5667 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5669 ctxt->sax->error(ctxt->userData,
5670 "Entity reference to unparsed entity %s\n", name);
5671 ctxt->wellFormed = 0;
5672 ctxt->disableSAX = 1;
5673 }
5674
5675 /*
5676 * [ WFC: No External Entity References ]
5677 * Attribute values cannot contain direct or indirect
5678 * entity references to external entities.
5679 */
5680 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5681 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5682 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5684 ctxt->sax->error(ctxt->userData,
5685 "Attribute references external entity '%s'\n", name);
5686 ctxt->wellFormed = 0;
5687 ctxt->disableSAX = 1;
5688 }
5689 /*
5690 * [ WFC: No < in Attribute Values ]
5691 * The replacement text of any entity referred to directly or
5692 * indirectly in an attribute value (other than "&lt;") must
5693 * not contain a <.
5694 */
5695 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5696 (ent != NULL) &&
5697 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5698 (ent->content != NULL) &&
5699 (xmlStrchr(ent->content, '<'))) {
5700 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5701 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5702 ctxt->sax->error(ctxt->userData,
5703 "'<' in entity '%s' is not allowed in attributes values\n", name);
5704 ctxt->wellFormed = 0;
5705 ctxt->disableSAX = 1;
5706 }
5707
5708 /*
5709 * Internal check, no parameter entities here ...
5710 */
5711 else {
5712 switch (ent->etype) {
5713 case XML_INTERNAL_PARAMETER_ENTITY:
5714 case XML_EXTERNAL_PARAMETER_ENTITY:
5715 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5716 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5717 ctxt->sax->error(ctxt->userData,
5718 "Attempt to reference the parameter entity '%s'\n", name);
5719 ctxt->wellFormed = 0;
5720 ctxt->disableSAX = 1;
5721 break;
5722 default:
5723 break;
5724 }
5725 }
5726
5727 /*
5728 * [ WFC: No Recursion ]
5729 * A parsed entity must not contain a recursive reference
5730 * to itself, either directly or indirectly.
5731 * Done somewhwere else
5732 */
5733
5734 } else {
5735 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5737 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005738 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 }
5742 xmlFree(name);
5743 }
5744 }
5745 *str = ptr;
5746 return(ent);
5747}
5748
5749/**
5750 * xmlParsePEReference:
5751 * @ctxt: an XML parser context
5752 *
5753 * parse PEReference declarations
5754 * The entity content is handled directly by pushing it's content as
5755 * a new input stream.
5756 *
5757 * [69] PEReference ::= '%' Name ';'
5758 *
5759 * [ WFC: No Recursion ]
5760 * A parsed entity must not contain a recursive
5761 * reference to itself, either directly or indirectly.
5762 *
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an internal DTD
5765 * subset which contains no parameter entity references, or a document
5766 * with "standalone='yes'", ... ... The declaration of a parameter
5767 * entity must precede any reference to it...
5768 *
5769 * [ VC: Entity Declared ]
5770 * In a document with an external subset or external parameter entities
5771 * with "standalone='no'", ... ... The declaration of a parameter entity
5772 * must precede any reference to it...
5773 *
5774 * [ WFC: In DTD ]
5775 * Parameter-entity references may only appear in the DTD.
5776 * NOTE: misleading but this is handled.
5777 */
5778void
5779xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5780 xmlChar *name;
5781 xmlEntityPtr entity = NULL;
5782 xmlParserInputPtr input;
5783
5784 if (RAW == '%') {
5785 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005786 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005787 if (name == NULL) {
5788 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5789 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5790 ctxt->sax->error(ctxt->userData,
5791 "xmlParsePEReference: no name\n");
5792 ctxt->wellFormed = 0;
5793 ctxt->disableSAX = 1;
5794 } else {
5795 if (RAW == ';') {
5796 NEXT;
5797 if ((ctxt->sax != NULL) &&
5798 (ctxt->sax->getParameterEntity != NULL))
5799 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5800 name);
5801 if (entity == NULL) {
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", ...
5807 * ... The declaration of a parameter entity must precede
5808 * any reference to it...
5809 */
5810 if ((ctxt->standalone == 1) ||
5811 ((ctxt->hasExternalSubset == 0) &&
5812 (ctxt->hasPErefs == 0))) {
5813 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5814 if ((!ctxt->disableSAX) &&
5815 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5816 ctxt->sax->error(ctxt->userData,
5817 "PEReference: %%%s; not found\n", name);
5818 ctxt->wellFormed = 0;
5819 ctxt->disableSAX = 1;
5820 } else {
5821 /*
5822 * [ VC: Entity Declared ]
5823 * In a document with an external subset or external
5824 * parameter entities with "standalone='no'", ...
5825 * ... The declaration of a parameter entity must precede
5826 * any reference to it...
5827 */
5828 if ((!ctxt->disableSAX) &&
5829 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5830 ctxt->sax->warning(ctxt->userData,
5831 "PEReference: %%%s; not found\n", name);
5832 ctxt->valid = 0;
5833 }
5834 } else {
5835 /*
5836 * Internal checking in case the entity quest barfed
5837 */
5838 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5839 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5840 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5841 ctxt->sax->warning(ctxt->userData,
5842 "Internal: %%%s; is not a parameter entity\n", name);
5843 } else {
5844 /*
5845 * TODO !!!
5846 * handle the extra spaces added before and after
5847 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5848 */
5849 input = xmlNewEntityInputStream(ctxt, entity);
5850 xmlPushInput(ctxt, input);
5851 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5852 (RAW == '<') && (NXT(1) == '?') &&
5853 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5854 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5855 xmlParseTextDecl(ctxt);
5856 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5857 /*
5858 * The XML REC instructs us to stop parsing
5859 * right here
5860 */
5861 ctxt->instate = XML_PARSER_EOF;
5862 xmlFree(name);
5863 return;
5864 }
5865 }
5866 if (ctxt->token == 0)
5867 ctxt->token = ' ';
5868 }
5869 }
5870 ctxt->hasPErefs = 1;
5871 } else {
5872 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5874 ctxt->sax->error(ctxt->userData,
5875 "xmlParsePEReference: expecting ';'\n");
5876 ctxt->wellFormed = 0;
5877 ctxt->disableSAX = 1;
5878 }
5879 xmlFree(name);
5880 }
5881 }
5882}
5883
5884/**
5885 * xmlParseStringPEReference:
5886 * @ctxt: an XML parser context
5887 * @str: a pointer to an index in the string
5888 *
5889 * parse PEReference declarations
5890 *
5891 * [69] PEReference ::= '%' Name ';'
5892 *
5893 * [ WFC: No Recursion ]
5894 * A parsed entity must not contain a recursive
5895 * reference to itself, either directly or indirectly.
5896 *
5897 * [ WFC: Entity Declared ]
5898 * In a document without any DTD, a document with only an internal DTD
5899 * subset which contains no parameter entity references, or a document
5900 * with "standalone='yes'", ... ... The declaration of a parameter
5901 * entity must precede any reference to it...
5902 *
5903 * [ VC: Entity Declared ]
5904 * In a document with an external subset or external parameter entities
5905 * with "standalone='no'", ... ... The declaration of a parameter entity
5906 * must precede any reference to it...
5907 *
5908 * [ WFC: In DTD ]
5909 * Parameter-entity references may only appear in the DTD.
5910 * NOTE: misleading but this is handled.
5911 *
5912 * Returns the string of the entity content.
5913 * str is updated to the current value of the index
5914 */
5915xmlEntityPtr
5916xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5917 const xmlChar *ptr;
5918 xmlChar cur;
5919 xmlChar *name;
5920 xmlEntityPtr entity = NULL;
5921
5922 if ((str == NULL) || (*str == NULL)) return(NULL);
5923 ptr = *str;
5924 cur = *ptr;
5925 if (cur == '%') {
5926 ptr++;
5927 cur = *ptr;
5928 name = xmlParseStringName(ctxt, &ptr);
5929 if (name == NULL) {
5930 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5932 ctxt->sax->error(ctxt->userData,
5933 "xmlParseStringPEReference: no name\n");
5934 ctxt->wellFormed = 0;
5935 ctxt->disableSAX = 1;
5936 } else {
5937 cur = *ptr;
5938 if (cur == ';') {
5939 ptr++;
5940 cur = *ptr;
5941 if ((ctxt->sax != NULL) &&
5942 (ctxt->sax->getParameterEntity != NULL))
5943 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5944 name);
5945 if (entity == NULL) {
5946 /*
5947 * [ WFC: Entity Declared ]
5948 * In a document without any DTD, a document with only an
5949 * internal DTD subset which contains no parameter entity
5950 * references, or a document with "standalone='yes'", ...
5951 * ... The declaration of a parameter entity must precede
5952 * any reference to it...
5953 */
5954 if ((ctxt->standalone == 1) ||
5955 ((ctxt->hasExternalSubset == 0) &&
5956 (ctxt->hasPErefs == 0))) {
5957 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "PEReference: %%%s; not found\n", name);
5961 ctxt->wellFormed = 0;
5962 ctxt->disableSAX = 1;
5963 } else {
5964 /*
5965 * [ VC: Entity Declared ]
5966 * In a document with an external subset or external
5967 * parameter entities with "standalone='no'", ...
5968 * ... The declaration of a parameter entity must
5969 * precede any reference to it...
5970 */
5971 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5972 ctxt->sax->warning(ctxt->userData,
5973 "PEReference: %%%s; not found\n", name);
5974 ctxt->valid = 0;
5975 }
5976 } else {
5977 /*
5978 * Internal checking in case the entity quest barfed
5979 */
5980 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5981 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5982 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5983 ctxt->sax->warning(ctxt->userData,
5984 "Internal: %%%s; is not a parameter entity\n", name);
5985 }
5986 }
5987 ctxt->hasPErefs = 1;
5988 } else {
5989 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992 "xmlParseStringPEReference: expecting ';'\n");
5993 ctxt->wellFormed = 0;
5994 ctxt->disableSAX = 1;
5995 }
5996 xmlFree(name);
5997 }
5998 }
5999 *str = ptr;
6000 return(entity);
6001}
6002
6003/**
6004 * xmlParseDocTypeDecl:
6005 * @ctxt: an XML parser context
6006 *
6007 * parse a DOCTYPE declaration
6008 *
6009 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
6010 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6011 *
6012 * [ VC: Root Element Type ]
6013 * The Name in the document type declaration must match the element
6014 * type of the root element.
6015 */
6016
6017void
6018xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
6019 xmlChar *name = NULL;
6020 xmlChar *ExternalID = NULL;
6021 xmlChar *URI = NULL;
6022
6023 /*
6024 * We know that '<!DOCTYPE' has been detected.
6025 */
6026 SKIP(9);
6027
6028 SKIP_BLANKS;
6029
6030 /*
6031 * Parse the DOCTYPE name.
6032 */
6033 name = xmlParseName(ctxt);
6034 if (name == NULL) {
6035 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6037 ctxt->sax->error(ctxt->userData,
6038 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
6039 ctxt->wellFormed = 0;
6040 ctxt->disableSAX = 1;
6041 }
6042 ctxt->intSubName = name;
6043
6044 SKIP_BLANKS;
6045
6046 /*
6047 * Check for SystemID and ExternalID
6048 */
6049 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6050
6051 if ((URI != NULL) || (ExternalID != NULL)) {
6052 ctxt->hasExternalSubset = 1;
6053 }
6054 ctxt->extSubURI = URI;
6055 ctxt->extSubSystem = ExternalID;
6056
6057 SKIP_BLANKS;
6058
6059 /*
6060 * Create and update the internal subset.
6061 */
6062 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6063 (!ctxt->disableSAX))
6064 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6065
6066 /*
6067 * Is there any internal subset declarations ?
6068 * they are handled separately in xmlParseInternalSubset()
6069 */
6070 if (RAW == '[')
6071 return;
6072
6073 /*
6074 * We should be at the end of the DOCTYPE declaration.
6075 */
6076 if (RAW != '>') {
6077 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006079 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006080 ctxt->wellFormed = 0;
6081 ctxt->disableSAX = 1;
6082 }
6083 NEXT;
6084}
6085
6086/**
6087 * xmlParseInternalsubset:
6088 * @ctxt: an XML parser context
6089 *
6090 * parse the internal subset declaration
6091 *
6092 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6093 */
6094
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006095static void
Owen Taylor3473f882001-02-23 17:55:21 +00006096xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6097 /*
6098 * Is there any DTD definition ?
6099 */
6100 if (RAW == '[') {
6101 ctxt->instate = XML_PARSER_DTD;
6102 NEXT;
6103 /*
6104 * Parse the succession of Markup declarations and
6105 * PEReferences.
6106 * Subsequence (markupdecl | PEReference | S)*
6107 */
6108 while (RAW != ']') {
6109 const xmlChar *check = CUR_PTR;
6110 int cons = ctxt->input->consumed;
6111
6112 SKIP_BLANKS;
6113 xmlParseMarkupDecl(ctxt);
6114 xmlParsePEReference(ctxt);
6115
6116 /*
6117 * Pop-up of finished entities.
6118 */
6119 while ((RAW == 0) && (ctxt->inputNr > 1))
6120 xmlPopInput(ctxt);
6121
6122 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6123 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6125 ctxt->sax->error(ctxt->userData,
6126 "xmlParseInternalSubset: error detected in Markup declaration\n");
6127 ctxt->wellFormed = 0;
6128 ctxt->disableSAX = 1;
6129 break;
6130 }
6131 }
6132 if (RAW == ']') {
6133 NEXT;
6134 SKIP_BLANKS;
6135 }
6136 }
6137
6138 /*
6139 * We should be at the end of the DOCTYPE declaration.
6140 */
6141 if (RAW != '>') {
6142 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6143 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006144 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006145 ctxt->wellFormed = 0;
6146 ctxt->disableSAX = 1;
6147 }
6148 NEXT;
6149}
6150
6151/**
6152 * xmlParseAttribute:
6153 * @ctxt: an XML parser context
6154 * @value: a xmlChar ** used to store the value of the attribute
6155 *
6156 * parse an attribute
6157 *
6158 * [41] Attribute ::= Name Eq AttValue
6159 *
6160 * [ WFC: No External Entity References ]
6161 * Attribute values cannot contain direct or indirect entity references
6162 * to external entities.
6163 *
6164 * [ WFC: No < in Attribute Values ]
6165 * The replacement text of any entity referred to directly or indirectly in
6166 * an attribute value (other than "&lt;") must not contain a <.
6167 *
6168 * [ VC: Attribute Value Type ]
6169 * The attribute must have been declared; the value must be of the type
6170 * declared for it.
6171 *
6172 * [25] Eq ::= S? '=' S?
6173 *
6174 * With namespace:
6175 *
6176 * [NS 11] Attribute ::= QName Eq AttValue
6177 *
6178 * Also the case QName == xmlns:??? is handled independently as a namespace
6179 * definition.
6180 *
6181 * Returns the attribute name, and the value in *value.
6182 */
6183
6184xmlChar *
6185xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6186 xmlChar *name, *val;
6187
6188 *value = NULL;
6189 name = xmlParseName(ctxt);
6190 if (name == NULL) {
6191 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6193 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6194 ctxt->wellFormed = 0;
6195 ctxt->disableSAX = 1;
6196 return(NULL);
6197 }
6198
6199 /*
6200 * read the value
6201 */
6202 SKIP_BLANKS;
6203 if (RAW == '=') {
6204 NEXT;
6205 SKIP_BLANKS;
6206 val = xmlParseAttValue(ctxt);
6207 ctxt->instate = XML_PARSER_CONTENT;
6208 } else {
6209 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6211 ctxt->sax->error(ctxt->userData,
6212 "Specification mandate value for attribute %s\n", name);
6213 ctxt->wellFormed = 0;
6214 ctxt->disableSAX = 1;
6215 xmlFree(name);
6216 return(NULL);
6217 }
6218
6219 /*
6220 * Check that xml:lang conforms to the specification
6221 * No more registered as an error, just generate a warning now
6222 * since this was deprecated in XML second edition
6223 */
6224 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6225 if (!xmlCheckLanguageID(val)) {
6226 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6227 ctxt->sax->warning(ctxt->userData,
6228 "Malformed value for xml:lang : %s\n", val);
6229 }
6230 }
6231
6232 /*
6233 * Check that xml:space conforms to the specification
6234 */
6235 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6236 if (xmlStrEqual(val, BAD_CAST "default"))
6237 *(ctxt->space) = 0;
6238 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6239 *(ctxt->space) = 1;
6240 else {
6241 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6245 val);
6246 ctxt->wellFormed = 0;
6247 ctxt->disableSAX = 1;
6248 }
6249 }
6250
6251 *value = val;
6252 return(name);
6253}
6254
6255/**
6256 * xmlParseStartTag:
6257 * @ctxt: an XML parser context
6258 *
6259 * parse a start of tag either for rule element or
6260 * EmptyElement. In both case we don't parse the tag closing chars.
6261 *
6262 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6263 *
6264 * [ WFC: Unique Att Spec ]
6265 * No attribute name may appear more than once in the same start-tag or
6266 * empty-element tag.
6267 *
6268 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6269 *
6270 * [ WFC: Unique Att Spec ]
6271 * No attribute name may appear more than once in the same start-tag or
6272 * empty-element tag.
6273 *
6274 * With namespace:
6275 *
6276 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6277 *
6278 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6279 *
6280 * Returns the element name parsed
6281 */
6282
6283xmlChar *
6284xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6285 xmlChar *name;
6286 xmlChar *attname;
6287 xmlChar *attvalue;
6288 const xmlChar **atts = NULL;
6289 int nbatts = 0;
6290 int maxatts = 0;
6291 int i;
6292
6293 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006294 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006295
6296 name = xmlParseName(ctxt);
6297 if (name == NULL) {
6298 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6300 ctxt->sax->error(ctxt->userData,
6301 "xmlParseStartTag: invalid element name\n");
6302 ctxt->wellFormed = 0;
6303 ctxt->disableSAX = 1;
6304 return(NULL);
6305 }
6306
6307 /*
6308 * Now parse the attributes, it ends up with the ending
6309 *
6310 * (S Attribute)* S?
6311 */
6312 SKIP_BLANKS;
6313 GROW;
6314
Daniel Veillard21a0f912001-02-25 19:54:14 +00006315 while ((RAW != '>') &&
6316 ((RAW != '/') || (NXT(1) != '>')) &&
6317 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006318 const xmlChar *q = CUR_PTR;
6319 int cons = ctxt->input->consumed;
6320
6321 attname = xmlParseAttribute(ctxt, &attvalue);
6322 if ((attname != NULL) && (attvalue != NULL)) {
6323 /*
6324 * [ WFC: Unique Att Spec ]
6325 * No attribute name may appear more than once in the same
6326 * start-tag or empty-element tag.
6327 */
6328 for (i = 0; i < nbatts;i += 2) {
6329 if (xmlStrEqual(atts[i], attname)) {
6330 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6331 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6332 ctxt->sax->error(ctxt->userData,
6333 "Attribute %s redefined\n",
6334 attname);
6335 ctxt->wellFormed = 0;
6336 ctxt->disableSAX = 1;
6337 xmlFree(attname);
6338 xmlFree(attvalue);
6339 goto failed;
6340 }
6341 }
6342
6343 /*
6344 * Add the pair to atts
6345 */
6346 if (atts == NULL) {
6347 maxatts = 10;
6348 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6349 if (atts == NULL) {
6350 xmlGenericError(xmlGenericErrorContext,
6351 "malloc of %ld byte failed\n",
6352 maxatts * (long)sizeof(xmlChar *));
6353 return(NULL);
6354 }
6355 } else if (nbatts + 4 > maxatts) {
6356 maxatts *= 2;
6357 atts = (const xmlChar **) xmlRealloc((void *) atts,
6358 maxatts * sizeof(xmlChar *));
6359 if (atts == NULL) {
6360 xmlGenericError(xmlGenericErrorContext,
6361 "realloc of %ld byte failed\n",
6362 maxatts * (long)sizeof(xmlChar *));
6363 return(NULL);
6364 }
6365 }
6366 atts[nbatts++] = attname;
6367 atts[nbatts++] = attvalue;
6368 atts[nbatts] = NULL;
6369 atts[nbatts + 1] = NULL;
6370 } else {
6371 if (attname != NULL)
6372 xmlFree(attname);
6373 if (attvalue != NULL)
6374 xmlFree(attvalue);
6375 }
6376
6377failed:
6378
6379 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6380 break;
6381 if (!IS_BLANK(RAW)) {
6382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6384 ctxt->sax->error(ctxt->userData,
6385 "attributes construct error\n");
6386 ctxt->wellFormed = 0;
6387 ctxt->disableSAX = 1;
6388 }
6389 SKIP_BLANKS;
6390 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6391 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6393 ctxt->sax->error(ctxt->userData,
6394 "xmlParseStartTag: problem parsing attributes\n");
6395 ctxt->wellFormed = 0;
6396 ctxt->disableSAX = 1;
6397 break;
6398 }
6399 GROW;
6400 }
6401
6402 /*
6403 * SAX: Start of Element !
6404 */
6405 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6406 (!ctxt->disableSAX))
6407 ctxt->sax->startElement(ctxt->userData, name, atts);
6408
6409 if (atts != NULL) {
6410 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6411 xmlFree((void *) atts);
6412 }
6413 return(name);
6414}
6415
6416/**
6417 * xmlParseEndTag:
6418 * @ctxt: an XML parser context
6419 *
6420 * parse an end of tag
6421 *
6422 * [42] ETag ::= '</' Name S? '>'
6423 *
6424 * With namespace
6425 *
6426 * [NS 9] ETag ::= '</' QName S? '>'
6427 */
6428
6429void
6430xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6431 xmlChar *name;
6432 xmlChar *oldname;
6433
6434 GROW;
6435 if ((RAW != '<') || (NXT(1) != '/')) {
6436 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6437 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6438 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6439 ctxt->wellFormed = 0;
6440 ctxt->disableSAX = 1;
6441 return;
6442 }
6443 SKIP(2);
6444
6445 name = xmlParseName(ctxt);
6446
6447 /*
6448 * We should definitely be at the ending "S? '>'" part
6449 */
6450 GROW;
6451 SKIP_BLANKS;
6452 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6453 ctxt->errNo = XML_ERR_GT_REQUIRED;
6454 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6455 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6456 ctxt->wellFormed = 0;
6457 ctxt->disableSAX = 1;
6458 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006459 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006460
6461 /*
6462 * [ WFC: Element Type Match ]
6463 * The Name in an element's end-tag must match the element type in the
6464 * start-tag.
6465 *
6466 */
6467 if ((name == NULL) || (ctxt->name == NULL) ||
6468 (!xmlStrEqual(name, ctxt->name))) {
6469 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6471 if ((name != NULL) && (ctxt->name != NULL)) {
6472 ctxt->sax->error(ctxt->userData,
6473 "Opening and ending tag mismatch: %s and %s\n",
6474 ctxt->name, name);
6475 } else if (ctxt->name != NULL) {
6476 ctxt->sax->error(ctxt->userData,
6477 "Ending tag eror for: %s\n", ctxt->name);
6478 } else {
6479 ctxt->sax->error(ctxt->userData,
6480 "Ending tag error: internal error ???\n");
6481 }
6482
6483 }
6484 ctxt->wellFormed = 0;
6485 ctxt->disableSAX = 1;
6486 }
6487
6488 /*
6489 * SAX: End of Tag
6490 */
6491 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6492 (!ctxt->disableSAX))
6493 ctxt->sax->endElement(ctxt->userData, name);
6494
6495 if (name != NULL)
6496 xmlFree(name);
6497 oldname = namePop(ctxt);
6498 spacePop(ctxt);
6499 if (oldname != NULL) {
6500#ifdef DEBUG_STACK
6501 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6502#endif
6503 xmlFree(oldname);
6504 }
6505 return;
6506}
6507
6508/**
6509 * xmlParseCDSect:
6510 * @ctxt: an XML parser context
6511 *
6512 * Parse escaped pure raw content.
6513 *
6514 * [18] CDSect ::= CDStart CData CDEnd
6515 *
6516 * [19] CDStart ::= '<![CDATA['
6517 *
6518 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6519 *
6520 * [21] CDEnd ::= ']]>'
6521 */
6522void
6523xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6524 xmlChar *buf = NULL;
6525 int len = 0;
6526 int size = XML_PARSER_BUFFER_SIZE;
6527 int r, rl;
6528 int s, sl;
6529 int cur, l;
6530 int count = 0;
6531
6532 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6533 (NXT(2) == '[') && (NXT(3) == 'C') &&
6534 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6535 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6536 (NXT(8) == '[')) {
6537 SKIP(9);
6538 } else
6539 return;
6540
6541 ctxt->instate = XML_PARSER_CDATA_SECTION;
6542 r = CUR_CHAR(rl);
6543 if (!IS_CHAR(r)) {
6544 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6545 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6546 ctxt->sax->error(ctxt->userData,
6547 "CData section not finished\n");
6548 ctxt->wellFormed = 0;
6549 ctxt->disableSAX = 1;
6550 ctxt->instate = XML_PARSER_CONTENT;
6551 return;
6552 }
6553 NEXTL(rl);
6554 s = CUR_CHAR(sl);
6555 if (!IS_CHAR(s)) {
6556 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6558 ctxt->sax->error(ctxt->userData,
6559 "CData section not finished\n");
6560 ctxt->wellFormed = 0;
6561 ctxt->disableSAX = 1;
6562 ctxt->instate = XML_PARSER_CONTENT;
6563 return;
6564 }
6565 NEXTL(sl);
6566 cur = CUR_CHAR(l);
6567 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6568 if (buf == NULL) {
6569 xmlGenericError(xmlGenericErrorContext,
6570 "malloc of %d byte failed\n", size);
6571 return;
6572 }
6573 while (IS_CHAR(cur) &&
6574 ((r != ']') || (s != ']') || (cur != '>'))) {
6575 if (len + 5 >= size) {
6576 size *= 2;
6577 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6578 if (buf == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "realloc of %d byte failed\n", size);
6581 return;
6582 }
6583 }
6584 COPY_BUF(rl,buf,len,r);
6585 r = s;
6586 rl = sl;
6587 s = cur;
6588 sl = l;
6589 count++;
6590 if (count > 50) {
6591 GROW;
6592 count = 0;
6593 }
6594 NEXTL(l);
6595 cur = CUR_CHAR(l);
6596 }
6597 buf[len] = 0;
6598 ctxt->instate = XML_PARSER_CONTENT;
6599 if (cur != '>') {
6600 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6602 ctxt->sax->error(ctxt->userData,
6603 "CData section not finished\n%.50s\n", buf);
6604 ctxt->wellFormed = 0;
6605 ctxt->disableSAX = 1;
6606 xmlFree(buf);
6607 return;
6608 }
6609 NEXTL(l);
6610
6611 /*
6612 * Ok the buffer is to be consumed as cdata.
6613 */
6614 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6615 if (ctxt->sax->cdataBlock != NULL)
6616 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006617 else if (ctxt->sax->characters != NULL)
6618 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006619 }
6620 xmlFree(buf);
6621}
6622
6623/**
6624 * xmlParseContent:
6625 * @ctxt: an XML parser context
6626 *
6627 * Parse a content:
6628 *
6629 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6630 */
6631
6632void
6633xmlParseContent(xmlParserCtxtPtr ctxt) {
6634 GROW;
6635 while (((RAW != 0) || (ctxt->token != 0)) &&
6636 ((RAW != '<') || (NXT(1) != '/'))) {
6637 const xmlChar *test = CUR_PTR;
6638 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006639 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006640 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006641
6642 /*
6643 * Handle possible processed charrefs.
6644 */
6645 if (ctxt->token != 0) {
6646 xmlParseCharData(ctxt, 0);
6647 }
6648 /*
6649 * First case : a Processing Instruction.
6650 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006651 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006652 xmlParsePI(ctxt);
6653 }
6654
6655 /*
6656 * Second case : a CDSection
6657 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006658 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006659 (NXT(2) == '[') && (NXT(3) == 'C') &&
6660 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6661 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6662 (NXT(8) == '[')) {
6663 xmlParseCDSect(ctxt);
6664 }
6665
6666 /*
6667 * Third case : a comment
6668 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006669 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006670 (NXT(2) == '-') && (NXT(3) == '-')) {
6671 xmlParseComment(ctxt);
6672 ctxt->instate = XML_PARSER_CONTENT;
6673 }
6674
6675 /*
6676 * Fourth case : a sub-element.
6677 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006678 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006679 xmlParseElement(ctxt);
6680 }
6681
6682 /*
6683 * Fifth case : a reference. If if has not been resolved,
6684 * parsing returns it's Name, create the node
6685 */
6686
Daniel Veillard21a0f912001-02-25 19:54:14 +00006687 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006688 xmlParseReference(ctxt);
6689 }
6690
6691 /*
6692 * Last case, text. Note that References are handled directly.
6693 */
6694 else {
6695 xmlParseCharData(ctxt, 0);
6696 }
6697
6698 GROW;
6699 /*
6700 * Pop-up of finished entities.
6701 */
6702 while ((RAW == 0) && (ctxt->inputNr > 1))
6703 xmlPopInput(ctxt);
6704 SHRINK;
6705
6706 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6707 (tok == ctxt->token)) {
6708 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6710 ctxt->sax->error(ctxt->userData,
6711 "detected an error in element content\n");
6712 ctxt->wellFormed = 0;
6713 ctxt->disableSAX = 1;
6714 ctxt->instate = XML_PARSER_EOF;
6715 break;
6716 }
6717 }
6718}
6719
6720/**
6721 * xmlParseElement:
6722 * @ctxt: an XML parser context
6723 *
6724 * parse an XML element, this is highly recursive
6725 *
6726 * [39] element ::= EmptyElemTag | STag content ETag
6727 *
6728 * [ WFC: Element Type Match ]
6729 * The Name in an element's end-tag must match the element type in the
6730 * start-tag.
6731 *
6732 * [ VC: Element Valid ]
6733 * An element is valid if there is a declaration matching elementdecl
6734 * where the Name matches the element type and one of the following holds:
6735 * - The declaration matches EMPTY and the element has no content.
6736 * - The declaration matches children and the sequence of child elements
6737 * belongs to the language generated by the regular expression in the
6738 * content model, with optional white space (characters matching the
6739 * nonterminal S) between each pair of child elements.
6740 * - The declaration matches Mixed and the content consists of character
6741 * data and child elements whose types match names in the content model.
6742 * - The declaration matches ANY, and the types of any child elements have
6743 * been declared.
6744 */
6745
6746void
6747xmlParseElement(xmlParserCtxtPtr ctxt) {
6748 const xmlChar *openTag = CUR_PTR;
6749 xmlChar *name;
6750 xmlChar *oldname;
6751 xmlParserNodeInfo node_info;
6752 xmlNodePtr ret;
6753
6754 /* Capture start position */
6755 if (ctxt->record_info) {
6756 node_info.begin_pos = ctxt->input->consumed +
6757 (CUR_PTR - ctxt->input->base);
6758 node_info.begin_line = ctxt->input->line;
6759 }
6760
6761 if (ctxt->spaceNr == 0)
6762 spacePush(ctxt, -1);
6763 else
6764 spacePush(ctxt, *ctxt->space);
6765
6766 name = xmlParseStartTag(ctxt);
6767 if (name == NULL) {
6768 spacePop(ctxt);
6769 return;
6770 }
6771 namePush(ctxt, name);
6772 ret = ctxt->node;
6773
6774 /*
6775 * [ VC: Root Element Type ]
6776 * The Name in the document type declaration must match the element
6777 * type of the root element.
6778 */
6779 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6780 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6781 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6782
6783 /*
6784 * Check for an Empty Element.
6785 */
6786 if ((RAW == '/') && (NXT(1) == '>')) {
6787 SKIP(2);
6788 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6789 (!ctxt->disableSAX))
6790 ctxt->sax->endElement(ctxt->userData, name);
6791 oldname = namePop(ctxt);
6792 spacePop(ctxt);
6793 if (oldname != NULL) {
6794#ifdef DEBUG_STACK
6795 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6796#endif
6797 xmlFree(oldname);
6798 }
6799 if ( ret != NULL && ctxt->record_info ) {
6800 node_info.end_pos = ctxt->input->consumed +
6801 (CUR_PTR - ctxt->input->base);
6802 node_info.end_line = ctxt->input->line;
6803 node_info.node = ret;
6804 xmlParserAddNodeInfo(ctxt, &node_info);
6805 }
6806 return;
6807 }
6808 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006809 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006810 } else {
6811 ctxt->errNo = XML_ERR_GT_REQUIRED;
6812 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6813 ctxt->sax->error(ctxt->userData,
6814 "Couldn't find end of Start Tag\n%.30s\n",
6815 openTag);
6816 ctxt->wellFormed = 0;
6817 ctxt->disableSAX = 1;
6818
6819 /*
6820 * end of parsing of this node.
6821 */
6822 nodePop(ctxt);
6823 oldname = namePop(ctxt);
6824 spacePop(ctxt);
6825 if (oldname != NULL) {
6826#ifdef DEBUG_STACK
6827 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6828#endif
6829 xmlFree(oldname);
6830 }
6831
6832 /*
6833 * Capture end position and add node
6834 */
6835 if ( ret != NULL && ctxt->record_info ) {
6836 node_info.end_pos = ctxt->input->consumed +
6837 (CUR_PTR - ctxt->input->base);
6838 node_info.end_line = ctxt->input->line;
6839 node_info.node = ret;
6840 xmlParserAddNodeInfo(ctxt, &node_info);
6841 }
6842 return;
6843 }
6844
6845 /*
6846 * Parse the content of the element:
6847 */
6848 xmlParseContent(ctxt);
6849 if (!IS_CHAR(RAW)) {
6850 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6852 ctxt->sax->error(ctxt->userData,
6853 "Premature end of data in tag %.30s\n", openTag);
6854 ctxt->wellFormed = 0;
6855 ctxt->disableSAX = 1;
6856
6857 /*
6858 * end of parsing of this node.
6859 */
6860 nodePop(ctxt);
6861 oldname = namePop(ctxt);
6862 spacePop(ctxt);
6863 if (oldname != NULL) {
6864#ifdef DEBUG_STACK
6865 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6866#endif
6867 xmlFree(oldname);
6868 }
6869 return;
6870 }
6871
6872 /*
6873 * parse the end of tag: '</' should be here.
6874 */
6875 xmlParseEndTag(ctxt);
6876
6877 /*
6878 * Capture end position and add node
6879 */
6880 if ( ret != NULL && ctxt->record_info ) {
6881 node_info.end_pos = ctxt->input->consumed +
6882 (CUR_PTR - ctxt->input->base);
6883 node_info.end_line = ctxt->input->line;
6884 node_info.node = ret;
6885 xmlParserAddNodeInfo(ctxt, &node_info);
6886 }
6887}
6888
6889/**
6890 * xmlParseVersionNum:
6891 * @ctxt: an XML parser context
6892 *
6893 * parse the XML version value.
6894 *
6895 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6896 *
6897 * Returns the string giving the XML version number, or NULL
6898 */
6899xmlChar *
6900xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6901 xmlChar *buf = NULL;
6902 int len = 0;
6903 int size = 10;
6904 xmlChar cur;
6905
6906 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6907 if (buf == NULL) {
6908 xmlGenericError(xmlGenericErrorContext,
6909 "malloc of %d byte failed\n", size);
6910 return(NULL);
6911 }
6912 cur = CUR;
6913 while (((cur >= 'a') && (cur <= 'z')) ||
6914 ((cur >= 'A') && (cur <= 'Z')) ||
6915 ((cur >= '0') && (cur <= '9')) ||
6916 (cur == '_') || (cur == '.') ||
6917 (cur == ':') || (cur == '-')) {
6918 if (len + 1 >= size) {
6919 size *= 2;
6920 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6921 if (buf == NULL) {
6922 xmlGenericError(xmlGenericErrorContext,
6923 "realloc of %d byte failed\n", size);
6924 return(NULL);
6925 }
6926 }
6927 buf[len++] = cur;
6928 NEXT;
6929 cur=CUR;
6930 }
6931 buf[len] = 0;
6932 return(buf);
6933}
6934
6935/**
6936 * xmlParseVersionInfo:
6937 * @ctxt: an XML parser context
6938 *
6939 * parse the XML version.
6940 *
6941 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6942 *
6943 * [25] Eq ::= S? '=' S?
6944 *
6945 * Returns the version string, e.g. "1.0"
6946 */
6947
6948xmlChar *
6949xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6950 xmlChar *version = NULL;
6951 const xmlChar *q;
6952
6953 if ((RAW == 'v') && (NXT(1) == 'e') &&
6954 (NXT(2) == 'r') && (NXT(3) == 's') &&
6955 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6956 (NXT(6) == 'n')) {
6957 SKIP(7);
6958 SKIP_BLANKS;
6959 if (RAW != '=') {
6960 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6961 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6962 ctxt->sax->error(ctxt->userData,
6963 "xmlParseVersionInfo : expected '='\n");
6964 ctxt->wellFormed = 0;
6965 ctxt->disableSAX = 1;
6966 return(NULL);
6967 }
6968 NEXT;
6969 SKIP_BLANKS;
6970 if (RAW == '"') {
6971 NEXT;
6972 q = CUR_PTR;
6973 version = xmlParseVersionNum(ctxt);
6974 if (RAW != '"') {
6975 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6976 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6977 ctxt->sax->error(ctxt->userData,
6978 "String not closed\n%.50s\n", q);
6979 ctxt->wellFormed = 0;
6980 ctxt->disableSAX = 1;
6981 } else
6982 NEXT;
6983 } else if (RAW == '\''){
6984 NEXT;
6985 q = CUR_PTR;
6986 version = xmlParseVersionNum(ctxt);
6987 if (RAW != '\'') {
6988 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6989 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6990 ctxt->sax->error(ctxt->userData,
6991 "String not closed\n%.50s\n", q);
6992 ctxt->wellFormed = 0;
6993 ctxt->disableSAX = 1;
6994 } else
6995 NEXT;
6996 } else {
6997 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6999 ctxt->sax->error(ctxt->userData,
7000 "xmlParseVersionInfo : expected ' or \"\n");
7001 ctxt->wellFormed = 0;
7002 ctxt->disableSAX = 1;
7003 }
7004 }
7005 return(version);
7006}
7007
7008/**
7009 * xmlParseEncName:
7010 * @ctxt: an XML parser context
7011 *
7012 * parse the XML encoding name
7013 *
7014 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
7015 *
7016 * Returns the encoding name value or NULL
7017 */
7018xmlChar *
7019xmlParseEncName(xmlParserCtxtPtr ctxt) {
7020 xmlChar *buf = NULL;
7021 int len = 0;
7022 int size = 10;
7023 xmlChar cur;
7024
7025 cur = CUR;
7026 if (((cur >= 'a') && (cur <= 'z')) ||
7027 ((cur >= 'A') && (cur <= 'Z'))) {
7028 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
7029 if (buf == NULL) {
7030 xmlGenericError(xmlGenericErrorContext,
7031 "malloc of %d byte failed\n", size);
7032 return(NULL);
7033 }
7034
7035 buf[len++] = cur;
7036 NEXT;
7037 cur = CUR;
7038 while (((cur >= 'a') && (cur <= 'z')) ||
7039 ((cur >= 'A') && (cur <= 'Z')) ||
7040 ((cur >= '0') && (cur <= '9')) ||
7041 (cur == '.') || (cur == '_') ||
7042 (cur == '-')) {
7043 if (len + 1 >= size) {
7044 size *= 2;
7045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7046 if (buf == NULL) {
7047 xmlGenericError(xmlGenericErrorContext,
7048 "realloc of %d byte failed\n", size);
7049 return(NULL);
7050 }
7051 }
7052 buf[len++] = cur;
7053 NEXT;
7054 cur = CUR;
7055 if (cur == 0) {
7056 SHRINK;
7057 GROW;
7058 cur = CUR;
7059 }
7060 }
7061 buf[len] = 0;
7062 } else {
7063 ctxt->errNo = XML_ERR_ENCODING_NAME;
7064 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7065 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7066 ctxt->wellFormed = 0;
7067 ctxt->disableSAX = 1;
7068 }
7069 return(buf);
7070}
7071
7072/**
7073 * xmlParseEncodingDecl:
7074 * @ctxt: an XML parser context
7075 *
7076 * parse the XML encoding declaration
7077 *
7078 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7079 *
7080 * this setups the conversion filters.
7081 *
7082 * Returns the encoding value or NULL
7083 */
7084
7085xmlChar *
7086xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7087 xmlChar *encoding = NULL;
7088 const xmlChar *q;
7089
7090 SKIP_BLANKS;
7091 if ((RAW == 'e') && (NXT(1) == 'n') &&
7092 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7093 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7094 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7095 SKIP(8);
7096 SKIP_BLANKS;
7097 if (RAW != '=') {
7098 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7100 ctxt->sax->error(ctxt->userData,
7101 "xmlParseEncodingDecl : expected '='\n");
7102 ctxt->wellFormed = 0;
7103 ctxt->disableSAX = 1;
7104 return(NULL);
7105 }
7106 NEXT;
7107 SKIP_BLANKS;
7108 if (RAW == '"') {
7109 NEXT;
7110 q = CUR_PTR;
7111 encoding = xmlParseEncName(ctxt);
7112 if (RAW != '"') {
7113 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7114 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7115 ctxt->sax->error(ctxt->userData,
7116 "String not closed\n%.50s\n", q);
7117 ctxt->wellFormed = 0;
7118 ctxt->disableSAX = 1;
7119 } else
7120 NEXT;
7121 } else if (RAW == '\''){
7122 NEXT;
7123 q = CUR_PTR;
7124 encoding = xmlParseEncName(ctxt);
7125 if (RAW != '\'') {
7126 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7128 ctxt->sax->error(ctxt->userData,
7129 "String not closed\n%.50s\n", q);
7130 ctxt->wellFormed = 0;
7131 ctxt->disableSAX = 1;
7132 } else
7133 NEXT;
7134 } else if (RAW == '"'){
7135 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7136 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7137 ctxt->sax->error(ctxt->userData,
7138 "xmlParseEncodingDecl : expected ' or \"\n");
7139 ctxt->wellFormed = 0;
7140 ctxt->disableSAX = 1;
7141 }
7142 if (encoding != NULL) {
7143 xmlCharEncoding enc;
7144 xmlCharEncodingHandlerPtr handler;
7145
7146 if (ctxt->input->encoding != NULL)
7147 xmlFree((xmlChar *) ctxt->input->encoding);
7148 ctxt->input->encoding = encoding;
7149
7150 enc = xmlParseCharEncoding((const char *) encoding);
7151 /*
7152 * registered set of known encodings
7153 */
7154 if (enc != XML_CHAR_ENCODING_ERROR) {
7155 xmlSwitchEncoding(ctxt, enc);
7156 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7157 xmlFree(encoding);
7158 return(NULL);
7159 }
7160 } else {
7161 /*
7162 * fallback for unknown encodings
7163 */
7164 handler = xmlFindCharEncodingHandler((const char *) encoding);
7165 if (handler != NULL) {
7166 xmlSwitchToEncoding(ctxt, handler);
7167 } else {
7168 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7169 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7170 ctxt->sax->error(ctxt->userData,
7171 "Unsupported encoding %s\n", encoding);
7172 return(NULL);
7173 }
7174 }
7175 }
7176 }
7177 return(encoding);
7178}
7179
7180/**
7181 * xmlParseSDDecl:
7182 * @ctxt: an XML parser context
7183 *
7184 * parse the XML standalone declaration
7185 *
7186 * [32] SDDecl ::= S 'standalone' Eq
7187 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7188 *
7189 * [ VC: Standalone Document Declaration ]
7190 * TODO The standalone document declaration must have the value "no"
7191 * if any external markup declarations contain declarations of:
7192 * - attributes with default values, if elements to which these
7193 * attributes apply appear in the document without specifications
7194 * of values for these attributes, or
7195 * - entities (other than amp, lt, gt, apos, quot), if references
7196 * to those entities appear in the document, or
7197 * - attributes with values subject to normalization, where the
7198 * attribute appears in the document with a value which will change
7199 * as a result of normalization, or
7200 * - element types with element content, if white space occurs directly
7201 * within any instance of those types.
7202 *
7203 * Returns 1 if standalone, 0 otherwise
7204 */
7205
7206int
7207xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7208 int standalone = -1;
7209
7210 SKIP_BLANKS;
7211 if ((RAW == 's') && (NXT(1) == 't') &&
7212 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7213 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7214 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7215 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7216 SKIP(10);
7217 SKIP_BLANKS;
7218 if (RAW != '=') {
7219 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7220 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7221 ctxt->sax->error(ctxt->userData,
7222 "XML standalone declaration : expected '='\n");
7223 ctxt->wellFormed = 0;
7224 ctxt->disableSAX = 1;
7225 return(standalone);
7226 }
7227 NEXT;
7228 SKIP_BLANKS;
7229 if (RAW == '\''){
7230 NEXT;
7231 if ((RAW == 'n') && (NXT(1) == 'o')) {
7232 standalone = 0;
7233 SKIP(2);
7234 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7235 (NXT(2) == 's')) {
7236 standalone = 1;
7237 SKIP(3);
7238 } else {
7239 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7241 ctxt->sax->error(ctxt->userData,
7242 "standalone accepts only 'yes' or 'no'\n");
7243 ctxt->wellFormed = 0;
7244 ctxt->disableSAX = 1;
7245 }
7246 if (RAW != '\'') {
7247 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7249 ctxt->sax->error(ctxt->userData, "String not closed\n");
7250 ctxt->wellFormed = 0;
7251 ctxt->disableSAX = 1;
7252 } else
7253 NEXT;
7254 } else if (RAW == '"'){
7255 NEXT;
7256 if ((RAW == 'n') && (NXT(1) == 'o')) {
7257 standalone = 0;
7258 SKIP(2);
7259 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7260 (NXT(2) == 's')) {
7261 standalone = 1;
7262 SKIP(3);
7263 } else {
7264 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7265 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7266 ctxt->sax->error(ctxt->userData,
7267 "standalone accepts only 'yes' or 'no'\n");
7268 ctxt->wellFormed = 0;
7269 ctxt->disableSAX = 1;
7270 }
7271 if (RAW != '"') {
7272 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7274 ctxt->sax->error(ctxt->userData, "String not closed\n");
7275 ctxt->wellFormed = 0;
7276 ctxt->disableSAX = 1;
7277 } else
7278 NEXT;
7279 } else {
7280 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7281 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7282 ctxt->sax->error(ctxt->userData,
7283 "Standalone value not found\n");
7284 ctxt->wellFormed = 0;
7285 ctxt->disableSAX = 1;
7286 }
7287 }
7288 return(standalone);
7289}
7290
7291/**
7292 * xmlParseXMLDecl:
7293 * @ctxt: an XML parser context
7294 *
7295 * parse an XML declaration header
7296 *
7297 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7298 */
7299
7300void
7301xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7302 xmlChar *version;
7303
7304 /*
7305 * We know that '<?xml' is here.
7306 */
7307 SKIP(5);
7308
7309 if (!IS_BLANK(RAW)) {
7310 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7312 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7313 ctxt->wellFormed = 0;
7314 ctxt->disableSAX = 1;
7315 }
7316 SKIP_BLANKS;
7317
7318 /*
7319 * We should have the VersionInfo here.
7320 */
7321 version = xmlParseVersionInfo(ctxt);
7322 if (version == NULL)
7323 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillarda050d232001-09-05 15:51:05 +00007324 else if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7325 /*
7326 * TODO: Blueberry should be detected here
7327 */
7328 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7329 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7330 version);
7331 }
Owen Taylor3473f882001-02-23 17:55:21 +00007332 ctxt->version = xmlStrdup(version);
7333 xmlFree(version);
7334
7335 /*
7336 * We may have the encoding declaration
7337 */
7338 if (!IS_BLANK(RAW)) {
7339 if ((RAW == '?') && (NXT(1) == '>')) {
7340 SKIP(2);
7341 return;
7342 }
7343 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7345 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7346 ctxt->wellFormed = 0;
7347 ctxt->disableSAX = 1;
7348 }
7349 xmlParseEncodingDecl(ctxt);
7350 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7351 /*
7352 * The XML REC instructs us to stop parsing right here
7353 */
7354 return;
7355 }
7356
7357 /*
7358 * We may have the standalone status.
7359 */
7360 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7361 if ((RAW == '?') && (NXT(1) == '>')) {
7362 SKIP(2);
7363 return;
7364 }
7365 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7367 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7368 ctxt->wellFormed = 0;
7369 ctxt->disableSAX = 1;
7370 }
7371 SKIP_BLANKS;
7372 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7373
7374 SKIP_BLANKS;
7375 if ((RAW == '?') && (NXT(1) == '>')) {
7376 SKIP(2);
7377 } else if (RAW == '>') {
7378 /* Deprecated old WD ... */
7379 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7381 ctxt->sax->error(ctxt->userData,
7382 "XML declaration must end-up with '?>'\n");
7383 ctxt->wellFormed = 0;
7384 ctxt->disableSAX = 1;
7385 NEXT;
7386 } else {
7387 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7389 ctxt->sax->error(ctxt->userData,
7390 "parsing XML declaration: '?>' expected\n");
7391 ctxt->wellFormed = 0;
7392 ctxt->disableSAX = 1;
7393 MOVETO_ENDTAG(CUR_PTR);
7394 NEXT;
7395 }
7396}
7397
7398/**
7399 * xmlParseMisc:
7400 * @ctxt: an XML parser context
7401 *
7402 * parse an XML Misc* optionnal field.
7403 *
7404 * [27] Misc ::= Comment | PI | S
7405 */
7406
7407void
7408xmlParseMisc(xmlParserCtxtPtr ctxt) {
7409 while (((RAW == '<') && (NXT(1) == '?')) ||
7410 ((RAW == '<') && (NXT(1) == '!') &&
7411 (NXT(2) == '-') && (NXT(3) == '-')) ||
7412 IS_BLANK(CUR)) {
7413 if ((RAW == '<') && (NXT(1) == '?')) {
7414 xmlParsePI(ctxt);
7415 } else if (IS_BLANK(CUR)) {
7416 NEXT;
7417 } else
7418 xmlParseComment(ctxt);
7419 }
7420}
7421
7422/**
7423 * xmlParseDocument:
7424 * @ctxt: an XML parser context
7425 *
7426 * parse an XML document (and build a tree if using the standard SAX
7427 * interface).
7428 *
7429 * [1] document ::= prolog element Misc*
7430 *
7431 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7432 *
7433 * Returns 0, -1 in case of error. the parser context is augmented
7434 * as a result of the parsing.
7435 */
7436
7437int
7438xmlParseDocument(xmlParserCtxtPtr ctxt) {
7439 xmlChar start[4];
7440 xmlCharEncoding enc;
7441
7442 xmlInitParser();
7443
7444 GROW;
7445
7446 /*
7447 * SAX: beginning of the document processing.
7448 */
7449 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7450 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7451
Daniel Veillard50f34372001-08-03 12:06:36 +00007452 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007453 /*
7454 * Get the 4 first bytes and decode the charset
7455 * if enc != XML_CHAR_ENCODING_NONE
7456 * plug some encoding conversion routines.
7457 */
7458 start[0] = RAW;
7459 start[1] = NXT(1);
7460 start[2] = NXT(2);
7461 start[3] = NXT(3);
7462 enc = xmlDetectCharEncoding(start, 4);
7463 if (enc != XML_CHAR_ENCODING_NONE) {
7464 xmlSwitchEncoding(ctxt, enc);
7465 }
Owen Taylor3473f882001-02-23 17:55:21 +00007466 }
7467
7468
7469 if (CUR == 0) {
7470 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7472 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7473 ctxt->wellFormed = 0;
7474 ctxt->disableSAX = 1;
7475 }
7476
7477 /*
7478 * Check for the XMLDecl in the Prolog.
7479 */
7480 GROW;
7481 if ((RAW == '<') && (NXT(1) == '?') &&
7482 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7483 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7484
7485 /*
7486 * Note that we will switch encoding on the fly.
7487 */
7488 xmlParseXMLDecl(ctxt);
7489 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7490 /*
7491 * The XML REC instructs us to stop parsing right here
7492 */
7493 return(-1);
7494 }
7495 ctxt->standalone = ctxt->input->standalone;
7496 SKIP_BLANKS;
7497 } else {
7498 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7499 }
7500 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7501 ctxt->sax->startDocument(ctxt->userData);
7502
7503 /*
7504 * The Misc part of the Prolog
7505 */
7506 GROW;
7507 xmlParseMisc(ctxt);
7508
7509 /*
7510 * Then possibly doc type declaration(s) and more Misc
7511 * (doctypedecl Misc*)?
7512 */
7513 GROW;
7514 if ((RAW == '<') && (NXT(1) == '!') &&
7515 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7516 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7517 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7518 (NXT(8) == 'E')) {
7519
7520 ctxt->inSubset = 1;
7521 xmlParseDocTypeDecl(ctxt);
7522 if (RAW == '[') {
7523 ctxt->instate = XML_PARSER_DTD;
7524 xmlParseInternalSubset(ctxt);
7525 }
7526
7527 /*
7528 * Create and update the external subset.
7529 */
7530 ctxt->inSubset = 2;
7531 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7532 (!ctxt->disableSAX))
7533 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7534 ctxt->extSubSystem, ctxt->extSubURI);
7535 ctxt->inSubset = 0;
7536
7537
7538 ctxt->instate = XML_PARSER_PROLOG;
7539 xmlParseMisc(ctxt);
7540 }
7541
7542 /*
7543 * Time to start parsing the tree itself
7544 */
7545 GROW;
7546 if (RAW != '<') {
7547 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7549 ctxt->sax->error(ctxt->userData,
7550 "Start tag expected, '<' not found\n");
7551 ctxt->wellFormed = 0;
7552 ctxt->disableSAX = 1;
7553 ctxt->instate = XML_PARSER_EOF;
7554 } else {
7555 ctxt->instate = XML_PARSER_CONTENT;
7556 xmlParseElement(ctxt);
7557 ctxt->instate = XML_PARSER_EPILOG;
7558
7559
7560 /*
7561 * The Misc part at the end
7562 */
7563 xmlParseMisc(ctxt);
7564
7565 if (RAW != 0) {
7566 ctxt->errNo = XML_ERR_DOCUMENT_END;
7567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7568 ctxt->sax->error(ctxt->userData,
7569 "Extra content at the end of the document\n");
7570 ctxt->wellFormed = 0;
7571 ctxt->disableSAX = 1;
7572 }
7573 ctxt->instate = XML_PARSER_EOF;
7574 }
7575
7576 /*
7577 * SAX: end of the document processing.
7578 */
7579 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7580 (!ctxt->disableSAX))
7581 ctxt->sax->endDocument(ctxt->userData);
7582
7583 if (! ctxt->wellFormed) return(-1);
7584 return(0);
7585}
7586
7587/**
7588 * xmlParseExtParsedEnt:
7589 * @ctxt: an XML parser context
7590 *
7591 * parse a genreral parsed entity
7592 * An external general parsed entity is well-formed if it matches the
7593 * production labeled extParsedEnt.
7594 *
7595 * [78] extParsedEnt ::= TextDecl? content
7596 *
7597 * Returns 0, -1 in case of error. the parser context is augmented
7598 * as a result of the parsing.
7599 */
7600
7601int
7602xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7603 xmlChar start[4];
7604 xmlCharEncoding enc;
7605
7606 xmlDefaultSAXHandlerInit();
7607
7608 GROW;
7609
7610 /*
7611 * SAX: beginning of the document processing.
7612 */
7613 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7614 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7615
7616 /*
7617 * Get the 4 first bytes and decode the charset
7618 * if enc != XML_CHAR_ENCODING_NONE
7619 * plug some encoding conversion routines.
7620 */
7621 start[0] = RAW;
7622 start[1] = NXT(1);
7623 start[2] = NXT(2);
7624 start[3] = NXT(3);
7625 enc = xmlDetectCharEncoding(start, 4);
7626 if (enc != XML_CHAR_ENCODING_NONE) {
7627 xmlSwitchEncoding(ctxt, enc);
7628 }
7629
7630
7631 if (CUR == 0) {
7632 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7634 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7635 ctxt->wellFormed = 0;
7636 ctxt->disableSAX = 1;
7637 }
7638
7639 /*
7640 * Check for the XMLDecl in the Prolog.
7641 */
7642 GROW;
7643 if ((RAW == '<') && (NXT(1) == '?') &&
7644 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7645 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7646
7647 /*
7648 * Note that we will switch encoding on the fly.
7649 */
7650 xmlParseXMLDecl(ctxt);
7651 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7652 /*
7653 * The XML REC instructs us to stop parsing right here
7654 */
7655 return(-1);
7656 }
7657 SKIP_BLANKS;
7658 } else {
7659 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7660 }
7661 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7662 ctxt->sax->startDocument(ctxt->userData);
7663
7664 /*
7665 * Doing validity checking on chunk doesn't make sense
7666 */
7667 ctxt->instate = XML_PARSER_CONTENT;
7668 ctxt->validate = 0;
7669 ctxt->loadsubset = 0;
7670 ctxt->depth = 0;
7671
7672 xmlParseContent(ctxt);
7673
7674 if ((RAW == '<') && (NXT(1) == '/')) {
7675 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7677 ctxt->sax->error(ctxt->userData,
7678 "chunk is not well balanced\n");
7679 ctxt->wellFormed = 0;
7680 ctxt->disableSAX = 1;
7681 } else if (RAW != 0) {
7682 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7684 ctxt->sax->error(ctxt->userData,
7685 "extra content at the end of well balanced chunk\n");
7686 ctxt->wellFormed = 0;
7687 ctxt->disableSAX = 1;
7688 }
7689
7690 /*
7691 * SAX: end of the document processing.
7692 */
7693 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7694 (!ctxt->disableSAX))
7695 ctxt->sax->endDocument(ctxt->userData);
7696
7697 if (! ctxt->wellFormed) return(-1);
7698 return(0);
7699}
7700
7701/************************************************************************
7702 * *
7703 * Progressive parsing interfaces *
7704 * *
7705 ************************************************************************/
7706
7707/**
7708 * xmlParseLookupSequence:
7709 * @ctxt: an XML parser context
7710 * @first: the first char to lookup
7711 * @next: the next char to lookup or zero
7712 * @third: the next char to lookup or zero
7713 *
7714 * Try to find if a sequence (first, next, third) or just (first next) or
7715 * (first) is available in the input stream.
7716 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7717 * to avoid rescanning sequences of bytes, it DOES change the state of the
7718 * parser, do not use liberally.
7719 *
7720 * Returns the index to the current parsing point if the full sequence
7721 * is available, -1 otherwise.
7722 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007723static int
Owen Taylor3473f882001-02-23 17:55:21 +00007724xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7725 xmlChar next, xmlChar third) {
7726 int base, len;
7727 xmlParserInputPtr in;
7728 const xmlChar *buf;
7729
7730 in = ctxt->input;
7731 if (in == NULL) return(-1);
7732 base = in->cur - in->base;
7733 if (base < 0) return(-1);
7734 if (ctxt->checkIndex > base)
7735 base = ctxt->checkIndex;
7736 if (in->buf == NULL) {
7737 buf = in->base;
7738 len = in->length;
7739 } else {
7740 buf = in->buf->buffer->content;
7741 len = in->buf->buffer->use;
7742 }
7743 /* take into account the sequence length */
7744 if (third) len -= 2;
7745 else if (next) len --;
7746 for (;base < len;base++) {
7747 if (buf[base] == first) {
7748 if (third != 0) {
7749 if ((buf[base + 1] != next) ||
7750 (buf[base + 2] != third)) continue;
7751 } else if (next != 0) {
7752 if (buf[base + 1] != next) continue;
7753 }
7754 ctxt->checkIndex = 0;
7755#ifdef DEBUG_PUSH
7756 if (next == 0)
7757 xmlGenericError(xmlGenericErrorContext,
7758 "PP: lookup '%c' found at %d\n",
7759 first, base);
7760 else if (third == 0)
7761 xmlGenericError(xmlGenericErrorContext,
7762 "PP: lookup '%c%c' found at %d\n",
7763 first, next, base);
7764 else
7765 xmlGenericError(xmlGenericErrorContext,
7766 "PP: lookup '%c%c%c' found at %d\n",
7767 first, next, third, base);
7768#endif
7769 return(base - (in->cur - in->base));
7770 }
7771 }
7772 ctxt->checkIndex = base;
7773#ifdef DEBUG_PUSH
7774 if (next == 0)
7775 xmlGenericError(xmlGenericErrorContext,
7776 "PP: lookup '%c' failed\n", first);
7777 else if (third == 0)
7778 xmlGenericError(xmlGenericErrorContext,
7779 "PP: lookup '%c%c' failed\n", first, next);
7780 else
7781 xmlGenericError(xmlGenericErrorContext,
7782 "PP: lookup '%c%c%c' failed\n", first, next, third);
7783#endif
7784 return(-1);
7785}
7786
7787/**
7788 * xmlParseTryOrFinish:
7789 * @ctxt: an XML parser context
7790 * @terminate: last chunk indicator
7791 *
7792 * Try to progress on parsing
7793 *
7794 * Returns zero if no parsing was possible
7795 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007796static int
Owen Taylor3473f882001-02-23 17:55:21 +00007797xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7798 int ret = 0;
7799 int avail;
7800 xmlChar cur, next;
7801
7802#ifdef DEBUG_PUSH
7803 switch (ctxt->instate) {
7804 case XML_PARSER_EOF:
7805 xmlGenericError(xmlGenericErrorContext,
7806 "PP: try EOF\n"); break;
7807 case XML_PARSER_START:
7808 xmlGenericError(xmlGenericErrorContext,
7809 "PP: try START\n"); break;
7810 case XML_PARSER_MISC:
7811 xmlGenericError(xmlGenericErrorContext,
7812 "PP: try MISC\n");break;
7813 case XML_PARSER_COMMENT:
7814 xmlGenericError(xmlGenericErrorContext,
7815 "PP: try COMMENT\n");break;
7816 case XML_PARSER_PROLOG:
7817 xmlGenericError(xmlGenericErrorContext,
7818 "PP: try PROLOG\n");break;
7819 case XML_PARSER_START_TAG:
7820 xmlGenericError(xmlGenericErrorContext,
7821 "PP: try START_TAG\n");break;
7822 case XML_PARSER_CONTENT:
7823 xmlGenericError(xmlGenericErrorContext,
7824 "PP: try CONTENT\n");break;
7825 case XML_PARSER_CDATA_SECTION:
7826 xmlGenericError(xmlGenericErrorContext,
7827 "PP: try CDATA_SECTION\n");break;
7828 case XML_PARSER_END_TAG:
7829 xmlGenericError(xmlGenericErrorContext,
7830 "PP: try END_TAG\n");break;
7831 case XML_PARSER_ENTITY_DECL:
7832 xmlGenericError(xmlGenericErrorContext,
7833 "PP: try ENTITY_DECL\n");break;
7834 case XML_PARSER_ENTITY_VALUE:
7835 xmlGenericError(xmlGenericErrorContext,
7836 "PP: try ENTITY_VALUE\n");break;
7837 case XML_PARSER_ATTRIBUTE_VALUE:
7838 xmlGenericError(xmlGenericErrorContext,
7839 "PP: try ATTRIBUTE_VALUE\n");break;
7840 case XML_PARSER_DTD:
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: try DTD\n");break;
7843 case XML_PARSER_EPILOG:
7844 xmlGenericError(xmlGenericErrorContext,
7845 "PP: try EPILOG\n");break;
7846 case XML_PARSER_PI:
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PP: try PI\n");break;
7849 case XML_PARSER_IGNORE:
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: try IGNORE\n");break;
7852 }
7853#endif
7854
7855 while (1) {
7856 /*
7857 * Pop-up of finished entities.
7858 */
7859 while ((RAW == 0) && (ctxt->inputNr > 1))
7860 xmlPopInput(ctxt);
7861
7862 if (ctxt->input ==NULL) break;
7863 if (ctxt->input->buf == NULL)
7864 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7865 else
7866 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7867 if (avail < 1)
7868 goto done;
7869 switch (ctxt->instate) {
7870 case XML_PARSER_EOF:
7871 /*
7872 * Document parsing is done !
7873 */
7874 goto done;
7875 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007876 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7877 xmlChar start[4];
7878 xmlCharEncoding enc;
7879
7880 /*
7881 * Very first chars read from the document flow.
7882 */
7883 if (avail < 4)
7884 goto done;
7885
7886 /*
7887 * Get the 4 first bytes and decode the charset
7888 * if enc != XML_CHAR_ENCODING_NONE
7889 * plug some encoding conversion routines.
7890 */
7891 start[0] = RAW;
7892 start[1] = NXT(1);
7893 start[2] = NXT(2);
7894 start[3] = NXT(3);
7895 enc = xmlDetectCharEncoding(start, 4);
7896 if (enc != XML_CHAR_ENCODING_NONE) {
7897 xmlSwitchEncoding(ctxt, enc);
7898 }
7899 break;
7900 }
Owen Taylor3473f882001-02-23 17:55:21 +00007901
7902 cur = ctxt->input->cur[0];
7903 next = ctxt->input->cur[1];
7904 if (cur == 0) {
7905 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7906 ctxt->sax->setDocumentLocator(ctxt->userData,
7907 &xmlDefaultSAXLocator);
7908 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7910 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7911 ctxt->wellFormed = 0;
7912 ctxt->disableSAX = 1;
7913 ctxt->instate = XML_PARSER_EOF;
7914#ifdef DEBUG_PUSH
7915 xmlGenericError(xmlGenericErrorContext,
7916 "PP: entering EOF\n");
7917#endif
7918 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7919 ctxt->sax->endDocument(ctxt->userData);
7920 goto done;
7921 }
7922 if ((cur == '<') && (next == '?')) {
7923 /* PI or XML decl */
7924 if (avail < 5) return(ret);
7925 if ((!terminate) &&
7926 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7927 return(ret);
7928 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7929 ctxt->sax->setDocumentLocator(ctxt->userData,
7930 &xmlDefaultSAXLocator);
7931 if ((ctxt->input->cur[2] == 'x') &&
7932 (ctxt->input->cur[3] == 'm') &&
7933 (ctxt->input->cur[4] == 'l') &&
7934 (IS_BLANK(ctxt->input->cur[5]))) {
7935 ret += 5;
7936#ifdef DEBUG_PUSH
7937 xmlGenericError(xmlGenericErrorContext,
7938 "PP: Parsing XML Decl\n");
7939#endif
7940 xmlParseXMLDecl(ctxt);
7941 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7942 /*
7943 * The XML REC instructs us to stop parsing right
7944 * here
7945 */
7946 ctxt->instate = XML_PARSER_EOF;
7947 return(0);
7948 }
7949 ctxt->standalone = ctxt->input->standalone;
7950 if ((ctxt->encoding == NULL) &&
7951 (ctxt->input->encoding != NULL))
7952 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7953 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7954 (!ctxt->disableSAX))
7955 ctxt->sax->startDocument(ctxt->userData);
7956 ctxt->instate = XML_PARSER_MISC;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: entering MISC\n");
7960#endif
7961 } else {
7962 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7963 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7964 (!ctxt->disableSAX))
7965 ctxt->sax->startDocument(ctxt->userData);
7966 ctxt->instate = XML_PARSER_MISC;
7967#ifdef DEBUG_PUSH
7968 xmlGenericError(xmlGenericErrorContext,
7969 "PP: entering MISC\n");
7970#endif
7971 }
7972 } else {
7973 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7974 ctxt->sax->setDocumentLocator(ctxt->userData,
7975 &xmlDefaultSAXLocator);
7976 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7977 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7978 (!ctxt->disableSAX))
7979 ctxt->sax->startDocument(ctxt->userData);
7980 ctxt->instate = XML_PARSER_MISC;
7981#ifdef DEBUG_PUSH
7982 xmlGenericError(xmlGenericErrorContext,
7983 "PP: entering MISC\n");
7984#endif
7985 }
7986 break;
7987 case XML_PARSER_MISC:
7988 SKIP_BLANKS;
7989 if (ctxt->input->buf == NULL)
7990 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7991 else
7992 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7993 if (avail < 2)
7994 goto done;
7995 cur = ctxt->input->cur[0];
7996 next = ctxt->input->cur[1];
7997 if ((cur == '<') && (next == '?')) {
7998 if ((!terminate) &&
7999 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8000 goto done;
8001#ifdef DEBUG_PUSH
8002 xmlGenericError(xmlGenericErrorContext,
8003 "PP: Parsing PI\n");
8004#endif
8005 xmlParsePI(ctxt);
8006 } else if ((cur == '<') && (next == '!') &&
8007 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8008 if ((!terminate) &&
8009 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8010 goto done;
8011#ifdef DEBUG_PUSH
8012 xmlGenericError(xmlGenericErrorContext,
8013 "PP: Parsing Comment\n");
8014#endif
8015 xmlParseComment(ctxt);
8016 ctxt->instate = XML_PARSER_MISC;
8017 } else if ((cur == '<') && (next == '!') &&
8018 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
8019 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
8020 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
8021 (ctxt->input->cur[8] == 'E')) {
8022 if ((!terminate) &&
8023 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8024 goto done;
8025#ifdef DEBUG_PUSH
8026 xmlGenericError(xmlGenericErrorContext,
8027 "PP: Parsing internal subset\n");
8028#endif
8029 ctxt->inSubset = 1;
8030 xmlParseDocTypeDecl(ctxt);
8031 if (RAW == '[') {
8032 ctxt->instate = XML_PARSER_DTD;
8033#ifdef DEBUG_PUSH
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: entering DTD\n");
8036#endif
8037 } else {
8038 /*
8039 * Create and update the external subset.
8040 */
8041 ctxt->inSubset = 2;
8042 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8043 (ctxt->sax->externalSubset != NULL))
8044 ctxt->sax->externalSubset(ctxt->userData,
8045 ctxt->intSubName, ctxt->extSubSystem,
8046 ctxt->extSubURI);
8047 ctxt->inSubset = 0;
8048 ctxt->instate = XML_PARSER_PROLOG;
8049#ifdef DEBUG_PUSH
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: entering PROLOG\n");
8052#endif
8053 }
8054 } else if ((cur == '<') && (next == '!') &&
8055 (avail < 9)) {
8056 goto done;
8057 } else {
8058 ctxt->instate = XML_PARSER_START_TAG;
8059#ifdef DEBUG_PUSH
8060 xmlGenericError(xmlGenericErrorContext,
8061 "PP: entering START_TAG\n");
8062#endif
8063 }
8064 break;
8065 case XML_PARSER_IGNORE:
8066 xmlGenericError(xmlGenericErrorContext,
8067 "PP: internal error, state == IGNORE");
8068 ctxt->instate = XML_PARSER_DTD;
8069#ifdef DEBUG_PUSH
8070 xmlGenericError(xmlGenericErrorContext,
8071 "PP: entering DTD\n");
8072#endif
8073 break;
8074 case XML_PARSER_PROLOG:
8075 SKIP_BLANKS;
8076 if (ctxt->input->buf == NULL)
8077 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8078 else
8079 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8080 if (avail < 2)
8081 goto done;
8082 cur = ctxt->input->cur[0];
8083 next = ctxt->input->cur[1];
8084 if ((cur == '<') && (next == '?')) {
8085 if ((!terminate) &&
8086 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8087 goto done;
8088#ifdef DEBUG_PUSH
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: Parsing PI\n");
8091#endif
8092 xmlParsePI(ctxt);
8093 } else if ((cur == '<') && (next == '!') &&
8094 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8095 if ((!terminate) &&
8096 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8097 goto done;
8098#ifdef DEBUG_PUSH
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: Parsing Comment\n");
8101#endif
8102 xmlParseComment(ctxt);
8103 ctxt->instate = XML_PARSER_PROLOG;
8104 } else if ((cur == '<') && (next == '!') &&
8105 (avail < 4)) {
8106 goto done;
8107 } else {
8108 ctxt->instate = XML_PARSER_START_TAG;
8109#ifdef DEBUG_PUSH
8110 xmlGenericError(xmlGenericErrorContext,
8111 "PP: entering START_TAG\n");
8112#endif
8113 }
8114 break;
8115 case XML_PARSER_EPILOG:
8116 SKIP_BLANKS;
8117 if (ctxt->input->buf == NULL)
8118 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8119 else
8120 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8121 if (avail < 2)
8122 goto done;
8123 cur = ctxt->input->cur[0];
8124 next = ctxt->input->cur[1];
8125 if ((cur == '<') && (next == '?')) {
8126 if ((!terminate) &&
8127 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8128 goto done;
8129#ifdef DEBUG_PUSH
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: Parsing PI\n");
8132#endif
8133 xmlParsePI(ctxt);
8134 ctxt->instate = XML_PARSER_EPILOG;
8135 } else if ((cur == '<') && (next == '!') &&
8136 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8137 if ((!terminate) &&
8138 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8139 goto done;
8140#ifdef DEBUG_PUSH
8141 xmlGenericError(xmlGenericErrorContext,
8142 "PP: Parsing Comment\n");
8143#endif
8144 xmlParseComment(ctxt);
8145 ctxt->instate = XML_PARSER_EPILOG;
8146 } else if ((cur == '<') && (next == '!') &&
8147 (avail < 4)) {
8148 goto done;
8149 } else {
8150 ctxt->errNo = XML_ERR_DOCUMENT_END;
8151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8152 ctxt->sax->error(ctxt->userData,
8153 "Extra content at the end of the document\n");
8154 ctxt->wellFormed = 0;
8155 ctxt->disableSAX = 1;
8156 ctxt->instate = XML_PARSER_EOF;
8157#ifdef DEBUG_PUSH
8158 xmlGenericError(xmlGenericErrorContext,
8159 "PP: entering EOF\n");
8160#endif
8161 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8162 (!ctxt->disableSAX))
8163 ctxt->sax->endDocument(ctxt->userData);
8164 goto done;
8165 }
8166 break;
8167 case XML_PARSER_START_TAG: {
8168 xmlChar *name, *oldname;
8169
8170 if ((avail < 2) && (ctxt->inputNr == 1))
8171 goto done;
8172 cur = ctxt->input->cur[0];
8173 if (cur != '<') {
8174 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8175 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8176 ctxt->sax->error(ctxt->userData,
8177 "Start tag expect, '<' not found\n");
8178 ctxt->wellFormed = 0;
8179 ctxt->disableSAX = 1;
8180 ctxt->instate = XML_PARSER_EOF;
8181#ifdef DEBUG_PUSH
8182 xmlGenericError(xmlGenericErrorContext,
8183 "PP: entering EOF\n");
8184#endif
8185 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8186 (!ctxt->disableSAX))
8187 ctxt->sax->endDocument(ctxt->userData);
8188 goto done;
8189 }
8190 if ((!terminate) &&
8191 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8192 goto done;
8193 if (ctxt->spaceNr == 0)
8194 spacePush(ctxt, -1);
8195 else
8196 spacePush(ctxt, *ctxt->space);
8197 name = xmlParseStartTag(ctxt);
8198 if (name == NULL) {
8199 spacePop(ctxt);
8200 ctxt->instate = XML_PARSER_EOF;
8201#ifdef DEBUG_PUSH
8202 xmlGenericError(xmlGenericErrorContext,
8203 "PP: entering EOF\n");
8204#endif
8205 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8206 (!ctxt->disableSAX))
8207 ctxt->sax->endDocument(ctxt->userData);
8208 goto done;
8209 }
8210 namePush(ctxt, xmlStrdup(name));
8211
8212 /*
8213 * [ VC: Root Element Type ]
8214 * The Name in the document type declaration must match
8215 * the element type of the root element.
8216 */
8217 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8218 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8219 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8220
8221 /*
8222 * Check for an Empty Element.
8223 */
8224 if ((RAW == '/') && (NXT(1) == '>')) {
8225 SKIP(2);
8226 if ((ctxt->sax != NULL) &&
8227 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8228 ctxt->sax->endElement(ctxt->userData, name);
8229 xmlFree(name);
8230 oldname = namePop(ctxt);
8231 spacePop(ctxt);
8232 if (oldname != NULL) {
8233#ifdef DEBUG_STACK
8234 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8235#endif
8236 xmlFree(oldname);
8237 }
8238 if (ctxt->name == NULL) {
8239 ctxt->instate = XML_PARSER_EPILOG;
8240#ifdef DEBUG_PUSH
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: entering EPILOG\n");
8243#endif
8244 } else {
8245 ctxt->instate = XML_PARSER_CONTENT;
8246#ifdef DEBUG_PUSH
8247 xmlGenericError(xmlGenericErrorContext,
8248 "PP: entering CONTENT\n");
8249#endif
8250 }
8251 break;
8252 }
8253 if (RAW == '>') {
8254 NEXT;
8255 } else {
8256 ctxt->errNo = XML_ERR_GT_REQUIRED;
8257 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8258 ctxt->sax->error(ctxt->userData,
8259 "Couldn't find end of Start Tag %s\n",
8260 name);
8261 ctxt->wellFormed = 0;
8262 ctxt->disableSAX = 1;
8263
8264 /*
8265 * end of parsing of this node.
8266 */
8267 nodePop(ctxt);
8268 oldname = namePop(ctxt);
8269 spacePop(ctxt);
8270 if (oldname != NULL) {
8271#ifdef DEBUG_STACK
8272 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8273#endif
8274 xmlFree(oldname);
8275 }
8276 }
8277 xmlFree(name);
8278 ctxt->instate = XML_PARSER_CONTENT;
8279#ifdef DEBUG_PUSH
8280 xmlGenericError(xmlGenericErrorContext,
8281 "PP: entering CONTENT\n");
8282#endif
8283 break;
8284 }
8285 case XML_PARSER_CONTENT: {
8286 const xmlChar *test;
8287 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008288 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008289
8290 /*
8291 * Handle preparsed entities and charRef
8292 */
8293 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008294 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008295
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008296 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008297 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8298 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008299 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008300 ctxt->token = 0;
8301 }
8302 if ((avail < 2) && (ctxt->inputNr == 1))
8303 goto done;
8304 cur = ctxt->input->cur[0];
8305 next = ctxt->input->cur[1];
8306
8307 test = CUR_PTR;
8308 cons = ctxt->input->consumed;
8309 tok = ctxt->token;
8310 if ((cur == '<') && (next == '?')) {
8311 if ((!terminate) &&
8312 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8313 goto done;
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: Parsing PI\n");
8317#endif
8318 xmlParsePI(ctxt);
8319 } else if ((cur == '<') && (next == '!') &&
8320 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8321 if ((!terminate) &&
8322 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8323 goto done;
8324#ifdef DEBUG_PUSH
8325 xmlGenericError(xmlGenericErrorContext,
8326 "PP: Parsing Comment\n");
8327#endif
8328 xmlParseComment(ctxt);
8329 ctxt->instate = XML_PARSER_CONTENT;
8330 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8331 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8332 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8333 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8334 (ctxt->input->cur[8] == '[')) {
8335 SKIP(9);
8336 ctxt->instate = XML_PARSER_CDATA_SECTION;
8337#ifdef DEBUG_PUSH
8338 xmlGenericError(xmlGenericErrorContext,
8339 "PP: entering CDATA_SECTION\n");
8340#endif
8341 break;
8342 } else if ((cur == '<') && (next == '!') &&
8343 (avail < 9)) {
8344 goto done;
8345 } else if ((cur == '<') && (next == '/')) {
8346 ctxt->instate = XML_PARSER_END_TAG;
8347#ifdef DEBUG_PUSH
8348 xmlGenericError(xmlGenericErrorContext,
8349 "PP: entering END_TAG\n");
8350#endif
8351 break;
8352 } else if (cur == '<') {
8353 ctxt->instate = XML_PARSER_START_TAG;
8354#ifdef DEBUG_PUSH
8355 xmlGenericError(xmlGenericErrorContext,
8356 "PP: entering START_TAG\n");
8357#endif
8358 break;
8359 } else if (cur == '&') {
8360 if ((!terminate) &&
8361 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8362 goto done;
8363#ifdef DEBUG_PUSH
8364 xmlGenericError(xmlGenericErrorContext,
8365 "PP: Parsing Reference\n");
8366#endif
8367 xmlParseReference(ctxt);
8368 } else {
8369 /* TODO Avoid the extra copy, handle directly !!! */
8370 /*
8371 * Goal of the following test is:
8372 * - minimize calls to the SAX 'character' callback
8373 * when they are mergeable
8374 * - handle an problem for isBlank when we only parse
8375 * a sequence of blank chars and the next one is
8376 * not available to check against '<' presence.
8377 * - tries to homogenize the differences in SAX
8378 * callbacks beween the push and pull versions
8379 * of the parser.
8380 */
8381 if ((ctxt->inputNr == 1) &&
8382 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8383 if ((!terminate) &&
8384 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8385 goto done;
8386 }
8387 ctxt->checkIndex = 0;
8388#ifdef DEBUG_PUSH
8389 xmlGenericError(xmlGenericErrorContext,
8390 "PP: Parsing char data\n");
8391#endif
8392 xmlParseCharData(ctxt, 0);
8393 }
8394 /*
8395 * Pop-up of finished entities.
8396 */
8397 while ((RAW == 0) && (ctxt->inputNr > 1))
8398 xmlPopInput(ctxt);
8399 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8400 (tok == ctxt->token)) {
8401 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8402 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8403 ctxt->sax->error(ctxt->userData,
8404 "detected an error in element content\n");
8405 ctxt->wellFormed = 0;
8406 ctxt->disableSAX = 1;
8407 ctxt->instate = XML_PARSER_EOF;
8408 break;
8409 }
8410 break;
8411 }
8412 case XML_PARSER_CDATA_SECTION: {
8413 /*
8414 * The Push mode need to have the SAX callback for
8415 * cdataBlock merge back contiguous callbacks.
8416 */
8417 int base;
8418
8419 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8420 if (base < 0) {
8421 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8422 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8423 if (ctxt->sax->cdataBlock != NULL)
8424 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8425 XML_PARSER_BIG_BUFFER_SIZE);
8426 }
8427 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8428 ctxt->checkIndex = 0;
8429 }
8430 goto done;
8431 } else {
8432 if ((ctxt->sax != NULL) && (base > 0) &&
8433 (!ctxt->disableSAX)) {
8434 if (ctxt->sax->cdataBlock != NULL)
8435 ctxt->sax->cdataBlock(ctxt->userData,
8436 ctxt->input->cur, base);
8437 }
8438 SKIP(base + 3);
8439 ctxt->checkIndex = 0;
8440 ctxt->instate = XML_PARSER_CONTENT;
8441#ifdef DEBUG_PUSH
8442 xmlGenericError(xmlGenericErrorContext,
8443 "PP: entering CONTENT\n");
8444#endif
8445 }
8446 break;
8447 }
8448 case XML_PARSER_END_TAG:
8449 if (avail < 2)
8450 goto done;
8451 if ((!terminate) &&
8452 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8453 goto done;
8454 xmlParseEndTag(ctxt);
8455 if (ctxt->name == NULL) {
8456 ctxt->instate = XML_PARSER_EPILOG;
8457#ifdef DEBUG_PUSH
8458 xmlGenericError(xmlGenericErrorContext,
8459 "PP: entering EPILOG\n");
8460#endif
8461 } else {
8462 ctxt->instate = XML_PARSER_CONTENT;
8463#ifdef DEBUG_PUSH
8464 xmlGenericError(xmlGenericErrorContext,
8465 "PP: entering CONTENT\n");
8466#endif
8467 }
8468 break;
8469 case XML_PARSER_DTD: {
8470 /*
8471 * Sorry but progressive parsing of the internal subset
8472 * is not expected to be supported. We first check that
8473 * the full content of the internal subset is available and
8474 * the parsing is launched only at that point.
8475 * Internal subset ends up with "']' S? '>'" in an unescaped
8476 * section and not in a ']]>' sequence which are conditional
8477 * sections (whoever argued to keep that crap in XML deserve
8478 * a place in hell !).
8479 */
8480 int base, i;
8481 xmlChar *buf;
8482 xmlChar quote = 0;
8483
8484 base = ctxt->input->cur - ctxt->input->base;
8485 if (base < 0) return(0);
8486 if (ctxt->checkIndex > base)
8487 base = ctxt->checkIndex;
8488 buf = ctxt->input->buf->buffer->content;
8489 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8490 base++) {
8491 if (quote != 0) {
8492 if (buf[base] == quote)
8493 quote = 0;
8494 continue;
8495 }
8496 if (buf[base] == '"') {
8497 quote = '"';
8498 continue;
8499 }
8500 if (buf[base] == '\'') {
8501 quote = '\'';
8502 continue;
8503 }
8504 if (buf[base] == ']') {
8505 if ((unsigned int) base +1 >=
8506 ctxt->input->buf->buffer->use)
8507 break;
8508 if (buf[base + 1] == ']') {
8509 /* conditional crap, skip both ']' ! */
8510 base++;
8511 continue;
8512 }
8513 for (i = 0;
8514 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8515 i++) {
8516 if (buf[base + i] == '>')
8517 goto found_end_int_subset;
8518 }
8519 break;
8520 }
8521 }
8522 /*
8523 * We didn't found the end of the Internal subset
8524 */
8525 if (quote == 0)
8526 ctxt->checkIndex = base;
8527#ifdef DEBUG_PUSH
8528 if (next == 0)
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: lookup of int subset end filed\n");
8531#endif
8532 goto done;
8533
8534found_end_int_subset:
8535 xmlParseInternalSubset(ctxt);
8536 ctxt->inSubset = 2;
8537 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8538 (ctxt->sax->externalSubset != NULL))
8539 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8540 ctxt->extSubSystem, ctxt->extSubURI);
8541 ctxt->inSubset = 0;
8542 ctxt->instate = XML_PARSER_PROLOG;
8543 ctxt->checkIndex = 0;
8544#ifdef DEBUG_PUSH
8545 xmlGenericError(xmlGenericErrorContext,
8546 "PP: entering PROLOG\n");
8547#endif
8548 break;
8549 }
8550 case XML_PARSER_COMMENT:
8551 xmlGenericError(xmlGenericErrorContext,
8552 "PP: internal error, state == COMMENT\n");
8553 ctxt->instate = XML_PARSER_CONTENT;
8554#ifdef DEBUG_PUSH
8555 xmlGenericError(xmlGenericErrorContext,
8556 "PP: entering CONTENT\n");
8557#endif
8558 break;
8559 case XML_PARSER_PI:
8560 xmlGenericError(xmlGenericErrorContext,
8561 "PP: internal error, state == PI\n");
8562 ctxt->instate = XML_PARSER_CONTENT;
8563#ifdef DEBUG_PUSH
8564 xmlGenericError(xmlGenericErrorContext,
8565 "PP: entering CONTENT\n");
8566#endif
8567 break;
8568 case XML_PARSER_ENTITY_DECL:
8569 xmlGenericError(xmlGenericErrorContext,
8570 "PP: internal error, state == ENTITY_DECL\n");
8571 ctxt->instate = XML_PARSER_DTD;
8572#ifdef DEBUG_PUSH
8573 xmlGenericError(xmlGenericErrorContext,
8574 "PP: entering DTD\n");
8575#endif
8576 break;
8577 case XML_PARSER_ENTITY_VALUE:
8578 xmlGenericError(xmlGenericErrorContext,
8579 "PP: internal error, state == ENTITY_VALUE\n");
8580 ctxt->instate = XML_PARSER_CONTENT;
8581#ifdef DEBUG_PUSH
8582 xmlGenericError(xmlGenericErrorContext,
8583 "PP: entering DTD\n");
8584#endif
8585 break;
8586 case XML_PARSER_ATTRIBUTE_VALUE:
8587 xmlGenericError(xmlGenericErrorContext,
8588 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8589 ctxt->instate = XML_PARSER_START_TAG;
8590#ifdef DEBUG_PUSH
8591 xmlGenericError(xmlGenericErrorContext,
8592 "PP: entering START_TAG\n");
8593#endif
8594 break;
8595 case XML_PARSER_SYSTEM_LITERAL:
8596 xmlGenericError(xmlGenericErrorContext,
8597 "PP: internal error, state == SYSTEM_LITERAL\n");
8598 ctxt->instate = XML_PARSER_START_TAG;
8599#ifdef DEBUG_PUSH
8600 xmlGenericError(xmlGenericErrorContext,
8601 "PP: entering START_TAG\n");
8602#endif
8603 break;
8604 }
8605 }
8606done:
8607#ifdef DEBUG_PUSH
8608 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8609#endif
8610 return(ret);
8611}
8612
8613/**
Owen Taylor3473f882001-02-23 17:55:21 +00008614 * xmlParseChunk:
8615 * @ctxt: an XML parser context
8616 * @chunk: an char array
8617 * @size: the size in byte of the chunk
8618 * @terminate: last chunk indicator
8619 *
8620 * Parse a Chunk of memory
8621 *
8622 * Returns zero if no error, the xmlParserErrors otherwise.
8623 */
8624int
8625xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8626 int terminate) {
8627 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8628 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8629 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8630 int cur = ctxt->input->cur - ctxt->input->base;
8631
8632 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8633 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8634 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008635 ctxt->input->end =
8636 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008637#ifdef DEBUG_PUSH
8638 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8639#endif
8640
8641 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8642 xmlParseTryOrFinish(ctxt, terminate);
8643 } else if (ctxt->instate != XML_PARSER_EOF) {
8644 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8645 xmlParserInputBufferPtr in = ctxt->input->buf;
8646 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8647 (in->raw != NULL)) {
8648 int nbchars;
8649
8650 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8651 if (nbchars < 0) {
8652 xmlGenericError(xmlGenericErrorContext,
8653 "xmlParseChunk: encoder error\n");
8654 return(XML_ERR_INVALID_ENCODING);
8655 }
8656 }
8657 }
8658 }
8659 xmlParseTryOrFinish(ctxt, terminate);
8660 if (terminate) {
8661 /*
8662 * Check for termination
8663 */
8664 if ((ctxt->instate != XML_PARSER_EOF) &&
8665 (ctxt->instate != XML_PARSER_EPILOG)) {
8666 ctxt->errNo = XML_ERR_DOCUMENT_END;
8667 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8668 ctxt->sax->error(ctxt->userData,
8669 "Extra content at the end of the document\n");
8670 ctxt->wellFormed = 0;
8671 ctxt->disableSAX = 1;
8672 }
8673 if (ctxt->instate != XML_PARSER_EOF) {
8674 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8675 (!ctxt->disableSAX))
8676 ctxt->sax->endDocument(ctxt->userData);
8677 }
8678 ctxt->instate = XML_PARSER_EOF;
8679 }
8680 return((xmlParserErrors) ctxt->errNo);
8681}
8682
8683/************************************************************************
8684 * *
8685 * I/O front end functions to the parser *
8686 * *
8687 ************************************************************************/
8688
8689/**
8690 * xmlStopParser:
8691 * @ctxt: an XML parser context
8692 *
8693 * Blocks further parser processing
8694 */
8695void
8696xmlStopParser(xmlParserCtxtPtr ctxt) {
8697 ctxt->instate = XML_PARSER_EOF;
8698 if (ctxt->input != NULL)
8699 ctxt->input->cur = BAD_CAST"";
8700}
8701
8702/**
8703 * xmlCreatePushParserCtxt:
8704 * @sax: a SAX handler
8705 * @user_data: The user data returned on SAX callbacks
8706 * @chunk: a pointer to an array of chars
8707 * @size: number of chars in the array
8708 * @filename: an optional file name or URI
8709 *
8710 * Create a parser context for using the XML parser in push mode
8711 * To allow content encoding detection, @size should be >= 4
8712 * The value of @filename is used for fetching external entities
8713 * and error/warning reports.
8714 *
8715 * Returns the new parser context or NULL
8716 */
8717xmlParserCtxtPtr
8718xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8719 const char *chunk, int size, const char *filename) {
8720 xmlParserCtxtPtr ctxt;
8721 xmlParserInputPtr inputStream;
8722 xmlParserInputBufferPtr buf;
8723 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8724
8725 /*
8726 * plug some encoding conversion routines
8727 */
8728 if ((chunk != NULL) && (size >= 4))
8729 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8730
8731 buf = xmlAllocParserInputBuffer(enc);
8732 if (buf == NULL) return(NULL);
8733
8734 ctxt = xmlNewParserCtxt();
8735 if (ctxt == NULL) {
8736 xmlFree(buf);
8737 return(NULL);
8738 }
8739 if (sax != NULL) {
8740 if (ctxt->sax != &xmlDefaultSAXHandler)
8741 xmlFree(ctxt->sax);
8742 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8743 if (ctxt->sax == NULL) {
8744 xmlFree(buf);
8745 xmlFree(ctxt);
8746 return(NULL);
8747 }
8748 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8749 if (user_data != NULL)
8750 ctxt->userData = user_data;
8751 }
8752 if (filename == NULL) {
8753 ctxt->directory = NULL;
8754 } else {
8755 ctxt->directory = xmlParserGetDirectory(filename);
8756 }
8757
8758 inputStream = xmlNewInputStream(ctxt);
8759 if (inputStream == NULL) {
8760 xmlFreeParserCtxt(ctxt);
8761 return(NULL);
8762 }
8763
8764 if (filename == NULL)
8765 inputStream->filename = NULL;
8766 else
8767 inputStream->filename = xmlMemStrdup(filename);
8768 inputStream->buf = buf;
8769 inputStream->base = inputStream->buf->buffer->content;
8770 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008771 inputStream->end =
8772 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008773
8774 inputPush(ctxt, inputStream);
8775
8776 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8777 (ctxt->input->buf != NULL)) {
8778 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8779#ifdef DEBUG_PUSH
8780 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8781#endif
8782 }
8783
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008784 if (enc != XML_CHAR_ENCODING_NONE) {
8785 xmlSwitchEncoding(ctxt, enc);
8786 }
8787
Owen Taylor3473f882001-02-23 17:55:21 +00008788 return(ctxt);
8789}
8790
8791/**
8792 * xmlCreateIOParserCtxt:
8793 * @sax: a SAX handler
8794 * @user_data: The user data returned on SAX callbacks
8795 * @ioread: an I/O read function
8796 * @ioclose: an I/O close function
8797 * @ioctx: an I/O handler
8798 * @enc: the charset encoding if known
8799 *
8800 * Create a parser context for using the XML parser with an existing
8801 * I/O stream
8802 *
8803 * Returns the new parser context or NULL
8804 */
8805xmlParserCtxtPtr
8806xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8807 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8808 void *ioctx, xmlCharEncoding enc) {
8809 xmlParserCtxtPtr ctxt;
8810 xmlParserInputPtr inputStream;
8811 xmlParserInputBufferPtr buf;
8812
8813 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8814 if (buf == NULL) return(NULL);
8815
8816 ctxt = xmlNewParserCtxt();
8817 if (ctxt == NULL) {
8818 xmlFree(buf);
8819 return(NULL);
8820 }
8821 if (sax != NULL) {
8822 if (ctxt->sax != &xmlDefaultSAXHandler)
8823 xmlFree(ctxt->sax);
8824 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8825 if (ctxt->sax == NULL) {
8826 xmlFree(buf);
8827 xmlFree(ctxt);
8828 return(NULL);
8829 }
8830 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8831 if (user_data != NULL)
8832 ctxt->userData = user_data;
8833 }
8834
8835 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8836 if (inputStream == NULL) {
8837 xmlFreeParserCtxt(ctxt);
8838 return(NULL);
8839 }
8840 inputPush(ctxt, inputStream);
8841
8842 return(ctxt);
8843}
8844
8845/************************************************************************
8846 * *
8847 * Front ends when parsing a Dtd *
8848 * *
8849 ************************************************************************/
8850
8851/**
8852 * xmlIOParseDTD:
8853 * @sax: the SAX handler block or NULL
8854 * @input: an Input Buffer
8855 * @enc: the charset encoding if known
8856 *
8857 * Load and parse a DTD
8858 *
8859 * Returns the resulting xmlDtdPtr or NULL in case of error.
8860 * @input will be freed at parsing end.
8861 */
8862
8863xmlDtdPtr
8864xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8865 xmlCharEncoding enc) {
8866 xmlDtdPtr ret = NULL;
8867 xmlParserCtxtPtr ctxt;
8868 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008869 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008870
8871 if (input == NULL)
8872 return(NULL);
8873
8874 ctxt = xmlNewParserCtxt();
8875 if (ctxt == NULL) {
8876 return(NULL);
8877 }
8878
8879 /*
8880 * Set-up the SAX context
8881 */
8882 if (sax != NULL) {
8883 if (ctxt->sax != NULL)
8884 xmlFree(ctxt->sax);
8885 ctxt->sax = sax;
8886 ctxt->userData = NULL;
8887 }
8888
8889 /*
8890 * generate a parser input from the I/O handler
8891 */
8892
8893 pinput = xmlNewIOInputStream(ctxt, input, enc);
8894 if (pinput == NULL) {
8895 if (sax != NULL) ctxt->sax = NULL;
8896 xmlFreeParserCtxt(ctxt);
8897 return(NULL);
8898 }
8899
8900 /*
8901 * plug some encoding conversion routines here.
8902 */
8903 xmlPushInput(ctxt, pinput);
8904
8905 pinput->filename = NULL;
8906 pinput->line = 1;
8907 pinput->col = 1;
8908 pinput->base = ctxt->input->cur;
8909 pinput->cur = ctxt->input->cur;
8910 pinput->free = NULL;
8911
8912 /*
8913 * let's parse that entity knowing it's an external subset.
8914 */
8915 ctxt->inSubset = 2;
8916 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8917 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8918 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008919
8920 if (enc == XML_CHAR_ENCODING_NONE) {
8921 /*
8922 * Get the 4 first bytes and decode the charset
8923 * if enc != XML_CHAR_ENCODING_NONE
8924 * plug some encoding conversion routines.
8925 */
8926 start[0] = RAW;
8927 start[1] = NXT(1);
8928 start[2] = NXT(2);
8929 start[3] = NXT(3);
8930 enc = xmlDetectCharEncoding(start, 4);
8931 if (enc != XML_CHAR_ENCODING_NONE) {
8932 xmlSwitchEncoding(ctxt, enc);
8933 }
8934 }
8935
Owen Taylor3473f882001-02-23 17:55:21 +00008936 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8937
8938 if (ctxt->myDoc != NULL) {
8939 if (ctxt->wellFormed) {
8940 ret = ctxt->myDoc->extSubset;
8941 ctxt->myDoc->extSubset = NULL;
8942 } else {
8943 ret = NULL;
8944 }
8945 xmlFreeDoc(ctxt->myDoc);
8946 ctxt->myDoc = NULL;
8947 }
8948 if (sax != NULL) ctxt->sax = NULL;
8949 xmlFreeParserCtxt(ctxt);
8950
8951 return(ret);
8952}
8953
8954/**
8955 * xmlSAXParseDTD:
8956 * @sax: the SAX handler block
8957 * @ExternalID: a NAME* containing the External ID of the DTD
8958 * @SystemID: a NAME* containing the URL to the DTD
8959 *
8960 * Load and parse an external subset.
8961 *
8962 * Returns the resulting xmlDtdPtr or NULL in case of error.
8963 */
8964
8965xmlDtdPtr
8966xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8967 const xmlChar *SystemID) {
8968 xmlDtdPtr ret = NULL;
8969 xmlParserCtxtPtr ctxt;
8970 xmlParserInputPtr input = NULL;
8971 xmlCharEncoding enc;
8972
8973 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8974
8975 ctxt = xmlNewParserCtxt();
8976 if (ctxt == NULL) {
8977 return(NULL);
8978 }
8979
8980 /*
8981 * Set-up the SAX context
8982 */
8983 if (sax != NULL) {
8984 if (ctxt->sax != NULL)
8985 xmlFree(ctxt->sax);
8986 ctxt->sax = sax;
8987 ctxt->userData = NULL;
8988 }
8989
8990 /*
8991 * Ask the Entity resolver to load the damn thing
8992 */
8993
8994 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8995 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8996 if (input == NULL) {
8997 if (sax != NULL) ctxt->sax = NULL;
8998 xmlFreeParserCtxt(ctxt);
8999 return(NULL);
9000 }
9001
9002 /*
9003 * plug some encoding conversion routines here.
9004 */
9005 xmlPushInput(ctxt, input);
9006 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
9007 xmlSwitchEncoding(ctxt, enc);
9008
9009 if (input->filename == NULL)
9010 input->filename = (char *) xmlStrdup(SystemID);
9011 input->line = 1;
9012 input->col = 1;
9013 input->base = ctxt->input->cur;
9014 input->cur = ctxt->input->cur;
9015 input->free = NULL;
9016
9017 /*
9018 * let's parse that entity knowing it's an external subset.
9019 */
9020 ctxt->inSubset = 2;
9021 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
9022 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
9023 ExternalID, SystemID);
9024 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
9025
9026 if (ctxt->myDoc != NULL) {
9027 if (ctxt->wellFormed) {
9028 ret = ctxt->myDoc->extSubset;
9029 ctxt->myDoc->extSubset = NULL;
9030 } else {
9031 ret = NULL;
9032 }
9033 xmlFreeDoc(ctxt->myDoc);
9034 ctxt->myDoc = NULL;
9035 }
9036 if (sax != NULL) ctxt->sax = NULL;
9037 xmlFreeParserCtxt(ctxt);
9038
9039 return(ret);
9040}
9041
9042/**
9043 * xmlParseDTD:
9044 * @ExternalID: a NAME* containing the External ID of the DTD
9045 * @SystemID: a NAME* containing the URL to the DTD
9046 *
9047 * Load and parse an external subset.
9048 *
9049 * Returns the resulting xmlDtdPtr or NULL in case of error.
9050 */
9051
9052xmlDtdPtr
9053xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9054 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9055}
9056
9057/************************************************************************
9058 * *
9059 * Front ends when parsing an Entity *
9060 * *
9061 ************************************************************************/
9062
9063/**
Owen Taylor3473f882001-02-23 17:55:21 +00009064 * xmlParseCtxtExternalEntity:
9065 * @ctx: the existing parsing context
9066 * @URL: the URL for the entity to load
9067 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009068 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009069 *
9070 * Parse an external general entity within an existing parsing context
9071 * An external general parsed entity is well-formed if it matches the
9072 * production labeled extParsedEnt.
9073 *
9074 * [78] extParsedEnt ::= TextDecl? content
9075 *
9076 * Returns 0 if the entity is well formed, -1 in case of args problem and
9077 * the parser error code otherwise
9078 */
9079
9080int
9081xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009082 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009083 xmlParserCtxtPtr ctxt;
9084 xmlDocPtr newDoc;
9085 xmlSAXHandlerPtr oldsax = NULL;
9086 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009087 xmlChar start[4];
9088 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009089
9090 if (ctx->depth > 40) {
9091 return(XML_ERR_ENTITY_LOOP);
9092 }
9093
Daniel Veillardcda96922001-08-21 10:56:31 +00009094 if (lst != NULL)
9095 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009096 if ((URL == NULL) && (ID == NULL))
9097 return(-1);
9098 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9099 return(-1);
9100
9101
9102 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9103 if (ctxt == NULL) return(-1);
9104 ctxt->userData = ctxt;
9105 oldsax = ctxt->sax;
9106 ctxt->sax = ctx->sax;
9107 newDoc = xmlNewDoc(BAD_CAST "1.0");
9108 if (newDoc == NULL) {
9109 xmlFreeParserCtxt(ctxt);
9110 return(-1);
9111 }
9112 if (ctx->myDoc != NULL) {
9113 newDoc->intSubset = ctx->myDoc->intSubset;
9114 newDoc->extSubset = ctx->myDoc->extSubset;
9115 }
9116 if (ctx->myDoc->URL != NULL) {
9117 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9118 }
9119 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9120 if (newDoc->children == NULL) {
9121 ctxt->sax = oldsax;
9122 xmlFreeParserCtxt(ctxt);
9123 newDoc->intSubset = NULL;
9124 newDoc->extSubset = NULL;
9125 xmlFreeDoc(newDoc);
9126 return(-1);
9127 }
9128 nodePush(ctxt, newDoc->children);
9129 if (ctx->myDoc == NULL) {
9130 ctxt->myDoc = newDoc;
9131 } else {
9132 ctxt->myDoc = ctx->myDoc;
9133 newDoc->children->doc = ctx->myDoc;
9134 }
9135
Daniel Veillard87a764e2001-06-20 17:41:10 +00009136 /*
9137 * Get the 4 first bytes and decode the charset
9138 * if enc != XML_CHAR_ENCODING_NONE
9139 * plug some encoding conversion routines.
9140 */
9141 GROW
9142 start[0] = RAW;
9143 start[1] = NXT(1);
9144 start[2] = NXT(2);
9145 start[3] = NXT(3);
9146 enc = xmlDetectCharEncoding(start, 4);
9147 if (enc != XML_CHAR_ENCODING_NONE) {
9148 xmlSwitchEncoding(ctxt, enc);
9149 }
9150
Owen Taylor3473f882001-02-23 17:55:21 +00009151 /*
9152 * Parse a possible text declaration first
9153 */
Owen Taylor3473f882001-02-23 17:55:21 +00009154 if ((RAW == '<') && (NXT(1) == '?') &&
9155 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9156 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9157 xmlParseTextDecl(ctxt);
9158 }
9159
9160 /*
9161 * Doing validity checking on chunk doesn't make sense
9162 */
9163 ctxt->instate = XML_PARSER_CONTENT;
9164 ctxt->validate = ctx->validate;
9165 ctxt->loadsubset = ctx->loadsubset;
9166 ctxt->depth = ctx->depth + 1;
9167 ctxt->replaceEntities = ctx->replaceEntities;
9168 if (ctxt->validate) {
9169 ctxt->vctxt.error = ctx->vctxt.error;
9170 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009171 } else {
9172 ctxt->vctxt.error = NULL;
9173 ctxt->vctxt.warning = NULL;
9174 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009175 ctxt->vctxt.nodeTab = NULL;
9176 ctxt->vctxt.nodeNr = 0;
9177 ctxt->vctxt.nodeMax = 0;
9178 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009179
9180 xmlParseContent(ctxt);
9181
9182 if ((RAW == '<') && (NXT(1) == '/')) {
9183 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9185 ctxt->sax->error(ctxt->userData,
9186 "chunk is not well balanced\n");
9187 ctxt->wellFormed = 0;
9188 ctxt->disableSAX = 1;
9189 } else if (RAW != 0) {
9190 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9192 ctxt->sax->error(ctxt->userData,
9193 "extra content at the end of well balanced chunk\n");
9194 ctxt->wellFormed = 0;
9195 ctxt->disableSAX = 1;
9196 }
9197 if (ctxt->node != newDoc->children) {
9198 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9200 ctxt->sax->error(ctxt->userData,
9201 "chunk is not well balanced\n");
9202 ctxt->wellFormed = 0;
9203 ctxt->disableSAX = 1;
9204 }
9205
9206 if (!ctxt->wellFormed) {
9207 if (ctxt->errNo == 0)
9208 ret = 1;
9209 else
9210 ret = ctxt->errNo;
9211 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009212 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009213 xmlNodePtr cur;
9214
9215 /*
9216 * Return the newly created nodeset after unlinking it from
9217 * they pseudo parent.
9218 */
9219 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009220 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009221 while (cur != NULL) {
9222 cur->parent = NULL;
9223 cur = cur->next;
9224 }
9225 newDoc->children->children = NULL;
9226 }
9227 ret = 0;
9228 }
9229 ctxt->sax = oldsax;
9230 xmlFreeParserCtxt(ctxt);
9231 newDoc->intSubset = NULL;
9232 newDoc->extSubset = NULL;
9233 xmlFreeDoc(newDoc);
9234
9235 return(ret);
9236}
9237
9238/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009239 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009240 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009241 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009242 * @sax: the SAX handler bloc (possibly NULL)
9243 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9244 * @depth: Used for loop detection, use 0
9245 * @URL: the URL for the entity to load
9246 * @ID: the System ID for the entity to load
9247 * @list: the return value for the set of parsed nodes
9248 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009249 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009250 *
9251 * Returns 0 if the entity is well formed, -1 in case of args problem and
9252 * the parser error code otherwise
9253 */
9254
Daniel Veillard257d9102001-05-08 10:41:44 +00009255static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009256xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9257 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009258 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009259 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009260 xmlParserCtxtPtr ctxt;
9261 xmlDocPtr newDoc;
9262 xmlSAXHandlerPtr oldsax = NULL;
9263 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009264 xmlChar start[4];
9265 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009266
9267 if (depth > 40) {
9268 return(XML_ERR_ENTITY_LOOP);
9269 }
9270
9271
9272
9273 if (list != NULL)
9274 *list = NULL;
9275 if ((URL == NULL) && (ID == NULL))
9276 return(-1);
9277 if (doc == NULL) /* @@ relax but check for dereferences */
9278 return(-1);
9279
9280
9281 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9282 if (ctxt == NULL) return(-1);
9283 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009284 if (oldctxt != NULL) {
9285 ctxt->_private = oldctxt->_private;
9286 ctxt->loadsubset = oldctxt->loadsubset;
9287 ctxt->validate = oldctxt->validate;
9288 ctxt->external = oldctxt->external;
9289 } else {
9290 /*
9291 * Doing validity checking on chunk without context
9292 * doesn't make sense
9293 */
9294 ctxt->_private = NULL;
9295 ctxt->validate = 0;
9296 ctxt->external = 2;
9297 ctxt->loadsubset = 0;
9298 }
Owen Taylor3473f882001-02-23 17:55:21 +00009299 if (sax != NULL) {
9300 oldsax = ctxt->sax;
9301 ctxt->sax = sax;
9302 if (user_data != NULL)
9303 ctxt->userData = user_data;
9304 }
9305 newDoc = xmlNewDoc(BAD_CAST "1.0");
9306 if (newDoc == NULL) {
9307 xmlFreeParserCtxt(ctxt);
9308 return(-1);
9309 }
9310 if (doc != NULL) {
9311 newDoc->intSubset = doc->intSubset;
9312 newDoc->extSubset = doc->extSubset;
9313 }
9314 if (doc->URL != NULL) {
9315 newDoc->URL = xmlStrdup(doc->URL);
9316 }
9317 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9318 if (newDoc->children == NULL) {
9319 if (sax != NULL)
9320 ctxt->sax = oldsax;
9321 xmlFreeParserCtxt(ctxt);
9322 newDoc->intSubset = NULL;
9323 newDoc->extSubset = NULL;
9324 xmlFreeDoc(newDoc);
9325 return(-1);
9326 }
9327 nodePush(ctxt, newDoc->children);
9328 if (doc == NULL) {
9329 ctxt->myDoc = newDoc;
9330 } else {
9331 ctxt->myDoc = doc;
9332 newDoc->children->doc = doc;
9333 }
9334
Daniel Veillard87a764e2001-06-20 17:41:10 +00009335 /*
9336 * Get the 4 first bytes and decode the charset
9337 * if enc != XML_CHAR_ENCODING_NONE
9338 * plug some encoding conversion routines.
9339 */
9340 GROW;
9341 start[0] = RAW;
9342 start[1] = NXT(1);
9343 start[2] = NXT(2);
9344 start[3] = NXT(3);
9345 enc = xmlDetectCharEncoding(start, 4);
9346 if (enc != XML_CHAR_ENCODING_NONE) {
9347 xmlSwitchEncoding(ctxt, enc);
9348 }
9349
Owen Taylor3473f882001-02-23 17:55:21 +00009350 /*
9351 * Parse a possible text declaration first
9352 */
Owen Taylor3473f882001-02-23 17:55:21 +00009353 if ((RAW == '<') && (NXT(1) == '?') &&
9354 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9355 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9356 xmlParseTextDecl(ctxt);
9357 }
9358
Owen Taylor3473f882001-02-23 17:55:21 +00009359 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009360 ctxt->depth = depth;
9361
9362 xmlParseContent(ctxt);
9363
9364 if ((RAW == '<') && (NXT(1) == '/')) {
9365 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9367 ctxt->sax->error(ctxt->userData,
9368 "chunk is not well balanced\n");
9369 ctxt->wellFormed = 0;
9370 ctxt->disableSAX = 1;
9371 } else if (RAW != 0) {
9372 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9373 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9374 ctxt->sax->error(ctxt->userData,
9375 "extra content at the end of well balanced chunk\n");
9376 ctxt->wellFormed = 0;
9377 ctxt->disableSAX = 1;
9378 }
9379 if (ctxt->node != newDoc->children) {
9380 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9381 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9382 ctxt->sax->error(ctxt->userData,
9383 "chunk is not well balanced\n");
9384 ctxt->wellFormed = 0;
9385 ctxt->disableSAX = 1;
9386 }
9387
9388 if (!ctxt->wellFormed) {
9389 if (ctxt->errNo == 0)
9390 ret = 1;
9391 else
9392 ret = ctxt->errNo;
9393 } else {
9394 if (list != NULL) {
9395 xmlNodePtr cur;
9396
9397 /*
9398 * Return the newly created nodeset after unlinking it from
9399 * they pseudo parent.
9400 */
9401 cur = newDoc->children->children;
9402 *list = cur;
9403 while (cur != NULL) {
9404 cur->parent = NULL;
9405 cur = cur->next;
9406 }
9407 newDoc->children->children = NULL;
9408 }
9409 ret = 0;
9410 }
9411 if (sax != NULL)
9412 ctxt->sax = oldsax;
9413 xmlFreeParserCtxt(ctxt);
9414 newDoc->intSubset = NULL;
9415 newDoc->extSubset = NULL;
9416 xmlFreeDoc(newDoc);
9417
9418 return(ret);
9419}
9420
9421/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009422 * xmlParseExternalEntity:
9423 * @doc: the document the chunk pertains to
9424 * @sax: the SAX handler bloc (possibly NULL)
9425 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9426 * @depth: Used for loop detection, use 0
9427 * @URL: the URL for the entity to load
9428 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009429 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009430 *
9431 * Parse an external general entity
9432 * An external general parsed entity is well-formed if it matches the
9433 * production labeled extParsedEnt.
9434 *
9435 * [78] extParsedEnt ::= TextDecl? content
9436 *
9437 * Returns 0 if the entity is well formed, -1 in case of args problem and
9438 * the parser error code otherwise
9439 */
9440
9441int
9442xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009443 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009444 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009445 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009446}
9447
9448/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009449 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009450 * @doc: the document the chunk pertains to
9451 * @sax: the SAX handler bloc (possibly NULL)
9452 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9453 * @depth: Used for loop detection, use 0
9454 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009455 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009456 *
9457 * Parse a well-balanced chunk of an XML document
9458 * called by the parser
9459 * The allowed sequence for the Well Balanced Chunk is the one defined by
9460 * the content production in the XML grammar:
9461 *
9462 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9463 *
9464 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9465 * the parser error code otherwise
9466 */
9467
9468int
9469xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009470 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009471 xmlParserCtxtPtr ctxt;
9472 xmlDocPtr newDoc;
9473 xmlSAXHandlerPtr oldsax = NULL;
9474 int size;
9475 int ret = 0;
9476
9477 if (depth > 40) {
9478 return(XML_ERR_ENTITY_LOOP);
9479 }
9480
9481
Daniel Veillardcda96922001-08-21 10:56:31 +00009482 if (lst != NULL)
9483 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009484 if (string == NULL)
9485 return(-1);
9486
9487 size = xmlStrlen(string);
9488
9489 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9490 if (ctxt == NULL) return(-1);
9491 ctxt->userData = ctxt;
9492 if (sax != NULL) {
9493 oldsax = ctxt->sax;
9494 ctxt->sax = sax;
9495 if (user_data != NULL)
9496 ctxt->userData = user_data;
9497 }
9498 newDoc = xmlNewDoc(BAD_CAST "1.0");
9499 if (newDoc == NULL) {
9500 xmlFreeParserCtxt(ctxt);
9501 return(-1);
9502 }
9503 if (doc != NULL) {
9504 newDoc->intSubset = doc->intSubset;
9505 newDoc->extSubset = doc->extSubset;
9506 }
9507 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9508 if (newDoc->children == NULL) {
9509 if (sax != NULL)
9510 ctxt->sax = oldsax;
9511 xmlFreeParserCtxt(ctxt);
9512 newDoc->intSubset = NULL;
9513 newDoc->extSubset = NULL;
9514 xmlFreeDoc(newDoc);
9515 return(-1);
9516 }
9517 nodePush(ctxt, newDoc->children);
9518 if (doc == NULL) {
9519 ctxt->myDoc = newDoc;
9520 } else {
9521 ctxt->myDoc = doc;
9522 newDoc->children->doc = doc;
9523 }
9524 ctxt->instate = XML_PARSER_CONTENT;
9525 ctxt->depth = depth;
9526
9527 /*
9528 * Doing validity checking on chunk doesn't make sense
9529 */
9530 ctxt->validate = 0;
9531 ctxt->loadsubset = 0;
9532
9533 xmlParseContent(ctxt);
9534
9535 if ((RAW == '<') && (NXT(1) == '/')) {
9536 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9537 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9538 ctxt->sax->error(ctxt->userData,
9539 "chunk is not well balanced\n");
9540 ctxt->wellFormed = 0;
9541 ctxt->disableSAX = 1;
9542 } else if (RAW != 0) {
9543 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9544 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9545 ctxt->sax->error(ctxt->userData,
9546 "extra content at the end of well balanced chunk\n");
9547 ctxt->wellFormed = 0;
9548 ctxt->disableSAX = 1;
9549 }
9550 if (ctxt->node != newDoc->children) {
9551 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9553 ctxt->sax->error(ctxt->userData,
9554 "chunk is not well balanced\n");
9555 ctxt->wellFormed = 0;
9556 ctxt->disableSAX = 1;
9557 }
9558
9559 if (!ctxt->wellFormed) {
9560 if (ctxt->errNo == 0)
9561 ret = 1;
9562 else
9563 ret = ctxt->errNo;
9564 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009565 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009566 xmlNodePtr cur;
9567
9568 /*
9569 * Return the newly created nodeset after unlinking it from
9570 * they pseudo parent.
9571 */
9572 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009573 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009574 while (cur != NULL) {
9575 cur->parent = NULL;
9576 cur = cur->next;
9577 }
9578 newDoc->children->children = NULL;
9579 }
9580 ret = 0;
9581 }
9582 if (sax != NULL)
9583 ctxt->sax = oldsax;
9584 xmlFreeParserCtxt(ctxt);
9585 newDoc->intSubset = NULL;
9586 newDoc->extSubset = NULL;
9587 xmlFreeDoc(newDoc);
9588
9589 return(ret);
9590}
9591
9592/**
9593 * xmlSAXParseEntity:
9594 * @sax: the SAX handler block
9595 * @filename: the filename
9596 *
9597 * parse an XML external entity out of context and build a tree.
9598 * It use the given SAX function block to handle the parsing callback.
9599 * If sax is NULL, fallback to the default DOM tree building routines.
9600 *
9601 * [78] extParsedEnt ::= TextDecl? content
9602 *
9603 * This correspond to a "Well Balanced" chunk
9604 *
9605 * Returns the resulting document tree
9606 */
9607
9608xmlDocPtr
9609xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9610 xmlDocPtr ret;
9611 xmlParserCtxtPtr ctxt;
9612 char *directory = NULL;
9613
9614 ctxt = xmlCreateFileParserCtxt(filename);
9615 if (ctxt == NULL) {
9616 return(NULL);
9617 }
9618 if (sax != NULL) {
9619 if (ctxt->sax != NULL)
9620 xmlFree(ctxt->sax);
9621 ctxt->sax = sax;
9622 ctxt->userData = NULL;
9623 }
9624
9625 if ((ctxt->directory == NULL) && (directory == NULL))
9626 directory = xmlParserGetDirectory(filename);
9627
9628 xmlParseExtParsedEnt(ctxt);
9629
9630 if (ctxt->wellFormed)
9631 ret = ctxt->myDoc;
9632 else {
9633 ret = NULL;
9634 xmlFreeDoc(ctxt->myDoc);
9635 ctxt->myDoc = NULL;
9636 }
9637 if (sax != NULL)
9638 ctxt->sax = NULL;
9639 xmlFreeParserCtxt(ctxt);
9640
9641 return(ret);
9642}
9643
9644/**
9645 * xmlParseEntity:
9646 * @filename: the filename
9647 *
9648 * parse an XML external entity out of context and build a tree.
9649 *
9650 * [78] extParsedEnt ::= TextDecl? content
9651 *
9652 * This correspond to a "Well Balanced" chunk
9653 *
9654 * Returns the resulting document tree
9655 */
9656
9657xmlDocPtr
9658xmlParseEntity(const char *filename) {
9659 return(xmlSAXParseEntity(NULL, filename));
9660}
9661
9662/**
9663 * xmlCreateEntityParserCtxt:
9664 * @URL: the entity URL
9665 * @ID: the entity PUBLIC ID
9666 * @base: a posible base for the target URI
9667 *
9668 * Create a parser context for an external entity
9669 * Automatic support for ZLIB/Compress compressed document is provided
9670 * by default if found at compile-time.
9671 *
9672 * Returns the new parser context or NULL
9673 */
9674xmlParserCtxtPtr
9675xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9676 const xmlChar *base) {
9677 xmlParserCtxtPtr ctxt;
9678 xmlParserInputPtr inputStream;
9679 char *directory = NULL;
9680 xmlChar *uri;
9681
9682 ctxt = xmlNewParserCtxt();
9683 if (ctxt == NULL) {
9684 return(NULL);
9685 }
9686
9687 uri = xmlBuildURI(URL, base);
9688
9689 if (uri == NULL) {
9690 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9691 if (inputStream == NULL) {
9692 xmlFreeParserCtxt(ctxt);
9693 return(NULL);
9694 }
9695
9696 inputPush(ctxt, inputStream);
9697
9698 if ((ctxt->directory == NULL) && (directory == NULL))
9699 directory = xmlParserGetDirectory((char *)URL);
9700 if ((ctxt->directory == NULL) && (directory != NULL))
9701 ctxt->directory = directory;
9702 } else {
9703 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9704 if (inputStream == NULL) {
9705 xmlFree(uri);
9706 xmlFreeParserCtxt(ctxt);
9707 return(NULL);
9708 }
9709
9710 inputPush(ctxt, inputStream);
9711
9712 if ((ctxt->directory == NULL) && (directory == NULL))
9713 directory = xmlParserGetDirectory((char *)uri);
9714 if ((ctxt->directory == NULL) && (directory != NULL))
9715 ctxt->directory = directory;
9716 xmlFree(uri);
9717 }
9718
9719 return(ctxt);
9720}
9721
9722/************************************************************************
9723 * *
9724 * Front ends when parsing from a file *
9725 * *
9726 ************************************************************************/
9727
9728/**
9729 * xmlCreateFileParserCtxt:
9730 * @filename: the filename
9731 *
9732 * Create a parser context for a file content.
9733 * Automatic support for ZLIB/Compress compressed document is provided
9734 * by default if found at compile-time.
9735 *
9736 * Returns the new parser context or NULL
9737 */
9738xmlParserCtxtPtr
9739xmlCreateFileParserCtxt(const char *filename)
9740{
9741 xmlParserCtxtPtr ctxt;
9742 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009743 char *directory = NULL;
9744
Owen Taylor3473f882001-02-23 17:55:21 +00009745 ctxt = xmlNewParserCtxt();
9746 if (ctxt == NULL) {
9747 if (xmlDefaultSAXHandler.error != NULL) {
9748 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9749 }
9750 return(NULL);
9751 }
9752
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009753 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009754 if (inputStream == NULL) {
9755 xmlFreeParserCtxt(ctxt);
9756 return(NULL);
9757 }
9758
Owen Taylor3473f882001-02-23 17:55:21 +00009759 inputPush(ctxt, inputStream);
9760 if ((ctxt->directory == NULL) && (directory == NULL))
9761 directory = xmlParserGetDirectory(filename);
9762 if ((ctxt->directory == NULL) && (directory != NULL))
9763 ctxt->directory = directory;
9764
9765 return(ctxt);
9766}
9767
9768/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009769 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009770 * @sax: the SAX handler block
9771 * @filename: the filename
9772 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9773 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009774 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009775 *
9776 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9777 * compressed document is provided by default if found at compile-time.
9778 * It use the given SAX function block to handle the parsing callback.
9779 * If sax is NULL, fallback to the default DOM tree building routines.
9780 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009781 * User data (void *) is stored within the parser context, so it is
9782 * available nearly everywhere in libxml.
9783 *
Owen Taylor3473f882001-02-23 17:55:21 +00009784 * Returns the resulting document tree
9785 */
9786
9787xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009788xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9789 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009790 xmlDocPtr ret;
9791 xmlParserCtxtPtr ctxt;
9792 char *directory = NULL;
9793
Daniel Veillard635ef722001-10-29 11:48:19 +00009794 xmlInitParser();
9795
Owen Taylor3473f882001-02-23 17:55:21 +00009796 ctxt = xmlCreateFileParserCtxt(filename);
9797 if (ctxt == NULL) {
9798 return(NULL);
9799 }
9800 if (sax != NULL) {
9801 if (ctxt->sax != NULL)
9802 xmlFree(ctxt->sax);
9803 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009804 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009805 if (data!=NULL) {
9806 ctxt->_private=data;
9807 }
Owen Taylor3473f882001-02-23 17:55:21 +00009808
9809 if ((ctxt->directory == NULL) && (directory == NULL))
9810 directory = xmlParserGetDirectory(filename);
9811 if ((ctxt->directory == NULL) && (directory != NULL))
9812 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9813
9814 xmlParseDocument(ctxt);
9815
9816 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9817 else {
9818 ret = NULL;
9819 xmlFreeDoc(ctxt->myDoc);
9820 ctxt->myDoc = NULL;
9821 }
9822 if (sax != NULL)
9823 ctxt->sax = NULL;
9824 xmlFreeParserCtxt(ctxt);
9825
9826 return(ret);
9827}
9828
9829/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009830 * xmlSAXParseFile:
9831 * @sax: the SAX handler block
9832 * @filename: the filename
9833 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9834 * documents
9835 *
9836 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9837 * compressed document is provided by default if found at compile-time.
9838 * It use the given SAX function block to handle the parsing callback.
9839 * If sax is NULL, fallback to the default DOM tree building routines.
9840 *
9841 * Returns the resulting document tree
9842 */
9843
9844xmlDocPtr
9845xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9846 int recovery) {
9847 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9848}
9849
9850/**
Owen Taylor3473f882001-02-23 17:55:21 +00009851 * xmlRecoverDoc:
9852 * @cur: a pointer to an array of xmlChar
9853 *
9854 * parse an XML in-memory document and build a tree.
9855 * In the case the document is not Well Formed, a tree is built anyway
9856 *
9857 * Returns the resulting document tree
9858 */
9859
9860xmlDocPtr
9861xmlRecoverDoc(xmlChar *cur) {
9862 return(xmlSAXParseDoc(NULL, cur, 1));
9863}
9864
9865/**
9866 * xmlParseFile:
9867 * @filename: the filename
9868 *
9869 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9870 * compressed document is provided by default if found at compile-time.
9871 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009872 * Returns the resulting document tree if the file was wellformed,
9873 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009874 */
9875
9876xmlDocPtr
9877xmlParseFile(const char *filename) {
9878 return(xmlSAXParseFile(NULL, filename, 0));
9879}
9880
9881/**
9882 * xmlRecoverFile:
9883 * @filename: the filename
9884 *
9885 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9886 * compressed document is provided by default if found at compile-time.
9887 * In the case the document is not Well Formed, a tree is built anyway
9888 *
9889 * Returns the resulting document tree
9890 */
9891
9892xmlDocPtr
9893xmlRecoverFile(const char *filename) {
9894 return(xmlSAXParseFile(NULL, filename, 1));
9895}
9896
9897
9898/**
9899 * xmlSetupParserForBuffer:
9900 * @ctxt: an XML parser context
9901 * @buffer: a xmlChar * buffer
9902 * @filename: a file name
9903 *
9904 * Setup the parser context to parse a new buffer; Clears any prior
9905 * contents from the parser context. The buffer parameter must not be
9906 * NULL, but the filename parameter can be
9907 */
9908void
9909xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9910 const char* filename)
9911{
9912 xmlParserInputPtr input;
9913
9914 input = xmlNewInputStream(ctxt);
9915 if (input == NULL) {
9916 perror("malloc");
9917 xmlFree(ctxt);
9918 return;
9919 }
9920
9921 xmlClearParserCtxt(ctxt);
9922 if (filename != NULL)
9923 input->filename = xmlMemStrdup(filename);
9924 input->base = buffer;
9925 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009926 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009927 inputPush(ctxt, input);
9928}
9929
9930/**
9931 * xmlSAXUserParseFile:
9932 * @sax: a SAX handler
9933 * @user_data: The user data returned on SAX callbacks
9934 * @filename: a file name
9935 *
9936 * parse an XML file and call the given SAX handler routines.
9937 * Automatic support for ZLIB/Compress compressed document is provided
9938 *
9939 * Returns 0 in case of success or a error number otherwise
9940 */
9941int
9942xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9943 const char *filename) {
9944 int ret = 0;
9945 xmlParserCtxtPtr ctxt;
9946
9947 ctxt = xmlCreateFileParserCtxt(filename);
9948 if (ctxt == NULL) return -1;
9949 if (ctxt->sax != &xmlDefaultSAXHandler)
9950 xmlFree(ctxt->sax);
9951 ctxt->sax = sax;
9952 if (user_data != NULL)
9953 ctxt->userData = user_data;
9954
9955 xmlParseDocument(ctxt);
9956
9957 if (ctxt->wellFormed)
9958 ret = 0;
9959 else {
9960 if (ctxt->errNo != 0)
9961 ret = ctxt->errNo;
9962 else
9963 ret = -1;
9964 }
9965 if (sax != NULL)
9966 ctxt->sax = NULL;
9967 xmlFreeParserCtxt(ctxt);
9968
9969 return ret;
9970}
9971
9972/************************************************************************
9973 * *
9974 * Front ends when parsing from memory *
9975 * *
9976 ************************************************************************/
9977
9978/**
9979 * xmlCreateMemoryParserCtxt:
9980 * @buffer: a pointer to a char array
9981 * @size: the size of the array
9982 *
9983 * Create a parser context for an XML in-memory document.
9984 *
9985 * Returns the new parser context or NULL
9986 */
9987xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009988xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009989 xmlParserCtxtPtr ctxt;
9990 xmlParserInputPtr input;
9991 xmlParserInputBufferPtr buf;
9992
9993 if (buffer == NULL)
9994 return(NULL);
9995 if (size <= 0)
9996 return(NULL);
9997
9998 ctxt = xmlNewParserCtxt();
9999 if (ctxt == NULL)
10000 return(NULL);
10001
10002 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
10003 if (buf == NULL) return(NULL);
10004
10005 input = xmlNewInputStream(ctxt);
10006 if (input == NULL) {
10007 xmlFreeParserCtxt(ctxt);
10008 return(NULL);
10009 }
10010
10011 input->filename = NULL;
10012 input->buf = buf;
10013 input->base = input->buf->buffer->content;
10014 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +000010015 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +000010016
10017 inputPush(ctxt, input);
10018 return(ctxt);
10019}
10020
10021/**
10022 * xmlSAXParseMemory:
10023 * @sax: the SAX handler block
10024 * @buffer: an pointer to a char array
10025 * @size: the size of the array
10026 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
10027 * documents
10028 *
10029 * parse an XML in-memory block and use the given SAX function block
10030 * to handle the parsing callback. If sax is NULL, fallback to the default
10031 * DOM tree building routines.
10032 *
10033 * Returns the resulting document tree
10034 */
10035xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +000010036xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
10037 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +000010038 xmlDocPtr ret;
10039 xmlParserCtxtPtr ctxt;
10040
10041 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10042 if (ctxt == NULL) return(NULL);
10043 if (sax != NULL) {
10044 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010045 }
10046
10047 xmlParseDocument(ctxt);
10048
10049 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10050 else {
10051 ret = NULL;
10052 xmlFreeDoc(ctxt->myDoc);
10053 ctxt->myDoc = NULL;
10054 }
10055 if (sax != NULL)
10056 ctxt->sax = NULL;
10057 xmlFreeParserCtxt(ctxt);
10058
10059 return(ret);
10060}
10061
10062/**
10063 * xmlParseMemory:
10064 * @buffer: an pointer to a char array
10065 * @size: the size of the array
10066 *
10067 * parse an XML in-memory block and build a tree.
10068 *
10069 * Returns the resulting document tree
10070 */
10071
Daniel Veillard50822cb2001-07-26 20:05:51 +000010072xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010073 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10074}
10075
10076/**
10077 * xmlRecoverMemory:
10078 * @buffer: an pointer to a char array
10079 * @size: the size of the array
10080 *
10081 * parse an XML in-memory block and build a tree.
10082 * In the case the document is not Well Formed, a tree is built anyway
10083 *
10084 * Returns the resulting document tree
10085 */
10086
Daniel Veillard50822cb2001-07-26 20:05:51 +000010087xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010088 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10089}
10090
10091/**
10092 * xmlSAXUserParseMemory:
10093 * @sax: a SAX handler
10094 * @user_data: The user data returned on SAX callbacks
10095 * @buffer: an in-memory XML document input
10096 * @size: the length of the XML document in bytes
10097 *
10098 * A better SAX parsing routine.
10099 * parse an XML in-memory buffer and call the given SAX handler routines.
10100 *
10101 * Returns 0 in case of success or a error number otherwise
10102 */
10103int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010104 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010105 int ret = 0;
10106 xmlParserCtxtPtr ctxt;
10107 xmlSAXHandlerPtr oldsax = NULL;
10108
10109 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10110 if (ctxt == NULL) return -1;
10111 if (sax != NULL) {
10112 oldsax = ctxt->sax;
10113 ctxt->sax = sax;
10114 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010115 if (user_data != NULL)
10116 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010117
10118 xmlParseDocument(ctxt);
10119
10120 if (ctxt->wellFormed)
10121 ret = 0;
10122 else {
10123 if (ctxt->errNo != 0)
10124 ret = ctxt->errNo;
10125 else
10126 ret = -1;
10127 }
10128 if (sax != NULL) {
10129 ctxt->sax = oldsax;
10130 }
10131 xmlFreeParserCtxt(ctxt);
10132
10133 return ret;
10134}
10135
10136/**
10137 * xmlCreateDocParserCtxt:
10138 * @cur: a pointer to an array of xmlChar
10139 *
10140 * Creates a parser context for an XML in-memory document.
10141 *
10142 * Returns the new parser context or NULL
10143 */
10144xmlParserCtxtPtr
10145xmlCreateDocParserCtxt(xmlChar *cur) {
10146 int len;
10147
10148 if (cur == NULL)
10149 return(NULL);
10150 len = xmlStrlen(cur);
10151 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10152}
10153
10154/**
10155 * xmlSAXParseDoc:
10156 * @sax: the SAX handler block
10157 * @cur: a pointer to an array of xmlChar
10158 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10159 * documents
10160 *
10161 * parse an XML in-memory document and build a tree.
10162 * It use the given SAX function block to handle the parsing callback.
10163 * If sax is NULL, fallback to the default DOM tree building routines.
10164 *
10165 * Returns the resulting document tree
10166 */
10167
10168xmlDocPtr
10169xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10170 xmlDocPtr ret;
10171 xmlParserCtxtPtr ctxt;
10172
10173 if (cur == NULL) return(NULL);
10174
10175
10176 ctxt = xmlCreateDocParserCtxt(cur);
10177 if (ctxt == NULL) return(NULL);
10178 if (sax != NULL) {
10179 ctxt->sax = sax;
10180 ctxt->userData = NULL;
10181 }
10182
10183 xmlParseDocument(ctxt);
10184 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10185 else {
10186 ret = NULL;
10187 xmlFreeDoc(ctxt->myDoc);
10188 ctxt->myDoc = NULL;
10189 }
10190 if (sax != NULL)
10191 ctxt->sax = NULL;
10192 xmlFreeParserCtxt(ctxt);
10193
10194 return(ret);
10195}
10196
10197/**
10198 * xmlParseDoc:
10199 * @cur: a pointer to an array of xmlChar
10200 *
10201 * parse an XML in-memory document and build a tree.
10202 *
10203 * Returns the resulting document tree
10204 */
10205
10206xmlDocPtr
10207xmlParseDoc(xmlChar *cur) {
10208 return(xmlSAXParseDoc(NULL, cur, 0));
10209}
10210
10211
10212/************************************************************************
10213 * *
10214 * Miscellaneous *
10215 * *
10216 ************************************************************************/
10217
10218#ifdef LIBXML_XPATH_ENABLED
10219#include <libxml/xpath.h>
10220#endif
10221
10222static int xmlParserInitialized = 0;
10223
10224/**
10225 * xmlInitParser:
10226 *
10227 * Initialization function for the XML parser.
10228 * This is not reentrant. Call once before processing in case of
10229 * use in multithreaded programs.
10230 */
10231
10232void
10233xmlInitParser(void) {
Daniel Veillard3c01b1d2001-10-17 15:58:35 +000010234 if (xmlParserInitialized != 0)
10235 return;
Owen Taylor3473f882001-02-23 17:55:21 +000010236
Daniel Veillard6f350292001-10-14 09:56:15 +000010237 initGenericErrorDefaultFunc(NULL);
Daniel Veillardd0463562001-10-13 09:15:48 +000010238 xmlInitThreads();
Daniel Veillard6f350292001-10-14 09:56:15 +000010239 xmlInitMemory();
Owen Taylor3473f882001-02-23 17:55:21 +000010240 xmlInitCharEncodingHandlers();
10241 xmlInitializePredefinedEntities();
10242 xmlDefaultSAXHandlerInit();
10243 xmlRegisterDefaultInputCallbacks();
10244 xmlRegisterDefaultOutputCallbacks();
10245#ifdef LIBXML_HTML_ENABLED
10246 htmlInitAutoClose();
10247 htmlDefaultSAXHandlerInit();
10248#endif
10249#ifdef LIBXML_XPATH_ENABLED
10250 xmlXPathInit();
10251#endif
10252 xmlParserInitialized = 1;
10253}
10254
10255/**
10256 * xmlCleanupParser:
10257 *
10258 * Cleanup function for the XML parser. It tries to reclaim all
10259 * parsing related global memory allocated for the parser processing.
10260 * It doesn't deallocate any document related memory. Calling this
10261 * function should not prevent reusing the parser.
10262 */
10263
10264void
10265xmlCleanupParser(void) {
Owen Taylor3473f882001-02-23 17:55:21 +000010266 xmlCleanupCharEncodingHandlers();
10267 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010268#ifdef LIBXML_CATALOG_ENABLED
10269 xmlCatalogCleanup();
10270#endif
Daniel Veillardd0463562001-10-13 09:15:48 +000010271 xmlCleanupThreads();
10272 xmlParserInitialized = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000010273}