blob: c52529bf512cfe6ff13183ce9708142d96079d6b [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
44#include <libxml/tree.h>
45#include <libxml/parser.h>
46#include <libxml/parserInternals.h>
47#include <libxml/valid.h>
48#include <libxml/entities.h>
49#include <libxml/xmlerror.h>
50#include <libxml/encoding.h>
51#include <libxml/xmlIO.h>
52#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000053#ifdef LIBXML_CATALOG_ENABLED
54#include <libxml/catalog.h>
55#endif
Owen Taylor3473f882001-02-23 17:55:21 +000056
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard05c13a22001-09-09 08:38:09 +00001478 /*
1479 * Don't spend time trying to differentiate them, the same callback is
1480 * used !
1481 */
1482 if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
Daniel Veillard2f362242001-03-02 17:36:21 +00001483 return(0);
1484
Owen Taylor3473f882001-02-23 17:55:21 +00001485 /*
1486 * Check for xml:space value.
1487 */
1488 if (*(ctxt->space) == 1)
1489 return(0);
1490
1491 /*
1492 * Check that the string is made of blanks
1493 */
1494 for (i = 0;i < len;i++)
1495 if (!(IS_BLANK(str[i]))) return(0);
1496
1497 /*
1498 * Look if the element is mixed content in the Dtd if available
1499 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001500 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001501 if (ctxt->myDoc != NULL) {
1502 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1503 if (ret == 0) return(1);
1504 if (ret == 1) return(0);
1505 }
1506
1507 /*
1508 * Otherwise, heuristic :-\
1509 */
Owen Taylor3473f882001-02-23 17:55:21 +00001510 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001511 if ((ctxt->node->children == NULL) &&
1512 (RAW == '<') && (NXT(1) == '/')) return(0);
1513
1514 lastChild = xmlGetLastChild(ctxt->node);
1515 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001516 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1517 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001518 } else if (xmlNodeIsText(lastChild))
1519 return(0);
1520 else if ((ctxt->node->children != NULL) &&
1521 (xmlNodeIsText(ctxt->node->children)))
1522 return(0);
1523 return(1);
1524}
1525
1526/*
1527 * Forward definition for recusive behaviour.
1528 */
1529void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1530void xmlParseReference(xmlParserCtxtPtr ctxt);
1531
1532/************************************************************************
1533 * *
1534 * Extra stuff for namespace support *
1535 * Relates to http://www.w3.org/TR/WD-xml-names *
1536 * *
1537 ************************************************************************/
1538
1539/**
1540 * xmlSplitQName:
1541 * @ctxt: an XML parser context
1542 * @name: an XML parser context
1543 * @prefix: a xmlChar **
1544 *
1545 * parse an UTF8 encoded XML qualified name string
1546 *
1547 * [NS 5] QName ::= (Prefix ':')? LocalPart
1548 *
1549 * [NS 6] Prefix ::= NCName
1550 *
1551 * [NS 7] LocalPart ::= NCName
1552 *
1553 * Returns the local part, and prefix is updated
1554 * to get the Prefix if any.
1555 */
1556
1557xmlChar *
1558xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1559 xmlChar buf[XML_MAX_NAMELEN + 5];
1560 xmlChar *buffer = NULL;
1561 int len = 0;
1562 int max = XML_MAX_NAMELEN;
1563 xmlChar *ret = NULL;
1564 const xmlChar *cur = name;
1565 int c;
1566
1567 *prefix = NULL;
1568
1569#ifndef XML_XML_NAMESPACE
1570 /* xml: prefix is not really a namespace */
1571 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1572 (cur[2] == 'l') && (cur[3] == ':'))
1573 return(xmlStrdup(name));
1574#endif
1575
1576 /* nasty but valid */
1577 if (cur[0] == ':')
1578 return(xmlStrdup(name));
1579
1580 c = *cur++;
1581 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1582 buf[len++] = c;
1583 c = *cur++;
1584 }
1585 if (len >= max) {
1586 /*
1587 * Okay someone managed to make a huge name, so he's ready to pay
1588 * for the processing speed.
1589 */
1590 max = len * 2;
1591
1592 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1593 if (buffer == NULL) {
1594 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1595 ctxt->sax->error(ctxt->userData,
1596 "xmlSplitQName: out of memory\n");
1597 return(NULL);
1598 }
1599 memcpy(buffer, buf, len);
1600 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1601 if (len + 10 > max) {
1602 max *= 2;
1603 buffer = (xmlChar *) xmlRealloc(buffer,
1604 max * sizeof(xmlChar));
1605 if (buffer == NULL) {
1606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1607 ctxt->sax->error(ctxt->userData,
1608 "xmlSplitQName: out of memory\n");
1609 return(NULL);
1610 }
1611 }
1612 buffer[len++] = c;
1613 c = *cur++;
1614 }
1615 buffer[len] = 0;
1616 }
1617
1618 if (buffer == NULL)
1619 ret = xmlStrndup(buf, len);
1620 else {
1621 ret = buffer;
1622 buffer = NULL;
1623 max = XML_MAX_NAMELEN;
1624 }
1625
1626
1627 if (c == ':') {
1628 c = *cur++;
1629 if (c == 0) return(ret);
1630 *prefix = ret;
1631 len = 0;
1632
1633 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1634 buf[len++] = c;
1635 c = *cur++;
1636 }
1637 if (len >= max) {
1638 /*
1639 * Okay someone managed to make a huge name, so he's ready to pay
1640 * for the processing speed.
1641 */
1642 max = len * 2;
1643
1644 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1645 if (buffer == NULL) {
1646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1647 ctxt->sax->error(ctxt->userData,
1648 "xmlSplitQName: out of memory\n");
1649 return(NULL);
1650 }
1651 memcpy(buffer, buf, len);
1652 while (c != 0) { /* tested bigname2.xml */
1653 if (len + 10 > max) {
1654 max *= 2;
1655 buffer = (xmlChar *) xmlRealloc(buffer,
1656 max * sizeof(xmlChar));
1657 if (buffer == NULL) {
1658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1659 ctxt->sax->error(ctxt->userData,
1660 "xmlSplitQName: out of memory\n");
1661 return(NULL);
1662 }
1663 }
1664 buffer[len++] = c;
1665 c = *cur++;
1666 }
1667 buffer[len] = 0;
1668 }
1669
1670 if (buffer == NULL)
1671 ret = xmlStrndup(buf, len);
1672 else {
1673 ret = buffer;
1674 }
1675 }
1676
1677 return(ret);
1678}
1679
1680/************************************************************************
1681 * *
1682 * The parser itself *
1683 * Relates to http://www.w3.org/TR/REC-xml *
1684 * *
1685 ************************************************************************/
1686
Daniel Veillard76d66f42001-05-16 21:05:17 +00001687static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001688/**
1689 * xmlParseName:
1690 * @ctxt: an XML parser context
1691 *
1692 * parse an XML name.
1693 *
1694 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1695 * CombiningChar | Extender
1696 *
1697 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1698 *
1699 * [6] Names ::= Name (S Name)*
1700 *
1701 * Returns the Name parsed or NULL
1702 */
1703
1704xmlChar *
1705xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001706 const xmlChar *in;
1707 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001708 int count = 0;
1709
1710 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001711
1712 /*
1713 * Accelerator for simple ASCII names
1714 */
1715 in = ctxt->input->cur;
1716 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 (*in == '_') || (*in == ':')) {
1719 in++;
1720 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1721 ((*in >= 0x41) && (*in <= 0x5A)) ||
1722 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001723 (*in == '_') || (*in == '-') ||
1724 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001725 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001726 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001727 count = in - ctxt->input->cur;
1728 ret = xmlStrndup(ctxt->input->cur, count);
1729 ctxt->input->cur = in;
1730 return(ret);
1731 }
1732 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001733 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001734}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001735
Daniel Veillard76d66f42001-05-16 21:05:17 +00001736static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001737xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1738 xmlChar buf[XML_MAX_NAMELEN + 5];
1739 int len = 0, l;
1740 int c;
1741 int count = 0;
1742
1743 /*
1744 * Handler for more complex cases
1745 */
1746 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001747 c = CUR_CHAR(l);
1748 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1749 (!IS_LETTER(c) && (c != '_') &&
1750 (c != ':'))) {
1751 return(NULL);
1752 }
1753
1754 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1755 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1756 (c == '.') || (c == '-') ||
1757 (c == '_') || (c == ':') ||
1758 (IS_COMBINING(c)) ||
1759 (IS_EXTENDER(c)))) {
1760 if (count++ > 100) {
1761 count = 0;
1762 GROW;
1763 }
1764 COPY_BUF(l,buf,len,c);
1765 NEXTL(l);
1766 c = CUR_CHAR(l);
1767 if (len >= XML_MAX_NAMELEN) {
1768 /*
1769 * Okay someone managed to make a huge name, so he's ready to pay
1770 * for the processing speed.
1771 */
1772 xmlChar *buffer;
1773 int max = len * 2;
1774
1775 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1776 if (buffer == NULL) {
1777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1778 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001779 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001780 return(NULL);
1781 }
1782 memcpy(buffer, buf, len);
1783 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1784 (c == '.') || (c == '-') ||
1785 (c == '_') || (c == ':') ||
1786 (IS_COMBINING(c)) ||
1787 (IS_EXTENDER(c))) {
1788 if (count++ > 100) {
1789 count = 0;
1790 GROW;
1791 }
1792 if (len + 10 > max) {
1793 max *= 2;
1794 buffer = (xmlChar *) xmlRealloc(buffer,
1795 max * sizeof(xmlChar));
1796 if (buffer == NULL) {
1797 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1798 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001799 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001800 return(NULL);
1801 }
1802 }
1803 COPY_BUF(l,buffer,len,c);
1804 NEXTL(l);
1805 c = CUR_CHAR(l);
1806 }
1807 buffer[len] = 0;
1808 return(buffer);
1809 }
1810 }
1811 return(xmlStrndup(buf, len));
1812}
1813
1814/**
1815 * xmlParseStringName:
1816 * @ctxt: an XML parser context
1817 * @str: a pointer to the string pointer (IN/OUT)
1818 *
1819 * parse an XML name.
1820 *
1821 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1822 * CombiningChar | Extender
1823 *
1824 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1825 *
1826 * [6] Names ::= Name (S Name)*
1827 *
1828 * Returns the Name parsed or NULL. The str pointer
1829 * is updated to the current location in the string.
1830 */
1831
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001832static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001833xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1834 xmlChar buf[XML_MAX_NAMELEN + 5];
1835 const xmlChar *cur = *str;
1836 int len = 0, l;
1837 int c;
1838
1839 c = CUR_SCHAR(cur, l);
1840 if (!IS_LETTER(c) && (c != '_') &&
1841 (c != ':')) {
1842 return(NULL);
1843 }
1844
1845 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1846 (c == '.') || (c == '-') ||
1847 (c == '_') || (c == ':') ||
1848 (IS_COMBINING(c)) ||
1849 (IS_EXTENDER(c))) {
1850 COPY_BUF(l,buf,len,c);
1851 cur += l;
1852 c = CUR_SCHAR(cur, l);
1853 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1854 /*
1855 * Okay someone managed to make a huge name, so he's ready to pay
1856 * for the processing speed.
1857 */
1858 xmlChar *buffer;
1859 int max = len * 2;
1860
1861 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1862 if (buffer == NULL) {
1863 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1864 ctxt->sax->error(ctxt->userData,
1865 "xmlParseStringName: out of memory\n");
1866 return(NULL);
1867 }
1868 memcpy(buffer, buf, len);
1869 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1870 (c == '.') || (c == '-') ||
1871 (c == '_') || (c == ':') ||
1872 (IS_COMBINING(c)) ||
1873 (IS_EXTENDER(c))) {
1874 if (len + 10 > max) {
1875 max *= 2;
1876 buffer = (xmlChar *) xmlRealloc(buffer,
1877 max * sizeof(xmlChar));
1878 if (buffer == NULL) {
1879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1880 ctxt->sax->error(ctxt->userData,
1881 "xmlParseStringName: out of memory\n");
1882 return(NULL);
1883 }
1884 }
1885 COPY_BUF(l,buffer,len,c);
1886 cur += l;
1887 c = CUR_SCHAR(cur, l);
1888 }
1889 buffer[len] = 0;
1890 *str = cur;
1891 return(buffer);
1892 }
1893 }
1894 *str = cur;
1895 return(xmlStrndup(buf, len));
1896}
1897
1898/**
1899 * xmlParseNmtoken:
1900 * @ctxt: an XML parser context
1901 *
1902 * parse an XML Nmtoken.
1903 *
1904 * [7] Nmtoken ::= (NameChar)+
1905 *
1906 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1907 *
1908 * Returns the Nmtoken parsed or NULL
1909 */
1910
1911xmlChar *
1912xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1913 xmlChar buf[XML_MAX_NAMELEN + 5];
1914 int len = 0, l;
1915 int c;
1916 int count = 0;
1917
1918 GROW;
1919 c = CUR_CHAR(l);
1920
1921 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1922 (c == '.') || (c == '-') ||
1923 (c == '_') || (c == ':') ||
1924 (IS_COMBINING(c)) ||
1925 (IS_EXTENDER(c))) {
1926 if (count++ > 100) {
1927 count = 0;
1928 GROW;
1929 }
1930 COPY_BUF(l,buf,len,c);
1931 NEXTL(l);
1932 c = CUR_CHAR(l);
1933 if (len >= XML_MAX_NAMELEN) {
1934 /*
1935 * Okay someone managed to make a huge token, so he's ready to pay
1936 * for the processing speed.
1937 */
1938 xmlChar *buffer;
1939 int max = len * 2;
1940
1941 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1942 if (buffer == NULL) {
1943 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1944 ctxt->sax->error(ctxt->userData,
1945 "xmlParseNmtoken: out of memory\n");
1946 return(NULL);
1947 }
1948 memcpy(buffer, buf, len);
1949 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1950 (c == '.') || (c == '-') ||
1951 (c == '_') || (c == ':') ||
1952 (IS_COMBINING(c)) ||
1953 (IS_EXTENDER(c))) {
1954 if (count++ > 100) {
1955 count = 0;
1956 GROW;
1957 }
1958 if (len + 10 > max) {
1959 max *= 2;
1960 buffer = (xmlChar *) xmlRealloc(buffer,
1961 max * sizeof(xmlChar));
1962 if (buffer == NULL) {
1963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1964 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001965 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001966 return(NULL);
1967 }
1968 }
1969 COPY_BUF(l,buffer,len,c);
1970 NEXTL(l);
1971 c = CUR_CHAR(l);
1972 }
1973 buffer[len] = 0;
1974 return(buffer);
1975 }
1976 }
1977 if (len == 0)
1978 return(NULL);
1979 return(xmlStrndup(buf, len));
1980}
1981
1982/**
1983 * xmlParseEntityValue:
1984 * @ctxt: an XML parser context
1985 * @orig: if non-NULL store a copy of the original entity value
1986 *
1987 * parse a value for ENTITY declarations
1988 *
1989 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1990 * "'" ([^%&'] | PEReference | Reference)* "'"
1991 *
1992 * Returns the EntityValue parsed with reference substitued or NULL
1993 */
1994
1995xmlChar *
1996xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1997 xmlChar *buf = NULL;
1998 int len = 0;
1999 int size = XML_PARSER_BUFFER_SIZE;
2000 int c, l;
2001 xmlChar stop;
2002 xmlChar *ret = NULL;
2003 const xmlChar *cur = NULL;
2004 xmlParserInputPtr input;
2005
2006 if (RAW == '"') stop = '"';
2007 else if (RAW == '\'') stop = '\'';
2008 else {
2009 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2011 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2012 ctxt->wellFormed = 0;
2013 ctxt->disableSAX = 1;
2014 return(NULL);
2015 }
2016 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2017 if (buf == NULL) {
2018 xmlGenericError(xmlGenericErrorContext,
2019 "malloc of %d byte failed\n", size);
2020 return(NULL);
2021 }
2022
2023 /*
2024 * The content of the entity definition is copied in a buffer.
2025 */
2026
2027 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2028 input = ctxt->input;
2029 GROW;
2030 NEXT;
2031 c = CUR_CHAR(l);
2032 /*
2033 * NOTE: 4.4.5 Included in Literal
2034 * When a parameter entity reference appears in a literal entity
2035 * value, ... a single or double quote character in the replacement
2036 * text is always treated as a normal data character and will not
2037 * terminate the literal.
2038 * In practice it means we stop the loop only when back at parsing
2039 * the initial entity and the quote is found
2040 */
2041 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2042 (ctxt->input != input))) {
2043 if (len + 5 >= size) {
2044 size *= 2;
2045 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2046 if (buf == NULL) {
2047 xmlGenericError(xmlGenericErrorContext,
2048 "realloc of %d byte failed\n", size);
2049 return(NULL);
2050 }
2051 }
2052 COPY_BUF(l,buf,len,c);
2053 NEXTL(l);
2054 /*
2055 * Pop-up of finished entities.
2056 */
2057 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2058 xmlPopInput(ctxt);
2059
2060 GROW;
2061 c = CUR_CHAR(l);
2062 if (c == 0) {
2063 GROW;
2064 c = CUR_CHAR(l);
2065 }
2066 }
2067 buf[len] = 0;
2068
2069 /*
2070 * Raise problem w.r.t. '&' and '%' being used in non-entities
2071 * reference constructs. Note Charref will be handled in
2072 * xmlStringDecodeEntities()
2073 */
2074 cur = buf;
2075 while (*cur != 0) { /* non input consuming */
2076 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2077 xmlChar *name;
2078 xmlChar tmp = *cur;
2079
2080 cur++;
2081 name = xmlParseStringName(ctxt, &cur);
2082 if ((name == NULL) || (*cur != ';')) {
2083 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2085 ctxt->sax->error(ctxt->userData,
2086 "EntityValue: '%c' forbidden except for entities references\n",
2087 tmp);
2088 ctxt->wellFormed = 0;
2089 ctxt->disableSAX = 1;
2090 }
2091 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2092 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2093 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2094 ctxt->sax->error(ctxt->userData,
2095 "EntityValue: PEReferences forbidden in internal subset\n",
2096 tmp);
2097 ctxt->wellFormed = 0;
2098 ctxt->disableSAX = 1;
2099 }
2100 if (name != NULL)
2101 xmlFree(name);
2102 }
2103 cur++;
2104 }
2105
2106 /*
2107 * Then PEReference entities are substituted.
2108 */
2109 if (c != stop) {
2110 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2112 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2113 ctxt->wellFormed = 0;
2114 ctxt->disableSAX = 1;
2115 xmlFree(buf);
2116 } else {
2117 NEXT;
2118 /*
2119 * NOTE: 4.4.7 Bypassed
2120 * When a general entity reference appears in the EntityValue in
2121 * an entity declaration, it is bypassed and left as is.
2122 * so XML_SUBSTITUTE_REF is not set here.
2123 */
2124 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2125 0, 0, 0);
2126 if (orig != NULL)
2127 *orig = buf;
2128 else
2129 xmlFree(buf);
2130 }
2131
2132 return(ret);
2133}
2134
2135/**
2136 * xmlParseAttValue:
2137 * @ctxt: an XML parser context
2138 *
2139 * parse a value for an attribute
2140 * Note: the parser won't do substitution of entities here, this
2141 * will be handled later in xmlStringGetNodeList
2142 *
2143 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2144 * "'" ([^<&'] | Reference)* "'"
2145 *
2146 * 3.3.3 Attribute-Value Normalization:
2147 * Before the value of an attribute is passed to the application or
2148 * checked for validity, the XML processor must normalize it as follows:
2149 * - a character reference is processed by appending the referenced
2150 * character to the attribute value
2151 * - an entity reference is processed by recursively processing the
2152 * replacement text of the entity
2153 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2154 * appending #x20 to the normalized value, except that only a single
2155 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2156 * parsed entity or the literal entity value of an internal parsed entity
2157 * - other characters are processed by appending them to the normalized value
2158 * If the declared value is not CDATA, then the XML processor must further
2159 * process the normalized attribute value by discarding any leading and
2160 * trailing space (#x20) characters, and by replacing sequences of space
2161 * (#x20) characters by a single space (#x20) character.
2162 * All attributes for which no declaration has been read should be treated
2163 * by a non-validating parser as if declared CDATA.
2164 *
2165 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2166 */
2167
2168xmlChar *
2169xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2170 xmlChar limit = 0;
2171 xmlChar *buf = NULL;
2172 int len = 0;
2173 int buf_size = 0;
2174 int c, l;
2175 xmlChar *current = NULL;
2176 xmlEntityPtr ent;
2177
2178
2179 SHRINK;
2180 if (NXT(0) == '"') {
2181 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2182 limit = '"';
2183 NEXT;
2184 } else if (NXT(0) == '\'') {
2185 limit = '\'';
2186 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2187 NEXT;
2188 } else {
2189 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2191 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2192 ctxt->wellFormed = 0;
2193 ctxt->disableSAX = 1;
2194 return(NULL);
2195 }
2196
2197 /*
2198 * allocate a translation buffer.
2199 */
2200 buf_size = XML_PARSER_BUFFER_SIZE;
2201 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2202 if (buf == NULL) {
2203 perror("xmlParseAttValue: malloc failed");
2204 return(NULL);
2205 }
2206
2207 /*
2208 * Ok loop until we reach one of the ending char or a size limit.
2209 */
2210 c = CUR_CHAR(l);
2211 while (((NXT(0) != limit) && /* checked */
2212 (c != '<')) || (ctxt->token != 0)) {
2213 if (c == 0) break;
2214 if (ctxt->token == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002215 if (ctxt->replaceEntities) {
2216 if (len > buf_size - 10) {
2217 growBuffer(buf);
2218 }
2219 buf[len++] = '&';
2220 } else {
2221 /*
2222 * The reparsing will be done in xmlStringGetNodeList()
2223 * called by the attribute() function in SAX.c
2224 */
2225 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002226
Daniel Veillard319a7422001-09-11 09:27:09 +00002227 if (len > buf_size - 10) {
2228 growBuffer(buf);
2229 }
2230 current = &buffer[0];
2231 while (*current != 0) { /* non input consuming */
2232 buf[len++] = *current++;
2233 }
2234 ctxt->token = 0;
Owen Taylor3473f882001-02-23 17:55:21 +00002235 }
Owen Taylor3473f882001-02-23 17:55:21 +00002236 } else if (c == '&') {
2237 if (NXT(1) == '#') {
2238 int val = xmlParseCharRef(ctxt);
2239 if (val == '&') {
Daniel Veillard319a7422001-09-11 09:27:09 +00002240 if (ctxt->replaceEntities) {
2241 if (len > buf_size - 10) {
2242 growBuffer(buf);
2243 }
2244 buf[len++] = '&';
2245 } else {
2246 /*
2247 * The reparsing will be done in xmlStringGetNodeList()
2248 * called by the attribute() function in SAX.c
2249 */
2250 static xmlChar buffer[6] = "&#38;";
Owen Taylor3473f882001-02-23 17:55:21 +00002251
Daniel Veillard319a7422001-09-11 09:27:09 +00002252 if (len > buf_size - 10) {
2253 growBuffer(buf);
2254 }
2255 current = &buffer[0];
2256 while (*current != 0) { /* non input consuming */
2257 buf[len++] = *current++;
2258 }
Owen Taylor3473f882001-02-23 17:55:21 +00002259 }
2260 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
Owen Taylor3473f882001-02-23 17:55:21 +00002264 len += xmlCopyChar(0, &buf[len], val);
2265 }
2266 } else {
2267 ent = xmlParseEntityRef(ctxt);
2268 if ((ent != NULL) &&
2269 (ctxt->replaceEntities != 0)) {
2270 xmlChar *rep;
2271
2272 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2273 rep = xmlStringDecodeEntities(ctxt, ent->content,
2274 XML_SUBSTITUTE_REF, 0, 0, 0);
2275 if (rep != NULL) {
2276 current = rep;
2277 while (*current != 0) { /* non input consuming */
2278 buf[len++] = *current++;
2279 if (len > buf_size - 10) {
2280 growBuffer(buf);
2281 }
2282 }
2283 xmlFree(rep);
2284 }
2285 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002286 if (len > buf_size - 10) {
2287 growBuffer(buf);
2288 }
Owen Taylor3473f882001-02-23 17:55:21 +00002289 if (ent->content != NULL)
2290 buf[len++] = ent->content[0];
2291 }
2292 } else if (ent != NULL) {
2293 int i = xmlStrlen(ent->name);
2294 const xmlChar *cur = ent->name;
2295
2296 /*
2297 * This may look absurd but is needed to detect
2298 * entities problems
2299 */
2300 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2301 (ent->content != NULL)) {
2302 xmlChar *rep;
2303 rep = xmlStringDecodeEntities(ctxt, ent->content,
2304 XML_SUBSTITUTE_REF, 0, 0, 0);
2305 if (rep != NULL)
2306 xmlFree(rep);
2307 }
2308
2309 /*
2310 * Just output the reference
2311 */
2312 buf[len++] = '&';
2313 if (len > buf_size - i - 10) {
2314 growBuffer(buf);
2315 }
2316 for (;i > 0;i--)
2317 buf[len++] = *cur++;
2318 buf[len++] = ';';
2319 }
2320 }
2321 } else {
2322 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2323 COPY_BUF(l,buf,len,0x20);
2324 if (len > buf_size - 10) {
2325 growBuffer(buf);
2326 }
2327 } else {
2328 COPY_BUF(l,buf,len,c);
2329 if (len > buf_size - 10) {
2330 growBuffer(buf);
2331 }
2332 }
2333 NEXTL(l);
2334 }
2335 GROW;
2336 c = CUR_CHAR(l);
2337 }
2338 buf[len++] = 0;
2339 if (RAW == '<') {
2340 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2342 ctxt->sax->error(ctxt->userData,
2343 "Unescaped '<' not allowed in attributes values\n");
2344 ctxt->wellFormed = 0;
2345 ctxt->disableSAX = 1;
2346 } else if (RAW != limit) {
2347 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2349 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2350 ctxt->wellFormed = 0;
2351 ctxt->disableSAX = 1;
2352 } else
2353 NEXT;
2354 return(buf);
2355}
2356
2357/**
2358 * xmlParseSystemLiteral:
2359 * @ctxt: an XML parser context
2360 *
2361 * parse an XML Literal
2362 *
2363 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2364 *
2365 * Returns the SystemLiteral parsed or NULL
2366 */
2367
2368xmlChar *
2369xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2370 xmlChar *buf = NULL;
2371 int len = 0;
2372 int size = XML_PARSER_BUFFER_SIZE;
2373 int cur, l;
2374 xmlChar stop;
2375 int state = ctxt->instate;
2376 int count = 0;
2377
2378 SHRINK;
2379 if (RAW == '"') {
2380 NEXT;
2381 stop = '"';
2382 } else if (RAW == '\'') {
2383 NEXT;
2384 stop = '\'';
2385 } else {
2386 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2388 ctxt->sax->error(ctxt->userData,
2389 "SystemLiteral \" or ' expected\n");
2390 ctxt->wellFormed = 0;
2391 ctxt->disableSAX = 1;
2392 return(NULL);
2393 }
2394
2395 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2396 if (buf == NULL) {
2397 xmlGenericError(xmlGenericErrorContext,
2398 "malloc of %d byte failed\n", size);
2399 return(NULL);
2400 }
2401 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2402 cur = CUR_CHAR(l);
2403 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2404 if (len + 5 >= size) {
2405 size *= 2;
2406 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2407 if (buf == NULL) {
2408 xmlGenericError(xmlGenericErrorContext,
2409 "realloc of %d byte failed\n", size);
2410 ctxt->instate = (xmlParserInputState) state;
2411 return(NULL);
2412 }
2413 }
2414 count++;
2415 if (count > 50) {
2416 GROW;
2417 count = 0;
2418 }
2419 COPY_BUF(l,buf,len,cur);
2420 NEXTL(l);
2421 cur = CUR_CHAR(l);
2422 if (cur == 0) {
2423 GROW;
2424 SHRINK;
2425 cur = CUR_CHAR(l);
2426 }
2427 }
2428 buf[len] = 0;
2429 ctxt->instate = (xmlParserInputState) state;
2430 if (!IS_CHAR(cur)) {
2431 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2432 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2433 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2434 ctxt->wellFormed = 0;
2435 ctxt->disableSAX = 1;
2436 } else {
2437 NEXT;
2438 }
2439 return(buf);
2440}
2441
2442/**
2443 * xmlParsePubidLiteral:
2444 * @ctxt: an XML parser context
2445 *
2446 * parse an XML public literal
2447 *
2448 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2449 *
2450 * Returns the PubidLiteral parsed or NULL.
2451 */
2452
2453xmlChar *
2454xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2455 xmlChar *buf = NULL;
2456 int len = 0;
2457 int size = XML_PARSER_BUFFER_SIZE;
2458 xmlChar cur;
2459 xmlChar stop;
2460 int count = 0;
2461
2462 SHRINK;
2463 if (RAW == '"') {
2464 NEXT;
2465 stop = '"';
2466 } else if (RAW == '\'') {
2467 NEXT;
2468 stop = '\'';
2469 } else {
2470 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2472 ctxt->sax->error(ctxt->userData,
2473 "SystemLiteral \" or ' expected\n");
2474 ctxt->wellFormed = 0;
2475 ctxt->disableSAX = 1;
2476 return(NULL);
2477 }
2478 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2479 if (buf == NULL) {
2480 xmlGenericError(xmlGenericErrorContext,
2481 "malloc of %d byte failed\n", size);
2482 return(NULL);
2483 }
2484 cur = CUR;
2485 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2486 if (len + 1 >= size) {
2487 size *= 2;
2488 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2489 if (buf == NULL) {
2490 xmlGenericError(xmlGenericErrorContext,
2491 "realloc of %d byte failed\n", size);
2492 return(NULL);
2493 }
2494 }
2495 buf[len++] = cur;
2496 count++;
2497 if (count > 50) {
2498 GROW;
2499 count = 0;
2500 }
2501 NEXT;
2502 cur = CUR;
2503 if (cur == 0) {
2504 GROW;
2505 SHRINK;
2506 cur = CUR;
2507 }
2508 }
2509 buf[len] = 0;
2510 if (cur != stop) {
2511 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2513 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2514 ctxt->wellFormed = 0;
2515 ctxt->disableSAX = 1;
2516 } else {
2517 NEXT;
2518 }
2519 return(buf);
2520}
2521
Daniel Veillard48b2f892001-02-25 16:11:03 +00002522void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002523/**
2524 * xmlParseCharData:
2525 * @ctxt: an XML parser context
2526 * @cdata: int indicating whether we are within a CDATA section
2527 *
2528 * parse a CharData section.
2529 * if we are within a CDATA section ']]>' marks an end of section.
2530 *
2531 * The right angle bracket (>) may be represented using the string "&gt;",
2532 * and must, for compatibility, be escaped using "&gt;" or a character
2533 * reference when it appears in the string "]]>" in content, when that
2534 * string is not marking the end of a CDATA section.
2535 *
2536 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2537 */
2538
2539void
2540xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002541 const xmlChar *in;
2542 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002543 int line = ctxt->input->line;
2544 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002545
2546 SHRINK;
2547 GROW;
2548 /*
2549 * Accelerated common case where input don't need to be
2550 * modified before passing it to the handler.
2551 */
2552 if ((ctxt->token == 0) && (!cdata)) {
2553 in = ctxt->input->cur;
2554 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002555get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002556 while (((*in >= 0x20) && (*in != '<') &&
2557 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2558 in++;
2559 if (*in == 0xA) {
2560 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002561 in++;
2562 while (*in == 0xA) {
2563 ctxt->input->line++;
2564 in++;
2565 }
2566 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002567 }
2568 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002569 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002570 if (IS_BLANK(*ctxt->input->cur)) {
2571 const xmlChar *tmp = ctxt->input->cur;
2572 ctxt->input->cur = in;
2573 if (areBlanks(ctxt, tmp, nbchar)) {
2574 if (ctxt->sax->ignorableWhitespace != NULL)
2575 ctxt->sax->ignorableWhitespace(ctxt->userData,
2576 tmp, nbchar);
2577 } else {
2578 if (ctxt->sax->characters != NULL)
2579 ctxt->sax->characters(ctxt->userData,
2580 tmp, nbchar);
2581 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002582 line = ctxt->input->line;
2583 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002584 } else {
2585 if (ctxt->sax->characters != NULL)
2586 ctxt->sax->characters(ctxt->userData,
2587 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002588 line = ctxt->input->line;
2589 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002590 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002591 }
2592 ctxt->input->cur = in;
2593 if (*in == 0xD) {
2594 in++;
2595 if (*in == 0xA) {
2596 ctxt->input->cur = in;
2597 in++;
2598 ctxt->input->line++;
2599 continue; /* while */
2600 }
2601 in--;
2602 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002603 if (*in == '<') {
2604 return;
2605 }
2606 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002607 return;
2608 }
2609 SHRINK;
2610 GROW;
2611 in = ctxt->input->cur;
2612 } while ((*in >= 0x20) && (*in <= 0x7F));
2613 nbchar = 0;
2614 }
Daniel Veillard50582112001-03-26 22:52:16 +00002615 ctxt->input->line = line;
2616 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002617 xmlParseCharDataComplex(ctxt, cdata);
2618}
2619
2620void
2621xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002622 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2623 int nbchar = 0;
2624 int cur, l;
2625 int count = 0;
2626
2627 SHRINK;
2628 GROW;
2629 cur = CUR_CHAR(l);
2630 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2631 ((cur != '&') || (ctxt->token == '&')) &&
2632 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2633 if ((cur == ']') && (NXT(1) == ']') &&
2634 (NXT(2) == '>')) {
2635 if (cdata) break;
2636 else {
2637 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2638 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2639 ctxt->sax->error(ctxt->userData,
2640 "Sequence ']]>' not allowed in content\n");
2641 /* Should this be relaxed ??? I see a "must here */
2642 ctxt->wellFormed = 0;
2643 ctxt->disableSAX = 1;
2644 }
2645 }
2646 COPY_BUF(l,buf,nbchar,cur);
2647 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2648 /*
2649 * Ok the segment is to be consumed as chars.
2650 */
2651 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2652 if (areBlanks(ctxt, buf, nbchar)) {
2653 if (ctxt->sax->ignorableWhitespace != NULL)
2654 ctxt->sax->ignorableWhitespace(ctxt->userData,
2655 buf, nbchar);
2656 } else {
2657 if (ctxt->sax->characters != NULL)
2658 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2659 }
2660 }
2661 nbchar = 0;
2662 }
2663 count++;
2664 if (count > 50) {
2665 GROW;
2666 count = 0;
2667 }
2668 NEXTL(l);
2669 cur = CUR_CHAR(l);
2670 }
2671 if (nbchar != 0) {
2672 /*
2673 * Ok the segment is to be consumed as chars.
2674 */
2675 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2676 if (areBlanks(ctxt, buf, nbchar)) {
2677 if (ctxt->sax->ignorableWhitespace != NULL)
2678 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2679 } else {
2680 if (ctxt->sax->characters != NULL)
2681 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2682 }
2683 }
2684 }
2685}
2686
2687/**
2688 * xmlParseExternalID:
2689 * @ctxt: an XML parser context
2690 * @publicID: a xmlChar** receiving PubidLiteral
2691 * @strict: indicate whether we should restrict parsing to only
2692 * production [75], see NOTE below
2693 *
2694 * Parse an External ID or a Public ID
2695 *
2696 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2697 * 'PUBLIC' S PubidLiteral S SystemLiteral
2698 *
2699 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2700 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2701 *
2702 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2703 *
2704 * Returns the function returns SystemLiteral and in the second
2705 * case publicID receives PubidLiteral, is strict is off
2706 * it is possible to return NULL and have publicID set.
2707 */
2708
2709xmlChar *
2710xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2711 xmlChar *URI = NULL;
2712
2713 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002714
2715 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002716 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2717 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2718 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2719 SKIP(6);
2720 if (!IS_BLANK(CUR)) {
2721 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2722 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2723 ctxt->sax->error(ctxt->userData,
2724 "Space required after 'SYSTEM'\n");
2725 ctxt->wellFormed = 0;
2726 ctxt->disableSAX = 1;
2727 }
2728 SKIP_BLANKS;
2729 URI = xmlParseSystemLiteral(ctxt);
2730 if (URI == NULL) {
2731 ctxt->errNo = XML_ERR_URI_REQUIRED;
2732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2733 ctxt->sax->error(ctxt->userData,
2734 "xmlParseExternalID: SYSTEM, no URI\n");
2735 ctxt->wellFormed = 0;
2736 ctxt->disableSAX = 1;
2737 }
2738 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2739 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2740 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2741 SKIP(6);
2742 if (!IS_BLANK(CUR)) {
2743 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2744 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2745 ctxt->sax->error(ctxt->userData,
2746 "Space required after 'PUBLIC'\n");
2747 ctxt->wellFormed = 0;
2748 ctxt->disableSAX = 1;
2749 }
2750 SKIP_BLANKS;
2751 *publicID = xmlParsePubidLiteral(ctxt);
2752 if (*publicID == NULL) {
2753 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2755 ctxt->sax->error(ctxt->userData,
2756 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2757 ctxt->wellFormed = 0;
2758 ctxt->disableSAX = 1;
2759 }
2760 if (strict) {
2761 /*
2762 * We don't handle [83] so "S SystemLiteral" is required.
2763 */
2764 if (!IS_BLANK(CUR)) {
2765 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2767 ctxt->sax->error(ctxt->userData,
2768 "Space required after the Public Identifier\n");
2769 ctxt->wellFormed = 0;
2770 ctxt->disableSAX = 1;
2771 }
2772 } else {
2773 /*
2774 * We handle [83] so we return immediately, if
2775 * "S SystemLiteral" is not detected. From a purely parsing
2776 * point of view that's a nice mess.
2777 */
2778 const xmlChar *ptr;
2779 GROW;
2780
2781 ptr = CUR_PTR;
2782 if (!IS_BLANK(*ptr)) return(NULL);
2783
2784 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2785 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2786 }
2787 SKIP_BLANKS;
2788 URI = xmlParseSystemLiteral(ctxt);
2789 if (URI == NULL) {
2790 ctxt->errNo = XML_ERR_URI_REQUIRED;
2791 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2792 ctxt->sax->error(ctxt->userData,
2793 "xmlParseExternalID: PUBLIC, no URI\n");
2794 ctxt->wellFormed = 0;
2795 ctxt->disableSAX = 1;
2796 }
2797 }
2798 return(URI);
2799}
2800
2801/**
2802 * xmlParseComment:
2803 * @ctxt: an XML parser context
2804 *
2805 * Skip an XML (SGML) comment <!-- .... -->
2806 * The spec says that "For compatibility, the string "--" (double-hyphen)
2807 * must not occur within comments. "
2808 *
2809 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2810 */
2811void
2812xmlParseComment(xmlParserCtxtPtr ctxt) {
2813 xmlChar *buf = NULL;
2814 int len;
2815 int size = XML_PARSER_BUFFER_SIZE;
2816 int q, ql;
2817 int r, rl;
2818 int cur, l;
2819 xmlParserInputState state;
2820 xmlParserInputPtr input = ctxt->input;
2821 int count = 0;
2822
2823 /*
2824 * Check that there is a comment right here.
2825 */
2826 if ((RAW != '<') || (NXT(1) != '!') ||
2827 (NXT(2) != '-') || (NXT(3) != '-')) return;
2828
2829 state = ctxt->instate;
2830 ctxt->instate = XML_PARSER_COMMENT;
2831 SHRINK;
2832 SKIP(4);
2833 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2834 if (buf == NULL) {
2835 xmlGenericError(xmlGenericErrorContext,
2836 "malloc of %d byte failed\n", size);
2837 ctxt->instate = state;
2838 return;
2839 }
2840 q = CUR_CHAR(ql);
2841 NEXTL(ql);
2842 r = CUR_CHAR(rl);
2843 NEXTL(rl);
2844 cur = CUR_CHAR(l);
2845 len = 0;
2846 while (IS_CHAR(cur) && /* checked */
2847 ((cur != '>') ||
2848 (r != '-') || (q != '-'))) {
2849 if ((r == '-') && (q == '-') && (len > 1)) {
2850 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2852 ctxt->sax->error(ctxt->userData,
2853 "Comment must not contain '--' (double-hyphen)`\n");
2854 ctxt->wellFormed = 0;
2855 ctxt->disableSAX = 1;
2856 }
2857 if (len + 5 >= size) {
2858 size *= 2;
2859 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2860 if (buf == NULL) {
2861 xmlGenericError(xmlGenericErrorContext,
2862 "realloc of %d byte failed\n", size);
2863 ctxt->instate = state;
2864 return;
2865 }
2866 }
2867 COPY_BUF(ql,buf,len,q);
2868 q = r;
2869 ql = rl;
2870 r = cur;
2871 rl = l;
2872
2873 count++;
2874 if (count > 50) {
2875 GROW;
2876 count = 0;
2877 }
2878 NEXTL(l);
2879 cur = CUR_CHAR(l);
2880 if (cur == 0) {
2881 SHRINK;
2882 GROW;
2883 cur = CUR_CHAR(l);
2884 }
2885 }
2886 buf[len] = 0;
2887 if (!IS_CHAR(cur)) {
2888 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2889 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2890 ctxt->sax->error(ctxt->userData,
2891 "Comment not terminated \n<!--%.50s\n", buf);
2892 ctxt->wellFormed = 0;
2893 ctxt->disableSAX = 1;
2894 xmlFree(buf);
2895 } else {
2896 if (input != ctxt->input) {
2897 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2899 ctxt->sax->error(ctxt->userData,
2900"Comment doesn't start and stop in the same entity\n");
2901 ctxt->wellFormed = 0;
2902 ctxt->disableSAX = 1;
2903 }
2904 NEXT;
2905 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2906 (!ctxt->disableSAX))
2907 ctxt->sax->comment(ctxt->userData, buf);
2908 xmlFree(buf);
2909 }
2910 ctxt->instate = state;
2911}
2912
2913/**
2914 * xmlParsePITarget:
2915 * @ctxt: an XML parser context
2916 *
2917 * parse the name of a PI
2918 *
2919 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2920 *
2921 * Returns the PITarget name or NULL
2922 */
2923
2924xmlChar *
2925xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2926 xmlChar *name;
2927
2928 name = xmlParseName(ctxt);
2929 if ((name != NULL) &&
2930 ((name[0] == 'x') || (name[0] == 'X')) &&
2931 ((name[1] == 'm') || (name[1] == 'M')) &&
2932 ((name[2] == 'l') || (name[2] == 'L'))) {
2933 int i;
2934 if ((name[0] == 'x') && (name[1] == 'm') &&
2935 (name[2] == 'l') && (name[3] == 0)) {
2936 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2937 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2938 ctxt->sax->error(ctxt->userData,
2939 "XML declaration allowed only at the start of the document\n");
2940 ctxt->wellFormed = 0;
2941 ctxt->disableSAX = 1;
2942 return(name);
2943 } else if (name[3] == 0) {
2944 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2946 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2947 ctxt->wellFormed = 0;
2948 ctxt->disableSAX = 1;
2949 return(name);
2950 }
2951 for (i = 0;;i++) {
2952 if (xmlW3CPIs[i] == NULL) break;
2953 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2954 return(name);
2955 }
2956 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2957 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2958 ctxt->sax->warning(ctxt->userData,
2959 "xmlParsePItarget: invalid name prefix 'xml'\n");
2960 }
2961 }
2962 return(name);
2963}
2964
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002965#ifdef LIBXML_CATALOG_ENABLED
2966/**
2967 * xmlParseCatalogPI:
2968 * @ctxt: an XML parser context
2969 * @catalog: the PI value string
2970 *
2971 * parse an XML Catalog Processing Instruction.
2972 *
2973 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2974 *
2975 * Occurs only if allowed by the user and if happening in the Misc
2976 * part of the document before any doctype informations
2977 * This will add the given catalog to the parsing context in order
2978 * to be used if there is a resolution need further down in the document
2979 */
2980
2981static void
2982xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2983 xmlChar *URL = NULL;
2984 const xmlChar *tmp, *base;
2985 xmlChar marker;
2986
2987 tmp = catalog;
2988 while (IS_BLANK(*tmp)) tmp++;
2989 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2990 goto error;
2991 tmp += 7;
2992 while (IS_BLANK(*tmp)) tmp++;
2993 if (*tmp != '=') {
2994 return;
2995 }
2996 tmp++;
2997 while (IS_BLANK(*tmp)) tmp++;
2998 marker = *tmp;
2999 if ((marker != '\'') && (marker != '"'))
3000 goto error;
3001 tmp++;
3002 base = tmp;
3003 while ((*tmp != 0) && (*tmp != marker)) tmp++;
3004 if (*tmp == 0)
3005 goto error;
3006 URL = xmlStrndup(base, tmp - base);
3007 tmp++;
3008 while (IS_BLANK(*tmp)) tmp++;
3009 if (*tmp != 0)
3010 goto error;
3011
3012 if (URL != NULL) {
3013 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
3014 xmlFree(URL);
3015 }
3016 return;
3017
3018error:
3019 ctxt->errNo = XML_WAR_CATALOG_PI;
3020 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3021 ctxt->sax->warning(ctxt->userData,
3022 "Catalog PI syntax error: %s\n", catalog);
3023 if (URL != NULL)
3024 xmlFree(URL);
3025}
3026#endif
3027
Owen Taylor3473f882001-02-23 17:55:21 +00003028/**
3029 * xmlParsePI:
3030 * @ctxt: an XML parser context
3031 *
3032 * parse an XML Processing Instruction.
3033 *
3034 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3035 *
3036 * The processing is transfered to SAX once parsed.
3037 */
3038
3039void
3040xmlParsePI(xmlParserCtxtPtr ctxt) {
3041 xmlChar *buf = NULL;
3042 int len = 0;
3043 int size = XML_PARSER_BUFFER_SIZE;
3044 int cur, l;
3045 xmlChar *target;
3046 xmlParserInputState state;
3047 int count = 0;
3048
3049 if ((RAW == '<') && (NXT(1) == '?')) {
3050 xmlParserInputPtr input = ctxt->input;
3051 state = ctxt->instate;
3052 ctxt->instate = XML_PARSER_PI;
3053 /*
3054 * this is a Processing Instruction.
3055 */
3056 SKIP(2);
3057 SHRINK;
3058
3059 /*
3060 * Parse the target name and check for special support like
3061 * namespace.
3062 */
3063 target = xmlParsePITarget(ctxt);
3064 if (target != NULL) {
3065 if ((RAW == '?') && (NXT(1) == '>')) {
3066 if (input != ctxt->input) {
3067 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3069 ctxt->sax->error(ctxt->userData,
3070 "PI declaration doesn't start and stop in the same entity\n");
3071 ctxt->wellFormed = 0;
3072 ctxt->disableSAX = 1;
3073 }
3074 SKIP(2);
3075
3076 /*
3077 * SAX: PI detected.
3078 */
3079 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3080 (ctxt->sax->processingInstruction != NULL))
3081 ctxt->sax->processingInstruction(ctxt->userData,
3082 target, NULL);
3083 ctxt->instate = state;
3084 xmlFree(target);
3085 return;
3086 }
3087 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3088 if (buf == NULL) {
3089 xmlGenericError(xmlGenericErrorContext,
3090 "malloc of %d byte failed\n", size);
3091 ctxt->instate = state;
3092 return;
3093 }
3094 cur = CUR;
3095 if (!IS_BLANK(cur)) {
3096 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3098 ctxt->sax->error(ctxt->userData,
3099 "xmlParsePI: PI %s space expected\n", target);
3100 ctxt->wellFormed = 0;
3101 ctxt->disableSAX = 1;
3102 }
3103 SKIP_BLANKS;
3104 cur = CUR_CHAR(l);
3105 while (IS_CHAR(cur) && /* checked */
3106 ((cur != '?') || (NXT(1) != '>'))) {
3107 if (len + 5 >= size) {
3108 size *= 2;
3109 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3110 if (buf == NULL) {
3111 xmlGenericError(xmlGenericErrorContext,
3112 "realloc of %d byte failed\n", size);
3113 ctxt->instate = state;
3114 return;
3115 }
3116 }
3117 count++;
3118 if (count > 50) {
3119 GROW;
3120 count = 0;
3121 }
3122 COPY_BUF(l,buf,len,cur);
3123 NEXTL(l);
3124 cur = CUR_CHAR(l);
3125 if (cur == 0) {
3126 SHRINK;
3127 GROW;
3128 cur = CUR_CHAR(l);
3129 }
3130 }
3131 buf[len] = 0;
3132 if (cur != '?') {
3133 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3135 ctxt->sax->error(ctxt->userData,
3136 "xmlParsePI: PI %s never end ...\n", target);
3137 ctxt->wellFormed = 0;
3138 ctxt->disableSAX = 1;
3139 } else {
3140 if (input != ctxt->input) {
3141 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3143 ctxt->sax->error(ctxt->userData,
3144 "PI declaration doesn't start and stop in the same entity\n");
3145 ctxt->wellFormed = 0;
3146 ctxt->disableSAX = 1;
3147 }
3148 SKIP(2);
3149
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003150#ifdef LIBXML_CATALOG_ENABLED
3151 if (((state == XML_PARSER_MISC) ||
3152 (state == XML_PARSER_START)) &&
3153 (xmlStrEqual(target, XML_CATALOG_PI))) {
3154 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3155 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3156 (allow == XML_CATA_ALLOW_ALL))
3157 xmlParseCatalogPI(ctxt, buf);
3158 }
3159#endif
3160
3161
Owen Taylor3473f882001-02-23 17:55:21 +00003162 /*
3163 * SAX: PI detected.
3164 */
3165 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3166 (ctxt->sax->processingInstruction != NULL))
3167 ctxt->sax->processingInstruction(ctxt->userData,
3168 target, buf);
3169 }
3170 xmlFree(buf);
3171 xmlFree(target);
3172 } else {
3173 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "xmlParsePI : no target name\n");
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 }
3180 ctxt->instate = state;
3181 }
3182}
3183
3184/**
3185 * xmlParseNotationDecl:
3186 * @ctxt: an XML parser context
3187 *
3188 * parse a notation declaration
3189 *
3190 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3191 *
3192 * Hence there is actually 3 choices:
3193 * 'PUBLIC' S PubidLiteral
3194 * 'PUBLIC' S PubidLiteral S SystemLiteral
3195 * and 'SYSTEM' S SystemLiteral
3196 *
3197 * See the NOTE on xmlParseExternalID().
3198 */
3199
3200void
3201xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3202 xmlChar *name;
3203 xmlChar *Pubid;
3204 xmlChar *Systemid;
3205
3206 if ((RAW == '<') && (NXT(1) == '!') &&
3207 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3208 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3209 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3210 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3211 xmlParserInputPtr input = ctxt->input;
3212 SHRINK;
3213 SKIP(10);
3214 if (!IS_BLANK(CUR)) {
3215 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3217 ctxt->sax->error(ctxt->userData,
3218 "Space required after '<!NOTATION'\n");
3219 ctxt->wellFormed = 0;
3220 ctxt->disableSAX = 1;
3221 return;
3222 }
3223 SKIP_BLANKS;
3224
Daniel Veillard76d66f42001-05-16 21:05:17 +00003225 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003226 if (name == NULL) {
3227 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "NOTATION: Name expected here\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 return;
3234 }
3235 if (!IS_BLANK(CUR)) {
3236 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData,
3239 "Space required after the NOTATION name'\n");
3240 ctxt->wellFormed = 0;
3241 ctxt->disableSAX = 1;
3242 return;
3243 }
3244 SKIP_BLANKS;
3245
3246 /*
3247 * Parse the IDs.
3248 */
3249 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3250 SKIP_BLANKS;
3251
3252 if (RAW == '>') {
3253 if (input != ctxt->input) {
3254 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3255 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3256 ctxt->sax->error(ctxt->userData,
3257"Notation declaration doesn't start and stop in the same entity\n");
3258 ctxt->wellFormed = 0;
3259 ctxt->disableSAX = 1;
3260 }
3261 NEXT;
3262 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3263 (ctxt->sax->notationDecl != NULL))
3264 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3265 } else {
3266 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3267 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3268 ctxt->sax->error(ctxt->userData,
3269 "'>' required to close NOTATION declaration\n");
3270 ctxt->wellFormed = 0;
3271 ctxt->disableSAX = 1;
3272 }
3273 xmlFree(name);
3274 if (Systemid != NULL) xmlFree(Systemid);
3275 if (Pubid != NULL) xmlFree(Pubid);
3276 }
3277}
3278
3279/**
3280 * xmlParseEntityDecl:
3281 * @ctxt: an XML parser context
3282 *
3283 * parse <!ENTITY declarations
3284 *
3285 * [70] EntityDecl ::= GEDecl | PEDecl
3286 *
3287 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3288 *
3289 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3290 *
3291 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3292 *
3293 * [74] PEDef ::= EntityValue | ExternalID
3294 *
3295 * [76] NDataDecl ::= S 'NDATA' S Name
3296 *
3297 * [ VC: Notation Declared ]
3298 * The Name must match the declared name of a notation.
3299 */
3300
3301void
3302xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3303 xmlChar *name = NULL;
3304 xmlChar *value = NULL;
3305 xmlChar *URI = NULL, *literal = NULL;
3306 xmlChar *ndata = NULL;
3307 int isParameter = 0;
3308 xmlChar *orig = NULL;
3309
3310 GROW;
3311 if ((RAW == '<') && (NXT(1) == '!') &&
3312 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3313 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3314 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3315 xmlParserInputPtr input = ctxt->input;
3316 ctxt->instate = XML_PARSER_ENTITY_DECL;
3317 SHRINK;
3318 SKIP(8);
3319 if (!IS_BLANK(CUR)) {
3320 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3322 ctxt->sax->error(ctxt->userData,
3323 "Space required after '<!ENTITY'\n");
3324 ctxt->wellFormed = 0;
3325 ctxt->disableSAX = 1;
3326 }
3327 SKIP_BLANKS;
3328
3329 if (RAW == '%') {
3330 NEXT;
3331 if (!IS_BLANK(CUR)) {
3332 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3334 ctxt->sax->error(ctxt->userData,
3335 "Space required after '%'\n");
3336 ctxt->wellFormed = 0;
3337 ctxt->disableSAX = 1;
3338 }
3339 SKIP_BLANKS;
3340 isParameter = 1;
3341 }
3342
Daniel Veillard76d66f42001-05-16 21:05:17 +00003343 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003344 if (name == NULL) {
3345 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3346 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3347 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3348 ctxt->wellFormed = 0;
3349 ctxt->disableSAX = 1;
3350 return;
3351 }
3352 if (!IS_BLANK(CUR)) {
3353 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3355 ctxt->sax->error(ctxt->userData,
3356 "Space required after the entity name\n");
3357 ctxt->wellFormed = 0;
3358 ctxt->disableSAX = 1;
3359 }
3360 SKIP_BLANKS;
3361
3362 /*
3363 * handle the various case of definitions...
3364 */
3365 if (isParameter) {
3366 if ((RAW == '"') || (RAW == '\'')) {
3367 value = xmlParseEntityValue(ctxt, &orig);
3368 if (value) {
3369 if ((ctxt->sax != NULL) &&
3370 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3371 ctxt->sax->entityDecl(ctxt->userData, name,
3372 XML_INTERNAL_PARAMETER_ENTITY,
3373 NULL, NULL, value);
3374 }
3375 } else {
3376 URI = xmlParseExternalID(ctxt, &literal, 1);
3377 if ((URI == NULL) && (literal == NULL)) {
3378 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3380 ctxt->sax->error(ctxt->userData,
3381 "Entity value required\n");
3382 ctxt->wellFormed = 0;
3383 ctxt->disableSAX = 1;
3384 }
3385 if (URI) {
3386 xmlURIPtr uri;
3387
3388 uri = xmlParseURI((const char *) URI);
3389 if (uri == NULL) {
3390 ctxt->errNo = XML_ERR_INVALID_URI;
3391 if ((ctxt->sax != NULL) &&
3392 (!ctxt->disableSAX) &&
3393 (ctxt->sax->error != NULL))
3394 ctxt->sax->error(ctxt->userData,
3395 "Invalid URI: %s\n", URI);
3396 ctxt->wellFormed = 0;
3397 } else {
3398 if (uri->fragment != NULL) {
3399 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3400 if ((ctxt->sax != NULL) &&
3401 (!ctxt->disableSAX) &&
3402 (ctxt->sax->error != NULL))
3403 ctxt->sax->error(ctxt->userData,
3404 "Fragment not allowed: %s\n", URI);
3405 ctxt->wellFormed = 0;
3406 } else {
3407 if ((ctxt->sax != NULL) &&
3408 (!ctxt->disableSAX) &&
3409 (ctxt->sax->entityDecl != NULL))
3410 ctxt->sax->entityDecl(ctxt->userData, name,
3411 XML_EXTERNAL_PARAMETER_ENTITY,
3412 literal, URI, NULL);
3413 }
3414 xmlFreeURI(uri);
3415 }
3416 }
3417 }
3418 } else {
3419 if ((RAW == '"') || (RAW == '\'')) {
3420 value = xmlParseEntityValue(ctxt, &orig);
3421 if ((ctxt->sax != NULL) &&
3422 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3423 ctxt->sax->entityDecl(ctxt->userData, name,
3424 XML_INTERNAL_GENERAL_ENTITY,
3425 NULL, NULL, value);
3426 } else {
3427 URI = xmlParseExternalID(ctxt, &literal, 1);
3428 if ((URI == NULL) && (literal == NULL)) {
3429 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3431 ctxt->sax->error(ctxt->userData,
3432 "Entity value required\n");
3433 ctxt->wellFormed = 0;
3434 ctxt->disableSAX = 1;
3435 }
3436 if (URI) {
3437 xmlURIPtr uri;
3438
3439 uri = xmlParseURI((const char *)URI);
3440 if (uri == NULL) {
3441 ctxt->errNo = XML_ERR_INVALID_URI;
3442 if ((ctxt->sax != NULL) &&
3443 (!ctxt->disableSAX) &&
3444 (ctxt->sax->error != NULL))
3445 ctxt->sax->error(ctxt->userData,
3446 "Invalid URI: %s\n", URI);
3447 ctxt->wellFormed = 0;
3448 } else {
3449 if (uri->fragment != NULL) {
3450 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3451 if ((ctxt->sax != NULL) &&
3452 (!ctxt->disableSAX) &&
3453 (ctxt->sax->error != NULL))
3454 ctxt->sax->error(ctxt->userData,
3455 "Fragment not allowed: %s\n", URI);
3456 ctxt->wellFormed = 0;
3457 }
3458 xmlFreeURI(uri);
3459 }
3460 }
3461 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3462 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3463 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3464 ctxt->sax->error(ctxt->userData,
3465 "Space required before 'NDATA'\n");
3466 ctxt->wellFormed = 0;
3467 ctxt->disableSAX = 1;
3468 }
3469 SKIP_BLANKS;
3470 if ((RAW == 'N') && (NXT(1) == 'D') &&
3471 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3472 (NXT(4) == 'A')) {
3473 SKIP(5);
3474 if (!IS_BLANK(CUR)) {
3475 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3476 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3477 ctxt->sax->error(ctxt->userData,
3478 "Space required after 'NDATA'\n");
3479 ctxt->wellFormed = 0;
3480 ctxt->disableSAX = 1;
3481 }
3482 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003483 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003484 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3485 (ctxt->sax->unparsedEntityDecl != NULL))
3486 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3487 literal, URI, ndata);
3488 } else {
3489 if ((ctxt->sax != NULL) &&
3490 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3491 ctxt->sax->entityDecl(ctxt->userData, name,
3492 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3493 literal, URI, NULL);
3494 }
3495 }
3496 }
3497 SKIP_BLANKS;
3498 if (RAW != '>') {
3499 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3500 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3501 ctxt->sax->error(ctxt->userData,
3502 "xmlParseEntityDecl: entity %s not terminated\n", name);
3503 ctxt->wellFormed = 0;
3504 ctxt->disableSAX = 1;
3505 } else {
3506 if (input != ctxt->input) {
3507 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3509 ctxt->sax->error(ctxt->userData,
3510"Entity declaration doesn't start and stop in the same entity\n");
3511 ctxt->wellFormed = 0;
3512 ctxt->disableSAX = 1;
3513 }
3514 NEXT;
3515 }
3516 if (orig != NULL) {
3517 /*
3518 * Ugly mechanism to save the raw entity value.
3519 */
3520 xmlEntityPtr cur = NULL;
3521
3522 if (isParameter) {
3523 if ((ctxt->sax != NULL) &&
3524 (ctxt->sax->getParameterEntity != NULL))
3525 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3526 } else {
3527 if ((ctxt->sax != NULL) &&
3528 (ctxt->sax->getEntity != NULL))
3529 cur = ctxt->sax->getEntity(ctxt->userData, name);
3530 }
3531 if (cur != NULL) {
3532 if (cur->orig != NULL)
3533 xmlFree(orig);
3534 else
3535 cur->orig = orig;
3536 } else
3537 xmlFree(orig);
3538 }
3539 if (name != NULL) xmlFree(name);
3540 if (value != NULL) xmlFree(value);
3541 if (URI != NULL) xmlFree(URI);
3542 if (literal != NULL) xmlFree(literal);
3543 if (ndata != NULL) xmlFree(ndata);
3544 }
3545}
3546
3547/**
3548 * xmlParseDefaultDecl:
3549 * @ctxt: an XML parser context
3550 * @value: Receive a possible fixed default value for the attribute
3551 *
3552 * Parse an attribute default declaration
3553 *
3554 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3555 *
3556 * [ VC: Required Attribute ]
3557 * if the default declaration is the keyword #REQUIRED, then the
3558 * attribute must be specified for all elements of the type in the
3559 * attribute-list declaration.
3560 *
3561 * [ VC: Attribute Default Legal ]
3562 * The declared default value must meet the lexical constraints of
3563 * the declared attribute type c.f. xmlValidateAttributeDecl()
3564 *
3565 * [ VC: Fixed Attribute Default ]
3566 * if an attribute has a default value declared with the #FIXED
3567 * keyword, instances of that attribute must match the default value.
3568 *
3569 * [ WFC: No < in Attribute Values ]
3570 * handled in xmlParseAttValue()
3571 *
3572 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3573 * or XML_ATTRIBUTE_FIXED.
3574 */
3575
3576int
3577xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3578 int val;
3579 xmlChar *ret;
3580
3581 *value = NULL;
3582 if ((RAW == '#') && (NXT(1) == 'R') &&
3583 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3584 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3585 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3586 (NXT(8) == 'D')) {
3587 SKIP(9);
3588 return(XML_ATTRIBUTE_REQUIRED);
3589 }
3590 if ((RAW == '#') && (NXT(1) == 'I') &&
3591 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3592 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3593 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3594 SKIP(8);
3595 return(XML_ATTRIBUTE_IMPLIED);
3596 }
3597 val = XML_ATTRIBUTE_NONE;
3598 if ((RAW == '#') && (NXT(1) == 'F') &&
3599 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3600 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3601 SKIP(6);
3602 val = XML_ATTRIBUTE_FIXED;
3603 if (!IS_BLANK(CUR)) {
3604 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3605 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3606 ctxt->sax->error(ctxt->userData,
3607 "Space required after '#FIXED'\n");
3608 ctxt->wellFormed = 0;
3609 ctxt->disableSAX = 1;
3610 }
3611 SKIP_BLANKS;
3612 }
3613 ret = xmlParseAttValue(ctxt);
3614 ctxt->instate = XML_PARSER_DTD;
3615 if (ret == NULL) {
3616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3617 ctxt->sax->error(ctxt->userData,
3618 "Attribute default value declaration error\n");
3619 ctxt->wellFormed = 0;
3620 ctxt->disableSAX = 1;
3621 } else
3622 *value = ret;
3623 return(val);
3624}
3625
3626/**
3627 * xmlParseNotationType:
3628 * @ctxt: an XML parser context
3629 *
3630 * parse an Notation attribute type.
3631 *
3632 * Note: the leading 'NOTATION' S part has already being parsed...
3633 *
3634 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3635 *
3636 * [ VC: Notation Attributes ]
3637 * Values of this type must match one of the notation names included
3638 * in the declaration; all notation names in the declaration must be declared.
3639 *
3640 * Returns: the notation attribute tree built while parsing
3641 */
3642
3643xmlEnumerationPtr
3644xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3645 xmlChar *name;
3646 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3647
3648 if (RAW != '(') {
3649 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3650 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3651 ctxt->sax->error(ctxt->userData,
3652 "'(' required to start 'NOTATION'\n");
3653 ctxt->wellFormed = 0;
3654 ctxt->disableSAX = 1;
3655 return(NULL);
3656 }
3657 SHRINK;
3658 do {
3659 NEXT;
3660 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003661 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003662 if (name == NULL) {
3663 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3664 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3665 ctxt->sax->error(ctxt->userData,
3666 "Name expected in NOTATION declaration\n");
3667 ctxt->wellFormed = 0;
3668 ctxt->disableSAX = 1;
3669 return(ret);
3670 }
3671 cur = xmlCreateEnumeration(name);
3672 xmlFree(name);
3673 if (cur == NULL) return(ret);
3674 if (last == NULL) ret = last = cur;
3675 else {
3676 last->next = cur;
3677 last = cur;
3678 }
3679 SKIP_BLANKS;
3680 } while (RAW == '|');
3681 if (RAW != ')') {
3682 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3683 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3684 ctxt->sax->error(ctxt->userData,
3685 "')' required to finish NOTATION declaration\n");
3686 ctxt->wellFormed = 0;
3687 ctxt->disableSAX = 1;
3688 if ((last != NULL) && (last != ret))
3689 xmlFreeEnumeration(last);
3690 return(ret);
3691 }
3692 NEXT;
3693 return(ret);
3694}
3695
3696/**
3697 * xmlParseEnumerationType:
3698 * @ctxt: an XML parser context
3699 *
3700 * parse an Enumeration attribute type.
3701 *
3702 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3703 *
3704 * [ VC: Enumeration ]
3705 * Values of this type must match one of the Nmtoken tokens in
3706 * the declaration
3707 *
3708 * Returns: the enumeration attribute tree built while parsing
3709 */
3710
3711xmlEnumerationPtr
3712xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3713 xmlChar *name;
3714 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3715
3716 if (RAW != '(') {
3717 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3719 ctxt->sax->error(ctxt->userData,
3720 "'(' required to start ATTLIST enumeration\n");
3721 ctxt->wellFormed = 0;
3722 ctxt->disableSAX = 1;
3723 return(NULL);
3724 }
3725 SHRINK;
3726 do {
3727 NEXT;
3728 SKIP_BLANKS;
3729 name = xmlParseNmtoken(ctxt);
3730 if (name == NULL) {
3731 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3732 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3733 ctxt->sax->error(ctxt->userData,
3734 "NmToken expected in ATTLIST enumeration\n");
3735 ctxt->wellFormed = 0;
3736 ctxt->disableSAX = 1;
3737 return(ret);
3738 }
3739 cur = xmlCreateEnumeration(name);
3740 xmlFree(name);
3741 if (cur == NULL) return(ret);
3742 if (last == NULL) ret = last = cur;
3743 else {
3744 last->next = cur;
3745 last = cur;
3746 }
3747 SKIP_BLANKS;
3748 } while (RAW == '|');
3749 if (RAW != ')') {
3750 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3752 ctxt->sax->error(ctxt->userData,
3753 "')' required to finish ATTLIST enumeration\n");
3754 ctxt->wellFormed = 0;
3755 ctxt->disableSAX = 1;
3756 return(ret);
3757 }
3758 NEXT;
3759 return(ret);
3760}
3761
3762/**
3763 * xmlParseEnumeratedType:
3764 * @ctxt: an XML parser context
3765 * @tree: the enumeration tree built while parsing
3766 *
3767 * parse an Enumerated attribute type.
3768 *
3769 * [57] EnumeratedType ::= NotationType | Enumeration
3770 *
3771 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3772 *
3773 *
3774 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3775 */
3776
3777int
3778xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3779 if ((RAW == 'N') && (NXT(1) == 'O') &&
3780 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3781 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3782 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3783 SKIP(8);
3784 if (!IS_BLANK(CUR)) {
3785 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3786 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3787 ctxt->sax->error(ctxt->userData,
3788 "Space required after 'NOTATION'\n");
3789 ctxt->wellFormed = 0;
3790 ctxt->disableSAX = 1;
3791 return(0);
3792 }
3793 SKIP_BLANKS;
3794 *tree = xmlParseNotationType(ctxt);
3795 if (*tree == NULL) return(0);
3796 return(XML_ATTRIBUTE_NOTATION);
3797 }
3798 *tree = xmlParseEnumerationType(ctxt);
3799 if (*tree == NULL) return(0);
3800 return(XML_ATTRIBUTE_ENUMERATION);
3801}
3802
3803/**
3804 * xmlParseAttributeType:
3805 * @ctxt: an XML parser context
3806 * @tree: the enumeration tree built while parsing
3807 *
3808 * parse the Attribute list def for an element
3809 *
3810 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3811 *
3812 * [55] StringType ::= 'CDATA'
3813 *
3814 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3815 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3816 *
3817 * Validity constraints for attribute values syntax are checked in
3818 * xmlValidateAttributeValue()
3819 *
3820 * [ VC: ID ]
3821 * Values of type ID must match the Name production. A name must not
3822 * appear more than once in an XML document as a value of this type;
3823 * i.e., ID values must uniquely identify the elements which bear them.
3824 *
3825 * [ VC: One ID per Element Type ]
3826 * No element type may have more than one ID attribute specified.
3827 *
3828 * [ VC: ID Attribute Default ]
3829 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3830 *
3831 * [ VC: IDREF ]
3832 * Values of type IDREF must match the Name production, and values
3833 * of type IDREFS must match Names; each IDREF Name must match the value
3834 * of an ID attribute on some element in the XML document; i.e. IDREF
3835 * values must match the value of some ID attribute.
3836 *
3837 * [ VC: Entity Name ]
3838 * Values of type ENTITY must match the Name production, values
3839 * of type ENTITIES must match Names; each Entity Name must match the
3840 * name of an unparsed entity declared in the DTD.
3841 *
3842 * [ VC: Name Token ]
3843 * Values of type NMTOKEN must match the Nmtoken production; values
3844 * of type NMTOKENS must match Nmtokens.
3845 *
3846 * Returns the attribute type
3847 */
3848int
3849xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3850 SHRINK;
3851 if ((RAW == 'C') && (NXT(1) == 'D') &&
3852 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3853 (NXT(4) == 'A')) {
3854 SKIP(5);
3855 return(XML_ATTRIBUTE_CDATA);
3856 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3857 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3858 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3859 SKIP(6);
3860 return(XML_ATTRIBUTE_IDREFS);
3861 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3862 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3863 (NXT(4) == 'F')) {
3864 SKIP(5);
3865 return(XML_ATTRIBUTE_IDREF);
3866 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3867 SKIP(2);
3868 return(XML_ATTRIBUTE_ID);
3869 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3870 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3871 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3872 SKIP(6);
3873 return(XML_ATTRIBUTE_ENTITY);
3874 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3875 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3876 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3877 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3878 SKIP(8);
3879 return(XML_ATTRIBUTE_ENTITIES);
3880 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3881 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3882 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3883 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3884 SKIP(8);
3885 return(XML_ATTRIBUTE_NMTOKENS);
3886 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3887 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3888 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3889 (NXT(6) == 'N')) {
3890 SKIP(7);
3891 return(XML_ATTRIBUTE_NMTOKEN);
3892 }
3893 return(xmlParseEnumeratedType(ctxt, tree));
3894}
3895
3896/**
3897 * xmlParseAttributeListDecl:
3898 * @ctxt: an XML parser context
3899 *
3900 * : parse the Attribute list def for an element
3901 *
3902 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3903 *
3904 * [53] AttDef ::= S Name S AttType S DefaultDecl
3905 *
3906 */
3907void
3908xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3909 xmlChar *elemName;
3910 xmlChar *attrName;
3911 xmlEnumerationPtr tree;
3912
3913 if ((RAW == '<') && (NXT(1) == '!') &&
3914 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3915 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3916 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3917 (NXT(8) == 'T')) {
3918 xmlParserInputPtr input = ctxt->input;
3919
3920 SKIP(9);
3921 if (!IS_BLANK(CUR)) {
3922 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3923 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3924 ctxt->sax->error(ctxt->userData,
3925 "Space required after '<!ATTLIST'\n");
3926 ctxt->wellFormed = 0;
3927 ctxt->disableSAX = 1;
3928 }
3929 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003930 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003931 if (elemName == NULL) {
3932 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3933 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3934 ctxt->sax->error(ctxt->userData,
3935 "ATTLIST: no name for Element\n");
3936 ctxt->wellFormed = 0;
3937 ctxt->disableSAX = 1;
3938 return;
3939 }
3940 SKIP_BLANKS;
3941 GROW;
3942 while (RAW != '>') {
3943 const xmlChar *check = CUR_PTR;
3944 int type;
3945 int def;
3946 xmlChar *defaultValue = NULL;
3947
3948 GROW;
3949 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003950 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003951 if (attrName == NULL) {
3952 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3953 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3954 ctxt->sax->error(ctxt->userData,
3955 "ATTLIST: no name for Attribute\n");
3956 ctxt->wellFormed = 0;
3957 ctxt->disableSAX = 1;
3958 break;
3959 }
3960 GROW;
3961 if (!IS_BLANK(CUR)) {
3962 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3964 ctxt->sax->error(ctxt->userData,
3965 "Space required after the attribute name\n");
3966 ctxt->wellFormed = 0;
3967 ctxt->disableSAX = 1;
3968 if (attrName != NULL)
3969 xmlFree(attrName);
3970 if (defaultValue != NULL)
3971 xmlFree(defaultValue);
3972 break;
3973 }
3974 SKIP_BLANKS;
3975
3976 type = xmlParseAttributeType(ctxt, &tree);
3977 if (type <= 0) {
3978 if (attrName != NULL)
3979 xmlFree(attrName);
3980 if (defaultValue != NULL)
3981 xmlFree(defaultValue);
3982 break;
3983 }
3984
3985 GROW;
3986 if (!IS_BLANK(CUR)) {
3987 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3988 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3989 ctxt->sax->error(ctxt->userData,
3990 "Space required after the attribute type\n");
3991 ctxt->wellFormed = 0;
3992 ctxt->disableSAX = 1;
3993 if (attrName != NULL)
3994 xmlFree(attrName);
3995 if (defaultValue != NULL)
3996 xmlFree(defaultValue);
3997 if (tree != NULL)
3998 xmlFreeEnumeration(tree);
3999 break;
4000 }
4001 SKIP_BLANKS;
4002
4003 def = xmlParseDefaultDecl(ctxt, &defaultValue);
4004 if (def <= 0) {
4005 if (attrName != NULL)
4006 xmlFree(attrName);
4007 if (defaultValue != NULL)
4008 xmlFree(defaultValue);
4009 if (tree != NULL)
4010 xmlFreeEnumeration(tree);
4011 break;
4012 }
4013
4014 GROW;
4015 if (RAW != '>') {
4016 if (!IS_BLANK(CUR)) {
4017 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4019 ctxt->sax->error(ctxt->userData,
4020 "Space required after the attribute default value\n");
4021 ctxt->wellFormed = 0;
4022 ctxt->disableSAX = 1;
4023 if (attrName != NULL)
4024 xmlFree(attrName);
4025 if (defaultValue != NULL)
4026 xmlFree(defaultValue);
4027 if (tree != NULL)
4028 xmlFreeEnumeration(tree);
4029 break;
4030 }
4031 SKIP_BLANKS;
4032 }
4033 if (check == CUR_PTR) {
4034 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4035 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4036 ctxt->sax->error(ctxt->userData,
4037 "xmlParseAttributeListDecl: detected internal error\n");
4038 if (attrName != NULL)
4039 xmlFree(attrName);
4040 if (defaultValue != NULL)
4041 xmlFree(defaultValue);
4042 if (tree != NULL)
4043 xmlFreeEnumeration(tree);
4044 break;
4045 }
4046 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4047 (ctxt->sax->attributeDecl != NULL))
4048 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4049 type, def, defaultValue, tree);
4050 if (attrName != NULL)
4051 xmlFree(attrName);
4052 if (defaultValue != NULL)
4053 xmlFree(defaultValue);
4054 GROW;
4055 }
4056 if (RAW == '>') {
4057 if (input != ctxt->input) {
4058 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4060 ctxt->sax->error(ctxt->userData,
4061"Attribute list declaration doesn't start and stop in the same entity\n");
4062 ctxt->wellFormed = 0;
4063 ctxt->disableSAX = 1;
4064 }
4065 NEXT;
4066 }
4067
4068 xmlFree(elemName);
4069 }
4070}
4071
4072/**
4073 * xmlParseElementMixedContentDecl:
4074 * @ctxt: an XML parser context
4075 *
4076 * parse the declaration for a Mixed Element content
4077 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4078 *
4079 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4080 * '(' S? '#PCDATA' S? ')'
4081 *
4082 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4083 *
4084 * [ VC: No Duplicate Types ]
4085 * The same name must not appear more than once in a single
4086 * mixed-content declaration.
4087 *
4088 * returns: the list of the xmlElementContentPtr describing the element choices
4089 */
4090xmlElementContentPtr
4091xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4092 xmlElementContentPtr ret = NULL, cur = NULL, n;
4093 xmlChar *elem = NULL;
4094
4095 GROW;
4096 if ((RAW == '#') && (NXT(1) == 'P') &&
4097 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4098 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4099 (NXT(6) == 'A')) {
4100 SKIP(7);
4101 SKIP_BLANKS;
4102 SHRINK;
4103 if (RAW == ')') {
4104 ctxt->entity = ctxt->input;
4105 NEXT;
4106 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4107 if (RAW == '*') {
4108 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4109 NEXT;
4110 }
4111 return(ret);
4112 }
4113 if ((RAW == '(') || (RAW == '|')) {
4114 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4115 if (ret == NULL) return(NULL);
4116 }
4117 while (RAW == '|') {
4118 NEXT;
4119 if (elem == NULL) {
4120 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4121 if (ret == NULL) return(NULL);
4122 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004123 if (cur != NULL)
4124 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004125 cur = ret;
4126 } else {
4127 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4128 if (n == NULL) return(NULL);
4129 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004130 if (n->c1 != NULL)
4131 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004132 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004133 if (n != NULL)
4134 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004135 cur = n;
4136 xmlFree(elem);
4137 }
4138 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004139 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004140 if (elem == NULL) {
4141 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4143 ctxt->sax->error(ctxt->userData,
4144 "xmlParseElementMixedContentDecl : Name expected\n");
4145 ctxt->wellFormed = 0;
4146 ctxt->disableSAX = 1;
4147 xmlFreeElementContent(cur);
4148 return(NULL);
4149 }
4150 SKIP_BLANKS;
4151 GROW;
4152 }
4153 if ((RAW == ')') && (NXT(1) == '*')) {
4154 if (elem != NULL) {
4155 cur->c2 = xmlNewElementContent(elem,
4156 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004157 if (cur->c2 != NULL)
4158 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004159 xmlFree(elem);
4160 }
4161 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4162 ctxt->entity = ctxt->input;
4163 SKIP(2);
4164 } else {
4165 if (elem != NULL) xmlFree(elem);
4166 xmlFreeElementContent(ret);
4167 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4169 ctxt->sax->error(ctxt->userData,
4170 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4171 ctxt->wellFormed = 0;
4172 ctxt->disableSAX = 1;
4173 return(NULL);
4174 }
4175
4176 } else {
4177 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4179 ctxt->sax->error(ctxt->userData,
4180 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4181 ctxt->wellFormed = 0;
4182 ctxt->disableSAX = 1;
4183 }
4184 return(ret);
4185}
4186
4187/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004188 * xmlParseElementChildrenContentD:
4189 * @ctxt: an XML parser context
4190 *
4191 * VMS version of xmlParseElementChildrenContentDecl()
4192 *
4193 * Returns the tree of xmlElementContentPtr describing the element
4194 * hierarchy.
4195 */
4196/**
Owen Taylor3473f882001-02-23 17:55:21 +00004197 * xmlParseElementChildrenContentDecl:
4198 * @ctxt: an XML parser context
4199 *
4200 * parse the declaration for a Mixed Element content
4201 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4202 *
4203 *
4204 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4205 *
4206 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4207 *
4208 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4209 *
4210 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4211 *
4212 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4213 * TODO Parameter-entity replacement text must be properly nested
4214 * with parenthetized groups. That is to say, if either of the
4215 * opening or closing parentheses in a choice, seq, or Mixed
4216 * construct is contained in the replacement text for a parameter
4217 * entity, both must be contained in the same replacement text. For
4218 * interoperability, if a parameter-entity reference appears in a
4219 * choice, seq, or Mixed construct, its replacement text should not
4220 * be empty, and neither the first nor last non-blank character of
4221 * the replacement text should be a connector (| or ,).
4222 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004223 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004224 * hierarchy.
4225 */
4226xmlElementContentPtr
4227#ifdef VMS
4228xmlParseElementChildrenContentD
4229#else
4230xmlParseElementChildrenContentDecl
4231#endif
4232(xmlParserCtxtPtr ctxt) {
4233 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4234 xmlChar *elem;
4235 xmlChar type = 0;
4236
4237 SKIP_BLANKS;
4238 GROW;
4239 if (RAW == '(') {
4240 /* Recurse on first child */
4241 NEXT;
4242 SKIP_BLANKS;
4243 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4244 SKIP_BLANKS;
4245 GROW;
4246 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004247 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004248 if (elem == NULL) {
4249 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4251 ctxt->sax->error(ctxt->userData,
4252 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4253 ctxt->wellFormed = 0;
4254 ctxt->disableSAX = 1;
4255 return(NULL);
4256 }
4257 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4258 GROW;
4259 if (RAW == '?') {
4260 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4261 NEXT;
4262 } else if (RAW == '*') {
4263 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4264 NEXT;
4265 } else if (RAW == '+') {
4266 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4267 NEXT;
4268 } else {
4269 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4270 }
4271 xmlFree(elem);
4272 GROW;
4273 }
4274 SKIP_BLANKS;
4275 SHRINK;
4276 while (RAW != ')') {
4277 /*
4278 * Each loop we parse one separator and one element.
4279 */
4280 if (RAW == ',') {
4281 if (type == 0) type = CUR;
4282
4283 /*
4284 * Detect "Name | Name , Name" error
4285 */
4286 else if (type != CUR) {
4287 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4289 ctxt->sax->error(ctxt->userData,
4290 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4291 type);
4292 ctxt->wellFormed = 0;
4293 ctxt->disableSAX = 1;
4294 if ((op != NULL) && (op != ret))
4295 xmlFreeElementContent(op);
4296 if ((last != NULL) && (last != ret) &&
4297 (last != ret->c1) && (last != ret->c2))
4298 xmlFreeElementContent(last);
4299 if (ret != NULL)
4300 xmlFreeElementContent(ret);
4301 return(NULL);
4302 }
4303 NEXT;
4304
4305 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4306 if (op == NULL) {
4307 xmlFreeElementContent(ret);
4308 return(NULL);
4309 }
4310 if (last == NULL) {
4311 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004312 if (ret != NULL)
4313 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004314 ret = cur = op;
4315 } else {
4316 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004317 if (op != NULL)
4318 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004319 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004320 if (last != NULL)
4321 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004322 cur =op;
4323 last = NULL;
4324 }
4325 } else if (RAW == '|') {
4326 if (type == 0) type = CUR;
4327
4328 /*
4329 * Detect "Name , Name | Name" error
4330 */
4331 else if (type != CUR) {
4332 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4334 ctxt->sax->error(ctxt->userData,
4335 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4336 type);
4337 ctxt->wellFormed = 0;
4338 ctxt->disableSAX = 1;
4339 if ((op != NULL) && (op != ret) && (op != last))
4340 xmlFreeElementContent(op);
4341 if ((last != NULL) && (last != ret) &&
4342 (last != ret->c1) && (last != ret->c2))
4343 xmlFreeElementContent(last);
4344 if (ret != NULL)
4345 xmlFreeElementContent(ret);
4346 return(NULL);
4347 }
4348 NEXT;
4349
4350 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4351 if (op == NULL) {
4352 if ((op != NULL) && (op != ret))
4353 xmlFreeElementContent(op);
4354 if ((last != NULL) && (last != ret) &&
4355 (last != ret->c1) && (last != ret->c2))
4356 xmlFreeElementContent(last);
4357 if (ret != NULL)
4358 xmlFreeElementContent(ret);
4359 return(NULL);
4360 }
4361 if (last == NULL) {
4362 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004363 if (ret != NULL)
4364 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004365 ret = cur = op;
4366 } else {
4367 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004368 if (op != NULL)
4369 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004370 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004371 if (last != NULL)
4372 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004373 cur =op;
4374 last = NULL;
4375 }
4376 } else {
4377 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4378 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4379 ctxt->sax->error(ctxt->userData,
4380 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4381 ctxt->wellFormed = 0;
4382 ctxt->disableSAX = 1;
4383 if ((op != NULL) && (op != ret))
4384 xmlFreeElementContent(op);
4385 if ((last != NULL) && (last != ret) &&
4386 (last != ret->c1) && (last != ret->c2))
4387 xmlFreeElementContent(last);
4388 if (ret != NULL)
4389 xmlFreeElementContent(ret);
4390 return(NULL);
4391 }
4392 GROW;
4393 SKIP_BLANKS;
4394 GROW;
4395 if (RAW == '(') {
4396 /* Recurse on second child */
4397 NEXT;
4398 SKIP_BLANKS;
4399 last = xmlParseElementChildrenContentDecl(ctxt);
4400 SKIP_BLANKS;
4401 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004402 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004403 if (elem == NULL) {
4404 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4406 ctxt->sax->error(ctxt->userData,
4407 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4408 ctxt->wellFormed = 0;
4409 ctxt->disableSAX = 1;
4410 if ((op != NULL) && (op != ret))
4411 xmlFreeElementContent(op);
4412 if ((last != NULL) && (last != ret) &&
4413 (last != ret->c1) && (last != ret->c2))
4414 xmlFreeElementContent(last);
4415 if (ret != NULL)
4416 xmlFreeElementContent(ret);
4417 return(NULL);
4418 }
4419 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4420 xmlFree(elem);
4421 if (RAW == '?') {
4422 last->ocur = XML_ELEMENT_CONTENT_OPT;
4423 NEXT;
4424 } else if (RAW == '*') {
4425 last->ocur = XML_ELEMENT_CONTENT_MULT;
4426 NEXT;
4427 } else if (RAW == '+') {
4428 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4429 NEXT;
4430 } else {
4431 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4432 }
4433 }
4434 SKIP_BLANKS;
4435 GROW;
4436 }
4437 if ((cur != NULL) && (last != NULL)) {
4438 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004439 if (last != NULL)
4440 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004441 }
4442 ctxt->entity = ctxt->input;
4443 NEXT;
4444 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004445 if (ret != NULL)
4446 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004447 NEXT;
4448 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004449 if (ret != NULL)
4450 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004451 NEXT;
4452 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004453 if (ret != NULL)
4454 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004455 NEXT;
4456 }
4457 return(ret);
4458}
4459
4460/**
4461 * xmlParseElementContentDecl:
4462 * @ctxt: an XML parser context
4463 * @name: the name of the element being defined.
4464 * @result: the Element Content pointer will be stored here if any
4465 *
4466 * parse the declaration for an Element content either Mixed or Children,
4467 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4468 *
4469 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4470 *
4471 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4472 */
4473
4474int
4475xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4476 xmlElementContentPtr *result) {
4477
4478 xmlElementContentPtr tree = NULL;
4479 xmlParserInputPtr input = ctxt->input;
4480 int res;
4481
4482 *result = NULL;
4483
4484 if (RAW != '(') {
4485 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004488 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004489 ctxt->wellFormed = 0;
4490 ctxt->disableSAX = 1;
4491 return(-1);
4492 }
4493 NEXT;
4494 GROW;
4495 SKIP_BLANKS;
4496 if ((RAW == '#') && (NXT(1) == 'P') &&
4497 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4498 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4499 (NXT(6) == 'A')) {
4500 tree = xmlParseElementMixedContentDecl(ctxt);
4501 res = XML_ELEMENT_TYPE_MIXED;
4502 } else {
4503 tree = xmlParseElementChildrenContentDecl(ctxt);
4504 res = XML_ELEMENT_TYPE_ELEMENT;
4505 }
4506 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4507 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData,
4510"Element content declaration doesn't start and stop in the same entity\n");
4511 ctxt->wellFormed = 0;
4512 ctxt->disableSAX = 1;
4513 }
4514 SKIP_BLANKS;
4515 *result = tree;
4516 return(res);
4517}
4518
4519/**
4520 * xmlParseElementDecl:
4521 * @ctxt: an XML parser context
4522 *
4523 * parse an Element declaration.
4524 *
4525 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4526 *
4527 * [ VC: Unique Element Type Declaration ]
4528 * No element type may be declared more than once
4529 *
4530 * Returns the type of the element, or -1 in case of error
4531 */
4532int
4533xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4534 xmlChar *name;
4535 int ret = -1;
4536 xmlElementContentPtr content = NULL;
4537
4538 GROW;
4539 if ((RAW == '<') && (NXT(1) == '!') &&
4540 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4541 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4542 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4543 (NXT(8) == 'T')) {
4544 xmlParserInputPtr input = ctxt->input;
4545
4546 SKIP(9);
4547 if (!IS_BLANK(CUR)) {
4548 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4550 ctxt->sax->error(ctxt->userData,
4551 "Space required after 'ELEMENT'\n");
4552 ctxt->wellFormed = 0;
4553 ctxt->disableSAX = 1;
4554 }
4555 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004556 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004557 if (name == NULL) {
4558 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4559 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4560 ctxt->sax->error(ctxt->userData,
4561 "xmlParseElementDecl: no name for Element\n");
4562 ctxt->wellFormed = 0;
4563 ctxt->disableSAX = 1;
4564 return(-1);
4565 }
4566 while ((RAW == 0) && (ctxt->inputNr > 1))
4567 xmlPopInput(ctxt);
4568 if (!IS_BLANK(CUR)) {
4569 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4571 ctxt->sax->error(ctxt->userData,
4572 "Space required after the element name\n");
4573 ctxt->wellFormed = 0;
4574 ctxt->disableSAX = 1;
4575 }
4576 SKIP_BLANKS;
4577 if ((RAW == 'E') && (NXT(1) == 'M') &&
4578 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4579 (NXT(4) == 'Y')) {
4580 SKIP(5);
4581 /*
4582 * Element must always be empty.
4583 */
4584 ret = XML_ELEMENT_TYPE_EMPTY;
4585 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4586 (NXT(2) == 'Y')) {
4587 SKIP(3);
4588 /*
4589 * Element is a generic container.
4590 */
4591 ret = XML_ELEMENT_TYPE_ANY;
4592 } else if (RAW == '(') {
4593 ret = xmlParseElementContentDecl(ctxt, name, &content);
4594 } else {
4595 /*
4596 * [ WFC: PEs in Internal Subset ] error handling.
4597 */
4598 if ((RAW == '%') && (ctxt->external == 0) &&
4599 (ctxt->inputNr == 1)) {
4600 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4602 ctxt->sax->error(ctxt->userData,
4603 "PEReference: forbidden within markup decl in internal subset\n");
4604 } else {
4605 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4606 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4607 ctxt->sax->error(ctxt->userData,
4608 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4609 }
4610 ctxt->wellFormed = 0;
4611 ctxt->disableSAX = 1;
4612 if (name != NULL) xmlFree(name);
4613 return(-1);
4614 }
4615
4616 SKIP_BLANKS;
4617 /*
4618 * Pop-up of finished entities.
4619 */
4620 while ((RAW == 0) && (ctxt->inputNr > 1))
4621 xmlPopInput(ctxt);
4622 SKIP_BLANKS;
4623
4624 if (RAW != '>') {
4625 ctxt->errNo = XML_ERR_GT_REQUIRED;
4626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4627 ctxt->sax->error(ctxt->userData,
4628 "xmlParseElementDecl: expected '>' at the end\n");
4629 ctxt->wellFormed = 0;
4630 ctxt->disableSAX = 1;
4631 } else {
4632 if (input != ctxt->input) {
4633 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4635 ctxt->sax->error(ctxt->userData,
4636"Element declaration doesn't start and stop in the same entity\n");
4637 ctxt->wellFormed = 0;
4638 ctxt->disableSAX = 1;
4639 }
4640
4641 NEXT;
4642 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4643 (ctxt->sax->elementDecl != NULL))
4644 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4645 content);
4646 }
4647 if (content != NULL) {
4648 xmlFreeElementContent(content);
4649 }
4650 if (name != NULL) {
4651 xmlFree(name);
4652 }
4653 }
4654 return(ret);
4655}
4656
4657/**
Owen Taylor3473f882001-02-23 17:55:21 +00004658 * xmlParseConditionalSections
4659 * @ctxt: an XML parser context
4660 *
4661 * [61] conditionalSect ::= includeSect | ignoreSect
4662 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4663 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4664 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4665 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4666 */
4667
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004668static void
Owen Taylor3473f882001-02-23 17:55:21 +00004669xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4670 SKIP(3);
4671 SKIP_BLANKS;
4672 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4673 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4674 (NXT(6) == 'E')) {
4675 SKIP(7);
4676 SKIP_BLANKS;
4677 if (RAW != '[') {
4678 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4680 ctxt->sax->error(ctxt->userData,
4681 "XML conditional section '[' expected\n");
4682 ctxt->wellFormed = 0;
4683 ctxt->disableSAX = 1;
4684 } else {
4685 NEXT;
4686 }
4687 if (xmlParserDebugEntities) {
4688 if ((ctxt->input != NULL) && (ctxt->input->filename))
4689 xmlGenericError(xmlGenericErrorContext,
4690 "%s(%d): ", ctxt->input->filename,
4691 ctxt->input->line);
4692 xmlGenericError(xmlGenericErrorContext,
4693 "Entering INCLUDE Conditional Section\n");
4694 }
4695
4696 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4697 (NXT(2) != '>'))) {
4698 const xmlChar *check = CUR_PTR;
4699 int cons = ctxt->input->consumed;
4700 int tok = ctxt->token;
4701
4702 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4703 xmlParseConditionalSections(ctxt);
4704 } else if (IS_BLANK(CUR)) {
4705 NEXT;
4706 } else if (RAW == '%') {
4707 xmlParsePEReference(ctxt);
4708 } else
4709 xmlParseMarkupDecl(ctxt);
4710
4711 /*
4712 * Pop-up of finished entities.
4713 */
4714 while ((RAW == 0) && (ctxt->inputNr > 1))
4715 xmlPopInput(ctxt);
4716
4717 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4718 (tok == ctxt->token)) {
4719 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722 "Content error in the external subset\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 break;
4726 }
4727 }
4728 if (xmlParserDebugEntities) {
4729 if ((ctxt->input != NULL) && (ctxt->input->filename))
4730 xmlGenericError(xmlGenericErrorContext,
4731 "%s(%d): ", ctxt->input->filename,
4732 ctxt->input->line);
4733 xmlGenericError(xmlGenericErrorContext,
4734 "Leaving INCLUDE Conditional Section\n");
4735 }
4736
4737 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4738 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4739 int state;
4740 int instate;
4741 int depth = 0;
4742
4743 SKIP(6);
4744 SKIP_BLANKS;
4745 if (RAW != '[') {
4746 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4747 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4748 ctxt->sax->error(ctxt->userData,
4749 "XML conditional section '[' expected\n");
4750 ctxt->wellFormed = 0;
4751 ctxt->disableSAX = 1;
4752 } else {
4753 NEXT;
4754 }
4755 if (xmlParserDebugEntities) {
4756 if ((ctxt->input != NULL) && (ctxt->input->filename))
4757 xmlGenericError(xmlGenericErrorContext,
4758 "%s(%d): ", ctxt->input->filename,
4759 ctxt->input->line);
4760 xmlGenericError(xmlGenericErrorContext,
4761 "Entering IGNORE Conditional Section\n");
4762 }
4763
4764 /*
4765 * Parse up to the end of the conditionnal section
4766 * But disable SAX event generating DTD building in the meantime
4767 */
4768 state = ctxt->disableSAX;
4769 instate = ctxt->instate;
4770 ctxt->disableSAX = 1;
4771 ctxt->instate = XML_PARSER_IGNORE;
4772
4773 while (depth >= 0) {
4774 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4775 depth++;
4776 SKIP(3);
4777 continue;
4778 }
4779 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4780 if (--depth >= 0) SKIP(3);
4781 continue;
4782 }
4783 NEXT;
4784 continue;
4785 }
4786
4787 ctxt->disableSAX = state;
4788 ctxt->instate = instate;
4789
4790 if (xmlParserDebugEntities) {
4791 if ((ctxt->input != NULL) && (ctxt->input->filename))
4792 xmlGenericError(xmlGenericErrorContext,
4793 "%s(%d): ", ctxt->input->filename,
4794 ctxt->input->line);
4795 xmlGenericError(xmlGenericErrorContext,
4796 "Leaving IGNORE Conditional Section\n");
4797 }
4798
4799 } else {
4800 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4802 ctxt->sax->error(ctxt->userData,
4803 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4804 ctxt->wellFormed = 0;
4805 ctxt->disableSAX = 1;
4806 }
4807
4808 if (RAW == 0)
4809 SHRINK;
4810
4811 if (RAW == 0) {
4812 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4813 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4814 ctxt->sax->error(ctxt->userData,
4815 "XML conditional section not closed\n");
4816 ctxt->wellFormed = 0;
4817 ctxt->disableSAX = 1;
4818 } else {
4819 SKIP(3);
4820 }
4821}
4822
4823/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004824 * xmlParseMarkupDecl:
4825 * @ctxt: an XML parser context
4826 *
4827 * parse Markup declarations
4828 *
4829 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4830 * NotationDecl | PI | Comment
4831 *
4832 * [ VC: Proper Declaration/PE Nesting ]
4833 * Parameter-entity replacement text must be properly nested with
4834 * markup declarations. That is to say, if either the first character
4835 * or the last character of a markup declaration (markupdecl above) is
4836 * contained in the replacement text for a parameter-entity reference,
4837 * both must be contained in the same replacement text.
4838 *
4839 * [ WFC: PEs in Internal Subset ]
4840 * In the internal DTD subset, parameter-entity references can occur
4841 * only where markup declarations can occur, not within markup declarations.
4842 * (This does not apply to references that occur in external parameter
4843 * entities or to the external subset.)
4844 */
4845void
4846xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4847 GROW;
4848 xmlParseElementDecl(ctxt);
4849 xmlParseAttributeListDecl(ctxt);
4850 xmlParseEntityDecl(ctxt);
4851 xmlParseNotationDecl(ctxt);
4852 xmlParsePI(ctxt);
4853 xmlParseComment(ctxt);
4854 /*
4855 * This is only for internal subset. On external entities,
4856 * the replacement is done before parsing stage
4857 */
4858 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4859 xmlParsePEReference(ctxt);
4860
4861 /*
4862 * Conditional sections are allowed from entities included
4863 * by PE References in the internal subset.
4864 */
4865 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4866 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4867 xmlParseConditionalSections(ctxt);
4868 }
4869 }
4870
4871 ctxt->instate = XML_PARSER_DTD;
4872}
4873
4874/**
4875 * xmlParseTextDecl:
4876 * @ctxt: an XML parser context
4877 *
4878 * parse an XML declaration header for external entities
4879 *
4880 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4881 *
4882 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4883 */
4884
4885void
4886xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4887 xmlChar *version;
4888
4889 /*
4890 * We know that '<?xml' is here.
4891 */
4892 if ((RAW == '<') && (NXT(1) == '?') &&
4893 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4894 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4895 SKIP(5);
4896 } else {
4897 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4898 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4899 ctxt->sax->error(ctxt->userData,
4900 "Text declaration '<?xml' required\n");
4901 ctxt->wellFormed = 0;
4902 ctxt->disableSAX = 1;
4903
4904 return;
4905 }
4906
4907 if (!IS_BLANK(CUR)) {
4908 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4909 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4910 ctxt->sax->error(ctxt->userData,
4911 "Space needed after '<?xml'\n");
4912 ctxt->wellFormed = 0;
4913 ctxt->disableSAX = 1;
4914 }
4915 SKIP_BLANKS;
4916
4917 /*
4918 * We may have the VersionInfo here.
4919 */
4920 version = xmlParseVersionInfo(ctxt);
4921 if (version == NULL)
4922 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4923 ctxt->input->version = version;
4924
4925 /*
4926 * We must have the encoding declaration
4927 */
4928 if (!IS_BLANK(CUR)) {
4929 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4931 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4932 ctxt->wellFormed = 0;
4933 ctxt->disableSAX = 1;
4934 }
4935 xmlParseEncodingDecl(ctxt);
4936 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4937 /*
4938 * The XML REC instructs us to stop parsing right here
4939 */
4940 return;
4941 }
4942
4943 SKIP_BLANKS;
4944 if ((RAW == '?') && (NXT(1) == '>')) {
4945 SKIP(2);
4946 } else if (RAW == '>') {
4947 /* Deprecated old WD ... */
4948 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4950 ctxt->sax->error(ctxt->userData,
4951 "XML declaration must end-up with '?>'\n");
4952 ctxt->wellFormed = 0;
4953 ctxt->disableSAX = 1;
4954 NEXT;
4955 } else {
4956 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4957 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4958 ctxt->sax->error(ctxt->userData,
4959 "parsing XML declaration: '?>' expected\n");
4960 ctxt->wellFormed = 0;
4961 ctxt->disableSAX = 1;
4962 MOVETO_ENDTAG(CUR_PTR);
4963 NEXT;
4964 }
4965}
4966
4967/**
Owen Taylor3473f882001-02-23 17:55:21 +00004968 * xmlParseExternalSubset:
4969 * @ctxt: an XML parser context
4970 * @ExternalID: the external identifier
4971 * @SystemID: the system identifier (or URL)
4972 *
4973 * parse Markup declarations from an external subset
4974 *
4975 * [30] extSubset ::= textDecl? extSubsetDecl
4976 *
4977 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4978 */
4979void
4980xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4981 const xmlChar *SystemID) {
4982 GROW;
4983 if ((RAW == '<') && (NXT(1) == '?') &&
4984 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4985 (NXT(4) == 'l')) {
4986 xmlParseTextDecl(ctxt);
4987 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4988 /*
4989 * The XML REC instructs us to stop parsing right here
4990 */
4991 ctxt->instate = XML_PARSER_EOF;
4992 return;
4993 }
4994 }
4995 if (ctxt->myDoc == NULL) {
4996 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4997 }
4998 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4999 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
5000
5001 ctxt->instate = XML_PARSER_DTD;
5002 ctxt->external = 1;
5003 while (((RAW == '<') && (NXT(1) == '?')) ||
5004 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00005005 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00005006 const xmlChar *check = CUR_PTR;
5007 int cons = ctxt->input->consumed;
5008 int tok = ctxt->token;
5009
5010 GROW;
5011 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
5012 xmlParseConditionalSections(ctxt);
5013 } else if (IS_BLANK(CUR)) {
5014 NEXT;
5015 } else if (RAW == '%') {
5016 xmlParsePEReference(ctxt);
5017 } else
5018 xmlParseMarkupDecl(ctxt);
5019
5020 /*
5021 * Pop-up of finished entities.
5022 */
5023 while ((RAW == 0) && (ctxt->inputNr > 1))
5024 xmlPopInput(ctxt);
5025
5026 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5027 (tok == ctxt->token)) {
5028 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5030 ctxt->sax->error(ctxt->userData,
5031 "Content error in the external subset\n");
5032 ctxt->wellFormed = 0;
5033 ctxt->disableSAX = 1;
5034 break;
5035 }
5036 }
5037
5038 if (RAW != 0) {
5039 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5040 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5041 ctxt->sax->error(ctxt->userData,
5042 "Extra content at the end of the document\n");
5043 ctxt->wellFormed = 0;
5044 ctxt->disableSAX = 1;
5045 }
5046
5047}
5048
5049/**
5050 * xmlParseReference:
5051 * @ctxt: an XML parser context
5052 *
5053 * parse and handle entity references in content, depending on the SAX
5054 * interface, this may end-up in a call to character() if this is a
5055 * CharRef, a predefined entity, if there is no reference() callback.
5056 * or if the parser was asked to switch to that mode.
5057 *
5058 * [67] Reference ::= EntityRef | CharRef
5059 */
5060void
5061xmlParseReference(xmlParserCtxtPtr ctxt) {
5062 xmlEntityPtr ent;
5063 xmlChar *val;
5064 if (RAW != '&') return;
5065
5066 if (NXT(1) == '#') {
5067 int i = 0;
5068 xmlChar out[10];
5069 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005070 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005071
5072 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5073 /*
5074 * So we are using non-UTF-8 buffers
5075 * Check that the char fit on 8bits, if not
5076 * generate a CharRef.
5077 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005078 if (value <= 0xFF) {
5079 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005080 out[1] = 0;
5081 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5082 (!ctxt->disableSAX))
5083 ctxt->sax->characters(ctxt->userData, out, 1);
5084 } else {
5085 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005086 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005087 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005088 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005089 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5090 (!ctxt->disableSAX))
5091 ctxt->sax->reference(ctxt->userData, out);
5092 }
5093 } else {
5094 /*
5095 * Just encode the value in UTF-8
5096 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005097 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005098 out[i] = 0;
5099 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5100 (!ctxt->disableSAX))
5101 ctxt->sax->characters(ctxt->userData, out, i);
5102 }
5103 } else {
5104 ent = xmlParseEntityRef(ctxt);
5105 if (ent == NULL) return;
5106 if ((ent->name != NULL) &&
5107 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5108 xmlNodePtr list = NULL;
5109 int ret;
5110
5111
5112 /*
5113 * The first reference to the entity trigger a parsing phase
5114 * where the ent->children is filled with the result from
5115 * the parsing.
5116 */
5117 if (ent->children == NULL) {
5118 xmlChar *value;
5119 value = ent->content;
5120
5121 /*
5122 * Check that this entity is well formed
5123 */
5124 if ((value != NULL) &&
5125 (value[1] == 0) && (value[0] == '<') &&
5126 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5127 /*
5128 * DONE: get definite answer on this !!!
5129 * Lots of entity decls are used to declare a single
5130 * char
5131 * <!ENTITY lt "<">
5132 * Which seems to be valid since
5133 * 2.4: The ampersand character (&) and the left angle
5134 * bracket (<) may appear in their literal form only
5135 * when used ... They are also legal within the literal
5136 * entity value of an internal entity declaration;i
5137 * see "4.3.2 Well-Formed Parsed Entities".
5138 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5139 * Looking at the OASIS test suite and James Clark
5140 * tests, this is broken. However the XML REC uses
5141 * it. Is the XML REC not well-formed ????
5142 * This is a hack to avoid this problem
5143 *
5144 * ANSWER: since lt gt amp .. are already defined,
5145 * this is a redefinition and hence the fact that the
5146 * contentis not well balanced is not a Wf error, this
5147 * is lousy but acceptable.
5148 */
5149 list = xmlNewDocText(ctxt->myDoc, value);
5150 if (list != NULL) {
5151 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5152 (ent->children == NULL)) {
5153 ent->children = list;
5154 ent->last = list;
5155 list->parent = (xmlNodePtr) ent;
5156 } else {
5157 xmlFreeNodeList(list);
5158 }
5159 } else if (list != NULL) {
5160 xmlFreeNodeList(list);
5161 }
5162 } else {
5163 /*
5164 * 4.3.2: An internal general parsed entity is well-formed
5165 * if its replacement text matches the production labeled
5166 * content.
5167 */
5168 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5169 ctxt->depth++;
5170 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5171 ctxt->sax, NULL, ctxt->depth,
5172 value, &list);
5173 ctxt->depth--;
5174 } else if (ent->etype ==
5175 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5176 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005177 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005178 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005179 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005180 ctxt->depth--;
5181 } else {
5182 ret = -1;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5184 ctxt->sax->error(ctxt->userData,
5185 "Internal: invalid entity type\n");
5186 }
5187 if (ret == XML_ERR_ENTITY_LOOP) {
5188 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5190 ctxt->sax->error(ctxt->userData,
5191 "Detected entity reference loop\n");
5192 ctxt->wellFormed = 0;
5193 ctxt->disableSAX = 1;
5194 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005195 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5196 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005197 (ent->children == NULL)) {
5198 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005199 if (ctxt->replaceEntities) {
5200 /*
5201 * Prune it directly in the generated document
5202 * except for single text nodes.
5203 */
5204 if ((list->type == XML_TEXT_NODE) &&
5205 (list->next == NULL)) {
5206 list->parent = (xmlNodePtr) ent;
5207 list = NULL;
5208 } else {
5209 while (list != NULL) {
5210 list->parent = (xmlNodePtr) ctxt->node;
5211 if (list->next == NULL)
5212 ent->last = list;
5213 list = list->next;
5214 }
5215 list = ent->children;
5216 }
5217 } else {
5218 while (list != NULL) {
5219 list->parent = (xmlNodePtr) ent;
5220 if (list->next == NULL)
5221 ent->last = list;
5222 list = list->next;
5223 }
Owen Taylor3473f882001-02-23 17:55:21 +00005224 }
5225 } else {
5226 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005227 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005228 }
5229 } else if (ret > 0) {
5230 ctxt->errNo = ret;
5231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5232 ctxt->sax->error(ctxt->userData,
5233 "Entity value required\n");
5234 ctxt->wellFormed = 0;
5235 ctxt->disableSAX = 1;
5236 } else if (list != NULL) {
5237 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005238 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005239 }
5240 }
5241 }
5242 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5243 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5244 /*
5245 * Create a node.
5246 */
5247 ctxt->sax->reference(ctxt->userData, ent->name);
5248 return;
5249 } else if (ctxt->replaceEntities) {
5250 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5251 /*
5252 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005253 * a simple tree copy for all references except the first
5254 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005255 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005256 if (list == NULL) {
5257 xmlNodePtr new, cur;
5258 cur = ent->children;
5259 while (cur != NULL) {
5260 new = xmlCopyNode(cur, 1);
5261 xmlAddChild(ctxt->node, new);
5262 if (cur == ent->last)
5263 break;
5264 cur = cur->next;
5265 }
5266 } else {
5267 /*
5268 * the name change is to avoid coalescing of the
5269 * node with a prossible previous text one which
5270 * would make ent->children a dandling pointer
5271 */
5272 if (ent->children->type == XML_TEXT_NODE)
5273 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5274 if ((ent->last != ent->children) &&
5275 (ent->last->type == XML_TEXT_NODE))
5276 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5277 xmlAddChildList(ctxt->node, ent->children);
5278 }
5279
Owen Taylor3473f882001-02-23 17:55:21 +00005280 /*
5281 * This is to avoid a nasty side effect, see
5282 * characters() in SAX.c
5283 */
5284 ctxt->nodemem = 0;
5285 ctxt->nodelen = 0;
5286 return;
5287 } else {
5288 /*
5289 * Probably running in SAX mode
5290 */
5291 xmlParserInputPtr input;
5292
5293 input = xmlNewEntityInputStream(ctxt, ent);
5294 xmlPushInput(ctxt, input);
5295 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5296 (RAW == '<') && (NXT(1) == '?') &&
5297 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5298 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5299 xmlParseTextDecl(ctxt);
5300 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5301 /*
5302 * The XML REC instructs us to stop parsing right here
5303 */
5304 ctxt->instate = XML_PARSER_EOF;
5305 return;
5306 }
5307 if (input->standalone == 1) {
5308 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5310 ctxt->sax->error(ctxt->userData,
5311 "external parsed entities cannot be standalone\n");
5312 ctxt->wellFormed = 0;
5313 ctxt->disableSAX = 1;
5314 }
5315 }
5316 return;
5317 }
5318 }
5319 } else {
5320 val = ent->content;
5321 if (val == NULL) return;
5322 /*
5323 * inline the entity.
5324 */
5325 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5326 (!ctxt->disableSAX))
5327 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5328 }
5329 }
5330}
5331
5332/**
5333 * xmlParseEntityRef:
5334 * @ctxt: an XML parser context
5335 *
5336 * parse ENTITY references declarations
5337 *
5338 * [68] EntityRef ::= '&' Name ';'
5339 *
5340 * [ WFC: Entity Declared ]
5341 * In a document without any DTD, a document with only an internal DTD
5342 * subset which contains no parameter entity references, or a document
5343 * with "standalone='yes'", the Name given in the entity reference
5344 * must match that in an entity declaration, except that well-formed
5345 * documents need not declare any of the following entities: amp, lt,
5346 * gt, apos, quot. The declaration of a parameter entity must precede
5347 * any reference to it. Similarly, the declaration of a general entity
5348 * must precede any reference to it which appears in a default value in an
5349 * attribute-list declaration. Note that if entities are declared in the
5350 * external subset or in external parameter entities, a non-validating
5351 * processor is not obligated to read and process their declarations;
5352 * for such documents, the rule that an entity must be declared is a
5353 * well-formedness constraint only if standalone='yes'.
5354 *
5355 * [ WFC: Parsed Entity ]
5356 * An entity reference must not contain the name of an unparsed entity
5357 *
5358 * Returns the xmlEntityPtr if found, or NULL otherwise.
5359 */
5360xmlEntityPtr
5361xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5362 xmlChar *name;
5363 xmlEntityPtr ent = NULL;
5364
5365 GROW;
5366
5367 if (RAW == '&') {
5368 NEXT;
5369 name = xmlParseName(ctxt);
5370 if (name == NULL) {
5371 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5373 ctxt->sax->error(ctxt->userData,
5374 "xmlParseEntityRef: no name\n");
5375 ctxt->wellFormed = 0;
5376 ctxt->disableSAX = 1;
5377 } else {
5378 if (RAW == ';') {
5379 NEXT;
5380 /*
5381 * Ask first SAX for entity resolution, otherwise try the
5382 * predefined set.
5383 */
5384 if (ctxt->sax != NULL) {
5385 if (ctxt->sax->getEntity != NULL)
5386 ent = ctxt->sax->getEntity(ctxt->userData, name);
5387 if (ent == NULL)
5388 ent = xmlGetPredefinedEntity(name);
5389 }
5390 /*
5391 * [ WFC: Entity Declared ]
5392 * In a document without any DTD, a document with only an
5393 * internal DTD subset which contains no parameter entity
5394 * references, or a document with "standalone='yes'", the
5395 * Name given in the entity reference must match that in an
5396 * entity declaration, except that well-formed documents
5397 * need not declare any of the following entities: amp, lt,
5398 * gt, apos, quot.
5399 * The declaration of a parameter entity must precede any
5400 * reference to it.
5401 * Similarly, the declaration of a general entity must
5402 * precede any reference to it which appears in a default
5403 * value in an attribute-list declaration. Note that if
5404 * entities are declared in the external subset or in
5405 * external parameter entities, a non-validating processor
5406 * is not obligated to read and process their declarations;
5407 * for such documents, the rule that an entity must be
5408 * declared is a well-formedness constraint only if
5409 * standalone='yes'.
5410 */
5411 if (ent == NULL) {
5412 if ((ctxt->standalone == 1) ||
5413 ((ctxt->hasExternalSubset == 0) &&
5414 (ctxt->hasPErefs == 0))) {
5415 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5416 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5417 ctxt->sax->error(ctxt->userData,
5418 "Entity '%s' not defined\n", name);
5419 ctxt->wellFormed = 0;
5420 ctxt->disableSAX = 1;
5421 } else {
5422 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005423 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005424 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005425 "Entity '%s' not defined\n", name);
5426 }
5427 }
5428
5429 /*
5430 * [ WFC: Parsed Entity ]
5431 * An entity reference must not contain the name of an
5432 * unparsed entity
5433 */
5434 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5435 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5437 ctxt->sax->error(ctxt->userData,
5438 "Entity reference to unparsed entity %s\n", name);
5439 ctxt->wellFormed = 0;
5440 ctxt->disableSAX = 1;
5441 }
5442
5443 /*
5444 * [ WFC: No External Entity References ]
5445 * Attribute values cannot contain direct or indirect
5446 * entity references to external entities.
5447 */
5448 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5449 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5450 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5452 ctxt->sax->error(ctxt->userData,
5453 "Attribute references external entity '%s'\n", name);
5454 ctxt->wellFormed = 0;
5455 ctxt->disableSAX = 1;
5456 }
5457 /*
5458 * [ WFC: No < in Attribute Values ]
5459 * The replacement text of any entity referred to directly or
5460 * indirectly in an attribute value (other than "&lt;") must
5461 * not contain a <.
5462 */
5463 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5464 (ent != NULL) &&
5465 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5466 (ent->content != NULL) &&
5467 (xmlStrchr(ent->content, '<'))) {
5468 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5470 ctxt->sax->error(ctxt->userData,
5471 "'<' in entity '%s' is not allowed in attributes values\n", name);
5472 ctxt->wellFormed = 0;
5473 ctxt->disableSAX = 1;
5474 }
5475
5476 /*
5477 * Internal check, no parameter entities here ...
5478 */
5479 else {
5480 switch (ent->etype) {
5481 case XML_INTERNAL_PARAMETER_ENTITY:
5482 case XML_EXTERNAL_PARAMETER_ENTITY:
5483 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5484 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5485 ctxt->sax->error(ctxt->userData,
5486 "Attempt to reference the parameter entity '%s'\n", name);
5487 ctxt->wellFormed = 0;
5488 ctxt->disableSAX = 1;
5489 break;
5490 default:
5491 break;
5492 }
5493 }
5494
5495 /*
5496 * [ WFC: No Recursion ]
5497 * A parsed entity must not contain a recursive reference
5498 * to itself, either directly or indirectly.
5499 * Done somewhere else
5500 */
5501
5502 } else {
5503 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5505 ctxt->sax->error(ctxt->userData,
5506 "xmlParseEntityRef: expecting ';'\n");
5507 ctxt->wellFormed = 0;
5508 ctxt->disableSAX = 1;
5509 }
5510 xmlFree(name);
5511 }
5512 }
5513 return(ent);
5514}
5515
5516/**
5517 * xmlParseStringEntityRef:
5518 * @ctxt: an XML parser context
5519 * @str: a pointer to an index in the string
5520 *
5521 * parse ENTITY references declarations, but this version parses it from
5522 * a string value.
5523 *
5524 * [68] EntityRef ::= '&' Name ';'
5525 *
5526 * [ WFC: Entity Declared ]
5527 * In a document without any DTD, a document with only an internal DTD
5528 * subset which contains no parameter entity references, or a document
5529 * with "standalone='yes'", the Name given in the entity reference
5530 * must match that in an entity declaration, except that well-formed
5531 * documents need not declare any of the following entities: amp, lt,
5532 * gt, apos, quot. The declaration of a parameter entity must precede
5533 * any reference to it. Similarly, the declaration of a general entity
5534 * must precede any reference to it which appears in a default value in an
5535 * attribute-list declaration. Note that if entities are declared in the
5536 * external subset or in external parameter entities, a non-validating
5537 * processor is not obligated to read and process their declarations;
5538 * for such documents, the rule that an entity must be declared is a
5539 * well-formedness constraint only if standalone='yes'.
5540 *
5541 * [ WFC: Parsed Entity ]
5542 * An entity reference must not contain the name of an unparsed entity
5543 *
5544 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5545 * is updated to the current location in the string.
5546 */
5547xmlEntityPtr
5548xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5549 xmlChar *name;
5550 const xmlChar *ptr;
5551 xmlChar cur;
5552 xmlEntityPtr ent = NULL;
5553
5554 if ((str == NULL) || (*str == NULL))
5555 return(NULL);
5556 ptr = *str;
5557 cur = *ptr;
5558 if (cur == '&') {
5559 ptr++;
5560 cur = *ptr;
5561 name = xmlParseStringName(ctxt, &ptr);
5562 if (name == NULL) {
5563 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5565 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005566 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005567 ctxt->wellFormed = 0;
5568 ctxt->disableSAX = 1;
5569 } else {
5570 if (*ptr == ';') {
5571 ptr++;
5572 /*
5573 * Ask first SAX for entity resolution, otherwise try the
5574 * predefined set.
5575 */
5576 if (ctxt->sax != NULL) {
5577 if (ctxt->sax->getEntity != NULL)
5578 ent = ctxt->sax->getEntity(ctxt->userData, name);
5579 if (ent == NULL)
5580 ent = xmlGetPredefinedEntity(name);
5581 }
5582 /*
5583 * [ WFC: Entity Declared ]
5584 * In a document without any DTD, a document with only an
5585 * internal DTD subset which contains no parameter entity
5586 * references, or a document with "standalone='yes'", the
5587 * Name given in the entity reference must match that in an
5588 * entity declaration, except that well-formed documents
5589 * need not declare any of the following entities: amp, lt,
5590 * gt, apos, quot.
5591 * The declaration of a parameter entity must precede any
5592 * reference to it.
5593 * Similarly, the declaration of a general entity must
5594 * precede any reference to it which appears in a default
5595 * value in an attribute-list declaration. Note that if
5596 * entities are declared in the external subset or in
5597 * external parameter entities, a non-validating processor
5598 * is not obligated to read and process their declarations;
5599 * for such documents, the rule that an entity must be
5600 * declared is a well-formedness constraint only if
5601 * standalone='yes'.
5602 */
5603 if (ent == NULL) {
5604 if ((ctxt->standalone == 1) ||
5605 ((ctxt->hasExternalSubset == 0) &&
5606 (ctxt->hasPErefs == 0))) {
5607 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5609 ctxt->sax->error(ctxt->userData,
5610 "Entity '%s' not defined\n", name);
5611 ctxt->wellFormed = 0;
5612 ctxt->disableSAX = 1;
5613 } else {
5614 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5615 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5616 ctxt->sax->warning(ctxt->userData,
5617 "Entity '%s' not defined\n", name);
5618 }
5619 }
5620
5621 /*
5622 * [ WFC: Parsed Entity ]
5623 * An entity reference must not contain the name of an
5624 * unparsed entity
5625 */
5626 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5627 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5629 ctxt->sax->error(ctxt->userData,
5630 "Entity reference to unparsed entity %s\n", name);
5631 ctxt->wellFormed = 0;
5632 ctxt->disableSAX = 1;
5633 }
5634
5635 /*
5636 * [ WFC: No External Entity References ]
5637 * Attribute values cannot contain direct or indirect
5638 * entity references to external entities.
5639 */
5640 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5641 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5642 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5644 ctxt->sax->error(ctxt->userData,
5645 "Attribute references external entity '%s'\n", name);
5646 ctxt->wellFormed = 0;
5647 ctxt->disableSAX = 1;
5648 }
5649 /*
5650 * [ WFC: No < in Attribute Values ]
5651 * The replacement text of any entity referred to directly or
5652 * indirectly in an attribute value (other than "&lt;") must
5653 * not contain a <.
5654 */
5655 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5656 (ent != NULL) &&
5657 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5658 (ent->content != NULL) &&
5659 (xmlStrchr(ent->content, '<'))) {
5660 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5662 ctxt->sax->error(ctxt->userData,
5663 "'<' in entity '%s' is not allowed in attributes values\n", name);
5664 ctxt->wellFormed = 0;
5665 ctxt->disableSAX = 1;
5666 }
5667
5668 /*
5669 * Internal check, no parameter entities here ...
5670 */
5671 else {
5672 switch (ent->etype) {
5673 case XML_INTERNAL_PARAMETER_ENTITY:
5674 case XML_EXTERNAL_PARAMETER_ENTITY:
5675 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5677 ctxt->sax->error(ctxt->userData,
5678 "Attempt to reference the parameter entity '%s'\n", name);
5679 ctxt->wellFormed = 0;
5680 ctxt->disableSAX = 1;
5681 break;
5682 default:
5683 break;
5684 }
5685 }
5686
5687 /*
5688 * [ WFC: No Recursion ]
5689 * A parsed entity must not contain a recursive reference
5690 * to itself, either directly or indirectly.
5691 * Done somewhwere else
5692 */
5693
5694 } else {
5695 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5697 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005698 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005699 ctxt->wellFormed = 0;
5700 ctxt->disableSAX = 1;
5701 }
5702 xmlFree(name);
5703 }
5704 }
5705 *str = ptr;
5706 return(ent);
5707}
5708
5709/**
5710 * xmlParsePEReference:
5711 * @ctxt: an XML parser context
5712 *
5713 * parse PEReference declarations
5714 * The entity content is handled directly by pushing it's content as
5715 * a new input stream.
5716 *
5717 * [69] PEReference ::= '%' Name ';'
5718 *
5719 * [ WFC: No Recursion ]
5720 * A parsed entity must not contain a recursive
5721 * reference to itself, either directly or indirectly.
5722 *
5723 * [ WFC: Entity Declared ]
5724 * In a document without any DTD, a document with only an internal DTD
5725 * subset which contains no parameter entity references, or a document
5726 * with "standalone='yes'", ... ... The declaration of a parameter
5727 * entity must precede any reference to it...
5728 *
5729 * [ VC: Entity Declared ]
5730 * In a document with an external subset or external parameter entities
5731 * with "standalone='no'", ... ... The declaration of a parameter entity
5732 * must precede any reference to it...
5733 *
5734 * [ WFC: In DTD ]
5735 * Parameter-entity references may only appear in the DTD.
5736 * NOTE: misleading but this is handled.
5737 */
5738void
5739xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5740 xmlChar *name;
5741 xmlEntityPtr entity = NULL;
5742 xmlParserInputPtr input;
5743
5744 if (RAW == '%') {
5745 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005746 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005747 if (name == NULL) {
5748 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5750 ctxt->sax->error(ctxt->userData,
5751 "xmlParsePEReference: no name\n");
5752 ctxt->wellFormed = 0;
5753 ctxt->disableSAX = 1;
5754 } else {
5755 if (RAW == ';') {
5756 NEXT;
5757 if ((ctxt->sax != NULL) &&
5758 (ctxt->sax->getParameterEntity != NULL))
5759 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5760 name);
5761 if (entity == NULL) {
5762 /*
5763 * [ WFC: Entity Declared ]
5764 * In a document without any DTD, a document with only an
5765 * internal DTD subset which contains no parameter entity
5766 * references, or a document with "standalone='yes'", ...
5767 * ... The declaration of a parameter entity must precede
5768 * any reference to it...
5769 */
5770 if ((ctxt->standalone == 1) ||
5771 ((ctxt->hasExternalSubset == 0) &&
5772 (ctxt->hasPErefs == 0))) {
5773 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5774 if ((!ctxt->disableSAX) &&
5775 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5776 ctxt->sax->error(ctxt->userData,
5777 "PEReference: %%%s; not found\n", name);
5778 ctxt->wellFormed = 0;
5779 ctxt->disableSAX = 1;
5780 } else {
5781 /*
5782 * [ VC: Entity Declared ]
5783 * In a document with an external subset or external
5784 * parameter entities with "standalone='no'", ...
5785 * ... The declaration of a parameter entity must precede
5786 * any reference to it...
5787 */
5788 if ((!ctxt->disableSAX) &&
5789 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5790 ctxt->sax->warning(ctxt->userData,
5791 "PEReference: %%%s; not found\n", name);
5792 ctxt->valid = 0;
5793 }
5794 } else {
5795 /*
5796 * Internal checking in case the entity quest barfed
5797 */
5798 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5799 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5800 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5801 ctxt->sax->warning(ctxt->userData,
5802 "Internal: %%%s; is not a parameter entity\n", name);
5803 } else {
5804 /*
5805 * TODO !!!
5806 * handle the extra spaces added before and after
5807 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5808 */
5809 input = xmlNewEntityInputStream(ctxt, entity);
5810 xmlPushInput(ctxt, input);
5811 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5812 (RAW == '<') && (NXT(1) == '?') &&
5813 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5814 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5815 xmlParseTextDecl(ctxt);
5816 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5817 /*
5818 * The XML REC instructs us to stop parsing
5819 * right here
5820 */
5821 ctxt->instate = XML_PARSER_EOF;
5822 xmlFree(name);
5823 return;
5824 }
5825 }
5826 if (ctxt->token == 0)
5827 ctxt->token = ' ';
5828 }
5829 }
5830 ctxt->hasPErefs = 1;
5831 } else {
5832 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5834 ctxt->sax->error(ctxt->userData,
5835 "xmlParsePEReference: expecting ';'\n");
5836 ctxt->wellFormed = 0;
5837 ctxt->disableSAX = 1;
5838 }
5839 xmlFree(name);
5840 }
5841 }
5842}
5843
5844/**
5845 * xmlParseStringPEReference:
5846 * @ctxt: an XML parser context
5847 * @str: a pointer to an index in the string
5848 *
5849 * parse PEReference declarations
5850 *
5851 * [69] PEReference ::= '%' Name ';'
5852 *
5853 * [ WFC: No Recursion ]
5854 * A parsed entity must not contain a recursive
5855 * reference to itself, either directly or indirectly.
5856 *
5857 * [ WFC: Entity Declared ]
5858 * In a document without any DTD, a document with only an internal DTD
5859 * subset which contains no parameter entity references, or a document
5860 * with "standalone='yes'", ... ... The declaration of a parameter
5861 * entity must precede any reference to it...
5862 *
5863 * [ VC: Entity Declared ]
5864 * In a document with an external subset or external parameter entities
5865 * with "standalone='no'", ... ... The declaration of a parameter entity
5866 * must precede any reference to it...
5867 *
5868 * [ WFC: In DTD ]
5869 * Parameter-entity references may only appear in the DTD.
5870 * NOTE: misleading but this is handled.
5871 *
5872 * Returns the string of the entity content.
5873 * str is updated to the current value of the index
5874 */
5875xmlEntityPtr
5876xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5877 const xmlChar *ptr;
5878 xmlChar cur;
5879 xmlChar *name;
5880 xmlEntityPtr entity = NULL;
5881
5882 if ((str == NULL) || (*str == NULL)) return(NULL);
5883 ptr = *str;
5884 cur = *ptr;
5885 if (cur == '%') {
5886 ptr++;
5887 cur = *ptr;
5888 name = xmlParseStringName(ctxt, &ptr);
5889 if (name == NULL) {
5890 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5892 ctxt->sax->error(ctxt->userData,
5893 "xmlParseStringPEReference: no name\n");
5894 ctxt->wellFormed = 0;
5895 ctxt->disableSAX = 1;
5896 } else {
5897 cur = *ptr;
5898 if (cur == ';') {
5899 ptr++;
5900 cur = *ptr;
5901 if ((ctxt->sax != NULL) &&
5902 (ctxt->sax->getParameterEntity != NULL))
5903 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5904 name);
5905 if (entity == NULL) {
5906 /*
5907 * [ WFC: Entity Declared ]
5908 * In a document without any DTD, a document with only an
5909 * internal DTD subset which contains no parameter entity
5910 * references, or a document with "standalone='yes'", ...
5911 * ... The declaration of a parameter entity must precede
5912 * any reference to it...
5913 */
5914 if ((ctxt->standalone == 1) ||
5915 ((ctxt->hasExternalSubset == 0) &&
5916 (ctxt->hasPErefs == 0))) {
5917 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5919 ctxt->sax->error(ctxt->userData,
5920 "PEReference: %%%s; not found\n", name);
5921 ctxt->wellFormed = 0;
5922 ctxt->disableSAX = 1;
5923 } else {
5924 /*
5925 * [ VC: Entity Declared ]
5926 * In a document with an external subset or external
5927 * parameter entities with "standalone='no'", ...
5928 * ... The declaration of a parameter entity must
5929 * precede any reference to it...
5930 */
5931 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5932 ctxt->sax->warning(ctxt->userData,
5933 "PEReference: %%%s; not found\n", name);
5934 ctxt->valid = 0;
5935 }
5936 } else {
5937 /*
5938 * Internal checking in case the entity quest barfed
5939 */
5940 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5941 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5942 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5943 ctxt->sax->warning(ctxt->userData,
5944 "Internal: %%%s; is not a parameter entity\n", name);
5945 }
5946 }
5947 ctxt->hasPErefs = 1;
5948 } else {
5949 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5951 ctxt->sax->error(ctxt->userData,
5952 "xmlParseStringPEReference: expecting ';'\n");
5953 ctxt->wellFormed = 0;
5954 ctxt->disableSAX = 1;
5955 }
5956 xmlFree(name);
5957 }
5958 }
5959 *str = ptr;
5960 return(entity);
5961}
5962
5963/**
5964 * xmlParseDocTypeDecl:
5965 * @ctxt: an XML parser context
5966 *
5967 * parse a DOCTYPE declaration
5968 *
5969 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5970 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5971 *
5972 * [ VC: Root Element Type ]
5973 * The Name in the document type declaration must match the element
5974 * type of the root element.
5975 */
5976
5977void
5978xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5979 xmlChar *name = NULL;
5980 xmlChar *ExternalID = NULL;
5981 xmlChar *URI = NULL;
5982
5983 /*
5984 * We know that '<!DOCTYPE' has been detected.
5985 */
5986 SKIP(9);
5987
5988 SKIP_BLANKS;
5989
5990 /*
5991 * Parse the DOCTYPE name.
5992 */
5993 name = xmlParseName(ctxt);
5994 if (name == NULL) {
5995 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5997 ctxt->sax->error(ctxt->userData,
5998 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5999 ctxt->wellFormed = 0;
6000 ctxt->disableSAX = 1;
6001 }
6002 ctxt->intSubName = name;
6003
6004 SKIP_BLANKS;
6005
6006 /*
6007 * Check for SystemID and ExternalID
6008 */
6009 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
6010
6011 if ((URI != NULL) || (ExternalID != NULL)) {
6012 ctxt->hasExternalSubset = 1;
6013 }
6014 ctxt->extSubURI = URI;
6015 ctxt->extSubSystem = ExternalID;
6016
6017 SKIP_BLANKS;
6018
6019 /*
6020 * Create and update the internal subset.
6021 */
6022 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6023 (!ctxt->disableSAX))
6024 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6025
6026 /*
6027 * Is there any internal subset declarations ?
6028 * they are handled separately in xmlParseInternalSubset()
6029 */
6030 if (RAW == '[')
6031 return;
6032
6033 /*
6034 * We should be at the end of the DOCTYPE declaration.
6035 */
6036 if (RAW != '>') {
6037 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006039 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006040 ctxt->wellFormed = 0;
6041 ctxt->disableSAX = 1;
6042 }
6043 NEXT;
6044}
6045
6046/**
6047 * xmlParseInternalsubset:
6048 * @ctxt: an XML parser context
6049 *
6050 * parse the internal subset declaration
6051 *
6052 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6053 */
6054
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006055static void
Owen Taylor3473f882001-02-23 17:55:21 +00006056xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6057 /*
6058 * Is there any DTD definition ?
6059 */
6060 if (RAW == '[') {
6061 ctxt->instate = XML_PARSER_DTD;
6062 NEXT;
6063 /*
6064 * Parse the succession of Markup declarations and
6065 * PEReferences.
6066 * Subsequence (markupdecl | PEReference | S)*
6067 */
6068 while (RAW != ']') {
6069 const xmlChar *check = CUR_PTR;
6070 int cons = ctxt->input->consumed;
6071
6072 SKIP_BLANKS;
6073 xmlParseMarkupDecl(ctxt);
6074 xmlParsePEReference(ctxt);
6075
6076 /*
6077 * Pop-up of finished entities.
6078 */
6079 while ((RAW == 0) && (ctxt->inputNr > 1))
6080 xmlPopInput(ctxt);
6081
6082 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6083 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6084 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6085 ctxt->sax->error(ctxt->userData,
6086 "xmlParseInternalSubset: error detected in Markup declaration\n");
6087 ctxt->wellFormed = 0;
6088 ctxt->disableSAX = 1;
6089 break;
6090 }
6091 }
6092 if (RAW == ']') {
6093 NEXT;
6094 SKIP_BLANKS;
6095 }
6096 }
6097
6098 /*
6099 * We should be at the end of the DOCTYPE declaration.
6100 */
6101 if (RAW != '>') {
6102 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillardf6ed8bc2001-10-02 09:22:47 +00006104 ctxt->sax->error(ctxt->userData, "DOCTYPE improperly terminated\n");
Owen Taylor3473f882001-02-23 17:55:21 +00006105 ctxt->wellFormed = 0;
6106 ctxt->disableSAX = 1;
6107 }
6108 NEXT;
6109}
6110
6111/**
6112 * xmlParseAttribute:
6113 * @ctxt: an XML parser context
6114 * @value: a xmlChar ** used to store the value of the attribute
6115 *
6116 * parse an attribute
6117 *
6118 * [41] Attribute ::= Name Eq AttValue
6119 *
6120 * [ WFC: No External Entity References ]
6121 * Attribute values cannot contain direct or indirect entity references
6122 * to external entities.
6123 *
6124 * [ WFC: No < in Attribute Values ]
6125 * The replacement text of any entity referred to directly or indirectly in
6126 * an attribute value (other than "&lt;") must not contain a <.
6127 *
6128 * [ VC: Attribute Value Type ]
6129 * The attribute must have been declared; the value must be of the type
6130 * declared for it.
6131 *
6132 * [25] Eq ::= S? '=' S?
6133 *
6134 * With namespace:
6135 *
6136 * [NS 11] Attribute ::= QName Eq AttValue
6137 *
6138 * Also the case QName == xmlns:??? is handled independently as a namespace
6139 * definition.
6140 *
6141 * Returns the attribute name, and the value in *value.
6142 */
6143
6144xmlChar *
6145xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6146 xmlChar *name, *val;
6147
6148 *value = NULL;
6149 name = xmlParseName(ctxt);
6150 if (name == NULL) {
6151 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6154 ctxt->wellFormed = 0;
6155 ctxt->disableSAX = 1;
6156 return(NULL);
6157 }
6158
6159 /*
6160 * read the value
6161 */
6162 SKIP_BLANKS;
6163 if (RAW == '=') {
6164 NEXT;
6165 SKIP_BLANKS;
6166 val = xmlParseAttValue(ctxt);
6167 ctxt->instate = XML_PARSER_CONTENT;
6168 } else {
6169 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6171 ctxt->sax->error(ctxt->userData,
6172 "Specification mandate value for attribute %s\n", name);
6173 ctxt->wellFormed = 0;
6174 ctxt->disableSAX = 1;
6175 xmlFree(name);
6176 return(NULL);
6177 }
6178
6179 /*
6180 * Check that xml:lang conforms to the specification
6181 * No more registered as an error, just generate a warning now
6182 * since this was deprecated in XML second edition
6183 */
6184 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6185 if (!xmlCheckLanguageID(val)) {
6186 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6187 ctxt->sax->warning(ctxt->userData,
6188 "Malformed value for xml:lang : %s\n", val);
6189 }
6190 }
6191
6192 /*
6193 * Check that xml:space conforms to the specification
6194 */
6195 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6196 if (xmlStrEqual(val, BAD_CAST "default"))
6197 *(ctxt->space) = 0;
6198 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6199 *(ctxt->space) = 1;
6200 else {
6201 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6202 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6203 ctxt->sax->error(ctxt->userData,
6204"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6205 val);
6206 ctxt->wellFormed = 0;
6207 ctxt->disableSAX = 1;
6208 }
6209 }
6210
6211 *value = val;
6212 return(name);
6213}
6214
6215/**
6216 * xmlParseStartTag:
6217 * @ctxt: an XML parser context
6218 *
6219 * parse a start of tag either for rule element or
6220 * EmptyElement. In both case we don't parse the tag closing chars.
6221 *
6222 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6223 *
6224 * [ WFC: Unique Att Spec ]
6225 * No attribute name may appear more than once in the same start-tag or
6226 * empty-element tag.
6227 *
6228 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6229 *
6230 * [ WFC: Unique Att Spec ]
6231 * No attribute name may appear more than once in the same start-tag or
6232 * empty-element tag.
6233 *
6234 * With namespace:
6235 *
6236 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6237 *
6238 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6239 *
6240 * Returns the element name parsed
6241 */
6242
6243xmlChar *
6244xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6245 xmlChar *name;
6246 xmlChar *attname;
6247 xmlChar *attvalue;
6248 const xmlChar **atts = NULL;
6249 int nbatts = 0;
6250 int maxatts = 0;
6251 int i;
6252
6253 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006254 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006255
6256 name = xmlParseName(ctxt);
6257 if (name == NULL) {
6258 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6260 ctxt->sax->error(ctxt->userData,
6261 "xmlParseStartTag: invalid element name\n");
6262 ctxt->wellFormed = 0;
6263 ctxt->disableSAX = 1;
6264 return(NULL);
6265 }
6266
6267 /*
6268 * Now parse the attributes, it ends up with the ending
6269 *
6270 * (S Attribute)* S?
6271 */
6272 SKIP_BLANKS;
6273 GROW;
6274
Daniel Veillard21a0f912001-02-25 19:54:14 +00006275 while ((RAW != '>') &&
6276 ((RAW != '/') || (NXT(1) != '>')) &&
6277 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006278 const xmlChar *q = CUR_PTR;
6279 int cons = ctxt->input->consumed;
6280
6281 attname = xmlParseAttribute(ctxt, &attvalue);
6282 if ((attname != NULL) && (attvalue != NULL)) {
6283 /*
6284 * [ WFC: Unique Att Spec ]
6285 * No attribute name may appear more than once in the same
6286 * start-tag or empty-element tag.
6287 */
6288 for (i = 0; i < nbatts;i += 2) {
6289 if (xmlStrEqual(atts[i], attname)) {
6290 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6292 ctxt->sax->error(ctxt->userData,
6293 "Attribute %s redefined\n",
6294 attname);
6295 ctxt->wellFormed = 0;
6296 ctxt->disableSAX = 1;
6297 xmlFree(attname);
6298 xmlFree(attvalue);
6299 goto failed;
6300 }
6301 }
6302
6303 /*
6304 * Add the pair to atts
6305 */
6306 if (atts == NULL) {
6307 maxatts = 10;
6308 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6309 if (atts == NULL) {
6310 xmlGenericError(xmlGenericErrorContext,
6311 "malloc of %ld byte failed\n",
6312 maxatts * (long)sizeof(xmlChar *));
6313 return(NULL);
6314 }
6315 } else if (nbatts + 4 > maxatts) {
6316 maxatts *= 2;
6317 atts = (const xmlChar **) xmlRealloc((void *) atts,
6318 maxatts * sizeof(xmlChar *));
6319 if (atts == NULL) {
6320 xmlGenericError(xmlGenericErrorContext,
6321 "realloc of %ld byte failed\n",
6322 maxatts * (long)sizeof(xmlChar *));
6323 return(NULL);
6324 }
6325 }
6326 atts[nbatts++] = attname;
6327 atts[nbatts++] = attvalue;
6328 atts[nbatts] = NULL;
6329 atts[nbatts + 1] = NULL;
6330 } else {
6331 if (attname != NULL)
6332 xmlFree(attname);
6333 if (attvalue != NULL)
6334 xmlFree(attvalue);
6335 }
6336
6337failed:
6338
6339 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6340 break;
6341 if (!IS_BLANK(RAW)) {
6342 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6344 ctxt->sax->error(ctxt->userData,
6345 "attributes construct error\n");
6346 ctxt->wellFormed = 0;
6347 ctxt->disableSAX = 1;
6348 }
6349 SKIP_BLANKS;
6350 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6351 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6353 ctxt->sax->error(ctxt->userData,
6354 "xmlParseStartTag: problem parsing attributes\n");
6355 ctxt->wellFormed = 0;
6356 ctxt->disableSAX = 1;
6357 break;
6358 }
6359 GROW;
6360 }
6361
6362 /*
6363 * SAX: Start of Element !
6364 */
6365 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6366 (!ctxt->disableSAX))
6367 ctxt->sax->startElement(ctxt->userData, name, atts);
6368
6369 if (atts != NULL) {
6370 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6371 xmlFree((void *) atts);
6372 }
6373 return(name);
6374}
6375
6376/**
6377 * xmlParseEndTag:
6378 * @ctxt: an XML parser context
6379 *
6380 * parse an end of tag
6381 *
6382 * [42] ETag ::= '</' Name S? '>'
6383 *
6384 * With namespace
6385 *
6386 * [NS 9] ETag ::= '</' QName S? '>'
6387 */
6388
6389void
6390xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6391 xmlChar *name;
6392 xmlChar *oldname;
6393
6394 GROW;
6395 if ((RAW != '<') || (NXT(1) != '/')) {
6396 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6397 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6398 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6399 ctxt->wellFormed = 0;
6400 ctxt->disableSAX = 1;
6401 return;
6402 }
6403 SKIP(2);
6404
6405 name = xmlParseName(ctxt);
6406
6407 /*
6408 * We should definitely be at the ending "S? '>'" part
6409 */
6410 GROW;
6411 SKIP_BLANKS;
6412 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6413 ctxt->errNo = XML_ERR_GT_REQUIRED;
6414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6415 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006419 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006420
6421 /*
6422 * [ WFC: Element Type Match ]
6423 * The Name in an element's end-tag must match the element type in the
6424 * start-tag.
6425 *
6426 */
6427 if ((name == NULL) || (ctxt->name == NULL) ||
6428 (!xmlStrEqual(name, ctxt->name))) {
6429 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6431 if ((name != NULL) && (ctxt->name != NULL)) {
6432 ctxt->sax->error(ctxt->userData,
6433 "Opening and ending tag mismatch: %s and %s\n",
6434 ctxt->name, name);
6435 } else if (ctxt->name != NULL) {
6436 ctxt->sax->error(ctxt->userData,
6437 "Ending tag eror for: %s\n", ctxt->name);
6438 } else {
6439 ctxt->sax->error(ctxt->userData,
6440 "Ending tag error: internal error ???\n");
6441 }
6442
6443 }
6444 ctxt->wellFormed = 0;
6445 ctxt->disableSAX = 1;
6446 }
6447
6448 /*
6449 * SAX: End of Tag
6450 */
6451 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6452 (!ctxt->disableSAX))
6453 ctxt->sax->endElement(ctxt->userData, name);
6454
6455 if (name != NULL)
6456 xmlFree(name);
6457 oldname = namePop(ctxt);
6458 spacePop(ctxt);
6459 if (oldname != NULL) {
6460#ifdef DEBUG_STACK
6461 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6462#endif
6463 xmlFree(oldname);
6464 }
6465 return;
6466}
6467
6468/**
6469 * xmlParseCDSect:
6470 * @ctxt: an XML parser context
6471 *
6472 * Parse escaped pure raw content.
6473 *
6474 * [18] CDSect ::= CDStart CData CDEnd
6475 *
6476 * [19] CDStart ::= '<![CDATA['
6477 *
6478 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6479 *
6480 * [21] CDEnd ::= ']]>'
6481 */
6482void
6483xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6484 xmlChar *buf = NULL;
6485 int len = 0;
6486 int size = XML_PARSER_BUFFER_SIZE;
6487 int r, rl;
6488 int s, sl;
6489 int cur, l;
6490 int count = 0;
6491
6492 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6493 (NXT(2) == '[') && (NXT(3) == 'C') &&
6494 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6495 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6496 (NXT(8) == '[')) {
6497 SKIP(9);
6498 } else
6499 return;
6500
6501 ctxt->instate = XML_PARSER_CDATA_SECTION;
6502 r = CUR_CHAR(rl);
6503 if (!IS_CHAR(r)) {
6504 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6505 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6506 ctxt->sax->error(ctxt->userData,
6507 "CData section not finished\n");
6508 ctxt->wellFormed = 0;
6509 ctxt->disableSAX = 1;
6510 ctxt->instate = XML_PARSER_CONTENT;
6511 return;
6512 }
6513 NEXTL(rl);
6514 s = CUR_CHAR(sl);
6515 if (!IS_CHAR(s)) {
6516 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6518 ctxt->sax->error(ctxt->userData,
6519 "CData section not finished\n");
6520 ctxt->wellFormed = 0;
6521 ctxt->disableSAX = 1;
6522 ctxt->instate = XML_PARSER_CONTENT;
6523 return;
6524 }
6525 NEXTL(sl);
6526 cur = CUR_CHAR(l);
6527 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6528 if (buf == NULL) {
6529 xmlGenericError(xmlGenericErrorContext,
6530 "malloc of %d byte failed\n", size);
6531 return;
6532 }
6533 while (IS_CHAR(cur) &&
6534 ((r != ']') || (s != ']') || (cur != '>'))) {
6535 if (len + 5 >= size) {
6536 size *= 2;
6537 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6538 if (buf == NULL) {
6539 xmlGenericError(xmlGenericErrorContext,
6540 "realloc of %d byte failed\n", size);
6541 return;
6542 }
6543 }
6544 COPY_BUF(rl,buf,len,r);
6545 r = s;
6546 rl = sl;
6547 s = cur;
6548 sl = l;
6549 count++;
6550 if (count > 50) {
6551 GROW;
6552 count = 0;
6553 }
6554 NEXTL(l);
6555 cur = CUR_CHAR(l);
6556 }
6557 buf[len] = 0;
6558 ctxt->instate = XML_PARSER_CONTENT;
6559 if (cur != '>') {
6560 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6561 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6562 ctxt->sax->error(ctxt->userData,
6563 "CData section not finished\n%.50s\n", buf);
6564 ctxt->wellFormed = 0;
6565 ctxt->disableSAX = 1;
6566 xmlFree(buf);
6567 return;
6568 }
6569 NEXTL(l);
6570
6571 /*
6572 * Ok the buffer is to be consumed as cdata.
6573 */
6574 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6575 if (ctxt->sax->cdataBlock != NULL)
6576 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006577 else if (ctxt->sax->characters != NULL)
6578 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006579 }
6580 xmlFree(buf);
6581}
6582
6583/**
6584 * xmlParseContent:
6585 * @ctxt: an XML parser context
6586 *
6587 * Parse a content:
6588 *
6589 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6590 */
6591
6592void
6593xmlParseContent(xmlParserCtxtPtr ctxt) {
6594 GROW;
6595 while (((RAW != 0) || (ctxt->token != 0)) &&
6596 ((RAW != '<') || (NXT(1) != '/'))) {
6597 const xmlChar *test = CUR_PTR;
6598 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006599 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006600 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006601
6602 /*
6603 * Handle possible processed charrefs.
6604 */
6605 if (ctxt->token != 0) {
6606 xmlParseCharData(ctxt, 0);
6607 }
6608 /*
6609 * First case : a Processing Instruction.
6610 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006611 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006612 xmlParsePI(ctxt);
6613 }
6614
6615 /*
6616 * Second case : a CDSection
6617 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006618 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006619 (NXT(2) == '[') && (NXT(3) == 'C') &&
6620 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6621 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6622 (NXT(8) == '[')) {
6623 xmlParseCDSect(ctxt);
6624 }
6625
6626 /*
6627 * Third case : a comment
6628 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006629 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006630 (NXT(2) == '-') && (NXT(3) == '-')) {
6631 xmlParseComment(ctxt);
6632 ctxt->instate = XML_PARSER_CONTENT;
6633 }
6634
6635 /*
6636 * Fourth case : a sub-element.
6637 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006638 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006639 xmlParseElement(ctxt);
6640 }
6641
6642 /*
6643 * Fifth case : a reference. If if has not been resolved,
6644 * parsing returns it's Name, create the node
6645 */
6646
Daniel Veillard21a0f912001-02-25 19:54:14 +00006647 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006648 xmlParseReference(ctxt);
6649 }
6650
6651 /*
6652 * Last case, text. Note that References are handled directly.
6653 */
6654 else {
6655 xmlParseCharData(ctxt, 0);
6656 }
6657
6658 GROW;
6659 /*
6660 * Pop-up of finished entities.
6661 */
6662 while ((RAW == 0) && (ctxt->inputNr > 1))
6663 xmlPopInput(ctxt);
6664 SHRINK;
6665
6666 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6667 (tok == ctxt->token)) {
6668 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6669 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6670 ctxt->sax->error(ctxt->userData,
6671 "detected an error in element content\n");
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674 ctxt->instate = XML_PARSER_EOF;
6675 break;
6676 }
6677 }
6678}
6679
6680/**
6681 * xmlParseElement:
6682 * @ctxt: an XML parser context
6683 *
6684 * parse an XML element, this is highly recursive
6685 *
6686 * [39] element ::= EmptyElemTag | STag content ETag
6687 *
6688 * [ WFC: Element Type Match ]
6689 * The Name in an element's end-tag must match the element type in the
6690 * start-tag.
6691 *
6692 * [ VC: Element Valid ]
6693 * An element is valid if there is a declaration matching elementdecl
6694 * where the Name matches the element type and one of the following holds:
6695 * - The declaration matches EMPTY and the element has no content.
6696 * - The declaration matches children and the sequence of child elements
6697 * belongs to the language generated by the regular expression in the
6698 * content model, with optional white space (characters matching the
6699 * nonterminal S) between each pair of child elements.
6700 * - The declaration matches Mixed and the content consists of character
6701 * data and child elements whose types match names in the content model.
6702 * - The declaration matches ANY, and the types of any child elements have
6703 * been declared.
6704 */
6705
6706void
6707xmlParseElement(xmlParserCtxtPtr ctxt) {
6708 const xmlChar *openTag = CUR_PTR;
6709 xmlChar *name;
6710 xmlChar *oldname;
6711 xmlParserNodeInfo node_info;
6712 xmlNodePtr ret;
6713
6714 /* Capture start position */
6715 if (ctxt->record_info) {
6716 node_info.begin_pos = ctxt->input->consumed +
6717 (CUR_PTR - ctxt->input->base);
6718 node_info.begin_line = ctxt->input->line;
6719 }
6720
6721 if (ctxt->spaceNr == 0)
6722 spacePush(ctxt, -1);
6723 else
6724 spacePush(ctxt, *ctxt->space);
6725
6726 name = xmlParseStartTag(ctxt);
6727 if (name == NULL) {
6728 spacePop(ctxt);
6729 return;
6730 }
6731 namePush(ctxt, name);
6732 ret = ctxt->node;
6733
6734 /*
6735 * [ VC: Root Element Type ]
6736 * The Name in the document type declaration must match the element
6737 * type of the root element.
6738 */
6739 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6740 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6741 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6742
6743 /*
6744 * Check for an Empty Element.
6745 */
6746 if ((RAW == '/') && (NXT(1) == '>')) {
6747 SKIP(2);
6748 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6749 (!ctxt->disableSAX))
6750 ctxt->sax->endElement(ctxt->userData, name);
6751 oldname = namePop(ctxt);
6752 spacePop(ctxt);
6753 if (oldname != NULL) {
6754#ifdef DEBUG_STACK
6755 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6756#endif
6757 xmlFree(oldname);
6758 }
6759 if ( ret != NULL && ctxt->record_info ) {
6760 node_info.end_pos = ctxt->input->consumed +
6761 (CUR_PTR - ctxt->input->base);
6762 node_info.end_line = ctxt->input->line;
6763 node_info.node = ret;
6764 xmlParserAddNodeInfo(ctxt, &node_info);
6765 }
6766 return;
6767 }
6768 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006769 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006770 } else {
6771 ctxt->errNo = XML_ERR_GT_REQUIRED;
6772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6773 ctxt->sax->error(ctxt->userData,
6774 "Couldn't find end of Start Tag\n%.30s\n",
6775 openTag);
6776 ctxt->wellFormed = 0;
6777 ctxt->disableSAX = 1;
6778
6779 /*
6780 * end of parsing of this node.
6781 */
6782 nodePop(ctxt);
6783 oldname = namePop(ctxt);
6784 spacePop(ctxt);
6785 if (oldname != NULL) {
6786#ifdef DEBUG_STACK
6787 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6788#endif
6789 xmlFree(oldname);
6790 }
6791
6792 /*
6793 * Capture end position and add node
6794 */
6795 if ( ret != NULL && ctxt->record_info ) {
6796 node_info.end_pos = ctxt->input->consumed +
6797 (CUR_PTR - ctxt->input->base);
6798 node_info.end_line = ctxt->input->line;
6799 node_info.node = ret;
6800 xmlParserAddNodeInfo(ctxt, &node_info);
6801 }
6802 return;
6803 }
6804
6805 /*
6806 * Parse the content of the element:
6807 */
6808 xmlParseContent(ctxt);
6809 if (!IS_CHAR(RAW)) {
6810 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6811 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6812 ctxt->sax->error(ctxt->userData,
6813 "Premature end of data in tag %.30s\n", openTag);
6814 ctxt->wellFormed = 0;
6815 ctxt->disableSAX = 1;
6816
6817 /*
6818 * end of parsing of this node.
6819 */
6820 nodePop(ctxt);
6821 oldname = namePop(ctxt);
6822 spacePop(ctxt);
6823 if (oldname != NULL) {
6824#ifdef DEBUG_STACK
6825 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6826#endif
6827 xmlFree(oldname);
6828 }
6829 return;
6830 }
6831
6832 /*
6833 * parse the end of tag: '</' should be here.
6834 */
6835 xmlParseEndTag(ctxt);
6836
6837 /*
6838 * Capture end position and add node
6839 */
6840 if ( ret != NULL && ctxt->record_info ) {
6841 node_info.end_pos = ctxt->input->consumed +
6842 (CUR_PTR - ctxt->input->base);
6843 node_info.end_line = ctxt->input->line;
6844 node_info.node = ret;
6845 xmlParserAddNodeInfo(ctxt, &node_info);
6846 }
6847}
6848
6849/**
6850 * xmlParseVersionNum:
6851 * @ctxt: an XML parser context
6852 *
6853 * parse the XML version value.
6854 *
6855 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6856 *
6857 * Returns the string giving the XML version number, or NULL
6858 */
6859xmlChar *
6860xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6861 xmlChar *buf = NULL;
6862 int len = 0;
6863 int size = 10;
6864 xmlChar cur;
6865
6866 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6867 if (buf == NULL) {
6868 xmlGenericError(xmlGenericErrorContext,
6869 "malloc of %d byte failed\n", size);
6870 return(NULL);
6871 }
6872 cur = CUR;
6873 while (((cur >= 'a') && (cur <= 'z')) ||
6874 ((cur >= 'A') && (cur <= 'Z')) ||
6875 ((cur >= '0') && (cur <= '9')) ||
6876 (cur == '_') || (cur == '.') ||
6877 (cur == ':') || (cur == '-')) {
6878 if (len + 1 >= size) {
6879 size *= 2;
6880 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6881 if (buf == NULL) {
6882 xmlGenericError(xmlGenericErrorContext,
6883 "realloc of %d byte failed\n", size);
6884 return(NULL);
6885 }
6886 }
6887 buf[len++] = cur;
6888 NEXT;
6889 cur=CUR;
6890 }
6891 buf[len] = 0;
6892 return(buf);
6893}
6894
6895/**
6896 * xmlParseVersionInfo:
6897 * @ctxt: an XML parser context
6898 *
6899 * parse the XML version.
6900 *
6901 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6902 *
6903 * [25] Eq ::= S? '=' S?
6904 *
6905 * Returns the version string, e.g. "1.0"
6906 */
6907
6908xmlChar *
6909xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6910 xmlChar *version = NULL;
6911 const xmlChar *q;
6912
6913 if ((RAW == 'v') && (NXT(1) == 'e') &&
6914 (NXT(2) == 'r') && (NXT(3) == 's') &&
6915 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6916 (NXT(6) == 'n')) {
6917 SKIP(7);
6918 SKIP_BLANKS;
6919 if (RAW != '=') {
6920 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6922 ctxt->sax->error(ctxt->userData,
6923 "xmlParseVersionInfo : expected '='\n");
6924 ctxt->wellFormed = 0;
6925 ctxt->disableSAX = 1;
6926 return(NULL);
6927 }
6928 NEXT;
6929 SKIP_BLANKS;
6930 if (RAW == '"') {
6931 NEXT;
6932 q = CUR_PTR;
6933 version = xmlParseVersionNum(ctxt);
6934 if (RAW != '"') {
6935 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6937 ctxt->sax->error(ctxt->userData,
6938 "String not closed\n%.50s\n", q);
6939 ctxt->wellFormed = 0;
6940 ctxt->disableSAX = 1;
6941 } else
6942 NEXT;
6943 } else if (RAW == '\''){
6944 NEXT;
6945 q = CUR_PTR;
6946 version = xmlParseVersionNum(ctxt);
6947 if (RAW != '\'') {
6948 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6950 ctxt->sax->error(ctxt->userData,
6951 "String not closed\n%.50s\n", q);
6952 ctxt->wellFormed = 0;
6953 ctxt->disableSAX = 1;
6954 } else
6955 NEXT;
6956 } else {
6957 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6959 ctxt->sax->error(ctxt->userData,
6960 "xmlParseVersionInfo : expected ' or \"\n");
6961 ctxt->wellFormed = 0;
6962 ctxt->disableSAX = 1;
6963 }
6964 }
6965 return(version);
6966}
6967
6968/**
6969 * xmlParseEncName:
6970 * @ctxt: an XML parser context
6971 *
6972 * parse the XML encoding name
6973 *
6974 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6975 *
6976 * Returns the encoding name value or NULL
6977 */
6978xmlChar *
6979xmlParseEncName(xmlParserCtxtPtr ctxt) {
6980 xmlChar *buf = NULL;
6981 int len = 0;
6982 int size = 10;
6983 xmlChar cur;
6984
6985 cur = CUR;
6986 if (((cur >= 'a') && (cur <= 'z')) ||
6987 ((cur >= 'A') && (cur <= 'Z'))) {
6988 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6989 if (buf == NULL) {
6990 xmlGenericError(xmlGenericErrorContext,
6991 "malloc of %d byte failed\n", size);
6992 return(NULL);
6993 }
6994
6995 buf[len++] = cur;
6996 NEXT;
6997 cur = CUR;
6998 while (((cur >= 'a') && (cur <= 'z')) ||
6999 ((cur >= 'A') && (cur <= 'Z')) ||
7000 ((cur >= '0') && (cur <= '9')) ||
7001 (cur == '.') || (cur == '_') ||
7002 (cur == '-')) {
7003 if (len + 1 >= size) {
7004 size *= 2;
7005 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
7006 if (buf == NULL) {
7007 xmlGenericError(xmlGenericErrorContext,
7008 "realloc of %d byte failed\n", size);
7009 return(NULL);
7010 }
7011 }
7012 buf[len++] = cur;
7013 NEXT;
7014 cur = CUR;
7015 if (cur == 0) {
7016 SHRINK;
7017 GROW;
7018 cur = CUR;
7019 }
7020 }
7021 buf[len] = 0;
7022 } else {
7023 ctxt->errNo = XML_ERR_ENCODING_NAME;
7024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7025 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7026 ctxt->wellFormed = 0;
7027 ctxt->disableSAX = 1;
7028 }
7029 return(buf);
7030}
7031
7032/**
7033 * xmlParseEncodingDecl:
7034 * @ctxt: an XML parser context
7035 *
7036 * parse the XML encoding declaration
7037 *
7038 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7039 *
7040 * this setups the conversion filters.
7041 *
7042 * Returns the encoding value or NULL
7043 */
7044
7045xmlChar *
7046xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7047 xmlChar *encoding = NULL;
7048 const xmlChar *q;
7049
7050 SKIP_BLANKS;
7051 if ((RAW == 'e') && (NXT(1) == 'n') &&
7052 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7053 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7054 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7055 SKIP(8);
7056 SKIP_BLANKS;
7057 if (RAW != '=') {
7058 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7060 ctxt->sax->error(ctxt->userData,
7061 "xmlParseEncodingDecl : expected '='\n");
7062 ctxt->wellFormed = 0;
7063 ctxt->disableSAX = 1;
7064 return(NULL);
7065 }
7066 NEXT;
7067 SKIP_BLANKS;
7068 if (RAW == '"') {
7069 NEXT;
7070 q = CUR_PTR;
7071 encoding = xmlParseEncName(ctxt);
7072 if (RAW != '"') {
7073 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7074 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7075 ctxt->sax->error(ctxt->userData,
7076 "String not closed\n%.50s\n", q);
7077 ctxt->wellFormed = 0;
7078 ctxt->disableSAX = 1;
7079 } else
7080 NEXT;
7081 } else if (RAW == '\''){
7082 NEXT;
7083 q = CUR_PTR;
7084 encoding = xmlParseEncName(ctxt);
7085 if (RAW != '\'') {
7086 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7088 ctxt->sax->error(ctxt->userData,
7089 "String not closed\n%.50s\n", q);
7090 ctxt->wellFormed = 0;
7091 ctxt->disableSAX = 1;
7092 } else
7093 NEXT;
7094 } else if (RAW == '"'){
7095 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7097 ctxt->sax->error(ctxt->userData,
7098 "xmlParseEncodingDecl : expected ' or \"\n");
7099 ctxt->wellFormed = 0;
7100 ctxt->disableSAX = 1;
7101 }
7102 if (encoding != NULL) {
7103 xmlCharEncoding enc;
7104 xmlCharEncodingHandlerPtr handler;
7105
7106 if (ctxt->input->encoding != NULL)
7107 xmlFree((xmlChar *) ctxt->input->encoding);
7108 ctxt->input->encoding = encoding;
7109
7110 enc = xmlParseCharEncoding((const char *) encoding);
7111 /*
7112 * registered set of known encodings
7113 */
7114 if (enc != XML_CHAR_ENCODING_ERROR) {
7115 xmlSwitchEncoding(ctxt, enc);
7116 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7117 xmlFree(encoding);
7118 return(NULL);
7119 }
7120 } else {
7121 /*
7122 * fallback for unknown encodings
7123 */
7124 handler = xmlFindCharEncodingHandler((const char *) encoding);
7125 if (handler != NULL) {
7126 xmlSwitchToEncoding(ctxt, handler);
7127 } else {
7128 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7130 ctxt->sax->error(ctxt->userData,
7131 "Unsupported encoding %s\n", encoding);
7132 return(NULL);
7133 }
7134 }
7135 }
7136 }
7137 return(encoding);
7138}
7139
7140/**
7141 * xmlParseSDDecl:
7142 * @ctxt: an XML parser context
7143 *
7144 * parse the XML standalone declaration
7145 *
7146 * [32] SDDecl ::= S 'standalone' Eq
7147 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7148 *
7149 * [ VC: Standalone Document Declaration ]
7150 * TODO The standalone document declaration must have the value "no"
7151 * if any external markup declarations contain declarations of:
7152 * - attributes with default values, if elements to which these
7153 * attributes apply appear in the document without specifications
7154 * of values for these attributes, or
7155 * - entities (other than amp, lt, gt, apos, quot), if references
7156 * to those entities appear in the document, or
7157 * - attributes with values subject to normalization, where the
7158 * attribute appears in the document with a value which will change
7159 * as a result of normalization, or
7160 * - element types with element content, if white space occurs directly
7161 * within any instance of those types.
7162 *
7163 * Returns 1 if standalone, 0 otherwise
7164 */
7165
7166int
7167xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7168 int standalone = -1;
7169
7170 SKIP_BLANKS;
7171 if ((RAW == 's') && (NXT(1) == 't') &&
7172 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7173 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7174 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7175 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7176 SKIP(10);
7177 SKIP_BLANKS;
7178 if (RAW != '=') {
7179 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7180 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7181 ctxt->sax->error(ctxt->userData,
7182 "XML standalone declaration : expected '='\n");
7183 ctxt->wellFormed = 0;
7184 ctxt->disableSAX = 1;
7185 return(standalone);
7186 }
7187 NEXT;
7188 SKIP_BLANKS;
7189 if (RAW == '\''){
7190 NEXT;
7191 if ((RAW == 'n') && (NXT(1) == 'o')) {
7192 standalone = 0;
7193 SKIP(2);
7194 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7195 (NXT(2) == 's')) {
7196 standalone = 1;
7197 SKIP(3);
7198 } else {
7199 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7201 ctxt->sax->error(ctxt->userData,
7202 "standalone accepts only 'yes' or 'no'\n");
7203 ctxt->wellFormed = 0;
7204 ctxt->disableSAX = 1;
7205 }
7206 if (RAW != '\'') {
7207 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7208 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7209 ctxt->sax->error(ctxt->userData, "String not closed\n");
7210 ctxt->wellFormed = 0;
7211 ctxt->disableSAX = 1;
7212 } else
7213 NEXT;
7214 } else if (RAW == '"'){
7215 NEXT;
7216 if ((RAW == 'n') && (NXT(1) == 'o')) {
7217 standalone = 0;
7218 SKIP(2);
7219 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7220 (NXT(2) == 's')) {
7221 standalone = 1;
7222 SKIP(3);
7223 } else {
7224 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7226 ctxt->sax->error(ctxt->userData,
7227 "standalone accepts only 'yes' or 'no'\n");
7228 ctxt->wellFormed = 0;
7229 ctxt->disableSAX = 1;
7230 }
7231 if (RAW != '"') {
7232 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7234 ctxt->sax->error(ctxt->userData, "String not closed\n");
7235 ctxt->wellFormed = 0;
7236 ctxt->disableSAX = 1;
7237 } else
7238 NEXT;
7239 } else {
7240 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7242 ctxt->sax->error(ctxt->userData,
7243 "Standalone value not found\n");
7244 ctxt->wellFormed = 0;
7245 ctxt->disableSAX = 1;
7246 }
7247 }
7248 return(standalone);
7249}
7250
7251/**
7252 * xmlParseXMLDecl:
7253 * @ctxt: an XML parser context
7254 *
7255 * parse an XML declaration header
7256 *
7257 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7258 */
7259
7260void
7261xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7262 xmlChar *version;
7263
7264 /*
7265 * We know that '<?xml' is here.
7266 */
7267 SKIP(5);
7268
7269 if (!IS_BLANK(RAW)) {
7270 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7271 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7272 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7273 ctxt->wellFormed = 0;
7274 ctxt->disableSAX = 1;
7275 }
7276 SKIP_BLANKS;
7277
7278 /*
7279 * We should have the VersionInfo here.
7280 */
7281 version = xmlParseVersionInfo(ctxt);
7282 if (version == NULL)
7283 version = xmlCharStrdup(XML_DEFAULT_VERSION);
Daniel Veillarda050d232001-09-05 15:51:05 +00007284 else if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
7285 /*
7286 * TODO: Blueberry should be detected here
7287 */
7288 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
7289 ctxt->sax->warning(ctxt->userData, "Unsupported version '%s'\n",
7290 version);
7291 }
Owen Taylor3473f882001-02-23 17:55:21 +00007292 ctxt->version = xmlStrdup(version);
7293 xmlFree(version);
7294
7295 /*
7296 * We may have the encoding declaration
7297 */
7298 if (!IS_BLANK(RAW)) {
7299 if ((RAW == '?') && (NXT(1) == '>')) {
7300 SKIP(2);
7301 return;
7302 }
7303 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7305 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7306 ctxt->wellFormed = 0;
7307 ctxt->disableSAX = 1;
7308 }
7309 xmlParseEncodingDecl(ctxt);
7310 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7311 /*
7312 * The XML REC instructs us to stop parsing right here
7313 */
7314 return;
7315 }
7316
7317 /*
7318 * We may have the standalone status.
7319 */
7320 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7321 if ((RAW == '?') && (NXT(1) == '>')) {
7322 SKIP(2);
7323 return;
7324 }
7325 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7327 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7328 ctxt->wellFormed = 0;
7329 ctxt->disableSAX = 1;
7330 }
7331 SKIP_BLANKS;
7332 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7333
7334 SKIP_BLANKS;
7335 if ((RAW == '?') && (NXT(1) == '>')) {
7336 SKIP(2);
7337 } else if (RAW == '>') {
7338 /* Deprecated old WD ... */
7339 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7340 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7341 ctxt->sax->error(ctxt->userData,
7342 "XML declaration must end-up with '?>'\n");
7343 ctxt->wellFormed = 0;
7344 ctxt->disableSAX = 1;
7345 NEXT;
7346 } else {
7347 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7348 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7349 ctxt->sax->error(ctxt->userData,
7350 "parsing XML declaration: '?>' expected\n");
7351 ctxt->wellFormed = 0;
7352 ctxt->disableSAX = 1;
7353 MOVETO_ENDTAG(CUR_PTR);
7354 NEXT;
7355 }
7356}
7357
7358/**
7359 * xmlParseMisc:
7360 * @ctxt: an XML parser context
7361 *
7362 * parse an XML Misc* optionnal field.
7363 *
7364 * [27] Misc ::= Comment | PI | S
7365 */
7366
7367void
7368xmlParseMisc(xmlParserCtxtPtr ctxt) {
7369 while (((RAW == '<') && (NXT(1) == '?')) ||
7370 ((RAW == '<') && (NXT(1) == '!') &&
7371 (NXT(2) == '-') && (NXT(3) == '-')) ||
7372 IS_BLANK(CUR)) {
7373 if ((RAW == '<') && (NXT(1) == '?')) {
7374 xmlParsePI(ctxt);
7375 } else if (IS_BLANK(CUR)) {
7376 NEXT;
7377 } else
7378 xmlParseComment(ctxt);
7379 }
7380}
7381
7382/**
7383 * xmlParseDocument:
7384 * @ctxt: an XML parser context
7385 *
7386 * parse an XML document (and build a tree if using the standard SAX
7387 * interface).
7388 *
7389 * [1] document ::= prolog element Misc*
7390 *
7391 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7392 *
7393 * Returns 0, -1 in case of error. the parser context is augmented
7394 * as a result of the parsing.
7395 */
7396
7397int
7398xmlParseDocument(xmlParserCtxtPtr ctxt) {
7399 xmlChar start[4];
7400 xmlCharEncoding enc;
7401
7402 xmlInitParser();
7403
7404 GROW;
7405
7406 /*
7407 * SAX: beginning of the document processing.
7408 */
7409 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7410 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7411
Daniel Veillard50f34372001-08-03 12:06:36 +00007412 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007413 /*
7414 * Get the 4 first bytes and decode the charset
7415 * if enc != XML_CHAR_ENCODING_NONE
7416 * plug some encoding conversion routines.
7417 */
7418 start[0] = RAW;
7419 start[1] = NXT(1);
7420 start[2] = NXT(2);
7421 start[3] = NXT(3);
7422 enc = xmlDetectCharEncoding(start, 4);
7423 if (enc != XML_CHAR_ENCODING_NONE) {
7424 xmlSwitchEncoding(ctxt, enc);
7425 }
Owen Taylor3473f882001-02-23 17:55:21 +00007426 }
7427
7428
7429 if (CUR == 0) {
7430 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7431 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7432 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7433 ctxt->wellFormed = 0;
7434 ctxt->disableSAX = 1;
7435 }
7436
7437 /*
7438 * Check for the XMLDecl in the Prolog.
7439 */
7440 GROW;
7441 if ((RAW == '<') && (NXT(1) == '?') &&
7442 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7443 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7444
7445 /*
7446 * Note that we will switch encoding on the fly.
7447 */
7448 xmlParseXMLDecl(ctxt);
7449 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7450 /*
7451 * The XML REC instructs us to stop parsing right here
7452 */
7453 return(-1);
7454 }
7455 ctxt->standalone = ctxt->input->standalone;
7456 SKIP_BLANKS;
7457 } else {
7458 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7459 }
7460 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7461 ctxt->sax->startDocument(ctxt->userData);
7462
7463 /*
7464 * The Misc part of the Prolog
7465 */
7466 GROW;
7467 xmlParseMisc(ctxt);
7468
7469 /*
7470 * Then possibly doc type declaration(s) and more Misc
7471 * (doctypedecl Misc*)?
7472 */
7473 GROW;
7474 if ((RAW == '<') && (NXT(1) == '!') &&
7475 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7476 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7477 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7478 (NXT(8) == 'E')) {
7479
7480 ctxt->inSubset = 1;
7481 xmlParseDocTypeDecl(ctxt);
7482 if (RAW == '[') {
7483 ctxt->instate = XML_PARSER_DTD;
7484 xmlParseInternalSubset(ctxt);
7485 }
7486
7487 /*
7488 * Create and update the external subset.
7489 */
7490 ctxt->inSubset = 2;
7491 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7492 (!ctxt->disableSAX))
7493 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7494 ctxt->extSubSystem, ctxt->extSubURI);
7495 ctxt->inSubset = 0;
7496
7497
7498 ctxt->instate = XML_PARSER_PROLOG;
7499 xmlParseMisc(ctxt);
7500 }
7501
7502 /*
7503 * Time to start parsing the tree itself
7504 */
7505 GROW;
7506 if (RAW != '<') {
7507 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7509 ctxt->sax->error(ctxt->userData,
7510 "Start tag expected, '<' not found\n");
7511 ctxt->wellFormed = 0;
7512 ctxt->disableSAX = 1;
7513 ctxt->instate = XML_PARSER_EOF;
7514 } else {
7515 ctxt->instate = XML_PARSER_CONTENT;
7516 xmlParseElement(ctxt);
7517 ctxt->instate = XML_PARSER_EPILOG;
7518
7519
7520 /*
7521 * The Misc part at the end
7522 */
7523 xmlParseMisc(ctxt);
7524
7525 if (RAW != 0) {
7526 ctxt->errNo = XML_ERR_DOCUMENT_END;
7527 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7528 ctxt->sax->error(ctxt->userData,
7529 "Extra content at the end of the document\n");
7530 ctxt->wellFormed = 0;
7531 ctxt->disableSAX = 1;
7532 }
7533 ctxt->instate = XML_PARSER_EOF;
7534 }
7535
7536 /*
7537 * SAX: end of the document processing.
7538 */
7539 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7540 (!ctxt->disableSAX))
7541 ctxt->sax->endDocument(ctxt->userData);
7542
7543 if (! ctxt->wellFormed) return(-1);
7544 return(0);
7545}
7546
7547/**
7548 * xmlParseExtParsedEnt:
7549 * @ctxt: an XML parser context
7550 *
7551 * parse a genreral parsed entity
7552 * An external general parsed entity is well-formed if it matches the
7553 * production labeled extParsedEnt.
7554 *
7555 * [78] extParsedEnt ::= TextDecl? content
7556 *
7557 * Returns 0, -1 in case of error. the parser context is augmented
7558 * as a result of the parsing.
7559 */
7560
7561int
7562xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7563 xmlChar start[4];
7564 xmlCharEncoding enc;
7565
7566 xmlDefaultSAXHandlerInit();
7567
7568 GROW;
7569
7570 /*
7571 * SAX: beginning of the document processing.
7572 */
7573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7574 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7575
7576 /*
7577 * Get the 4 first bytes and decode the charset
7578 * if enc != XML_CHAR_ENCODING_NONE
7579 * plug some encoding conversion routines.
7580 */
7581 start[0] = RAW;
7582 start[1] = NXT(1);
7583 start[2] = NXT(2);
7584 start[3] = NXT(3);
7585 enc = xmlDetectCharEncoding(start, 4);
7586 if (enc != XML_CHAR_ENCODING_NONE) {
7587 xmlSwitchEncoding(ctxt, enc);
7588 }
7589
7590
7591 if (CUR == 0) {
7592 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7593 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7594 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7595 ctxt->wellFormed = 0;
7596 ctxt->disableSAX = 1;
7597 }
7598
7599 /*
7600 * Check for the XMLDecl in the Prolog.
7601 */
7602 GROW;
7603 if ((RAW == '<') && (NXT(1) == '?') &&
7604 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7605 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7606
7607 /*
7608 * Note that we will switch encoding on the fly.
7609 */
7610 xmlParseXMLDecl(ctxt);
7611 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7612 /*
7613 * The XML REC instructs us to stop parsing right here
7614 */
7615 return(-1);
7616 }
7617 SKIP_BLANKS;
7618 } else {
7619 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7620 }
7621 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7622 ctxt->sax->startDocument(ctxt->userData);
7623
7624 /*
7625 * Doing validity checking on chunk doesn't make sense
7626 */
7627 ctxt->instate = XML_PARSER_CONTENT;
7628 ctxt->validate = 0;
7629 ctxt->loadsubset = 0;
7630 ctxt->depth = 0;
7631
7632 xmlParseContent(ctxt);
7633
7634 if ((RAW == '<') && (NXT(1) == '/')) {
7635 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7637 ctxt->sax->error(ctxt->userData,
7638 "chunk is not well balanced\n");
7639 ctxt->wellFormed = 0;
7640 ctxt->disableSAX = 1;
7641 } else if (RAW != 0) {
7642 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7644 ctxt->sax->error(ctxt->userData,
7645 "extra content at the end of well balanced chunk\n");
7646 ctxt->wellFormed = 0;
7647 ctxt->disableSAX = 1;
7648 }
7649
7650 /*
7651 * SAX: end of the document processing.
7652 */
7653 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7654 (!ctxt->disableSAX))
7655 ctxt->sax->endDocument(ctxt->userData);
7656
7657 if (! ctxt->wellFormed) return(-1);
7658 return(0);
7659}
7660
7661/************************************************************************
7662 * *
7663 * Progressive parsing interfaces *
7664 * *
7665 ************************************************************************/
7666
7667/**
7668 * xmlParseLookupSequence:
7669 * @ctxt: an XML parser context
7670 * @first: the first char to lookup
7671 * @next: the next char to lookup or zero
7672 * @third: the next char to lookup or zero
7673 *
7674 * Try to find if a sequence (first, next, third) or just (first next) or
7675 * (first) is available in the input stream.
7676 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7677 * to avoid rescanning sequences of bytes, it DOES change the state of the
7678 * parser, do not use liberally.
7679 *
7680 * Returns the index to the current parsing point if the full sequence
7681 * is available, -1 otherwise.
7682 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007683static int
Owen Taylor3473f882001-02-23 17:55:21 +00007684xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7685 xmlChar next, xmlChar third) {
7686 int base, len;
7687 xmlParserInputPtr in;
7688 const xmlChar *buf;
7689
7690 in = ctxt->input;
7691 if (in == NULL) return(-1);
7692 base = in->cur - in->base;
7693 if (base < 0) return(-1);
7694 if (ctxt->checkIndex > base)
7695 base = ctxt->checkIndex;
7696 if (in->buf == NULL) {
7697 buf = in->base;
7698 len = in->length;
7699 } else {
7700 buf = in->buf->buffer->content;
7701 len = in->buf->buffer->use;
7702 }
7703 /* take into account the sequence length */
7704 if (third) len -= 2;
7705 else if (next) len --;
7706 for (;base < len;base++) {
7707 if (buf[base] == first) {
7708 if (third != 0) {
7709 if ((buf[base + 1] != next) ||
7710 (buf[base + 2] != third)) continue;
7711 } else if (next != 0) {
7712 if (buf[base + 1] != next) continue;
7713 }
7714 ctxt->checkIndex = 0;
7715#ifdef DEBUG_PUSH
7716 if (next == 0)
7717 xmlGenericError(xmlGenericErrorContext,
7718 "PP: lookup '%c' found at %d\n",
7719 first, base);
7720 else if (third == 0)
7721 xmlGenericError(xmlGenericErrorContext,
7722 "PP: lookup '%c%c' found at %d\n",
7723 first, next, base);
7724 else
7725 xmlGenericError(xmlGenericErrorContext,
7726 "PP: lookup '%c%c%c' found at %d\n",
7727 first, next, third, base);
7728#endif
7729 return(base - (in->cur - in->base));
7730 }
7731 }
7732 ctxt->checkIndex = base;
7733#ifdef DEBUG_PUSH
7734 if (next == 0)
7735 xmlGenericError(xmlGenericErrorContext,
7736 "PP: lookup '%c' failed\n", first);
7737 else if (third == 0)
7738 xmlGenericError(xmlGenericErrorContext,
7739 "PP: lookup '%c%c' failed\n", first, next);
7740 else
7741 xmlGenericError(xmlGenericErrorContext,
7742 "PP: lookup '%c%c%c' failed\n", first, next, third);
7743#endif
7744 return(-1);
7745}
7746
7747/**
7748 * xmlParseTryOrFinish:
7749 * @ctxt: an XML parser context
7750 * @terminate: last chunk indicator
7751 *
7752 * Try to progress on parsing
7753 *
7754 * Returns zero if no parsing was possible
7755 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007756static int
Owen Taylor3473f882001-02-23 17:55:21 +00007757xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7758 int ret = 0;
7759 int avail;
7760 xmlChar cur, next;
7761
7762#ifdef DEBUG_PUSH
7763 switch (ctxt->instate) {
7764 case XML_PARSER_EOF:
7765 xmlGenericError(xmlGenericErrorContext,
7766 "PP: try EOF\n"); break;
7767 case XML_PARSER_START:
7768 xmlGenericError(xmlGenericErrorContext,
7769 "PP: try START\n"); break;
7770 case XML_PARSER_MISC:
7771 xmlGenericError(xmlGenericErrorContext,
7772 "PP: try MISC\n");break;
7773 case XML_PARSER_COMMENT:
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: try COMMENT\n");break;
7776 case XML_PARSER_PROLOG:
7777 xmlGenericError(xmlGenericErrorContext,
7778 "PP: try PROLOG\n");break;
7779 case XML_PARSER_START_TAG:
7780 xmlGenericError(xmlGenericErrorContext,
7781 "PP: try START_TAG\n");break;
7782 case XML_PARSER_CONTENT:
7783 xmlGenericError(xmlGenericErrorContext,
7784 "PP: try CONTENT\n");break;
7785 case XML_PARSER_CDATA_SECTION:
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: try CDATA_SECTION\n");break;
7788 case XML_PARSER_END_TAG:
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: try END_TAG\n");break;
7791 case XML_PARSER_ENTITY_DECL:
7792 xmlGenericError(xmlGenericErrorContext,
7793 "PP: try ENTITY_DECL\n");break;
7794 case XML_PARSER_ENTITY_VALUE:
7795 xmlGenericError(xmlGenericErrorContext,
7796 "PP: try ENTITY_VALUE\n");break;
7797 case XML_PARSER_ATTRIBUTE_VALUE:
7798 xmlGenericError(xmlGenericErrorContext,
7799 "PP: try ATTRIBUTE_VALUE\n");break;
7800 case XML_PARSER_DTD:
7801 xmlGenericError(xmlGenericErrorContext,
7802 "PP: try DTD\n");break;
7803 case XML_PARSER_EPILOG:
7804 xmlGenericError(xmlGenericErrorContext,
7805 "PP: try EPILOG\n");break;
7806 case XML_PARSER_PI:
7807 xmlGenericError(xmlGenericErrorContext,
7808 "PP: try PI\n");break;
7809 case XML_PARSER_IGNORE:
7810 xmlGenericError(xmlGenericErrorContext,
7811 "PP: try IGNORE\n");break;
7812 }
7813#endif
7814
7815 while (1) {
7816 /*
7817 * Pop-up of finished entities.
7818 */
7819 while ((RAW == 0) && (ctxt->inputNr > 1))
7820 xmlPopInput(ctxt);
7821
7822 if (ctxt->input ==NULL) break;
7823 if (ctxt->input->buf == NULL)
7824 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7825 else
7826 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7827 if (avail < 1)
7828 goto done;
7829 switch (ctxt->instate) {
7830 case XML_PARSER_EOF:
7831 /*
7832 * Document parsing is done !
7833 */
7834 goto done;
7835 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007836 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7837 xmlChar start[4];
7838 xmlCharEncoding enc;
7839
7840 /*
7841 * Very first chars read from the document flow.
7842 */
7843 if (avail < 4)
7844 goto done;
7845
7846 /*
7847 * Get the 4 first bytes and decode the charset
7848 * if enc != XML_CHAR_ENCODING_NONE
7849 * plug some encoding conversion routines.
7850 */
7851 start[0] = RAW;
7852 start[1] = NXT(1);
7853 start[2] = NXT(2);
7854 start[3] = NXT(3);
7855 enc = xmlDetectCharEncoding(start, 4);
7856 if (enc != XML_CHAR_ENCODING_NONE) {
7857 xmlSwitchEncoding(ctxt, enc);
7858 }
7859 break;
7860 }
Owen Taylor3473f882001-02-23 17:55:21 +00007861
7862 cur = ctxt->input->cur[0];
7863 next = ctxt->input->cur[1];
7864 if (cur == 0) {
7865 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7866 ctxt->sax->setDocumentLocator(ctxt->userData,
7867 &xmlDefaultSAXLocator);
7868 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7870 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7871 ctxt->wellFormed = 0;
7872 ctxt->disableSAX = 1;
7873 ctxt->instate = XML_PARSER_EOF;
7874#ifdef DEBUG_PUSH
7875 xmlGenericError(xmlGenericErrorContext,
7876 "PP: entering EOF\n");
7877#endif
7878 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7879 ctxt->sax->endDocument(ctxt->userData);
7880 goto done;
7881 }
7882 if ((cur == '<') && (next == '?')) {
7883 /* PI or XML decl */
7884 if (avail < 5) return(ret);
7885 if ((!terminate) &&
7886 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7887 return(ret);
7888 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7889 ctxt->sax->setDocumentLocator(ctxt->userData,
7890 &xmlDefaultSAXLocator);
7891 if ((ctxt->input->cur[2] == 'x') &&
7892 (ctxt->input->cur[3] == 'm') &&
7893 (ctxt->input->cur[4] == 'l') &&
7894 (IS_BLANK(ctxt->input->cur[5]))) {
7895 ret += 5;
7896#ifdef DEBUG_PUSH
7897 xmlGenericError(xmlGenericErrorContext,
7898 "PP: Parsing XML Decl\n");
7899#endif
7900 xmlParseXMLDecl(ctxt);
7901 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7902 /*
7903 * The XML REC instructs us to stop parsing right
7904 * here
7905 */
7906 ctxt->instate = XML_PARSER_EOF;
7907 return(0);
7908 }
7909 ctxt->standalone = ctxt->input->standalone;
7910 if ((ctxt->encoding == NULL) &&
7911 (ctxt->input->encoding != NULL))
7912 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7913 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7914 (!ctxt->disableSAX))
7915 ctxt->sax->startDocument(ctxt->userData);
7916 ctxt->instate = XML_PARSER_MISC;
7917#ifdef DEBUG_PUSH
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: entering MISC\n");
7920#endif
7921 } else {
7922 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7923 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7924 (!ctxt->disableSAX))
7925 ctxt->sax->startDocument(ctxt->userData);
7926 ctxt->instate = XML_PARSER_MISC;
7927#ifdef DEBUG_PUSH
7928 xmlGenericError(xmlGenericErrorContext,
7929 "PP: entering MISC\n");
7930#endif
7931 }
7932 } else {
7933 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7934 ctxt->sax->setDocumentLocator(ctxt->userData,
7935 &xmlDefaultSAXLocator);
7936 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7937 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7938 (!ctxt->disableSAX))
7939 ctxt->sax->startDocument(ctxt->userData);
7940 ctxt->instate = XML_PARSER_MISC;
7941#ifdef DEBUG_PUSH
7942 xmlGenericError(xmlGenericErrorContext,
7943 "PP: entering MISC\n");
7944#endif
7945 }
7946 break;
7947 case XML_PARSER_MISC:
7948 SKIP_BLANKS;
7949 if (ctxt->input->buf == NULL)
7950 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7951 else
7952 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7953 if (avail < 2)
7954 goto done;
7955 cur = ctxt->input->cur[0];
7956 next = ctxt->input->cur[1];
7957 if ((cur == '<') && (next == '?')) {
7958 if ((!terminate) &&
7959 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7960 goto done;
7961#ifdef DEBUG_PUSH
7962 xmlGenericError(xmlGenericErrorContext,
7963 "PP: Parsing PI\n");
7964#endif
7965 xmlParsePI(ctxt);
7966 } else if ((cur == '<') && (next == '!') &&
7967 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7968 if ((!terminate) &&
7969 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7970 goto done;
7971#ifdef DEBUG_PUSH
7972 xmlGenericError(xmlGenericErrorContext,
7973 "PP: Parsing Comment\n");
7974#endif
7975 xmlParseComment(ctxt);
7976 ctxt->instate = XML_PARSER_MISC;
7977 } else if ((cur == '<') && (next == '!') &&
7978 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7979 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7980 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7981 (ctxt->input->cur[8] == 'E')) {
7982 if ((!terminate) &&
7983 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7984 goto done;
7985#ifdef DEBUG_PUSH
7986 xmlGenericError(xmlGenericErrorContext,
7987 "PP: Parsing internal subset\n");
7988#endif
7989 ctxt->inSubset = 1;
7990 xmlParseDocTypeDecl(ctxt);
7991 if (RAW == '[') {
7992 ctxt->instate = XML_PARSER_DTD;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: entering DTD\n");
7996#endif
7997 } else {
7998 /*
7999 * Create and update the external subset.
8000 */
8001 ctxt->inSubset = 2;
8002 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8003 (ctxt->sax->externalSubset != NULL))
8004 ctxt->sax->externalSubset(ctxt->userData,
8005 ctxt->intSubName, ctxt->extSubSystem,
8006 ctxt->extSubURI);
8007 ctxt->inSubset = 0;
8008 ctxt->instate = XML_PARSER_PROLOG;
8009#ifdef DEBUG_PUSH
8010 xmlGenericError(xmlGenericErrorContext,
8011 "PP: entering PROLOG\n");
8012#endif
8013 }
8014 } else if ((cur == '<') && (next == '!') &&
8015 (avail < 9)) {
8016 goto done;
8017 } else {
8018 ctxt->instate = XML_PARSER_START_TAG;
8019#ifdef DEBUG_PUSH
8020 xmlGenericError(xmlGenericErrorContext,
8021 "PP: entering START_TAG\n");
8022#endif
8023 }
8024 break;
8025 case XML_PARSER_IGNORE:
8026 xmlGenericError(xmlGenericErrorContext,
8027 "PP: internal error, state == IGNORE");
8028 ctxt->instate = XML_PARSER_DTD;
8029#ifdef DEBUG_PUSH
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: entering DTD\n");
8032#endif
8033 break;
8034 case XML_PARSER_PROLOG:
8035 SKIP_BLANKS;
8036 if (ctxt->input->buf == NULL)
8037 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8038 else
8039 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8040 if (avail < 2)
8041 goto done;
8042 cur = ctxt->input->cur[0];
8043 next = ctxt->input->cur[1];
8044 if ((cur == '<') && (next == '?')) {
8045 if ((!terminate) &&
8046 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8047 goto done;
8048#ifdef DEBUG_PUSH
8049 xmlGenericError(xmlGenericErrorContext,
8050 "PP: Parsing PI\n");
8051#endif
8052 xmlParsePI(ctxt);
8053 } else if ((cur == '<') && (next == '!') &&
8054 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8055 if ((!terminate) &&
8056 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8057 goto done;
8058#ifdef DEBUG_PUSH
8059 xmlGenericError(xmlGenericErrorContext,
8060 "PP: Parsing Comment\n");
8061#endif
8062 xmlParseComment(ctxt);
8063 ctxt->instate = XML_PARSER_PROLOG;
8064 } else if ((cur == '<') && (next == '!') &&
8065 (avail < 4)) {
8066 goto done;
8067 } else {
8068 ctxt->instate = XML_PARSER_START_TAG;
8069#ifdef DEBUG_PUSH
8070 xmlGenericError(xmlGenericErrorContext,
8071 "PP: entering START_TAG\n");
8072#endif
8073 }
8074 break;
8075 case XML_PARSER_EPILOG:
8076 SKIP_BLANKS;
8077 if (ctxt->input->buf == NULL)
8078 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8079 else
8080 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8081 if (avail < 2)
8082 goto done;
8083 cur = ctxt->input->cur[0];
8084 next = ctxt->input->cur[1];
8085 if ((cur == '<') && (next == '?')) {
8086 if ((!terminate) &&
8087 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8088 goto done;
8089#ifdef DEBUG_PUSH
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: Parsing PI\n");
8092#endif
8093 xmlParsePI(ctxt);
8094 ctxt->instate = XML_PARSER_EPILOG;
8095 } else if ((cur == '<') && (next == '!') &&
8096 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8097 if ((!terminate) &&
8098 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8099 goto done;
8100#ifdef DEBUG_PUSH
8101 xmlGenericError(xmlGenericErrorContext,
8102 "PP: Parsing Comment\n");
8103#endif
8104 xmlParseComment(ctxt);
8105 ctxt->instate = XML_PARSER_EPILOG;
8106 } else if ((cur == '<') && (next == '!') &&
8107 (avail < 4)) {
8108 goto done;
8109 } else {
8110 ctxt->errNo = XML_ERR_DOCUMENT_END;
8111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8112 ctxt->sax->error(ctxt->userData,
8113 "Extra content at the end of the document\n");
8114 ctxt->wellFormed = 0;
8115 ctxt->disableSAX = 1;
8116 ctxt->instate = XML_PARSER_EOF;
8117#ifdef DEBUG_PUSH
8118 xmlGenericError(xmlGenericErrorContext,
8119 "PP: entering EOF\n");
8120#endif
8121 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8122 (!ctxt->disableSAX))
8123 ctxt->sax->endDocument(ctxt->userData);
8124 goto done;
8125 }
8126 break;
8127 case XML_PARSER_START_TAG: {
8128 xmlChar *name, *oldname;
8129
8130 if ((avail < 2) && (ctxt->inputNr == 1))
8131 goto done;
8132 cur = ctxt->input->cur[0];
8133 if (cur != '<') {
8134 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8136 ctxt->sax->error(ctxt->userData,
8137 "Start tag expect, '<' not found\n");
8138 ctxt->wellFormed = 0;
8139 ctxt->disableSAX = 1;
8140 ctxt->instate = XML_PARSER_EOF;
8141#ifdef DEBUG_PUSH
8142 xmlGenericError(xmlGenericErrorContext,
8143 "PP: entering EOF\n");
8144#endif
8145 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8146 (!ctxt->disableSAX))
8147 ctxt->sax->endDocument(ctxt->userData);
8148 goto done;
8149 }
8150 if ((!terminate) &&
8151 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8152 goto done;
8153 if (ctxt->spaceNr == 0)
8154 spacePush(ctxt, -1);
8155 else
8156 spacePush(ctxt, *ctxt->space);
8157 name = xmlParseStartTag(ctxt);
8158 if (name == NULL) {
8159 spacePop(ctxt);
8160 ctxt->instate = XML_PARSER_EOF;
8161#ifdef DEBUG_PUSH
8162 xmlGenericError(xmlGenericErrorContext,
8163 "PP: entering EOF\n");
8164#endif
8165 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8166 (!ctxt->disableSAX))
8167 ctxt->sax->endDocument(ctxt->userData);
8168 goto done;
8169 }
8170 namePush(ctxt, xmlStrdup(name));
8171
8172 /*
8173 * [ VC: Root Element Type ]
8174 * The Name in the document type declaration must match
8175 * the element type of the root element.
8176 */
8177 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8178 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8179 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8180
8181 /*
8182 * Check for an Empty Element.
8183 */
8184 if ((RAW == '/') && (NXT(1) == '>')) {
8185 SKIP(2);
8186 if ((ctxt->sax != NULL) &&
8187 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8188 ctxt->sax->endElement(ctxt->userData, name);
8189 xmlFree(name);
8190 oldname = namePop(ctxt);
8191 spacePop(ctxt);
8192 if (oldname != NULL) {
8193#ifdef DEBUG_STACK
8194 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8195#endif
8196 xmlFree(oldname);
8197 }
8198 if (ctxt->name == NULL) {
8199 ctxt->instate = XML_PARSER_EPILOG;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering EPILOG\n");
8203#endif
8204 } else {
8205 ctxt->instate = XML_PARSER_CONTENT;
8206#ifdef DEBUG_PUSH
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: entering CONTENT\n");
8209#endif
8210 }
8211 break;
8212 }
8213 if (RAW == '>') {
8214 NEXT;
8215 } else {
8216 ctxt->errNo = XML_ERR_GT_REQUIRED;
8217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8218 ctxt->sax->error(ctxt->userData,
8219 "Couldn't find end of Start Tag %s\n",
8220 name);
8221 ctxt->wellFormed = 0;
8222 ctxt->disableSAX = 1;
8223
8224 /*
8225 * end of parsing of this node.
8226 */
8227 nodePop(ctxt);
8228 oldname = namePop(ctxt);
8229 spacePop(ctxt);
8230 if (oldname != NULL) {
8231#ifdef DEBUG_STACK
8232 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8233#endif
8234 xmlFree(oldname);
8235 }
8236 }
8237 xmlFree(name);
8238 ctxt->instate = XML_PARSER_CONTENT;
8239#ifdef DEBUG_PUSH
8240 xmlGenericError(xmlGenericErrorContext,
8241 "PP: entering CONTENT\n");
8242#endif
8243 break;
8244 }
8245 case XML_PARSER_CONTENT: {
8246 const xmlChar *test;
8247 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008248 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008249
8250 /*
8251 * Handle preparsed entities and charRef
8252 */
8253 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008254 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008255
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008256 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008257 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8258 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008259 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008260 ctxt->token = 0;
8261 }
8262 if ((avail < 2) && (ctxt->inputNr == 1))
8263 goto done;
8264 cur = ctxt->input->cur[0];
8265 next = ctxt->input->cur[1];
8266
8267 test = CUR_PTR;
8268 cons = ctxt->input->consumed;
8269 tok = ctxt->token;
8270 if ((cur == '<') && (next == '?')) {
8271 if ((!terminate) &&
8272 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8273 goto done;
8274#ifdef DEBUG_PUSH
8275 xmlGenericError(xmlGenericErrorContext,
8276 "PP: Parsing PI\n");
8277#endif
8278 xmlParsePI(ctxt);
8279 } else if ((cur == '<') && (next == '!') &&
8280 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8281 if ((!terminate) &&
8282 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8283 goto done;
8284#ifdef DEBUG_PUSH
8285 xmlGenericError(xmlGenericErrorContext,
8286 "PP: Parsing Comment\n");
8287#endif
8288 xmlParseComment(ctxt);
8289 ctxt->instate = XML_PARSER_CONTENT;
8290 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8291 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8292 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8293 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8294 (ctxt->input->cur[8] == '[')) {
8295 SKIP(9);
8296 ctxt->instate = XML_PARSER_CDATA_SECTION;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: entering CDATA_SECTION\n");
8300#endif
8301 break;
8302 } else if ((cur == '<') && (next == '!') &&
8303 (avail < 9)) {
8304 goto done;
8305 } else if ((cur == '<') && (next == '/')) {
8306 ctxt->instate = XML_PARSER_END_TAG;
8307#ifdef DEBUG_PUSH
8308 xmlGenericError(xmlGenericErrorContext,
8309 "PP: entering END_TAG\n");
8310#endif
8311 break;
8312 } else if (cur == '<') {
8313 ctxt->instate = XML_PARSER_START_TAG;
8314#ifdef DEBUG_PUSH
8315 xmlGenericError(xmlGenericErrorContext,
8316 "PP: entering START_TAG\n");
8317#endif
8318 break;
8319 } else if (cur == '&') {
8320 if ((!terminate) &&
8321 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8322 goto done;
8323#ifdef DEBUG_PUSH
8324 xmlGenericError(xmlGenericErrorContext,
8325 "PP: Parsing Reference\n");
8326#endif
8327 xmlParseReference(ctxt);
8328 } else {
8329 /* TODO Avoid the extra copy, handle directly !!! */
8330 /*
8331 * Goal of the following test is:
8332 * - minimize calls to the SAX 'character' callback
8333 * when they are mergeable
8334 * - handle an problem for isBlank when we only parse
8335 * a sequence of blank chars and the next one is
8336 * not available to check against '<' presence.
8337 * - tries to homogenize the differences in SAX
8338 * callbacks beween the push and pull versions
8339 * of the parser.
8340 */
8341 if ((ctxt->inputNr == 1) &&
8342 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8343 if ((!terminate) &&
8344 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8345 goto done;
8346 }
8347 ctxt->checkIndex = 0;
8348#ifdef DEBUG_PUSH
8349 xmlGenericError(xmlGenericErrorContext,
8350 "PP: Parsing char data\n");
8351#endif
8352 xmlParseCharData(ctxt, 0);
8353 }
8354 /*
8355 * Pop-up of finished entities.
8356 */
8357 while ((RAW == 0) && (ctxt->inputNr > 1))
8358 xmlPopInput(ctxt);
8359 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8360 (tok == ctxt->token)) {
8361 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8363 ctxt->sax->error(ctxt->userData,
8364 "detected an error in element content\n");
8365 ctxt->wellFormed = 0;
8366 ctxt->disableSAX = 1;
8367 ctxt->instate = XML_PARSER_EOF;
8368 break;
8369 }
8370 break;
8371 }
8372 case XML_PARSER_CDATA_SECTION: {
8373 /*
8374 * The Push mode need to have the SAX callback for
8375 * cdataBlock merge back contiguous callbacks.
8376 */
8377 int base;
8378
8379 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8380 if (base < 0) {
8381 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8382 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8383 if (ctxt->sax->cdataBlock != NULL)
8384 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8385 XML_PARSER_BIG_BUFFER_SIZE);
8386 }
8387 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8388 ctxt->checkIndex = 0;
8389 }
8390 goto done;
8391 } else {
8392 if ((ctxt->sax != NULL) && (base > 0) &&
8393 (!ctxt->disableSAX)) {
8394 if (ctxt->sax->cdataBlock != NULL)
8395 ctxt->sax->cdataBlock(ctxt->userData,
8396 ctxt->input->cur, base);
8397 }
8398 SKIP(base + 3);
8399 ctxt->checkIndex = 0;
8400 ctxt->instate = XML_PARSER_CONTENT;
8401#ifdef DEBUG_PUSH
8402 xmlGenericError(xmlGenericErrorContext,
8403 "PP: entering CONTENT\n");
8404#endif
8405 }
8406 break;
8407 }
8408 case XML_PARSER_END_TAG:
8409 if (avail < 2)
8410 goto done;
8411 if ((!terminate) &&
8412 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8413 goto done;
8414 xmlParseEndTag(ctxt);
8415 if (ctxt->name == NULL) {
8416 ctxt->instate = XML_PARSER_EPILOG;
8417#ifdef DEBUG_PUSH
8418 xmlGenericError(xmlGenericErrorContext,
8419 "PP: entering EPILOG\n");
8420#endif
8421 } else {
8422 ctxt->instate = XML_PARSER_CONTENT;
8423#ifdef DEBUG_PUSH
8424 xmlGenericError(xmlGenericErrorContext,
8425 "PP: entering CONTENT\n");
8426#endif
8427 }
8428 break;
8429 case XML_PARSER_DTD: {
8430 /*
8431 * Sorry but progressive parsing of the internal subset
8432 * is not expected to be supported. We first check that
8433 * the full content of the internal subset is available and
8434 * the parsing is launched only at that point.
8435 * Internal subset ends up with "']' S? '>'" in an unescaped
8436 * section and not in a ']]>' sequence which are conditional
8437 * sections (whoever argued to keep that crap in XML deserve
8438 * a place in hell !).
8439 */
8440 int base, i;
8441 xmlChar *buf;
8442 xmlChar quote = 0;
8443
8444 base = ctxt->input->cur - ctxt->input->base;
8445 if (base < 0) return(0);
8446 if (ctxt->checkIndex > base)
8447 base = ctxt->checkIndex;
8448 buf = ctxt->input->buf->buffer->content;
8449 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8450 base++) {
8451 if (quote != 0) {
8452 if (buf[base] == quote)
8453 quote = 0;
8454 continue;
8455 }
8456 if (buf[base] == '"') {
8457 quote = '"';
8458 continue;
8459 }
8460 if (buf[base] == '\'') {
8461 quote = '\'';
8462 continue;
8463 }
8464 if (buf[base] == ']') {
8465 if ((unsigned int) base +1 >=
8466 ctxt->input->buf->buffer->use)
8467 break;
8468 if (buf[base + 1] == ']') {
8469 /* conditional crap, skip both ']' ! */
8470 base++;
8471 continue;
8472 }
8473 for (i = 0;
8474 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8475 i++) {
8476 if (buf[base + i] == '>')
8477 goto found_end_int_subset;
8478 }
8479 break;
8480 }
8481 }
8482 /*
8483 * We didn't found the end of the Internal subset
8484 */
8485 if (quote == 0)
8486 ctxt->checkIndex = base;
8487#ifdef DEBUG_PUSH
8488 if (next == 0)
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: lookup of int subset end filed\n");
8491#endif
8492 goto done;
8493
8494found_end_int_subset:
8495 xmlParseInternalSubset(ctxt);
8496 ctxt->inSubset = 2;
8497 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8498 (ctxt->sax->externalSubset != NULL))
8499 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8500 ctxt->extSubSystem, ctxt->extSubURI);
8501 ctxt->inSubset = 0;
8502 ctxt->instate = XML_PARSER_PROLOG;
8503 ctxt->checkIndex = 0;
8504#ifdef DEBUG_PUSH
8505 xmlGenericError(xmlGenericErrorContext,
8506 "PP: entering PROLOG\n");
8507#endif
8508 break;
8509 }
8510 case XML_PARSER_COMMENT:
8511 xmlGenericError(xmlGenericErrorContext,
8512 "PP: internal error, state == COMMENT\n");
8513 ctxt->instate = XML_PARSER_CONTENT;
8514#ifdef DEBUG_PUSH
8515 xmlGenericError(xmlGenericErrorContext,
8516 "PP: entering CONTENT\n");
8517#endif
8518 break;
8519 case XML_PARSER_PI:
8520 xmlGenericError(xmlGenericErrorContext,
8521 "PP: internal error, state == PI\n");
8522 ctxt->instate = XML_PARSER_CONTENT;
8523#ifdef DEBUG_PUSH
8524 xmlGenericError(xmlGenericErrorContext,
8525 "PP: entering CONTENT\n");
8526#endif
8527 break;
8528 case XML_PARSER_ENTITY_DECL:
8529 xmlGenericError(xmlGenericErrorContext,
8530 "PP: internal error, state == ENTITY_DECL\n");
8531 ctxt->instate = XML_PARSER_DTD;
8532#ifdef DEBUG_PUSH
8533 xmlGenericError(xmlGenericErrorContext,
8534 "PP: entering DTD\n");
8535#endif
8536 break;
8537 case XML_PARSER_ENTITY_VALUE:
8538 xmlGenericError(xmlGenericErrorContext,
8539 "PP: internal error, state == ENTITY_VALUE\n");
8540 ctxt->instate = XML_PARSER_CONTENT;
8541#ifdef DEBUG_PUSH
8542 xmlGenericError(xmlGenericErrorContext,
8543 "PP: entering DTD\n");
8544#endif
8545 break;
8546 case XML_PARSER_ATTRIBUTE_VALUE:
8547 xmlGenericError(xmlGenericErrorContext,
8548 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8549 ctxt->instate = XML_PARSER_START_TAG;
8550#ifdef DEBUG_PUSH
8551 xmlGenericError(xmlGenericErrorContext,
8552 "PP: entering START_TAG\n");
8553#endif
8554 break;
8555 case XML_PARSER_SYSTEM_LITERAL:
8556 xmlGenericError(xmlGenericErrorContext,
8557 "PP: internal error, state == SYSTEM_LITERAL\n");
8558 ctxt->instate = XML_PARSER_START_TAG;
8559#ifdef DEBUG_PUSH
8560 xmlGenericError(xmlGenericErrorContext,
8561 "PP: entering START_TAG\n");
8562#endif
8563 break;
8564 }
8565 }
8566done:
8567#ifdef DEBUG_PUSH
8568 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8569#endif
8570 return(ret);
8571}
8572
8573/**
Owen Taylor3473f882001-02-23 17:55:21 +00008574 * xmlParseChunk:
8575 * @ctxt: an XML parser context
8576 * @chunk: an char array
8577 * @size: the size in byte of the chunk
8578 * @terminate: last chunk indicator
8579 *
8580 * Parse a Chunk of memory
8581 *
8582 * Returns zero if no error, the xmlParserErrors otherwise.
8583 */
8584int
8585xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8586 int terminate) {
8587 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8588 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8589 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8590 int cur = ctxt->input->cur - ctxt->input->base;
8591
8592 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8593 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8594 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008595 ctxt->input->end =
8596 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008597#ifdef DEBUG_PUSH
8598 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8599#endif
8600
8601 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8602 xmlParseTryOrFinish(ctxt, terminate);
8603 } else if (ctxt->instate != XML_PARSER_EOF) {
8604 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8605 xmlParserInputBufferPtr in = ctxt->input->buf;
8606 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8607 (in->raw != NULL)) {
8608 int nbchars;
8609
8610 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8611 if (nbchars < 0) {
8612 xmlGenericError(xmlGenericErrorContext,
8613 "xmlParseChunk: encoder error\n");
8614 return(XML_ERR_INVALID_ENCODING);
8615 }
8616 }
8617 }
8618 }
8619 xmlParseTryOrFinish(ctxt, terminate);
8620 if (terminate) {
8621 /*
8622 * Check for termination
8623 */
8624 if ((ctxt->instate != XML_PARSER_EOF) &&
8625 (ctxt->instate != XML_PARSER_EPILOG)) {
8626 ctxt->errNo = XML_ERR_DOCUMENT_END;
8627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8628 ctxt->sax->error(ctxt->userData,
8629 "Extra content at the end of the document\n");
8630 ctxt->wellFormed = 0;
8631 ctxt->disableSAX = 1;
8632 }
8633 if (ctxt->instate != XML_PARSER_EOF) {
8634 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8635 (!ctxt->disableSAX))
8636 ctxt->sax->endDocument(ctxt->userData);
8637 }
8638 ctxt->instate = XML_PARSER_EOF;
8639 }
8640 return((xmlParserErrors) ctxt->errNo);
8641}
8642
8643/************************************************************************
8644 * *
8645 * I/O front end functions to the parser *
8646 * *
8647 ************************************************************************/
8648
8649/**
8650 * xmlStopParser:
8651 * @ctxt: an XML parser context
8652 *
8653 * Blocks further parser processing
8654 */
8655void
8656xmlStopParser(xmlParserCtxtPtr ctxt) {
8657 ctxt->instate = XML_PARSER_EOF;
8658 if (ctxt->input != NULL)
8659 ctxt->input->cur = BAD_CAST"";
8660}
8661
8662/**
8663 * xmlCreatePushParserCtxt:
8664 * @sax: a SAX handler
8665 * @user_data: The user data returned on SAX callbacks
8666 * @chunk: a pointer to an array of chars
8667 * @size: number of chars in the array
8668 * @filename: an optional file name or URI
8669 *
8670 * Create a parser context for using the XML parser in push mode
8671 * To allow content encoding detection, @size should be >= 4
8672 * The value of @filename is used for fetching external entities
8673 * and error/warning reports.
8674 *
8675 * Returns the new parser context or NULL
8676 */
8677xmlParserCtxtPtr
8678xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8679 const char *chunk, int size, const char *filename) {
8680 xmlParserCtxtPtr ctxt;
8681 xmlParserInputPtr inputStream;
8682 xmlParserInputBufferPtr buf;
8683 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8684
8685 /*
8686 * plug some encoding conversion routines
8687 */
8688 if ((chunk != NULL) && (size >= 4))
8689 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8690
8691 buf = xmlAllocParserInputBuffer(enc);
8692 if (buf == NULL) return(NULL);
8693
8694 ctxt = xmlNewParserCtxt();
8695 if (ctxt == NULL) {
8696 xmlFree(buf);
8697 return(NULL);
8698 }
8699 if (sax != NULL) {
8700 if (ctxt->sax != &xmlDefaultSAXHandler)
8701 xmlFree(ctxt->sax);
8702 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8703 if (ctxt->sax == NULL) {
8704 xmlFree(buf);
8705 xmlFree(ctxt);
8706 return(NULL);
8707 }
8708 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8709 if (user_data != NULL)
8710 ctxt->userData = user_data;
8711 }
8712 if (filename == NULL) {
8713 ctxt->directory = NULL;
8714 } else {
8715 ctxt->directory = xmlParserGetDirectory(filename);
8716 }
8717
8718 inputStream = xmlNewInputStream(ctxt);
8719 if (inputStream == NULL) {
8720 xmlFreeParserCtxt(ctxt);
8721 return(NULL);
8722 }
8723
8724 if (filename == NULL)
8725 inputStream->filename = NULL;
8726 else
8727 inputStream->filename = xmlMemStrdup(filename);
8728 inputStream->buf = buf;
8729 inputStream->base = inputStream->buf->buffer->content;
8730 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008731 inputStream->end =
8732 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008733
8734 inputPush(ctxt, inputStream);
8735
8736 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8737 (ctxt->input->buf != NULL)) {
8738 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8739#ifdef DEBUG_PUSH
8740 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8741#endif
8742 }
8743
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008744 if (enc != XML_CHAR_ENCODING_NONE) {
8745 xmlSwitchEncoding(ctxt, enc);
8746 }
8747
Owen Taylor3473f882001-02-23 17:55:21 +00008748 return(ctxt);
8749}
8750
8751/**
8752 * xmlCreateIOParserCtxt:
8753 * @sax: a SAX handler
8754 * @user_data: The user data returned on SAX callbacks
8755 * @ioread: an I/O read function
8756 * @ioclose: an I/O close function
8757 * @ioctx: an I/O handler
8758 * @enc: the charset encoding if known
8759 *
8760 * Create a parser context for using the XML parser with an existing
8761 * I/O stream
8762 *
8763 * Returns the new parser context or NULL
8764 */
8765xmlParserCtxtPtr
8766xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8767 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8768 void *ioctx, xmlCharEncoding enc) {
8769 xmlParserCtxtPtr ctxt;
8770 xmlParserInputPtr inputStream;
8771 xmlParserInputBufferPtr buf;
8772
8773 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8774 if (buf == NULL) return(NULL);
8775
8776 ctxt = xmlNewParserCtxt();
8777 if (ctxt == NULL) {
8778 xmlFree(buf);
8779 return(NULL);
8780 }
8781 if (sax != NULL) {
8782 if (ctxt->sax != &xmlDefaultSAXHandler)
8783 xmlFree(ctxt->sax);
8784 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8785 if (ctxt->sax == NULL) {
8786 xmlFree(buf);
8787 xmlFree(ctxt);
8788 return(NULL);
8789 }
8790 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8791 if (user_data != NULL)
8792 ctxt->userData = user_data;
8793 }
8794
8795 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8796 if (inputStream == NULL) {
8797 xmlFreeParserCtxt(ctxt);
8798 return(NULL);
8799 }
8800 inputPush(ctxt, inputStream);
8801
8802 return(ctxt);
8803}
8804
8805/************************************************************************
8806 * *
8807 * Front ends when parsing a Dtd *
8808 * *
8809 ************************************************************************/
8810
8811/**
8812 * xmlIOParseDTD:
8813 * @sax: the SAX handler block or NULL
8814 * @input: an Input Buffer
8815 * @enc: the charset encoding if known
8816 *
8817 * Load and parse a DTD
8818 *
8819 * Returns the resulting xmlDtdPtr or NULL in case of error.
8820 * @input will be freed at parsing end.
8821 */
8822
8823xmlDtdPtr
8824xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8825 xmlCharEncoding enc) {
8826 xmlDtdPtr ret = NULL;
8827 xmlParserCtxtPtr ctxt;
8828 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008829 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008830
8831 if (input == NULL)
8832 return(NULL);
8833
8834 ctxt = xmlNewParserCtxt();
8835 if (ctxt == NULL) {
8836 return(NULL);
8837 }
8838
8839 /*
8840 * Set-up the SAX context
8841 */
8842 if (sax != NULL) {
8843 if (ctxt->sax != NULL)
8844 xmlFree(ctxt->sax);
8845 ctxt->sax = sax;
8846 ctxt->userData = NULL;
8847 }
8848
8849 /*
8850 * generate a parser input from the I/O handler
8851 */
8852
8853 pinput = xmlNewIOInputStream(ctxt, input, enc);
8854 if (pinput == NULL) {
8855 if (sax != NULL) ctxt->sax = NULL;
8856 xmlFreeParserCtxt(ctxt);
8857 return(NULL);
8858 }
8859
8860 /*
8861 * plug some encoding conversion routines here.
8862 */
8863 xmlPushInput(ctxt, pinput);
8864
8865 pinput->filename = NULL;
8866 pinput->line = 1;
8867 pinput->col = 1;
8868 pinput->base = ctxt->input->cur;
8869 pinput->cur = ctxt->input->cur;
8870 pinput->free = NULL;
8871
8872 /*
8873 * let's parse that entity knowing it's an external subset.
8874 */
8875 ctxt->inSubset = 2;
8876 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8877 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8878 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008879
8880 if (enc == XML_CHAR_ENCODING_NONE) {
8881 /*
8882 * Get the 4 first bytes and decode the charset
8883 * if enc != XML_CHAR_ENCODING_NONE
8884 * plug some encoding conversion routines.
8885 */
8886 start[0] = RAW;
8887 start[1] = NXT(1);
8888 start[2] = NXT(2);
8889 start[3] = NXT(3);
8890 enc = xmlDetectCharEncoding(start, 4);
8891 if (enc != XML_CHAR_ENCODING_NONE) {
8892 xmlSwitchEncoding(ctxt, enc);
8893 }
8894 }
8895
Owen Taylor3473f882001-02-23 17:55:21 +00008896 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8897
8898 if (ctxt->myDoc != NULL) {
8899 if (ctxt->wellFormed) {
8900 ret = ctxt->myDoc->extSubset;
8901 ctxt->myDoc->extSubset = NULL;
8902 } else {
8903 ret = NULL;
8904 }
8905 xmlFreeDoc(ctxt->myDoc);
8906 ctxt->myDoc = NULL;
8907 }
8908 if (sax != NULL) ctxt->sax = NULL;
8909 xmlFreeParserCtxt(ctxt);
8910
8911 return(ret);
8912}
8913
8914/**
8915 * xmlSAXParseDTD:
8916 * @sax: the SAX handler block
8917 * @ExternalID: a NAME* containing the External ID of the DTD
8918 * @SystemID: a NAME* containing the URL to the DTD
8919 *
8920 * Load and parse an external subset.
8921 *
8922 * Returns the resulting xmlDtdPtr or NULL in case of error.
8923 */
8924
8925xmlDtdPtr
8926xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8927 const xmlChar *SystemID) {
8928 xmlDtdPtr ret = NULL;
8929 xmlParserCtxtPtr ctxt;
8930 xmlParserInputPtr input = NULL;
8931 xmlCharEncoding enc;
8932
8933 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8934
8935 ctxt = xmlNewParserCtxt();
8936 if (ctxt == NULL) {
8937 return(NULL);
8938 }
8939
8940 /*
8941 * Set-up the SAX context
8942 */
8943 if (sax != NULL) {
8944 if (ctxt->sax != NULL)
8945 xmlFree(ctxt->sax);
8946 ctxt->sax = sax;
8947 ctxt->userData = NULL;
8948 }
8949
8950 /*
8951 * Ask the Entity resolver to load the damn thing
8952 */
8953
8954 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8955 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8956 if (input == NULL) {
8957 if (sax != NULL) ctxt->sax = NULL;
8958 xmlFreeParserCtxt(ctxt);
8959 return(NULL);
8960 }
8961
8962 /*
8963 * plug some encoding conversion routines here.
8964 */
8965 xmlPushInput(ctxt, input);
8966 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8967 xmlSwitchEncoding(ctxt, enc);
8968
8969 if (input->filename == NULL)
8970 input->filename = (char *) xmlStrdup(SystemID);
8971 input->line = 1;
8972 input->col = 1;
8973 input->base = ctxt->input->cur;
8974 input->cur = ctxt->input->cur;
8975 input->free = NULL;
8976
8977 /*
8978 * let's parse that entity knowing it's an external subset.
8979 */
8980 ctxt->inSubset = 2;
8981 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8982 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8983 ExternalID, SystemID);
8984 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8985
8986 if (ctxt->myDoc != NULL) {
8987 if (ctxt->wellFormed) {
8988 ret = ctxt->myDoc->extSubset;
8989 ctxt->myDoc->extSubset = NULL;
8990 } else {
8991 ret = NULL;
8992 }
8993 xmlFreeDoc(ctxt->myDoc);
8994 ctxt->myDoc = NULL;
8995 }
8996 if (sax != NULL) ctxt->sax = NULL;
8997 xmlFreeParserCtxt(ctxt);
8998
8999 return(ret);
9000}
9001
9002/**
9003 * xmlParseDTD:
9004 * @ExternalID: a NAME* containing the External ID of the DTD
9005 * @SystemID: a NAME* containing the URL to the DTD
9006 *
9007 * Load and parse an external subset.
9008 *
9009 * Returns the resulting xmlDtdPtr or NULL in case of error.
9010 */
9011
9012xmlDtdPtr
9013xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
9014 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
9015}
9016
9017/************************************************************************
9018 * *
9019 * Front ends when parsing an Entity *
9020 * *
9021 ************************************************************************/
9022
9023/**
Owen Taylor3473f882001-02-23 17:55:21 +00009024 * xmlParseCtxtExternalEntity:
9025 * @ctx: the existing parsing context
9026 * @URL: the URL for the entity to load
9027 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009028 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009029 *
9030 * Parse an external general entity within an existing parsing context
9031 * An external general parsed entity is well-formed if it matches the
9032 * production labeled extParsedEnt.
9033 *
9034 * [78] extParsedEnt ::= TextDecl? content
9035 *
9036 * Returns 0 if the entity is well formed, -1 in case of args problem and
9037 * the parser error code otherwise
9038 */
9039
9040int
9041xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009042 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009043 xmlParserCtxtPtr ctxt;
9044 xmlDocPtr newDoc;
9045 xmlSAXHandlerPtr oldsax = NULL;
9046 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009047 xmlChar start[4];
9048 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009049
9050 if (ctx->depth > 40) {
9051 return(XML_ERR_ENTITY_LOOP);
9052 }
9053
Daniel Veillardcda96922001-08-21 10:56:31 +00009054 if (lst != NULL)
9055 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009056 if ((URL == NULL) && (ID == NULL))
9057 return(-1);
9058 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9059 return(-1);
9060
9061
9062 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9063 if (ctxt == NULL) return(-1);
9064 ctxt->userData = ctxt;
9065 oldsax = ctxt->sax;
9066 ctxt->sax = ctx->sax;
9067 newDoc = xmlNewDoc(BAD_CAST "1.0");
9068 if (newDoc == NULL) {
9069 xmlFreeParserCtxt(ctxt);
9070 return(-1);
9071 }
9072 if (ctx->myDoc != NULL) {
9073 newDoc->intSubset = ctx->myDoc->intSubset;
9074 newDoc->extSubset = ctx->myDoc->extSubset;
9075 }
9076 if (ctx->myDoc->URL != NULL) {
9077 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9078 }
9079 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9080 if (newDoc->children == NULL) {
9081 ctxt->sax = oldsax;
9082 xmlFreeParserCtxt(ctxt);
9083 newDoc->intSubset = NULL;
9084 newDoc->extSubset = NULL;
9085 xmlFreeDoc(newDoc);
9086 return(-1);
9087 }
9088 nodePush(ctxt, newDoc->children);
9089 if (ctx->myDoc == NULL) {
9090 ctxt->myDoc = newDoc;
9091 } else {
9092 ctxt->myDoc = ctx->myDoc;
9093 newDoc->children->doc = ctx->myDoc;
9094 }
9095
Daniel Veillard87a764e2001-06-20 17:41:10 +00009096 /*
9097 * Get the 4 first bytes and decode the charset
9098 * if enc != XML_CHAR_ENCODING_NONE
9099 * plug some encoding conversion routines.
9100 */
9101 GROW
9102 start[0] = RAW;
9103 start[1] = NXT(1);
9104 start[2] = NXT(2);
9105 start[3] = NXT(3);
9106 enc = xmlDetectCharEncoding(start, 4);
9107 if (enc != XML_CHAR_ENCODING_NONE) {
9108 xmlSwitchEncoding(ctxt, enc);
9109 }
9110
Owen Taylor3473f882001-02-23 17:55:21 +00009111 /*
9112 * Parse a possible text declaration first
9113 */
Owen Taylor3473f882001-02-23 17:55:21 +00009114 if ((RAW == '<') && (NXT(1) == '?') &&
9115 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9116 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9117 xmlParseTextDecl(ctxt);
9118 }
9119
9120 /*
9121 * Doing validity checking on chunk doesn't make sense
9122 */
9123 ctxt->instate = XML_PARSER_CONTENT;
9124 ctxt->validate = ctx->validate;
9125 ctxt->loadsubset = ctx->loadsubset;
9126 ctxt->depth = ctx->depth + 1;
9127 ctxt->replaceEntities = ctx->replaceEntities;
9128 if (ctxt->validate) {
9129 ctxt->vctxt.error = ctx->vctxt.error;
9130 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009131 } else {
9132 ctxt->vctxt.error = NULL;
9133 ctxt->vctxt.warning = NULL;
9134 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009135 ctxt->vctxt.nodeTab = NULL;
9136 ctxt->vctxt.nodeNr = 0;
9137 ctxt->vctxt.nodeMax = 0;
9138 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009139
9140 xmlParseContent(ctxt);
9141
9142 if ((RAW == '<') && (NXT(1) == '/')) {
9143 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9145 ctxt->sax->error(ctxt->userData,
9146 "chunk is not well balanced\n");
9147 ctxt->wellFormed = 0;
9148 ctxt->disableSAX = 1;
9149 } else if (RAW != 0) {
9150 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9151 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9152 ctxt->sax->error(ctxt->userData,
9153 "extra content at the end of well balanced chunk\n");
9154 ctxt->wellFormed = 0;
9155 ctxt->disableSAX = 1;
9156 }
9157 if (ctxt->node != newDoc->children) {
9158 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9159 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9160 ctxt->sax->error(ctxt->userData,
9161 "chunk is not well balanced\n");
9162 ctxt->wellFormed = 0;
9163 ctxt->disableSAX = 1;
9164 }
9165
9166 if (!ctxt->wellFormed) {
9167 if (ctxt->errNo == 0)
9168 ret = 1;
9169 else
9170 ret = ctxt->errNo;
9171 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009172 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009173 xmlNodePtr cur;
9174
9175 /*
9176 * Return the newly created nodeset after unlinking it from
9177 * they pseudo parent.
9178 */
9179 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009180 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009181 while (cur != NULL) {
9182 cur->parent = NULL;
9183 cur = cur->next;
9184 }
9185 newDoc->children->children = NULL;
9186 }
9187 ret = 0;
9188 }
9189 ctxt->sax = oldsax;
9190 xmlFreeParserCtxt(ctxt);
9191 newDoc->intSubset = NULL;
9192 newDoc->extSubset = NULL;
9193 xmlFreeDoc(newDoc);
9194
9195 return(ret);
9196}
9197
9198/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009199 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009200 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009201 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009202 * @sax: the SAX handler bloc (possibly NULL)
9203 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9204 * @depth: Used for loop detection, use 0
9205 * @URL: the URL for the entity to load
9206 * @ID: the System ID for the entity to load
9207 * @list: the return value for the set of parsed nodes
9208 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009209 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009210 *
9211 * Returns 0 if the entity is well formed, -1 in case of args problem and
9212 * the parser error code otherwise
9213 */
9214
Daniel Veillard257d9102001-05-08 10:41:44 +00009215static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009216xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9217 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009218 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009219 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009220 xmlParserCtxtPtr ctxt;
9221 xmlDocPtr newDoc;
9222 xmlSAXHandlerPtr oldsax = NULL;
9223 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009224 xmlChar start[4];
9225 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009226
9227 if (depth > 40) {
9228 return(XML_ERR_ENTITY_LOOP);
9229 }
9230
9231
9232
9233 if (list != NULL)
9234 *list = NULL;
9235 if ((URL == NULL) && (ID == NULL))
9236 return(-1);
9237 if (doc == NULL) /* @@ relax but check for dereferences */
9238 return(-1);
9239
9240
9241 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9242 if (ctxt == NULL) return(-1);
9243 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009244 if (oldctxt != NULL) {
9245 ctxt->_private = oldctxt->_private;
9246 ctxt->loadsubset = oldctxt->loadsubset;
9247 ctxt->validate = oldctxt->validate;
9248 ctxt->external = oldctxt->external;
9249 } else {
9250 /*
9251 * Doing validity checking on chunk without context
9252 * doesn't make sense
9253 */
9254 ctxt->_private = NULL;
9255 ctxt->validate = 0;
9256 ctxt->external = 2;
9257 ctxt->loadsubset = 0;
9258 }
Owen Taylor3473f882001-02-23 17:55:21 +00009259 if (sax != NULL) {
9260 oldsax = ctxt->sax;
9261 ctxt->sax = sax;
9262 if (user_data != NULL)
9263 ctxt->userData = user_data;
9264 }
9265 newDoc = xmlNewDoc(BAD_CAST "1.0");
9266 if (newDoc == NULL) {
9267 xmlFreeParserCtxt(ctxt);
9268 return(-1);
9269 }
9270 if (doc != NULL) {
9271 newDoc->intSubset = doc->intSubset;
9272 newDoc->extSubset = doc->extSubset;
9273 }
9274 if (doc->URL != NULL) {
9275 newDoc->URL = xmlStrdup(doc->URL);
9276 }
9277 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9278 if (newDoc->children == NULL) {
9279 if (sax != NULL)
9280 ctxt->sax = oldsax;
9281 xmlFreeParserCtxt(ctxt);
9282 newDoc->intSubset = NULL;
9283 newDoc->extSubset = NULL;
9284 xmlFreeDoc(newDoc);
9285 return(-1);
9286 }
9287 nodePush(ctxt, newDoc->children);
9288 if (doc == NULL) {
9289 ctxt->myDoc = newDoc;
9290 } else {
9291 ctxt->myDoc = doc;
9292 newDoc->children->doc = doc;
9293 }
9294
Daniel Veillard87a764e2001-06-20 17:41:10 +00009295 /*
9296 * Get the 4 first bytes and decode the charset
9297 * if enc != XML_CHAR_ENCODING_NONE
9298 * plug some encoding conversion routines.
9299 */
9300 GROW;
9301 start[0] = RAW;
9302 start[1] = NXT(1);
9303 start[2] = NXT(2);
9304 start[3] = NXT(3);
9305 enc = xmlDetectCharEncoding(start, 4);
9306 if (enc != XML_CHAR_ENCODING_NONE) {
9307 xmlSwitchEncoding(ctxt, enc);
9308 }
9309
Owen Taylor3473f882001-02-23 17:55:21 +00009310 /*
9311 * Parse a possible text declaration first
9312 */
Owen Taylor3473f882001-02-23 17:55:21 +00009313 if ((RAW == '<') && (NXT(1) == '?') &&
9314 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9315 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9316 xmlParseTextDecl(ctxt);
9317 }
9318
Owen Taylor3473f882001-02-23 17:55:21 +00009319 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009320 ctxt->depth = depth;
9321
9322 xmlParseContent(ctxt);
9323
9324 if ((RAW == '<') && (NXT(1) == '/')) {
9325 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9327 ctxt->sax->error(ctxt->userData,
9328 "chunk is not well balanced\n");
9329 ctxt->wellFormed = 0;
9330 ctxt->disableSAX = 1;
9331 } else if (RAW != 0) {
9332 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9333 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9334 ctxt->sax->error(ctxt->userData,
9335 "extra content at the end of well balanced chunk\n");
9336 ctxt->wellFormed = 0;
9337 ctxt->disableSAX = 1;
9338 }
9339 if (ctxt->node != newDoc->children) {
9340 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9341 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9342 ctxt->sax->error(ctxt->userData,
9343 "chunk is not well balanced\n");
9344 ctxt->wellFormed = 0;
9345 ctxt->disableSAX = 1;
9346 }
9347
9348 if (!ctxt->wellFormed) {
9349 if (ctxt->errNo == 0)
9350 ret = 1;
9351 else
9352 ret = ctxt->errNo;
9353 } else {
9354 if (list != NULL) {
9355 xmlNodePtr cur;
9356
9357 /*
9358 * Return the newly created nodeset after unlinking it from
9359 * they pseudo parent.
9360 */
9361 cur = newDoc->children->children;
9362 *list = cur;
9363 while (cur != NULL) {
9364 cur->parent = NULL;
9365 cur = cur->next;
9366 }
9367 newDoc->children->children = NULL;
9368 }
9369 ret = 0;
9370 }
9371 if (sax != NULL)
9372 ctxt->sax = oldsax;
9373 xmlFreeParserCtxt(ctxt);
9374 newDoc->intSubset = NULL;
9375 newDoc->extSubset = NULL;
9376 xmlFreeDoc(newDoc);
9377
9378 return(ret);
9379}
9380
9381/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009382 * xmlParseExternalEntity:
9383 * @doc: the document the chunk pertains to
9384 * @sax: the SAX handler bloc (possibly NULL)
9385 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9386 * @depth: Used for loop detection, use 0
9387 * @URL: the URL for the entity to load
9388 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009389 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009390 *
9391 * Parse an external general entity
9392 * An external general parsed entity is well-formed if it matches the
9393 * production labeled extParsedEnt.
9394 *
9395 * [78] extParsedEnt ::= TextDecl? content
9396 *
9397 * Returns 0 if the entity is well formed, -1 in case of args problem and
9398 * the parser error code otherwise
9399 */
9400
9401int
9402xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009403 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009404 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009405 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009406}
9407
9408/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009409 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009410 * @doc: the document the chunk pertains to
9411 * @sax: the SAX handler bloc (possibly NULL)
9412 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9413 * @depth: Used for loop detection, use 0
9414 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009415 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009416 *
9417 * Parse a well-balanced chunk of an XML document
9418 * called by the parser
9419 * The allowed sequence for the Well Balanced Chunk is the one defined by
9420 * the content production in the XML grammar:
9421 *
9422 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9423 *
9424 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9425 * the parser error code otherwise
9426 */
9427
9428int
9429xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009430 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009431 xmlParserCtxtPtr ctxt;
9432 xmlDocPtr newDoc;
9433 xmlSAXHandlerPtr oldsax = NULL;
9434 int size;
9435 int ret = 0;
9436
9437 if (depth > 40) {
9438 return(XML_ERR_ENTITY_LOOP);
9439 }
9440
9441
Daniel Veillardcda96922001-08-21 10:56:31 +00009442 if (lst != NULL)
9443 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009444 if (string == NULL)
9445 return(-1);
9446
9447 size = xmlStrlen(string);
9448
9449 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9450 if (ctxt == NULL) return(-1);
9451 ctxt->userData = ctxt;
9452 if (sax != NULL) {
9453 oldsax = ctxt->sax;
9454 ctxt->sax = sax;
9455 if (user_data != NULL)
9456 ctxt->userData = user_data;
9457 }
9458 newDoc = xmlNewDoc(BAD_CAST "1.0");
9459 if (newDoc == NULL) {
9460 xmlFreeParserCtxt(ctxt);
9461 return(-1);
9462 }
9463 if (doc != NULL) {
9464 newDoc->intSubset = doc->intSubset;
9465 newDoc->extSubset = doc->extSubset;
9466 }
9467 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9468 if (newDoc->children == NULL) {
9469 if (sax != NULL)
9470 ctxt->sax = oldsax;
9471 xmlFreeParserCtxt(ctxt);
9472 newDoc->intSubset = NULL;
9473 newDoc->extSubset = NULL;
9474 xmlFreeDoc(newDoc);
9475 return(-1);
9476 }
9477 nodePush(ctxt, newDoc->children);
9478 if (doc == NULL) {
9479 ctxt->myDoc = newDoc;
9480 } else {
9481 ctxt->myDoc = doc;
9482 newDoc->children->doc = doc;
9483 }
9484 ctxt->instate = XML_PARSER_CONTENT;
9485 ctxt->depth = depth;
9486
9487 /*
9488 * Doing validity checking on chunk doesn't make sense
9489 */
9490 ctxt->validate = 0;
9491 ctxt->loadsubset = 0;
9492
9493 xmlParseContent(ctxt);
9494
9495 if ((RAW == '<') && (NXT(1) == '/')) {
9496 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9497 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9498 ctxt->sax->error(ctxt->userData,
9499 "chunk is not well balanced\n");
9500 ctxt->wellFormed = 0;
9501 ctxt->disableSAX = 1;
9502 } else if (RAW != 0) {
9503 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9505 ctxt->sax->error(ctxt->userData,
9506 "extra content at the end of well balanced chunk\n");
9507 ctxt->wellFormed = 0;
9508 ctxt->disableSAX = 1;
9509 }
9510 if (ctxt->node != newDoc->children) {
9511 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9513 ctxt->sax->error(ctxt->userData,
9514 "chunk is not well balanced\n");
9515 ctxt->wellFormed = 0;
9516 ctxt->disableSAX = 1;
9517 }
9518
9519 if (!ctxt->wellFormed) {
9520 if (ctxt->errNo == 0)
9521 ret = 1;
9522 else
9523 ret = ctxt->errNo;
9524 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009525 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009526 xmlNodePtr cur;
9527
9528 /*
9529 * Return the newly created nodeset after unlinking it from
9530 * they pseudo parent.
9531 */
9532 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009533 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009534 while (cur != NULL) {
9535 cur->parent = NULL;
9536 cur = cur->next;
9537 }
9538 newDoc->children->children = NULL;
9539 }
9540 ret = 0;
9541 }
9542 if (sax != NULL)
9543 ctxt->sax = oldsax;
9544 xmlFreeParserCtxt(ctxt);
9545 newDoc->intSubset = NULL;
9546 newDoc->extSubset = NULL;
9547 xmlFreeDoc(newDoc);
9548
9549 return(ret);
9550}
9551
9552/**
9553 * xmlSAXParseEntity:
9554 * @sax: the SAX handler block
9555 * @filename: the filename
9556 *
9557 * parse an XML external entity out of context and build a tree.
9558 * It use the given SAX function block to handle the parsing callback.
9559 * If sax is NULL, fallback to the default DOM tree building routines.
9560 *
9561 * [78] extParsedEnt ::= TextDecl? content
9562 *
9563 * This correspond to a "Well Balanced" chunk
9564 *
9565 * Returns the resulting document tree
9566 */
9567
9568xmlDocPtr
9569xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9570 xmlDocPtr ret;
9571 xmlParserCtxtPtr ctxt;
9572 char *directory = NULL;
9573
9574 ctxt = xmlCreateFileParserCtxt(filename);
9575 if (ctxt == NULL) {
9576 return(NULL);
9577 }
9578 if (sax != NULL) {
9579 if (ctxt->sax != NULL)
9580 xmlFree(ctxt->sax);
9581 ctxt->sax = sax;
9582 ctxt->userData = NULL;
9583 }
9584
9585 if ((ctxt->directory == NULL) && (directory == NULL))
9586 directory = xmlParserGetDirectory(filename);
9587
9588 xmlParseExtParsedEnt(ctxt);
9589
9590 if (ctxt->wellFormed)
9591 ret = ctxt->myDoc;
9592 else {
9593 ret = NULL;
9594 xmlFreeDoc(ctxt->myDoc);
9595 ctxt->myDoc = NULL;
9596 }
9597 if (sax != NULL)
9598 ctxt->sax = NULL;
9599 xmlFreeParserCtxt(ctxt);
9600
9601 return(ret);
9602}
9603
9604/**
9605 * xmlParseEntity:
9606 * @filename: the filename
9607 *
9608 * parse an XML external entity out of context and build a tree.
9609 *
9610 * [78] extParsedEnt ::= TextDecl? content
9611 *
9612 * This correspond to a "Well Balanced" chunk
9613 *
9614 * Returns the resulting document tree
9615 */
9616
9617xmlDocPtr
9618xmlParseEntity(const char *filename) {
9619 return(xmlSAXParseEntity(NULL, filename));
9620}
9621
9622/**
9623 * xmlCreateEntityParserCtxt:
9624 * @URL: the entity URL
9625 * @ID: the entity PUBLIC ID
9626 * @base: a posible base for the target URI
9627 *
9628 * Create a parser context for an external entity
9629 * Automatic support for ZLIB/Compress compressed document is provided
9630 * by default if found at compile-time.
9631 *
9632 * Returns the new parser context or NULL
9633 */
9634xmlParserCtxtPtr
9635xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9636 const xmlChar *base) {
9637 xmlParserCtxtPtr ctxt;
9638 xmlParserInputPtr inputStream;
9639 char *directory = NULL;
9640 xmlChar *uri;
9641
9642 ctxt = xmlNewParserCtxt();
9643 if (ctxt == NULL) {
9644 return(NULL);
9645 }
9646
9647 uri = xmlBuildURI(URL, base);
9648
9649 if (uri == NULL) {
9650 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9651 if (inputStream == NULL) {
9652 xmlFreeParserCtxt(ctxt);
9653 return(NULL);
9654 }
9655
9656 inputPush(ctxt, inputStream);
9657
9658 if ((ctxt->directory == NULL) && (directory == NULL))
9659 directory = xmlParserGetDirectory((char *)URL);
9660 if ((ctxt->directory == NULL) && (directory != NULL))
9661 ctxt->directory = directory;
9662 } else {
9663 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9664 if (inputStream == NULL) {
9665 xmlFree(uri);
9666 xmlFreeParserCtxt(ctxt);
9667 return(NULL);
9668 }
9669
9670 inputPush(ctxt, inputStream);
9671
9672 if ((ctxt->directory == NULL) && (directory == NULL))
9673 directory = xmlParserGetDirectory((char *)uri);
9674 if ((ctxt->directory == NULL) && (directory != NULL))
9675 ctxt->directory = directory;
9676 xmlFree(uri);
9677 }
9678
9679 return(ctxt);
9680}
9681
9682/************************************************************************
9683 * *
9684 * Front ends when parsing from a file *
9685 * *
9686 ************************************************************************/
9687
9688/**
9689 * xmlCreateFileParserCtxt:
9690 * @filename: the filename
9691 *
9692 * Create a parser context for a file content.
9693 * Automatic support for ZLIB/Compress compressed document is provided
9694 * by default if found at compile-time.
9695 *
9696 * Returns the new parser context or NULL
9697 */
9698xmlParserCtxtPtr
9699xmlCreateFileParserCtxt(const char *filename)
9700{
9701 xmlParserCtxtPtr ctxt;
9702 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009703 char *directory = NULL;
9704
Owen Taylor3473f882001-02-23 17:55:21 +00009705 ctxt = xmlNewParserCtxt();
9706 if (ctxt == NULL) {
9707 if (xmlDefaultSAXHandler.error != NULL) {
9708 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9709 }
9710 return(NULL);
9711 }
9712
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009713 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009714 if (inputStream == NULL) {
9715 xmlFreeParserCtxt(ctxt);
9716 return(NULL);
9717 }
9718
Owen Taylor3473f882001-02-23 17:55:21 +00009719 inputPush(ctxt, inputStream);
9720 if ((ctxt->directory == NULL) && (directory == NULL))
9721 directory = xmlParserGetDirectory(filename);
9722 if ((ctxt->directory == NULL) && (directory != NULL))
9723 ctxt->directory = directory;
9724
9725 return(ctxt);
9726}
9727
9728/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009729 * xmlSAXParseFileWithData:
Owen Taylor3473f882001-02-23 17:55:21 +00009730 * @sax: the SAX handler block
9731 * @filename: the filename
9732 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9733 * documents
Daniel Veillarda293c322001-10-02 13:54:14 +00009734 * @data: the userdata
Owen Taylor3473f882001-02-23 17:55:21 +00009735 *
9736 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9737 * compressed document is provided by default if found at compile-time.
9738 * It use the given SAX function block to handle the parsing callback.
9739 * If sax is NULL, fallback to the default DOM tree building routines.
9740 *
Daniel Veillarda293c322001-10-02 13:54:14 +00009741 * User data (void *) is stored within the parser context, so it is
9742 * available nearly everywhere in libxml.
9743 *
Owen Taylor3473f882001-02-23 17:55:21 +00009744 * Returns the resulting document tree
9745 */
9746
9747xmlDocPtr
Daniel Veillarda293c322001-10-02 13:54:14 +00009748xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
9749 int recovery, void *data) {
Owen Taylor3473f882001-02-23 17:55:21 +00009750 xmlDocPtr ret;
9751 xmlParserCtxtPtr ctxt;
9752 char *directory = NULL;
9753
9754 ctxt = xmlCreateFileParserCtxt(filename);
9755 if (ctxt == NULL) {
9756 return(NULL);
9757 }
9758 if (sax != NULL) {
9759 if (ctxt->sax != NULL)
9760 xmlFree(ctxt->sax);
9761 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009762 }
Daniel Veillarda293c322001-10-02 13:54:14 +00009763 if (data!=NULL) {
9764 ctxt->_private=data;
9765 }
Owen Taylor3473f882001-02-23 17:55:21 +00009766
9767 if ((ctxt->directory == NULL) && (directory == NULL))
9768 directory = xmlParserGetDirectory(filename);
9769 if ((ctxt->directory == NULL) && (directory != NULL))
9770 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9771
9772 xmlParseDocument(ctxt);
9773
9774 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9775 else {
9776 ret = NULL;
9777 xmlFreeDoc(ctxt->myDoc);
9778 ctxt->myDoc = NULL;
9779 }
9780 if (sax != NULL)
9781 ctxt->sax = NULL;
9782 xmlFreeParserCtxt(ctxt);
9783
9784 return(ret);
9785}
9786
9787/**
Daniel Veillarda293c322001-10-02 13:54:14 +00009788 * xmlSAXParseFile:
9789 * @sax: the SAX handler block
9790 * @filename: the filename
9791 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9792 * documents
9793 *
9794 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9795 * compressed document is provided by default if found at compile-time.
9796 * It use the given SAX function block to handle the parsing callback.
9797 * If sax is NULL, fallback to the default DOM tree building routines.
9798 *
9799 * Returns the resulting document tree
9800 */
9801
9802xmlDocPtr
9803xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9804 int recovery) {
9805 return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
9806}
9807
9808/**
Owen Taylor3473f882001-02-23 17:55:21 +00009809 * xmlRecoverDoc:
9810 * @cur: a pointer to an array of xmlChar
9811 *
9812 * parse an XML in-memory document and build a tree.
9813 * In the case the document is not Well Formed, a tree is built anyway
9814 *
9815 * Returns the resulting document tree
9816 */
9817
9818xmlDocPtr
9819xmlRecoverDoc(xmlChar *cur) {
9820 return(xmlSAXParseDoc(NULL, cur, 1));
9821}
9822
9823/**
9824 * xmlParseFile:
9825 * @filename: the filename
9826 *
9827 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9828 * compressed document is provided by default if found at compile-time.
9829 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009830 * Returns the resulting document tree if the file was wellformed,
9831 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009832 */
9833
9834xmlDocPtr
9835xmlParseFile(const char *filename) {
9836 return(xmlSAXParseFile(NULL, filename, 0));
9837}
9838
9839/**
9840 * xmlRecoverFile:
9841 * @filename: the filename
9842 *
9843 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9844 * compressed document is provided by default if found at compile-time.
9845 * In the case the document is not Well Formed, a tree is built anyway
9846 *
9847 * Returns the resulting document tree
9848 */
9849
9850xmlDocPtr
9851xmlRecoverFile(const char *filename) {
9852 return(xmlSAXParseFile(NULL, filename, 1));
9853}
9854
9855
9856/**
9857 * xmlSetupParserForBuffer:
9858 * @ctxt: an XML parser context
9859 * @buffer: a xmlChar * buffer
9860 * @filename: a file name
9861 *
9862 * Setup the parser context to parse a new buffer; Clears any prior
9863 * contents from the parser context. The buffer parameter must not be
9864 * NULL, but the filename parameter can be
9865 */
9866void
9867xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9868 const char* filename)
9869{
9870 xmlParserInputPtr input;
9871
9872 input = xmlNewInputStream(ctxt);
9873 if (input == NULL) {
9874 perror("malloc");
9875 xmlFree(ctxt);
9876 return;
9877 }
9878
9879 xmlClearParserCtxt(ctxt);
9880 if (filename != NULL)
9881 input->filename = xmlMemStrdup(filename);
9882 input->base = buffer;
9883 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009884 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009885 inputPush(ctxt, input);
9886}
9887
9888/**
9889 * xmlSAXUserParseFile:
9890 * @sax: a SAX handler
9891 * @user_data: The user data returned on SAX callbacks
9892 * @filename: a file name
9893 *
9894 * parse an XML file and call the given SAX handler routines.
9895 * Automatic support for ZLIB/Compress compressed document is provided
9896 *
9897 * Returns 0 in case of success or a error number otherwise
9898 */
9899int
9900xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9901 const char *filename) {
9902 int ret = 0;
9903 xmlParserCtxtPtr ctxt;
9904
9905 ctxt = xmlCreateFileParserCtxt(filename);
9906 if (ctxt == NULL) return -1;
9907 if (ctxt->sax != &xmlDefaultSAXHandler)
9908 xmlFree(ctxt->sax);
9909 ctxt->sax = sax;
9910 if (user_data != NULL)
9911 ctxt->userData = user_data;
9912
9913 xmlParseDocument(ctxt);
9914
9915 if (ctxt->wellFormed)
9916 ret = 0;
9917 else {
9918 if (ctxt->errNo != 0)
9919 ret = ctxt->errNo;
9920 else
9921 ret = -1;
9922 }
9923 if (sax != NULL)
9924 ctxt->sax = NULL;
9925 xmlFreeParserCtxt(ctxt);
9926
9927 return ret;
9928}
9929
9930/************************************************************************
9931 * *
9932 * Front ends when parsing from memory *
9933 * *
9934 ************************************************************************/
9935
9936/**
9937 * xmlCreateMemoryParserCtxt:
9938 * @buffer: a pointer to a char array
9939 * @size: the size of the array
9940 *
9941 * Create a parser context for an XML in-memory document.
9942 *
9943 * Returns the new parser context or NULL
9944 */
9945xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009946xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009947 xmlParserCtxtPtr ctxt;
9948 xmlParserInputPtr input;
9949 xmlParserInputBufferPtr buf;
9950
9951 if (buffer == NULL)
9952 return(NULL);
9953 if (size <= 0)
9954 return(NULL);
9955
9956 ctxt = xmlNewParserCtxt();
9957 if (ctxt == NULL)
9958 return(NULL);
9959
9960 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9961 if (buf == NULL) return(NULL);
9962
9963 input = xmlNewInputStream(ctxt);
9964 if (input == NULL) {
9965 xmlFreeParserCtxt(ctxt);
9966 return(NULL);
9967 }
9968
9969 input->filename = NULL;
9970 input->buf = buf;
9971 input->base = input->buf->buffer->content;
9972 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009973 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009974
9975 inputPush(ctxt, input);
9976 return(ctxt);
9977}
9978
9979/**
9980 * xmlSAXParseMemory:
9981 * @sax: the SAX handler block
9982 * @buffer: an pointer to a char array
9983 * @size: the size of the array
9984 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9985 * documents
9986 *
9987 * parse an XML in-memory block and use the given SAX function block
9988 * to handle the parsing callback. If sax is NULL, fallback to the default
9989 * DOM tree building routines.
9990 *
9991 * Returns the resulting document tree
9992 */
9993xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009994xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9995 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009996 xmlDocPtr ret;
9997 xmlParserCtxtPtr ctxt;
9998
9999 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10000 if (ctxt == NULL) return(NULL);
10001 if (sax != NULL) {
10002 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +000010003 }
10004
10005 xmlParseDocument(ctxt);
10006
10007 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10008 else {
10009 ret = NULL;
10010 xmlFreeDoc(ctxt->myDoc);
10011 ctxt->myDoc = NULL;
10012 }
10013 if (sax != NULL)
10014 ctxt->sax = NULL;
10015 xmlFreeParserCtxt(ctxt);
10016
10017 return(ret);
10018}
10019
10020/**
10021 * xmlParseMemory:
10022 * @buffer: an pointer to a char array
10023 * @size: the size of the array
10024 *
10025 * parse an XML in-memory block and build a tree.
10026 *
10027 * Returns the resulting document tree
10028 */
10029
Daniel Veillard50822cb2001-07-26 20:05:51 +000010030xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010031 return(xmlSAXParseMemory(NULL, buffer, size, 0));
10032}
10033
10034/**
10035 * xmlRecoverMemory:
10036 * @buffer: an pointer to a char array
10037 * @size: the size of the array
10038 *
10039 * parse an XML in-memory block and build a tree.
10040 * In the case the document is not Well Formed, a tree is built anyway
10041 *
10042 * Returns the resulting document tree
10043 */
10044
Daniel Veillard50822cb2001-07-26 20:05:51 +000010045xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010046 return(xmlSAXParseMemory(NULL, buffer, size, 1));
10047}
10048
10049/**
10050 * xmlSAXUserParseMemory:
10051 * @sax: a SAX handler
10052 * @user_data: The user data returned on SAX callbacks
10053 * @buffer: an in-memory XML document input
10054 * @size: the length of the XML document in bytes
10055 *
10056 * A better SAX parsing routine.
10057 * parse an XML in-memory buffer and call the given SAX handler routines.
10058 *
10059 * Returns 0 in case of success or a error number otherwise
10060 */
10061int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010062 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010063 int ret = 0;
10064 xmlParserCtxtPtr ctxt;
10065 xmlSAXHandlerPtr oldsax = NULL;
10066
10067 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10068 if (ctxt == NULL) return -1;
10069 if (sax != NULL) {
10070 oldsax = ctxt->sax;
10071 ctxt->sax = sax;
10072 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010073 if (user_data != NULL)
10074 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010075
10076 xmlParseDocument(ctxt);
10077
10078 if (ctxt->wellFormed)
10079 ret = 0;
10080 else {
10081 if (ctxt->errNo != 0)
10082 ret = ctxt->errNo;
10083 else
10084 ret = -1;
10085 }
10086 if (sax != NULL) {
10087 ctxt->sax = oldsax;
10088 }
10089 xmlFreeParserCtxt(ctxt);
10090
10091 return ret;
10092}
10093
10094/**
10095 * xmlCreateDocParserCtxt:
10096 * @cur: a pointer to an array of xmlChar
10097 *
10098 * Creates a parser context for an XML in-memory document.
10099 *
10100 * Returns the new parser context or NULL
10101 */
10102xmlParserCtxtPtr
10103xmlCreateDocParserCtxt(xmlChar *cur) {
10104 int len;
10105
10106 if (cur == NULL)
10107 return(NULL);
10108 len = xmlStrlen(cur);
10109 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10110}
10111
10112/**
10113 * xmlSAXParseDoc:
10114 * @sax: the SAX handler block
10115 * @cur: a pointer to an array of xmlChar
10116 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10117 * documents
10118 *
10119 * parse an XML in-memory document and build a tree.
10120 * It use the given SAX function block to handle the parsing callback.
10121 * If sax is NULL, fallback to the default DOM tree building routines.
10122 *
10123 * Returns the resulting document tree
10124 */
10125
10126xmlDocPtr
10127xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10128 xmlDocPtr ret;
10129 xmlParserCtxtPtr ctxt;
10130
10131 if (cur == NULL) return(NULL);
10132
10133
10134 ctxt = xmlCreateDocParserCtxt(cur);
10135 if (ctxt == NULL) return(NULL);
10136 if (sax != NULL) {
10137 ctxt->sax = sax;
10138 ctxt->userData = NULL;
10139 }
10140
10141 xmlParseDocument(ctxt);
10142 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10143 else {
10144 ret = NULL;
10145 xmlFreeDoc(ctxt->myDoc);
10146 ctxt->myDoc = NULL;
10147 }
10148 if (sax != NULL)
10149 ctxt->sax = NULL;
10150 xmlFreeParserCtxt(ctxt);
10151
10152 return(ret);
10153}
10154
10155/**
10156 * xmlParseDoc:
10157 * @cur: a pointer to an array of xmlChar
10158 *
10159 * parse an XML in-memory document and build a tree.
10160 *
10161 * Returns the resulting document tree
10162 */
10163
10164xmlDocPtr
10165xmlParseDoc(xmlChar *cur) {
10166 return(xmlSAXParseDoc(NULL, cur, 0));
10167}
10168
10169
10170/************************************************************************
10171 * *
10172 * Miscellaneous *
10173 * *
10174 ************************************************************************/
10175
10176#ifdef LIBXML_XPATH_ENABLED
10177#include <libxml/xpath.h>
10178#endif
10179
10180static int xmlParserInitialized = 0;
10181
10182/**
10183 * xmlInitParser:
10184 *
10185 * Initialization function for the XML parser.
10186 * This is not reentrant. Call once before processing in case of
10187 * use in multithreaded programs.
10188 */
10189
10190void
10191xmlInitParser(void) {
10192 if (xmlParserInitialized) return;
10193
10194 xmlInitCharEncodingHandlers();
10195 xmlInitializePredefinedEntities();
10196 xmlDefaultSAXHandlerInit();
10197 xmlRegisterDefaultInputCallbacks();
10198 xmlRegisterDefaultOutputCallbacks();
10199#ifdef LIBXML_HTML_ENABLED
10200 htmlInitAutoClose();
10201 htmlDefaultSAXHandlerInit();
10202#endif
10203#ifdef LIBXML_XPATH_ENABLED
10204 xmlXPathInit();
10205#endif
10206 xmlParserInitialized = 1;
10207}
10208
10209/**
10210 * xmlCleanupParser:
10211 *
10212 * Cleanup function for the XML parser. It tries to reclaim all
10213 * parsing related global memory allocated for the parser processing.
10214 * It doesn't deallocate any document related memory. Calling this
10215 * function should not prevent reusing the parser.
10216 */
10217
10218void
10219xmlCleanupParser(void) {
10220 xmlParserInitialized = 0;
10221 xmlCleanupCharEncodingHandlers();
10222 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010223#ifdef LIBXML_CATALOG_ENABLED
10224 xmlCatalogCleanup();
10225#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010226}
10227