blob: 8aa5ab1b6a30922bc72445318c5432d9b291ff71 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
Daniel Veillard5d96fff2001-08-31 14:55:30 +000025 * from the SAX callbacks or as standalones functions using a preparsed
Owen Taylor3473f882001-02-23 17:55:21 +000026 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 */
32
Bjorn Reese70a9da52001-04-21 16:57:29 +000033#include "libxml.h"
34
Owen Taylor3473f882001-02-23 17:55:21 +000035#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000036#define XML_DIR_SEP '\\'
37#else
Owen Taylor3473f882001-02-23 17:55:21 +000038#define XML_DIR_SEP '/'
39#endif
40
Owen Taylor3473f882001-02-23 17:55:21 +000041#include <stdlib.h>
42#include <string.h>
43#include <libxml/xmlmemory.h>
44#include <libxml/tree.h>
45#include <libxml/parser.h>
46#include <libxml/parserInternals.h>
47#include <libxml/valid.h>
48#include <libxml/entities.h>
49#include <libxml/xmlerror.h>
50#include <libxml/encoding.h>
51#include <libxml/xmlIO.h>
52#include <libxml/uri.h>
Daniel Veillard5d90b6c2001-08-22 14:29:45 +000053#ifdef LIBXML_CATALOG_ENABLED
54#include <libxml/catalog.h>
55#endif
Owen Taylor3473f882001-02-23 17:55:21 +000056
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard2f362242001-03-02 17:36:21 +00001478 if (ctxt->keepBlanks)
1479 return(0);
1480
Owen Taylor3473f882001-02-23 17:55:21 +00001481 /*
1482 * Check for xml:space value.
1483 */
1484 if (*(ctxt->space) == 1)
1485 return(0);
1486
1487 /*
1488 * Check that the string is made of blanks
1489 */
1490 for (i = 0;i < len;i++)
1491 if (!(IS_BLANK(str[i]))) return(0);
1492
1493 /*
1494 * Look if the element is mixed content in the Dtd if available
1495 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001496 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001497 if (ctxt->myDoc != NULL) {
1498 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1499 if (ret == 0) return(1);
1500 if (ret == 1) return(0);
1501 }
1502
1503 /*
1504 * Otherwise, heuristic :-\
1505 */
Owen Taylor3473f882001-02-23 17:55:21 +00001506 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if ((ctxt->node->children == NULL) &&
1508 (RAW == '<') && (NXT(1) == '/')) return(0);
1509
1510 lastChild = xmlGetLastChild(ctxt->node);
1511 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001512 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1513 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001514 } else if (xmlNodeIsText(lastChild))
1515 return(0);
1516 else if ((ctxt->node->children != NULL) &&
1517 (xmlNodeIsText(ctxt->node->children)))
1518 return(0);
1519 return(1);
1520}
1521
1522/*
1523 * Forward definition for recusive behaviour.
1524 */
1525void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1526void xmlParseReference(xmlParserCtxtPtr ctxt);
1527
1528/************************************************************************
1529 * *
1530 * Extra stuff for namespace support *
1531 * Relates to http://www.w3.org/TR/WD-xml-names *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * xmlSplitQName:
1537 * @ctxt: an XML parser context
1538 * @name: an XML parser context
1539 * @prefix: a xmlChar **
1540 *
1541 * parse an UTF8 encoded XML qualified name string
1542 *
1543 * [NS 5] QName ::= (Prefix ':')? LocalPart
1544 *
1545 * [NS 6] Prefix ::= NCName
1546 *
1547 * [NS 7] LocalPart ::= NCName
1548 *
1549 * Returns the local part, and prefix is updated
1550 * to get the Prefix if any.
1551 */
1552
1553xmlChar *
1554xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1555 xmlChar buf[XML_MAX_NAMELEN + 5];
1556 xmlChar *buffer = NULL;
1557 int len = 0;
1558 int max = XML_MAX_NAMELEN;
1559 xmlChar *ret = NULL;
1560 const xmlChar *cur = name;
1561 int c;
1562
1563 *prefix = NULL;
1564
1565#ifndef XML_XML_NAMESPACE
1566 /* xml: prefix is not really a namespace */
1567 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1568 (cur[2] == 'l') && (cur[3] == ':'))
1569 return(xmlStrdup(name));
1570#endif
1571
1572 /* nasty but valid */
1573 if (cur[0] == ':')
1574 return(xmlStrdup(name));
1575
1576 c = *cur++;
1577 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 buffer = NULL;
1619 max = XML_MAX_NAMELEN;
1620 }
1621
1622
1623 if (c == ':') {
1624 c = *cur++;
1625 if (c == 0) return(ret);
1626 *prefix = ret;
1627 len = 0;
1628
1629 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1630 buf[len++] = c;
1631 c = *cur++;
1632 }
1633 if (len >= max) {
1634 /*
1635 * Okay someone managed to make a huge name, so he's ready to pay
1636 * for the processing speed.
1637 */
1638 max = len * 2;
1639
1640 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1641 if (buffer == NULL) {
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "xmlSplitQName: out of memory\n");
1645 return(NULL);
1646 }
1647 memcpy(buffer, buf, len);
1648 while (c != 0) { /* tested bigname2.xml */
1649 if (len + 10 > max) {
1650 max *= 2;
1651 buffer = (xmlChar *) xmlRealloc(buffer,
1652 max * sizeof(xmlChar));
1653 if (buffer == NULL) {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "xmlSplitQName: out of memory\n");
1657 return(NULL);
1658 }
1659 }
1660 buffer[len++] = c;
1661 c = *cur++;
1662 }
1663 buffer[len] = 0;
1664 }
1665
1666 if (buffer == NULL)
1667 ret = xmlStrndup(buf, len);
1668 else {
1669 ret = buffer;
1670 }
1671 }
1672
1673 return(ret);
1674}
1675
1676/************************************************************************
1677 * *
1678 * The parser itself *
1679 * Relates to http://www.w3.org/TR/REC-xml *
1680 * *
1681 ************************************************************************/
1682
Daniel Veillard76d66f42001-05-16 21:05:17 +00001683static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001684/**
1685 * xmlParseName:
1686 * @ctxt: an XML parser context
1687 *
1688 * parse an XML name.
1689 *
1690 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1691 * CombiningChar | Extender
1692 *
1693 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1694 *
1695 * [6] Names ::= Name (S Name)*
1696 *
1697 * Returns the Name parsed or NULL
1698 */
1699
1700xmlChar *
1701xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001702 const xmlChar *in;
1703 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 int count = 0;
1705
1706 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001707
1708 /*
1709 * Accelerator for simple ASCII names
1710 */
1711 in = ctxt->input->cur;
1712 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1713 ((*in >= 0x41) && (*in <= 0x5A)) ||
1714 (*in == '_') || (*in == ':')) {
1715 in++;
1716 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001719 (*in == '_') || (*in == '-') ||
1720 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001722 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723 count = in - ctxt->input->cur;
1724 ret = xmlStrndup(ctxt->input->cur, count);
1725 ctxt->input->cur = in;
1726 return(ret);
1727 }
1728 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001729 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001730}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001731
Daniel Veillard76d66f42001-05-16 21:05:17 +00001732static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001733xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1734 xmlChar buf[XML_MAX_NAMELEN + 5];
1735 int len = 0, l;
1736 int c;
1737 int count = 0;
1738
1739 /*
1740 * Handler for more complex cases
1741 */
1742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001743 c = CUR_CHAR(l);
1744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1745 (!IS_LETTER(c) && (c != '_') &&
1746 (c != ':'))) {
1747 return(NULL);
1748 }
1749
1750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1751 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1752 (c == '.') || (c == '-') ||
1753 (c == '_') || (c == ':') ||
1754 (IS_COMBINING(c)) ||
1755 (IS_EXTENDER(c)))) {
1756 if (count++ > 100) {
1757 count = 0;
1758 GROW;
1759 }
1760 COPY_BUF(l,buf,len,c);
1761 NEXTL(l);
1762 c = CUR_CHAR(l);
1763 if (len >= XML_MAX_NAMELEN) {
1764 /*
1765 * Okay someone managed to make a huge name, so he's ready to pay
1766 * for the processing speed.
1767 */
1768 xmlChar *buffer;
1769 int max = len * 2;
1770
1771 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1772 if (buffer == NULL) {
1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001775 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001776 return(NULL);
1777 }
1778 memcpy(buffer, buf, len);
1779 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1780 (c == '.') || (c == '-') ||
1781 (c == '_') || (c == ':') ||
1782 (IS_COMBINING(c)) ||
1783 (IS_EXTENDER(c))) {
1784 if (count++ > 100) {
1785 count = 0;
1786 GROW;
1787 }
1788 if (len + 10 > max) {
1789 max *= 2;
1790 buffer = (xmlChar *) xmlRealloc(buffer,
1791 max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001795 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001796 return(NULL);
1797 }
1798 }
1799 COPY_BUF(l,buffer,len,c);
1800 NEXTL(l);
1801 c = CUR_CHAR(l);
1802 }
1803 buffer[len] = 0;
1804 return(buffer);
1805 }
1806 }
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseStringName:
1812 * @ctxt: an XML parser context
1813 * @str: a pointer to the string pointer (IN/OUT)
1814 *
1815 * parse an XML name.
1816 *
1817 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1818 * CombiningChar | Extender
1819 *
1820 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1821 *
1822 * [6] Names ::= Name (S Name)*
1823 *
1824 * Returns the Name parsed or NULL. The str pointer
1825 * is updated to the current location in the string.
1826 */
1827
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001828static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001829xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1830 xmlChar buf[XML_MAX_NAMELEN + 5];
1831 const xmlChar *cur = *str;
1832 int len = 0, l;
1833 int c;
1834
1835 c = CUR_SCHAR(cur, l);
1836 if (!IS_LETTER(c) && (c != '_') &&
1837 (c != ':')) {
1838 return(NULL);
1839 }
1840
1841 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1842 (c == '.') || (c == '-') ||
1843 (c == '_') || (c == ':') ||
1844 (IS_COMBINING(c)) ||
1845 (IS_EXTENDER(c))) {
1846 COPY_BUF(l,buf,len,c);
1847 cur += l;
1848 c = CUR_SCHAR(cur, l);
1849 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1850 /*
1851 * Okay someone managed to make a huge name, so he's ready to pay
1852 * for the processing speed.
1853 */
1854 xmlChar *buffer;
1855 int max = len * 2;
1856
1857 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1858 if (buffer == NULL) {
1859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1860 ctxt->sax->error(ctxt->userData,
1861 "xmlParseStringName: out of memory\n");
1862 return(NULL);
1863 }
1864 memcpy(buffer, buf, len);
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
1877 "xmlParseStringName: out of memory\n");
1878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 cur += l;
1883 c = CUR_SCHAR(cur, l);
1884 }
1885 buffer[len] = 0;
1886 *str = cur;
1887 return(buffer);
1888 }
1889 }
1890 *str = cur;
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseNmtoken:
1896 * @ctxt: an XML parser context
1897 *
1898 * parse an XML Nmtoken.
1899 *
1900 * [7] Nmtoken ::= (NameChar)+
1901 *
1902 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1903 *
1904 * Returns the Nmtoken parsed or NULL
1905 */
1906
1907xmlChar *
1908xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1909 xmlChar buf[XML_MAX_NAMELEN + 5];
1910 int len = 0, l;
1911 int c;
1912 int count = 0;
1913
1914 GROW;
1915 c = CUR_CHAR(l);
1916
1917 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1918 (c == '.') || (c == '-') ||
1919 (c == '_') || (c == ':') ||
1920 (IS_COMBINING(c)) ||
1921 (IS_EXTENDER(c))) {
1922 if (count++ > 100) {
1923 count = 0;
1924 GROW;
1925 }
1926 COPY_BUF(l,buf,len,c);
1927 NEXTL(l);
1928 c = CUR_CHAR(l);
1929 if (len >= XML_MAX_NAMELEN) {
1930 /*
1931 * Okay someone managed to make a huge token, so he's ready to pay
1932 * for the processing speed.
1933 */
1934 xmlChar *buffer;
1935 int max = len * 2;
1936
1937 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1938 if (buffer == NULL) {
1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1940 ctxt->sax->error(ctxt->userData,
1941 "xmlParseNmtoken: out of memory\n");
1942 return(NULL);
1943 }
1944 memcpy(buffer, buf, len);
1945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1946 (c == '.') || (c == '-') ||
1947 (c == '_') || (c == ':') ||
1948 (IS_COMBINING(c)) ||
1949 (IS_EXTENDER(c))) {
1950 if (count++ > 100) {
1951 count = 0;
1952 GROW;
1953 }
1954 if (len + 10 > max) {
1955 max *= 2;
1956 buffer = (xmlChar *) xmlRealloc(buffer,
1957 max * sizeof(xmlChar));
1958 if (buffer == NULL) {
1959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001961 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001962 return(NULL);
1963 }
1964 }
1965 COPY_BUF(l,buffer,len,c);
1966 NEXTL(l);
1967 c = CUR_CHAR(l);
1968 }
1969 buffer[len] = 0;
1970 return(buffer);
1971 }
1972 }
1973 if (len == 0)
1974 return(NULL);
1975 return(xmlStrndup(buf, len));
1976}
1977
1978/**
1979 * xmlParseEntityValue:
1980 * @ctxt: an XML parser context
1981 * @orig: if non-NULL store a copy of the original entity value
1982 *
1983 * parse a value for ENTITY declarations
1984 *
1985 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1986 * "'" ([^%&'] | PEReference | Reference)* "'"
1987 *
1988 * Returns the EntityValue parsed with reference substitued or NULL
1989 */
1990
1991xmlChar *
1992xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1993 xmlChar *buf = NULL;
1994 int len = 0;
1995 int size = XML_PARSER_BUFFER_SIZE;
1996 int c, l;
1997 xmlChar stop;
1998 xmlChar *ret = NULL;
1999 const xmlChar *cur = NULL;
2000 xmlParserInputPtr input;
2001
2002 if (RAW == '"') stop = '"';
2003 else if (RAW == '\'') stop = '\'';
2004 else {
2005 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 return(NULL);
2011 }
2012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2013 if (buf == NULL) {
2014 xmlGenericError(xmlGenericErrorContext,
2015 "malloc of %d byte failed\n", size);
2016 return(NULL);
2017 }
2018
2019 /*
2020 * The content of the entity definition is copied in a buffer.
2021 */
2022
2023 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2024 input = ctxt->input;
2025 GROW;
2026 NEXT;
2027 c = CUR_CHAR(l);
2028 /*
2029 * NOTE: 4.4.5 Included in Literal
2030 * When a parameter entity reference appears in a literal entity
2031 * value, ... a single or double quote character in the replacement
2032 * text is always treated as a normal data character and will not
2033 * terminate the literal.
2034 * In practice it means we stop the loop only when back at parsing
2035 * the initial entity and the quote is found
2036 */
2037 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2038 (ctxt->input != input))) {
2039 if (len + 5 >= size) {
2040 size *= 2;
2041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2042 if (buf == NULL) {
2043 xmlGenericError(xmlGenericErrorContext,
2044 "realloc of %d byte failed\n", size);
2045 return(NULL);
2046 }
2047 }
2048 COPY_BUF(l,buf,len,c);
2049 NEXTL(l);
2050 /*
2051 * Pop-up of finished entities.
2052 */
2053 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2054 xmlPopInput(ctxt);
2055
2056 GROW;
2057 c = CUR_CHAR(l);
2058 if (c == 0) {
2059 GROW;
2060 c = CUR_CHAR(l);
2061 }
2062 }
2063 buf[len] = 0;
2064
2065 /*
2066 * Raise problem w.r.t. '&' and '%' being used in non-entities
2067 * reference constructs. Note Charref will be handled in
2068 * xmlStringDecodeEntities()
2069 */
2070 cur = buf;
2071 while (*cur != 0) { /* non input consuming */
2072 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2073 xmlChar *name;
2074 xmlChar tmp = *cur;
2075
2076 cur++;
2077 name = xmlParseStringName(ctxt, &cur);
2078 if ((name == NULL) || (*cur != ';')) {
2079 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2081 ctxt->sax->error(ctxt->userData,
2082 "EntityValue: '%c' forbidden except for entities references\n",
2083 tmp);
2084 ctxt->wellFormed = 0;
2085 ctxt->disableSAX = 1;
2086 }
2087 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2088 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2090 ctxt->sax->error(ctxt->userData,
2091 "EntityValue: PEReferences forbidden in internal subset\n",
2092 tmp);
2093 ctxt->wellFormed = 0;
2094 ctxt->disableSAX = 1;
2095 }
2096 if (name != NULL)
2097 xmlFree(name);
2098 }
2099 cur++;
2100 }
2101
2102 /*
2103 * Then PEReference entities are substituted.
2104 */
2105 if (c != stop) {
2106 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2109 ctxt->wellFormed = 0;
2110 ctxt->disableSAX = 1;
2111 xmlFree(buf);
2112 } else {
2113 NEXT;
2114 /*
2115 * NOTE: 4.4.7 Bypassed
2116 * When a general entity reference appears in the EntityValue in
2117 * an entity declaration, it is bypassed and left as is.
2118 * so XML_SUBSTITUTE_REF is not set here.
2119 */
2120 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2121 0, 0, 0);
2122 if (orig != NULL)
2123 *orig = buf;
2124 else
2125 xmlFree(buf);
2126 }
2127
2128 return(ret);
2129}
2130
2131/**
2132 * xmlParseAttValue:
2133 * @ctxt: an XML parser context
2134 *
2135 * parse a value for an attribute
2136 * Note: the parser won't do substitution of entities here, this
2137 * will be handled later in xmlStringGetNodeList
2138 *
2139 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2140 * "'" ([^<&'] | Reference)* "'"
2141 *
2142 * 3.3.3 Attribute-Value Normalization:
2143 * Before the value of an attribute is passed to the application or
2144 * checked for validity, the XML processor must normalize it as follows:
2145 * - a character reference is processed by appending the referenced
2146 * character to the attribute value
2147 * - an entity reference is processed by recursively processing the
2148 * replacement text of the entity
2149 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2150 * appending #x20 to the normalized value, except that only a single
2151 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2152 * parsed entity or the literal entity value of an internal parsed entity
2153 * - other characters are processed by appending them to the normalized value
2154 * If the declared value is not CDATA, then the XML processor must further
2155 * process the normalized attribute value by discarding any leading and
2156 * trailing space (#x20) characters, and by replacing sequences of space
2157 * (#x20) characters by a single space (#x20) character.
2158 * All attributes for which no declaration has been read should be treated
2159 * by a non-validating parser as if declared CDATA.
2160 *
2161 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2162 */
2163
2164xmlChar *
2165xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2166 xmlChar limit = 0;
2167 xmlChar *buf = NULL;
2168 int len = 0;
2169 int buf_size = 0;
2170 int c, l;
2171 xmlChar *current = NULL;
2172 xmlEntityPtr ent;
2173
2174
2175 SHRINK;
2176 if (NXT(0) == '"') {
2177 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2178 limit = '"';
2179 NEXT;
2180 } else if (NXT(0) == '\'') {
2181 limit = '\'';
2182 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2183 NEXT;
2184 } else {
2185 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2187 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2188 ctxt->wellFormed = 0;
2189 ctxt->disableSAX = 1;
2190 return(NULL);
2191 }
2192
2193 /*
2194 * allocate a translation buffer.
2195 */
2196 buf_size = XML_PARSER_BUFFER_SIZE;
2197 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2198 if (buf == NULL) {
2199 perror("xmlParseAttValue: malloc failed");
2200 return(NULL);
2201 }
2202
2203 /*
2204 * Ok loop until we reach one of the ending char or a size limit.
2205 */
2206 c = CUR_CHAR(l);
2207 while (((NXT(0) != limit) && /* checked */
2208 (c != '<')) || (ctxt->token != 0)) {
2209 if (c == 0) break;
2210 if (ctxt->token == '&') {
2211 /*
2212 * The reparsing will be done in xmlStringGetNodeList()
2213 * called by the attribute() function in SAX.c
2214 */
2215 static xmlChar buffer[6] = "&#38;";
2216
2217 if (len > buf_size - 10) {
2218 growBuffer(buf);
2219 }
2220 current = &buffer[0];
2221 while (*current != 0) { /* non input consuming */
2222 buf[len++] = *current++;
2223 }
2224 ctxt->token = 0;
2225 } else if (c == '&') {
2226 if (NXT(1) == '#') {
2227 int val = xmlParseCharRef(ctxt);
2228 if (val == '&') {
2229 /*
2230 * The reparsing will be done in xmlStringGetNodeList()
2231 * called by the attribute() function in SAX.c
2232 */
2233 static xmlChar buffer[6] = "&#38;";
2234
2235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
2238 current = &buffer[0];
2239 while (*current != 0) { /* non input consuming */
2240 buf[len++] = *current++;
2241 }
2242 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002243 if (len > buf_size - 10) {
2244 growBuffer(buf);
2245 }
Owen Taylor3473f882001-02-23 17:55:21 +00002246 len += xmlCopyChar(0, &buf[len], val);
2247 }
2248 } else {
2249 ent = xmlParseEntityRef(ctxt);
2250 if ((ent != NULL) &&
2251 (ctxt->replaceEntities != 0)) {
2252 xmlChar *rep;
2253
2254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2255 rep = xmlStringDecodeEntities(ctxt, ent->content,
2256 XML_SUBSTITUTE_REF, 0, 0, 0);
2257 if (rep != NULL) {
2258 current = rep;
2259 while (*current != 0) { /* non input consuming */
2260 buf[len++] = *current++;
2261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
2264 }
2265 xmlFree(rep);
2266 }
2267 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002268 if (len > buf_size - 10) {
2269 growBuffer(buf);
2270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (ent->content != NULL)
2272 buf[len++] = ent->content[0];
2273 }
2274 } else if (ent != NULL) {
2275 int i = xmlStrlen(ent->name);
2276 const xmlChar *cur = ent->name;
2277
2278 /*
2279 * This may look absurd but is needed to detect
2280 * entities problems
2281 */
2282 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2283 (ent->content != NULL)) {
2284 xmlChar *rep;
2285 rep = xmlStringDecodeEntities(ctxt, ent->content,
2286 XML_SUBSTITUTE_REF, 0, 0, 0);
2287 if (rep != NULL)
2288 xmlFree(rep);
2289 }
2290
2291 /*
2292 * Just output the reference
2293 */
2294 buf[len++] = '&';
2295 if (len > buf_size - i - 10) {
2296 growBuffer(buf);
2297 }
2298 for (;i > 0;i--)
2299 buf[len++] = *cur++;
2300 buf[len++] = ';';
2301 }
2302 }
2303 } else {
2304 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2305 COPY_BUF(l,buf,len,0x20);
2306 if (len > buf_size - 10) {
2307 growBuffer(buf);
2308 }
2309 } else {
2310 COPY_BUF(l,buf,len,c);
2311 if (len > buf_size - 10) {
2312 growBuffer(buf);
2313 }
2314 }
2315 NEXTL(l);
2316 }
2317 GROW;
2318 c = CUR_CHAR(l);
2319 }
2320 buf[len++] = 0;
2321 if (RAW == '<') {
2322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324 ctxt->sax->error(ctxt->userData,
2325 "Unescaped '<' not allowed in attributes values\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else if (RAW != limit) {
2329 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else
2335 NEXT;
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParseSystemLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML Literal
2344 *
2345 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2346 *
2347 * Returns the SystemLiteral parsed or NULL
2348 */
2349
2350xmlChar *
2351xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 int cur, l;
2356 xmlChar stop;
2357 int state = ctxt->instate;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376
2377 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2378 if (buf == NULL) {
2379 xmlGenericError(xmlGenericErrorContext,
2380 "malloc of %d byte failed\n", size);
2381 return(NULL);
2382 }
2383 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2384 cur = CUR_CHAR(l);
2385 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2386 if (len + 5 >= size) {
2387 size *= 2;
2388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2389 if (buf == NULL) {
2390 xmlGenericError(xmlGenericErrorContext,
2391 "realloc of %d byte failed\n", size);
2392 ctxt->instate = (xmlParserInputState) state;
2393 return(NULL);
2394 }
2395 }
2396 count++;
2397 if (count > 50) {
2398 GROW;
2399 count = 0;
2400 }
2401 COPY_BUF(l,buf,len,cur);
2402 NEXTL(l);
2403 cur = CUR_CHAR(l);
2404 if (cur == 0) {
2405 GROW;
2406 SHRINK;
2407 cur = CUR_CHAR(l);
2408 }
2409 }
2410 buf[len] = 0;
2411 ctxt->instate = (xmlParserInputState) state;
2412 if (!IS_CHAR(cur)) {
2413 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2416 ctxt->wellFormed = 0;
2417 ctxt->disableSAX = 1;
2418 } else {
2419 NEXT;
2420 }
2421 return(buf);
2422}
2423
2424/**
2425 * xmlParsePubidLiteral:
2426 * @ctxt: an XML parser context
2427 *
2428 * parse an XML public literal
2429 *
2430 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2431 *
2432 * Returns the PubidLiteral parsed or NULL.
2433 */
2434
2435xmlChar *
2436xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2437 xmlChar *buf = NULL;
2438 int len = 0;
2439 int size = XML_PARSER_BUFFER_SIZE;
2440 xmlChar cur;
2441 xmlChar stop;
2442 int count = 0;
2443
2444 SHRINK;
2445 if (RAW == '"') {
2446 NEXT;
2447 stop = '"';
2448 } else if (RAW == '\'') {
2449 NEXT;
2450 stop = '\'';
2451 } else {
2452 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData,
2455 "SystemLiteral \" or ' expected\n");
2456 ctxt->wellFormed = 0;
2457 ctxt->disableSAX = 1;
2458 return(NULL);
2459 }
2460 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2461 if (buf == NULL) {
2462 xmlGenericError(xmlGenericErrorContext,
2463 "malloc of %d byte failed\n", size);
2464 return(NULL);
2465 }
2466 cur = CUR;
2467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2468 if (len + 1 >= size) {
2469 size *= 2;
2470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2471 if (buf == NULL) {
2472 xmlGenericError(xmlGenericErrorContext,
2473 "realloc of %d byte failed\n", size);
2474 return(NULL);
2475 }
2476 }
2477 buf[len++] = cur;
2478 count++;
2479 if (count > 50) {
2480 GROW;
2481 count = 0;
2482 }
2483 NEXT;
2484 cur = CUR;
2485 if (cur == 0) {
2486 GROW;
2487 SHRINK;
2488 cur = CUR;
2489 }
2490 }
2491 buf[len] = 0;
2492 if (cur != stop) {
2493 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2496 ctxt->wellFormed = 0;
2497 ctxt->disableSAX = 1;
2498 } else {
2499 NEXT;
2500 }
2501 return(buf);
2502}
2503
Daniel Veillard48b2f892001-02-25 16:11:03 +00002504void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002505/**
2506 * xmlParseCharData:
2507 * @ctxt: an XML parser context
2508 * @cdata: int indicating whether we are within a CDATA section
2509 *
2510 * parse a CharData section.
2511 * if we are within a CDATA section ']]>' marks an end of section.
2512 *
2513 * The right angle bracket (>) may be represented using the string "&gt;",
2514 * and must, for compatibility, be escaped using "&gt;" or a character
2515 * reference when it appears in the string "]]>" in content, when that
2516 * string is not marking the end of a CDATA section.
2517 *
2518 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2519 */
2520
2521void
2522xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002523 const xmlChar *in;
2524 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002525 int line = ctxt->input->line;
2526 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002527
2528 SHRINK;
2529 GROW;
2530 /*
2531 * Accelerated common case where input don't need to be
2532 * modified before passing it to the handler.
2533 */
2534 if ((ctxt->token == 0) && (!cdata)) {
2535 in = ctxt->input->cur;
2536 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002537get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538 while (((*in >= 0x20) && (*in != '<') &&
2539 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2540 in++;
2541 if (*in == 0xA) {
2542 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002543 in++;
2544 while (*in == 0xA) {
2545 ctxt->input->line++;
2546 in++;
2547 }
2548 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002549 }
2550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002551 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002552 if (IS_BLANK(*ctxt->input->cur)) {
2553 const xmlChar *tmp = ctxt->input->cur;
2554 ctxt->input->cur = in;
2555 if (areBlanks(ctxt, tmp, nbchar)) {
2556 if (ctxt->sax->ignorableWhitespace != NULL)
2557 ctxt->sax->ignorableWhitespace(ctxt->userData,
2558 tmp, nbchar);
2559 } else {
2560 if (ctxt->sax->characters != NULL)
2561 ctxt->sax->characters(ctxt->userData,
2562 tmp, nbchar);
2563 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002564 line = ctxt->input->line;
2565 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002566 } else {
2567 if (ctxt->sax->characters != NULL)
2568 ctxt->sax->characters(ctxt->userData,
2569 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002570 line = ctxt->input->line;
2571 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002572 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002573 }
2574 ctxt->input->cur = in;
2575 if (*in == 0xD) {
2576 in++;
2577 if (*in == 0xA) {
2578 ctxt->input->cur = in;
2579 in++;
2580 ctxt->input->line++;
2581 continue; /* while */
2582 }
2583 in--;
2584 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002585 if (*in == '<') {
2586 return;
2587 }
2588 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 return;
2590 }
2591 SHRINK;
2592 GROW;
2593 in = ctxt->input->cur;
2594 } while ((*in >= 0x20) && (*in <= 0x7F));
2595 nbchar = 0;
2596 }
Daniel Veillard50582112001-03-26 22:52:16 +00002597 ctxt->input->line = line;
2598 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 xmlParseCharDataComplex(ctxt, cdata);
2600}
2601
2602void
2603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2605 int nbchar = 0;
2606 int cur, l;
2607 int count = 0;
2608
2609 SHRINK;
2610 GROW;
2611 cur = CUR_CHAR(l);
2612 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2613 ((cur != '&') || (ctxt->token == '&')) &&
2614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2615 if ((cur == ']') && (NXT(1) == ']') &&
2616 (NXT(2) == '>')) {
2617 if (cdata) break;
2618 else {
2619 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2621 ctxt->sax->error(ctxt->userData,
2622 "Sequence ']]>' not allowed in content\n");
2623 /* Should this be relaxed ??? I see a "must here */
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 }
2628 COPY_BUF(l,buf,nbchar,cur);
2629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2630 /*
2631 * Ok the segment is to be consumed as chars.
2632 */
2633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2634 if (areBlanks(ctxt, buf, nbchar)) {
2635 if (ctxt->sax->ignorableWhitespace != NULL)
2636 ctxt->sax->ignorableWhitespace(ctxt->userData,
2637 buf, nbchar);
2638 } else {
2639 if (ctxt->sax->characters != NULL)
2640 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2641 }
2642 }
2643 nbchar = 0;
2644 }
2645 count++;
2646 if (count > 50) {
2647 GROW;
2648 count = 0;
2649 }
2650 NEXTL(l);
2651 cur = CUR_CHAR(l);
2652 }
2653 if (nbchar != 0) {
2654 /*
2655 * Ok the segment is to be consumed as chars.
2656 */
2657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2658 if (areBlanks(ctxt, buf, nbchar)) {
2659 if (ctxt->sax->ignorableWhitespace != NULL)
2660 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2661 } else {
2662 if (ctxt->sax->characters != NULL)
2663 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2664 }
2665 }
2666 }
2667}
2668
2669/**
2670 * xmlParseExternalID:
2671 * @ctxt: an XML parser context
2672 * @publicID: a xmlChar** receiving PubidLiteral
2673 * @strict: indicate whether we should restrict parsing to only
2674 * production [75], see NOTE below
2675 *
2676 * Parse an External ID or a Public ID
2677 *
2678 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2679 * 'PUBLIC' S PubidLiteral S SystemLiteral
2680 *
2681 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2682 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2683 *
2684 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2685 *
2686 * Returns the function returns SystemLiteral and in the second
2687 * case publicID receives PubidLiteral, is strict is off
2688 * it is possible to return NULL and have publicID set.
2689 */
2690
2691xmlChar *
2692xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2693 xmlChar *URI = NULL;
2694
2695 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002696
2697 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2699 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2700 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2701 SKIP(6);
2702 if (!IS_BLANK(CUR)) {
2703 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "Space required after 'SYSTEM'\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 SKIP_BLANKS;
2711 URI = xmlParseSystemLiteral(ctxt);
2712 if (URI == NULL) {
2713 ctxt->errNo = XML_ERR_URI_REQUIRED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "xmlParseExternalID: SYSTEM, no URI\n");
2717 ctxt->wellFormed = 0;
2718 ctxt->disableSAX = 1;
2719 }
2720 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2721 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2722 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2723 SKIP(6);
2724 if (!IS_BLANK(CUR)) {
2725 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "Space required after 'PUBLIC'\n");
2729 ctxt->wellFormed = 0;
2730 ctxt->disableSAX = 1;
2731 }
2732 SKIP_BLANKS;
2733 *publicID = xmlParsePubidLiteral(ctxt);
2734 if (*publicID == NULL) {
2735 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2737 ctxt->sax->error(ctxt->userData,
2738 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2739 ctxt->wellFormed = 0;
2740 ctxt->disableSAX = 1;
2741 }
2742 if (strict) {
2743 /*
2744 * We don't handle [83] so "S SystemLiteral" is required.
2745 */
2746 if (!IS_BLANK(CUR)) {
2747 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2749 ctxt->sax->error(ctxt->userData,
2750 "Space required after the Public Identifier\n");
2751 ctxt->wellFormed = 0;
2752 ctxt->disableSAX = 1;
2753 }
2754 } else {
2755 /*
2756 * We handle [83] so we return immediately, if
2757 * "S SystemLiteral" is not detected. From a purely parsing
2758 * point of view that's a nice mess.
2759 */
2760 const xmlChar *ptr;
2761 GROW;
2762
2763 ptr = CUR_PTR;
2764 if (!IS_BLANK(*ptr)) return(NULL);
2765
2766 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2768 }
2769 SKIP_BLANKS;
2770 URI = xmlParseSystemLiteral(ctxt);
2771 if (URI == NULL) {
2772 ctxt->errNo = XML_ERR_URI_REQUIRED;
2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2774 ctxt->sax->error(ctxt->userData,
2775 "xmlParseExternalID: PUBLIC, no URI\n");
2776 ctxt->wellFormed = 0;
2777 ctxt->disableSAX = 1;
2778 }
2779 }
2780 return(URI);
2781}
2782
2783/**
2784 * xmlParseComment:
2785 * @ctxt: an XML parser context
2786 *
2787 * Skip an XML (SGML) comment <!-- .... -->
2788 * The spec says that "For compatibility, the string "--" (double-hyphen)
2789 * must not occur within comments. "
2790 *
2791 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2792 */
2793void
2794xmlParseComment(xmlParserCtxtPtr ctxt) {
2795 xmlChar *buf = NULL;
2796 int len;
2797 int size = XML_PARSER_BUFFER_SIZE;
2798 int q, ql;
2799 int r, rl;
2800 int cur, l;
2801 xmlParserInputState state;
2802 xmlParserInputPtr input = ctxt->input;
2803 int count = 0;
2804
2805 /*
2806 * Check that there is a comment right here.
2807 */
2808 if ((RAW != '<') || (NXT(1) != '!') ||
2809 (NXT(2) != '-') || (NXT(3) != '-')) return;
2810
2811 state = ctxt->instate;
2812 ctxt->instate = XML_PARSER_COMMENT;
2813 SHRINK;
2814 SKIP(4);
2815 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2816 if (buf == NULL) {
2817 xmlGenericError(xmlGenericErrorContext,
2818 "malloc of %d byte failed\n", size);
2819 ctxt->instate = state;
2820 return;
2821 }
2822 q = CUR_CHAR(ql);
2823 NEXTL(ql);
2824 r = CUR_CHAR(rl);
2825 NEXTL(rl);
2826 cur = CUR_CHAR(l);
2827 len = 0;
2828 while (IS_CHAR(cur) && /* checked */
2829 ((cur != '>') ||
2830 (r != '-') || (q != '-'))) {
2831 if ((r == '-') && (q == '-') && (len > 1)) {
2832 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2834 ctxt->sax->error(ctxt->userData,
2835 "Comment must not contain '--' (double-hyphen)`\n");
2836 ctxt->wellFormed = 0;
2837 ctxt->disableSAX = 1;
2838 }
2839 if (len + 5 >= size) {
2840 size *= 2;
2841 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2842 if (buf == NULL) {
2843 xmlGenericError(xmlGenericErrorContext,
2844 "realloc of %d byte failed\n", size);
2845 ctxt->instate = state;
2846 return;
2847 }
2848 }
2849 COPY_BUF(ql,buf,len,q);
2850 q = r;
2851 ql = rl;
2852 r = cur;
2853 rl = l;
2854
2855 count++;
2856 if (count > 50) {
2857 GROW;
2858 count = 0;
2859 }
2860 NEXTL(l);
2861 cur = CUR_CHAR(l);
2862 if (cur == 0) {
2863 SHRINK;
2864 GROW;
2865 cur = CUR_CHAR(l);
2866 }
2867 }
2868 buf[len] = 0;
2869 if (!IS_CHAR(cur)) {
2870 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2872 ctxt->sax->error(ctxt->userData,
2873 "Comment not terminated \n<!--%.50s\n", buf);
2874 ctxt->wellFormed = 0;
2875 ctxt->disableSAX = 1;
2876 xmlFree(buf);
2877 } else {
2878 if (input != ctxt->input) {
2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882"Comment doesn't start and stop in the same entity\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 NEXT;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2888 (!ctxt->disableSAX))
2889 ctxt->sax->comment(ctxt->userData, buf);
2890 xmlFree(buf);
2891 }
2892 ctxt->instate = state;
2893}
2894
2895/**
2896 * xmlParsePITarget:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse the name of a PI
2900 *
2901 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2902 *
2903 * Returns the PITarget name or NULL
2904 */
2905
2906xmlChar *
2907xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2908 xmlChar *name;
2909
2910 name = xmlParseName(ctxt);
2911 if ((name != NULL) &&
2912 ((name[0] == 'x') || (name[0] == 'X')) &&
2913 ((name[1] == 'm') || (name[1] == 'M')) &&
2914 ((name[2] == 'l') || (name[2] == 'L'))) {
2915 int i;
2916 if ((name[0] == 'x') && (name[1] == 'm') &&
2917 (name[2] == 'l') && (name[3] == 0)) {
2918 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2920 ctxt->sax->error(ctxt->userData,
2921 "XML declaration allowed only at the start of the document\n");
2922 ctxt->wellFormed = 0;
2923 ctxt->disableSAX = 1;
2924 return(name);
2925 } else if (name[3] == 0) {
2926 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 return(name);
2932 }
2933 for (i = 0;;i++) {
2934 if (xmlW3CPIs[i] == NULL) break;
2935 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2936 return(name);
2937 }
2938 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 ctxt->sax->warning(ctxt->userData,
2941 "xmlParsePItarget: invalid name prefix 'xml'\n");
2942 }
2943 }
2944 return(name);
2945}
2946
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00002947#ifdef LIBXML_CATALOG_ENABLED
2948/**
2949 * xmlParseCatalogPI:
2950 * @ctxt: an XML parser context
2951 * @catalog: the PI value string
2952 *
2953 * parse an XML Catalog Processing Instruction.
2954 *
2955 * <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
2956 *
2957 * Occurs only if allowed by the user and if happening in the Misc
2958 * part of the document before any doctype informations
2959 * This will add the given catalog to the parsing context in order
2960 * to be used if there is a resolution need further down in the document
2961 */
2962
2963static void
2964xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
2965 xmlChar *URL = NULL;
2966 const xmlChar *tmp, *base;
2967 xmlChar marker;
2968
2969 tmp = catalog;
2970 while (IS_BLANK(*tmp)) tmp++;
2971 if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
2972 goto error;
2973 tmp += 7;
2974 while (IS_BLANK(*tmp)) tmp++;
2975 if (*tmp != '=') {
2976 return;
2977 }
2978 tmp++;
2979 while (IS_BLANK(*tmp)) tmp++;
2980 marker = *tmp;
2981 if ((marker != '\'') && (marker != '"'))
2982 goto error;
2983 tmp++;
2984 base = tmp;
2985 while ((*tmp != 0) && (*tmp != marker)) tmp++;
2986 if (*tmp == 0)
2987 goto error;
2988 URL = xmlStrndup(base, tmp - base);
2989 tmp++;
2990 while (IS_BLANK(*tmp)) tmp++;
2991 if (*tmp != 0)
2992 goto error;
2993
2994 if (URL != NULL) {
2995 ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
2996 xmlFree(URL);
2997 }
2998 return;
2999
3000error:
3001 ctxt->errNo = XML_WAR_CATALOG_PI;
3002 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
3003 ctxt->sax->warning(ctxt->userData,
3004 "Catalog PI syntax error: %s\n", catalog);
3005 if (URL != NULL)
3006 xmlFree(URL);
3007}
3008#endif
3009
Owen Taylor3473f882001-02-23 17:55:21 +00003010/**
3011 * xmlParsePI:
3012 * @ctxt: an XML parser context
3013 *
3014 * parse an XML Processing Instruction.
3015 *
3016 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
3017 *
3018 * The processing is transfered to SAX once parsed.
3019 */
3020
3021void
3022xmlParsePI(xmlParserCtxtPtr ctxt) {
3023 xmlChar *buf = NULL;
3024 int len = 0;
3025 int size = XML_PARSER_BUFFER_SIZE;
3026 int cur, l;
3027 xmlChar *target;
3028 xmlParserInputState state;
3029 int count = 0;
3030
3031 if ((RAW == '<') && (NXT(1) == '?')) {
3032 xmlParserInputPtr input = ctxt->input;
3033 state = ctxt->instate;
3034 ctxt->instate = XML_PARSER_PI;
3035 /*
3036 * this is a Processing Instruction.
3037 */
3038 SKIP(2);
3039 SHRINK;
3040
3041 /*
3042 * Parse the target name and check for special support like
3043 * namespace.
3044 */
3045 target = xmlParsePITarget(ctxt);
3046 if (target != NULL) {
3047 if ((RAW == '?') && (NXT(1) == '>')) {
3048 if (input != ctxt->input) {
3049 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3051 ctxt->sax->error(ctxt->userData,
3052 "PI declaration doesn't start and stop in the same entity\n");
3053 ctxt->wellFormed = 0;
3054 ctxt->disableSAX = 1;
3055 }
3056 SKIP(2);
3057
3058 /*
3059 * SAX: PI detected.
3060 */
3061 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3062 (ctxt->sax->processingInstruction != NULL))
3063 ctxt->sax->processingInstruction(ctxt->userData,
3064 target, NULL);
3065 ctxt->instate = state;
3066 xmlFree(target);
3067 return;
3068 }
3069 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3070 if (buf == NULL) {
3071 xmlGenericError(xmlGenericErrorContext,
3072 "malloc of %d byte failed\n", size);
3073 ctxt->instate = state;
3074 return;
3075 }
3076 cur = CUR;
3077 if (!IS_BLANK(cur)) {
3078 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3080 ctxt->sax->error(ctxt->userData,
3081 "xmlParsePI: PI %s space expected\n", target);
3082 ctxt->wellFormed = 0;
3083 ctxt->disableSAX = 1;
3084 }
3085 SKIP_BLANKS;
3086 cur = CUR_CHAR(l);
3087 while (IS_CHAR(cur) && /* checked */
3088 ((cur != '?') || (NXT(1) != '>'))) {
3089 if (len + 5 >= size) {
3090 size *= 2;
3091 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3092 if (buf == NULL) {
3093 xmlGenericError(xmlGenericErrorContext,
3094 "realloc of %d byte failed\n", size);
3095 ctxt->instate = state;
3096 return;
3097 }
3098 }
3099 count++;
3100 if (count > 50) {
3101 GROW;
3102 count = 0;
3103 }
3104 COPY_BUF(l,buf,len,cur);
3105 NEXTL(l);
3106 cur = CUR_CHAR(l);
3107 if (cur == 0) {
3108 SHRINK;
3109 GROW;
3110 cur = CUR_CHAR(l);
3111 }
3112 }
3113 buf[len] = 0;
3114 if (cur != '?') {
3115 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3117 ctxt->sax->error(ctxt->userData,
3118 "xmlParsePI: PI %s never end ...\n", target);
3119 ctxt->wellFormed = 0;
3120 ctxt->disableSAX = 1;
3121 } else {
3122 if (input != ctxt->input) {
3123 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3125 ctxt->sax->error(ctxt->userData,
3126 "PI declaration doesn't start and stop in the same entity\n");
3127 ctxt->wellFormed = 0;
3128 ctxt->disableSAX = 1;
3129 }
3130 SKIP(2);
3131
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00003132#ifdef LIBXML_CATALOG_ENABLED
3133 if (((state == XML_PARSER_MISC) ||
3134 (state == XML_PARSER_START)) &&
3135 (xmlStrEqual(target, XML_CATALOG_PI))) {
3136 xmlCatalogAllow allow = xmlCatalogGetDefaults();
3137 if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
3138 (allow == XML_CATA_ALLOW_ALL))
3139 xmlParseCatalogPI(ctxt, buf);
3140 }
3141#endif
3142
3143
Owen Taylor3473f882001-02-23 17:55:21 +00003144 /*
3145 * SAX: PI detected.
3146 */
3147 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3148 (ctxt->sax->processingInstruction != NULL))
3149 ctxt->sax->processingInstruction(ctxt->userData,
3150 target, buf);
3151 }
3152 xmlFree(buf);
3153 xmlFree(target);
3154 } else {
3155 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3156 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3157 ctxt->sax->error(ctxt->userData,
3158 "xmlParsePI : no target name\n");
3159 ctxt->wellFormed = 0;
3160 ctxt->disableSAX = 1;
3161 }
3162 ctxt->instate = state;
3163 }
3164}
3165
3166/**
3167 * xmlParseNotationDecl:
3168 * @ctxt: an XML parser context
3169 *
3170 * parse a notation declaration
3171 *
3172 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3173 *
3174 * Hence there is actually 3 choices:
3175 * 'PUBLIC' S PubidLiteral
3176 * 'PUBLIC' S PubidLiteral S SystemLiteral
3177 * and 'SYSTEM' S SystemLiteral
3178 *
3179 * See the NOTE on xmlParseExternalID().
3180 */
3181
3182void
3183xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3184 xmlChar *name;
3185 xmlChar *Pubid;
3186 xmlChar *Systemid;
3187
3188 if ((RAW == '<') && (NXT(1) == '!') &&
3189 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3190 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3191 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3192 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3193 xmlParserInputPtr input = ctxt->input;
3194 SHRINK;
3195 SKIP(10);
3196 if (!IS_BLANK(CUR)) {
3197 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3199 ctxt->sax->error(ctxt->userData,
3200 "Space required after '<!NOTATION'\n");
3201 ctxt->wellFormed = 0;
3202 ctxt->disableSAX = 1;
3203 return;
3204 }
3205 SKIP_BLANKS;
3206
Daniel Veillard76d66f42001-05-16 21:05:17 +00003207 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003208 if (name == NULL) {
3209 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3210 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3211 ctxt->sax->error(ctxt->userData,
3212 "NOTATION: Name expected here\n");
3213 ctxt->wellFormed = 0;
3214 ctxt->disableSAX = 1;
3215 return;
3216 }
3217 if (!IS_BLANK(CUR)) {
3218 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3220 ctxt->sax->error(ctxt->userData,
3221 "Space required after the NOTATION name'\n");
3222 ctxt->wellFormed = 0;
3223 ctxt->disableSAX = 1;
3224 return;
3225 }
3226 SKIP_BLANKS;
3227
3228 /*
3229 * Parse the IDs.
3230 */
3231 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3232 SKIP_BLANKS;
3233
3234 if (RAW == '>') {
3235 if (input != ctxt->input) {
3236 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3237 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3238 ctxt->sax->error(ctxt->userData,
3239"Notation declaration doesn't start and stop in the same entity\n");
3240 ctxt->wellFormed = 0;
3241 ctxt->disableSAX = 1;
3242 }
3243 NEXT;
3244 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3245 (ctxt->sax->notationDecl != NULL))
3246 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3247 } else {
3248 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3249 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3250 ctxt->sax->error(ctxt->userData,
3251 "'>' required to close NOTATION declaration\n");
3252 ctxt->wellFormed = 0;
3253 ctxt->disableSAX = 1;
3254 }
3255 xmlFree(name);
3256 if (Systemid != NULL) xmlFree(Systemid);
3257 if (Pubid != NULL) xmlFree(Pubid);
3258 }
3259}
3260
3261/**
3262 * xmlParseEntityDecl:
3263 * @ctxt: an XML parser context
3264 *
3265 * parse <!ENTITY declarations
3266 *
3267 * [70] EntityDecl ::= GEDecl | PEDecl
3268 *
3269 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3270 *
3271 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3272 *
3273 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3274 *
3275 * [74] PEDef ::= EntityValue | ExternalID
3276 *
3277 * [76] NDataDecl ::= S 'NDATA' S Name
3278 *
3279 * [ VC: Notation Declared ]
3280 * The Name must match the declared name of a notation.
3281 */
3282
3283void
3284xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3285 xmlChar *name = NULL;
3286 xmlChar *value = NULL;
3287 xmlChar *URI = NULL, *literal = NULL;
3288 xmlChar *ndata = NULL;
3289 int isParameter = 0;
3290 xmlChar *orig = NULL;
3291
3292 GROW;
3293 if ((RAW == '<') && (NXT(1) == '!') &&
3294 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3295 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3296 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3297 xmlParserInputPtr input = ctxt->input;
3298 ctxt->instate = XML_PARSER_ENTITY_DECL;
3299 SHRINK;
3300 SKIP(8);
3301 if (!IS_BLANK(CUR)) {
3302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3304 ctxt->sax->error(ctxt->userData,
3305 "Space required after '<!ENTITY'\n");
3306 ctxt->wellFormed = 0;
3307 ctxt->disableSAX = 1;
3308 }
3309 SKIP_BLANKS;
3310
3311 if (RAW == '%') {
3312 NEXT;
3313 if (!IS_BLANK(CUR)) {
3314 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3316 ctxt->sax->error(ctxt->userData,
3317 "Space required after '%'\n");
3318 ctxt->wellFormed = 0;
3319 ctxt->disableSAX = 1;
3320 }
3321 SKIP_BLANKS;
3322 isParameter = 1;
3323 }
3324
Daniel Veillard76d66f42001-05-16 21:05:17 +00003325 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003326 if (name == NULL) {
3327 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3328 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3329 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3330 ctxt->wellFormed = 0;
3331 ctxt->disableSAX = 1;
3332 return;
3333 }
3334 if (!IS_BLANK(CUR)) {
3335 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3337 ctxt->sax->error(ctxt->userData,
3338 "Space required after the entity name\n");
3339 ctxt->wellFormed = 0;
3340 ctxt->disableSAX = 1;
3341 }
3342 SKIP_BLANKS;
3343
3344 /*
3345 * handle the various case of definitions...
3346 */
3347 if (isParameter) {
3348 if ((RAW == '"') || (RAW == '\'')) {
3349 value = xmlParseEntityValue(ctxt, &orig);
3350 if (value) {
3351 if ((ctxt->sax != NULL) &&
3352 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3353 ctxt->sax->entityDecl(ctxt->userData, name,
3354 XML_INTERNAL_PARAMETER_ENTITY,
3355 NULL, NULL, value);
3356 }
3357 } else {
3358 URI = xmlParseExternalID(ctxt, &literal, 1);
3359 if ((URI == NULL) && (literal == NULL)) {
3360 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3361 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3362 ctxt->sax->error(ctxt->userData,
3363 "Entity value required\n");
3364 ctxt->wellFormed = 0;
3365 ctxt->disableSAX = 1;
3366 }
3367 if (URI) {
3368 xmlURIPtr uri;
3369
3370 uri = xmlParseURI((const char *) URI);
3371 if (uri == NULL) {
3372 ctxt->errNo = XML_ERR_INVALID_URI;
3373 if ((ctxt->sax != NULL) &&
3374 (!ctxt->disableSAX) &&
3375 (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "Invalid URI: %s\n", URI);
3378 ctxt->wellFormed = 0;
3379 } else {
3380 if (uri->fragment != NULL) {
3381 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3382 if ((ctxt->sax != NULL) &&
3383 (!ctxt->disableSAX) &&
3384 (ctxt->sax->error != NULL))
3385 ctxt->sax->error(ctxt->userData,
3386 "Fragment not allowed: %s\n", URI);
3387 ctxt->wellFormed = 0;
3388 } else {
3389 if ((ctxt->sax != NULL) &&
3390 (!ctxt->disableSAX) &&
3391 (ctxt->sax->entityDecl != NULL))
3392 ctxt->sax->entityDecl(ctxt->userData, name,
3393 XML_EXTERNAL_PARAMETER_ENTITY,
3394 literal, URI, NULL);
3395 }
3396 xmlFreeURI(uri);
3397 }
3398 }
3399 }
3400 } else {
3401 if ((RAW == '"') || (RAW == '\'')) {
3402 value = xmlParseEntityValue(ctxt, &orig);
3403 if ((ctxt->sax != NULL) &&
3404 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3405 ctxt->sax->entityDecl(ctxt->userData, name,
3406 XML_INTERNAL_GENERAL_ENTITY,
3407 NULL, NULL, value);
3408 } else {
3409 URI = xmlParseExternalID(ctxt, &literal, 1);
3410 if ((URI == NULL) && (literal == NULL)) {
3411 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3413 ctxt->sax->error(ctxt->userData,
3414 "Entity value required\n");
3415 ctxt->wellFormed = 0;
3416 ctxt->disableSAX = 1;
3417 }
3418 if (URI) {
3419 xmlURIPtr uri;
3420
3421 uri = xmlParseURI((const char *)URI);
3422 if (uri == NULL) {
3423 ctxt->errNo = XML_ERR_INVALID_URI;
3424 if ((ctxt->sax != NULL) &&
3425 (!ctxt->disableSAX) &&
3426 (ctxt->sax->error != NULL))
3427 ctxt->sax->error(ctxt->userData,
3428 "Invalid URI: %s\n", URI);
3429 ctxt->wellFormed = 0;
3430 } else {
3431 if (uri->fragment != NULL) {
3432 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3433 if ((ctxt->sax != NULL) &&
3434 (!ctxt->disableSAX) &&
3435 (ctxt->sax->error != NULL))
3436 ctxt->sax->error(ctxt->userData,
3437 "Fragment not allowed: %s\n", URI);
3438 ctxt->wellFormed = 0;
3439 }
3440 xmlFreeURI(uri);
3441 }
3442 }
3443 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3444 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3445 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3446 ctxt->sax->error(ctxt->userData,
3447 "Space required before 'NDATA'\n");
3448 ctxt->wellFormed = 0;
3449 ctxt->disableSAX = 1;
3450 }
3451 SKIP_BLANKS;
3452 if ((RAW == 'N') && (NXT(1) == 'D') &&
3453 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3454 (NXT(4) == 'A')) {
3455 SKIP(5);
3456 if (!IS_BLANK(CUR)) {
3457 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3458 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3459 ctxt->sax->error(ctxt->userData,
3460 "Space required after 'NDATA'\n");
3461 ctxt->wellFormed = 0;
3462 ctxt->disableSAX = 1;
3463 }
3464 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003465 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003466 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3467 (ctxt->sax->unparsedEntityDecl != NULL))
3468 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3469 literal, URI, ndata);
3470 } else {
3471 if ((ctxt->sax != NULL) &&
3472 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3473 ctxt->sax->entityDecl(ctxt->userData, name,
3474 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3475 literal, URI, NULL);
3476 }
3477 }
3478 }
3479 SKIP_BLANKS;
3480 if (RAW != '>') {
3481 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3483 ctxt->sax->error(ctxt->userData,
3484 "xmlParseEntityDecl: entity %s not terminated\n", name);
3485 ctxt->wellFormed = 0;
3486 ctxt->disableSAX = 1;
3487 } else {
3488 if (input != ctxt->input) {
3489 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3491 ctxt->sax->error(ctxt->userData,
3492"Entity declaration doesn't start and stop in the same entity\n");
3493 ctxt->wellFormed = 0;
3494 ctxt->disableSAX = 1;
3495 }
3496 NEXT;
3497 }
3498 if (orig != NULL) {
3499 /*
3500 * Ugly mechanism to save the raw entity value.
3501 */
3502 xmlEntityPtr cur = NULL;
3503
3504 if (isParameter) {
3505 if ((ctxt->sax != NULL) &&
3506 (ctxt->sax->getParameterEntity != NULL))
3507 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3508 } else {
3509 if ((ctxt->sax != NULL) &&
3510 (ctxt->sax->getEntity != NULL))
3511 cur = ctxt->sax->getEntity(ctxt->userData, name);
3512 }
3513 if (cur != NULL) {
3514 if (cur->orig != NULL)
3515 xmlFree(orig);
3516 else
3517 cur->orig = orig;
3518 } else
3519 xmlFree(orig);
3520 }
3521 if (name != NULL) xmlFree(name);
3522 if (value != NULL) xmlFree(value);
3523 if (URI != NULL) xmlFree(URI);
3524 if (literal != NULL) xmlFree(literal);
3525 if (ndata != NULL) xmlFree(ndata);
3526 }
3527}
3528
3529/**
3530 * xmlParseDefaultDecl:
3531 * @ctxt: an XML parser context
3532 * @value: Receive a possible fixed default value for the attribute
3533 *
3534 * Parse an attribute default declaration
3535 *
3536 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3537 *
3538 * [ VC: Required Attribute ]
3539 * if the default declaration is the keyword #REQUIRED, then the
3540 * attribute must be specified for all elements of the type in the
3541 * attribute-list declaration.
3542 *
3543 * [ VC: Attribute Default Legal ]
3544 * The declared default value must meet the lexical constraints of
3545 * the declared attribute type c.f. xmlValidateAttributeDecl()
3546 *
3547 * [ VC: Fixed Attribute Default ]
3548 * if an attribute has a default value declared with the #FIXED
3549 * keyword, instances of that attribute must match the default value.
3550 *
3551 * [ WFC: No < in Attribute Values ]
3552 * handled in xmlParseAttValue()
3553 *
3554 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3555 * or XML_ATTRIBUTE_FIXED.
3556 */
3557
3558int
3559xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3560 int val;
3561 xmlChar *ret;
3562
3563 *value = NULL;
3564 if ((RAW == '#') && (NXT(1) == 'R') &&
3565 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3566 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3567 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3568 (NXT(8) == 'D')) {
3569 SKIP(9);
3570 return(XML_ATTRIBUTE_REQUIRED);
3571 }
3572 if ((RAW == '#') && (NXT(1) == 'I') &&
3573 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3574 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3575 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3576 SKIP(8);
3577 return(XML_ATTRIBUTE_IMPLIED);
3578 }
3579 val = XML_ATTRIBUTE_NONE;
3580 if ((RAW == '#') && (NXT(1) == 'F') &&
3581 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3582 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3583 SKIP(6);
3584 val = XML_ATTRIBUTE_FIXED;
3585 if (!IS_BLANK(CUR)) {
3586 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3587 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3588 ctxt->sax->error(ctxt->userData,
3589 "Space required after '#FIXED'\n");
3590 ctxt->wellFormed = 0;
3591 ctxt->disableSAX = 1;
3592 }
3593 SKIP_BLANKS;
3594 }
3595 ret = xmlParseAttValue(ctxt);
3596 ctxt->instate = XML_PARSER_DTD;
3597 if (ret == NULL) {
3598 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3599 ctxt->sax->error(ctxt->userData,
3600 "Attribute default value declaration error\n");
3601 ctxt->wellFormed = 0;
3602 ctxt->disableSAX = 1;
3603 } else
3604 *value = ret;
3605 return(val);
3606}
3607
3608/**
3609 * xmlParseNotationType:
3610 * @ctxt: an XML parser context
3611 *
3612 * parse an Notation attribute type.
3613 *
3614 * Note: the leading 'NOTATION' S part has already being parsed...
3615 *
3616 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3617 *
3618 * [ VC: Notation Attributes ]
3619 * Values of this type must match one of the notation names included
3620 * in the declaration; all notation names in the declaration must be declared.
3621 *
3622 * Returns: the notation attribute tree built while parsing
3623 */
3624
3625xmlEnumerationPtr
3626xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3627 xmlChar *name;
3628 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3629
3630 if (RAW != '(') {
3631 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3632 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3633 ctxt->sax->error(ctxt->userData,
3634 "'(' required to start 'NOTATION'\n");
3635 ctxt->wellFormed = 0;
3636 ctxt->disableSAX = 1;
3637 return(NULL);
3638 }
3639 SHRINK;
3640 do {
3641 NEXT;
3642 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003643 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003644 if (name == NULL) {
3645 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3647 ctxt->sax->error(ctxt->userData,
3648 "Name expected in NOTATION declaration\n");
3649 ctxt->wellFormed = 0;
3650 ctxt->disableSAX = 1;
3651 return(ret);
3652 }
3653 cur = xmlCreateEnumeration(name);
3654 xmlFree(name);
3655 if (cur == NULL) return(ret);
3656 if (last == NULL) ret = last = cur;
3657 else {
3658 last->next = cur;
3659 last = cur;
3660 }
3661 SKIP_BLANKS;
3662 } while (RAW == '|');
3663 if (RAW != ')') {
3664 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3666 ctxt->sax->error(ctxt->userData,
3667 "')' required to finish NOTATION declaration\n");
3668 ctxt->wellFormed = 0;
3669 ctxt->disableSAX = 1;
3670 if ((last != NULL) && (last != ret))
3671 xmlFreeEnumeration(last);
3672 return(ret);
3673 }
3674 NEXT;
3675 return(ret);
3676}
3677
3678/**
3679 * xmlParseEnumerationType:
3680 * @ctxt: an XML parser context
3681 *
3682 * parse an Enumeration attribute type.
3683 *
3684 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3685 *
3686 * [ VC: Enumeration ]
3687 * Values of this type must match one of the Nmtoken tokens in
3688 * the declaration
3689 *
3690 * Returns: the enumeration attribute tree built while parsing
3691 */
3692
3693xmlEnumerationPtr
3694xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3695 xmlChar *name;
3696 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3697
3698 if (RAW != '(') {
3699 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3701 ctxt->sax->error(ctxt->userData,
3702 "'(' required to start ATTLIST enumeration\n");
3703 ctxt->wellFormed = 0;
3704 ctxt->disableSAX = 1;
3705 return(NULL);
3706 }
3707 SHRINK;
3708 do {
3709 NEXT;
3710 SKIP_BLANKS;
3711 name = xmlParseNmtoken(ctxt);
3712 if (name == NULL) {
3713 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3715 ctxt->sax->error(ctxt->userData,
3716 "NmToken expected in ATTLIST enumeration\n");
3717 ctxt->wellFormed = 0;
3718 ctxt->disableSAX = 1;
3719 return(ret);
3720 }
3721 cur = xmlCreateEnumeration(name);
3722 xmlFree(name);
3723 if (cur == NULL) return(ret);
3724 if (last == NULL) ret = last = cur;
3725 else {
3726 last->next = cur;
3727 last = cur;
3728 }
3729 SKIP_BLANKS;
3730 } while (RAW == '|');
3731 if (RAW != ')') {
3732 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "')' required to finish ATTLIST enumeration\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 return(ret);
3739 }
3740 NEXT;
3741 return(ret);
3742}
3743
3744/**
3745 * xmlParseEnumeratedType:
3746 * @ctxt: an XML parser context
3747 * @tree: the enumeration tree built while parsing
3748 *
3749 * parse an Enumerated attribute type.
3750 *
3751 * [57] EnumeratedType ::= NotationType | Enumeration
3752 *
3753 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3754 *
3755 *
3756 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3757 */
3758
3759int
3760xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3761 if ((RAW == 'N') && (NXT(1) == 'O') &&
3762 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3763 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3764 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3765 SKIP(8);
3766 if (!IS_BLANK(CUR)) {
3767 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3769 ctxt->sax->error(ctxt->userData,
3770 "Space required after 'NOTATION'\n");
3771 ctxt->wellFormed = 0;
3772 ctxt->disableSAX = 1;
3773 return(0);
3774 }
3775 SKIP_BLANKS;
3776 *tree = xmlParseNotationType(ctxt);
3777 if (*tree == NULL) return(0);
3778 return(XML_ATTRIBUTE_NOTATION);
3779 }
3780 *tree = xmlParseEnumerationType(ctxt);
3781 if (*tree == NULL) return(0);
3782 return(XML_ATTRIBUTE_ENUMERATION);
3783}
3784
3785/**
3786 * xmlParseAttributeType:
3787 * @ctxt: an XML parser context
3788 * @tree: the enumeration tree built while parsing
3789 *
3790 * parse the Attribute list def for an element
3791 *
3792 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3793 *
3794 * [55] StringType ::= 'CDATA'
3795 *
3796 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3797 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3798 *
3799 * Validity constraints for attribute values syntax are checked in
3800 * xmlValidateAttributeValue()
3801 *
3802 * [ VC: ID ]
3803 * Values of type ID must match the Name production. A name must not
3804 * appear more than once in an XML document as a value of this type;
3805 * i.e., ID values must uniquely identify the elements which bear them.
3806 *
3807 * [ VC: One ID per Element Type ]
3808 * No element type may have more than one ID attribute specified.
3809 *
3810 * [ VC: ID Attribute Default ]
3811 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3812 *
3813 * [ VC: IDREF ]
3814 * Values of type IDREF must match the Name production, and values
3815 * of type IDREFS must match Names; each IDREF Name must match the value
3816 * of an ID attribute on some element in the XML document; i.e. IDREF
3817 * values must match the value of some ID attribute.
3818 *
3819 * [ VC: Entity Name ]
3820 * Values of type ENTITY must match the Name production, values
3821 * of type ENTITIES must match Names; each Entity Name must match the
3822 * name of an unparsed entity declared in the DTD.
3823 *
3824 * [ VC: Name Token ]
3825 * Values of type NMTOKEN must match the Nmtoken production; values
3826 * of type NMTOKENS must match Nmtokens.
3827 *
3828 * Returns the attribute type
3829 */
3830int
3831xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3832 SHRINK;
3833 if ((RAW == 'C') && (NXT(1) == 'D') &&
3834 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3835 (NXT(4) == 'A')) {
3836 SKIP(5);
3837 return(XML_ATTRIBUTE_CDATA);
3838 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3839 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3840 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3841 SKIP(6);
3842 return(XML_ATTRIBUTE_IDREFS);
3843 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3844 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3845 (NXT(4) == 'F')) {
3846 SKIP(5);
3847 return(XML_ATTRIBUTE_IDREF);
3848 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3849 SKIP(2);
3850 return(XML_ATTRIBUTE_ID);
3851 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3852 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3853 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3854 SKIP(6);
3855 return(XML_ATTRIBUTE_ENTITY);
3856 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3857 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3858 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3859 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3860 SKIP(8);
3861 return(XML_ATTRIBUTE_ENTITIES);
3862 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3863 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3864 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3865 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3866 SKIP(8);
3867 return(XML_ATTRIBUTE_NMTOKENS);
3868 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3869 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3870 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3871 (NXT(6) == 'N')) {
3872 SKIP(7);
3873 return(XML_ATTRIBUTE_NMTOKEN);
3874 }
3875 return(xmlParseEnumeratedType(ctxt, tree));
3876}
3877
3878/**
3879 * xmlParseAttributeListDecl:
3880 * @ctxt: an XML parser context
3881 *
3882 * : parse the Attribute list def for an element
3883 *
3884 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3885 *
3886 * [53] AttDef ::= S Name S AttType S DefaultDecl
3887 *
3888 */
3889void
3890xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3891 xmlChar *elemName;
3892 xmlChar *attrName;
3893 xmlEnumerationPtr tree;
3894
3895 if ((RAW == '<') && (NXT(1) == '!') &&
3896 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3897 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3898 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3899 (NXT(8) == 'T')) {
3900 xmlParserInputPtr input = ctxt->input;
3901
3902 SKIP(9);
3903 if (!IS_BLANK(CUR)) {
3904 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3906 ctxt->sax->error(ctxt->userData,
3907 "Space required after '<!ATTLIST'\n");
3908 ctxt->wellFormed = 0;
3909 ctxt->disableSAX = 1;
3910 }
3911 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003912 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003913 if (elemName == NULL) {
3914 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3915 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3916 ctxt->sax->error(ctxt->userData,
3917 "ATTLIST: no name for Element\n");
3918 ctxt->wellFormed = 0;
3919 ctxt->disableSAX = 1;
3920 return;
3921 }
3922 SKIP_BLANKS;
3923 GROW;
3924 while (RAW != '>') {
3925 const xmlChar *check = CUR_PTR;
3926 int type;
3927 int def;
3928 xmlChar *defaultValue = NULL;
3929
3930 GROW;
3931 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003932 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003933 if (attrName == NULL) {
3934 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3935 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3936 ctxt->sax->error(ctxt->userData,
3937 "ATTLIST: no name for Attribute\n");
3938 ctxt->wellFormed = 0;
3939 ctxt->disableSAX = 1;
3940 break;
3941 }
3942 GROW;
3943 if (!IS_BLANK(CUR)) {
3944 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3946 ctxt->sax->error(ctxt->userData,
3947 "Space required after the attribute name\n");
3948 ctxt->wellFormed = 0;
3949 ctxt->disableSAX = 1;
3950 if (attrName != NULL)
3951 xmlFree(attrName);
3952 if (defaultValue != NULL)
3953 xmlFree(defaultValue);
3954 break;
3955 }
3956 SKIP_BLANKS;
3957
3958 type = xmlParseAttributeType(ctxt, &tree);
3959 if (type <= 0) {
3960 if (attrName != NULL)
3961 xmlFree(attrName);
3962 if (defaultValue != NULL)
3963 xmlFree(defaultValue);
3964 break;
3965 }
3966
3967 GROW;
3968 if (!IS_BLANK(CUR)) {
3969 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3971 ctxt->sax->error(ctxt->userData,
3972 "Space required after the attribute type\n");
3973 ctxt->wellFormed = 0;
3974 ctxt->disableSAX = 1;
3975 if (attrName != NULL)
3976 xmlFree(attrName);
3977 if (defaultValue != NULL)
3978 xmlFree(defaultValue);
3979 if (tree != NULL)
3980 xmlFreeEnumeration(tree);
3981 break;
3982 }
3983 SKIP_BLANKS;
3984
3985 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3986 if (def <= 0) {
3987 if (attrName != NULL)
3988 xmlFree(attrName);
3989 if (defaultValue != NULL)
3990 xmlFree(defaultValue);
3991 if (tree != NULL)
3992 xmlFreeEnumeration(tree);
3993 break;
3994 }
3995
3996 GROW;
3997 if (RAW != '>') {
3998 if (!IS_BLANK(CUR)) {
3999 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4001 ctxt->sax->error(ctxt->userData,
4002 "Space required after the attribute default value\n");
4003 ctxt->wellFormed = 0;
4004 ctxt->disableSAX = 1;
4005 if (attrName != NULL)
4006 xmlFree(attrName);
4007 if (defaultValue != NULL)
4008 xmlFree(defaultValue);
4009 if (tree != NULL)
4010 xmlFreeEnumeration(tree);
4011 break;
4012 }
4013 SKIP_BLANKS;
4014 }
4015 if (check == CUR_PTR) {
4016 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
4017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4018 ctxt->sax->error(ctxt->userData,
4019 "xmlParseAttributeListDecl: detected internal error\n");
4020 if (attrName != NULL)
4021 xmlFree(attrName);
4022 if (defaultValue != NULL)
4023 xmlFree(defaultValue);
4024 if (tree != NULL)
4025 xmlFreeEnumeration(tree);
4026 break;
4027 }
4028 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4029 (ctxt->sax->attributeDecl != NULL))
4030 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
4031 type, def, defaultValue, tree);
4032 if (attrName != NULL)
4033 xmlFree(attrName);
4034 if (defaultValue != NULL)
4035 xmlFree(defaultValue);
4036 GROW;
4037 }
4038 if (RAW == '>') {
4039 if (input != ctxt->input) {
4040 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4042 ctxt->sax->error(ctxt->userData,
4043"Attribute list declaration doesn't start and stop in the same entity\n");
4044 ctxt->wellFormed = 0;
4045 ctxt->disableSAX = 1;
4046 }
4047 NEXT;
4048 }
4049
4050 xmlFree(elemName);
4051 }
4052}
4053
4054/**
4055 * xmlParseElementMixedContentDecl:
4056 * @ctxt: an XML parser context
4057 *
4058 * parse the declaration for a Mixed Element content
4059 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4060 *
4061 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
4062 * '(' S? '#PCDATA' S? ')'
4063 *
4064 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
4065 *
4066 * [ VC: No Duplicate Types ]
4067 * The same name must not appear more than once in a single
4068 * mixed-content declaration.
4069 *
4070 * returns: the list of the xmlElementContentPtr describing the element choices
4071 */
4072xmlElementContentPtr
4073xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
4074 xmlElementContentPtr ret = NULL, cur = NULL, n;
4075 xmlChar *elem = NULL;
4076
4077 GROW;
4078 if ((RAW == '#') && (NXT(1) == 'P') &&
4079 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4080 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4081 (NXT(6) == 'A')) {
4082 SKIP(7);
4083 SKIP_BLANKS;
4084 SHRINK;
4085 if (RAW == ')') {
4086 ctxt->entity = ctxt->input;
4087 NEXT;
4088 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4089 if (RAW == '*') {
4090 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4091 NEXT;
4092 }
4093 return(ret);
4094 }
4095 if ((RAW == '(') || (RAW == '|')) {
4096 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4097 if (ret == NULL) return(NULL);
4098 }
4099 while (RAW == '|') {
4100 NEXT;
4101 if (elem == NULL) {
4102 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4103 if (ret == NULL) return(NULL);
4104 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004105 if (cur != NULL)
4106 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004107 cur = ret;
4108 } else {
4109 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4110 if (n == NULL) return(NULL);
4111 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004112 if (n->c1 != NULL)
4113 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004114 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004115 if (n != NULL)
4116 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004117 cur = n;
4118 xmlFree(elem);
4119 }
4120 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004121 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004122 if (elem == NULL) {
4123 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4125 ctxt->sax->error(ctxt->userData,
4126 "xmlParseElementMixedContentDecl : Name expected\n");
4127 ctxt->wellFormed = 0;
4128 ctxt->disableSAX = 1;
4129 xmlFreeElementContent(cur);
4130 return(NULL);
4131 }
4132 SKIP_BLANKS;
4133 GROW;
4134 }
4135 if ((RAW == ')') && (NXT(1) == '*')) {
4136 if (elem != NULL) {
4137 cur->c2 = xmlNewElementContent(elem,
4138 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004139 if (cur->c2 != NULL)
4140 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004141 xmlFree(elem);
4142 }
4143 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4144 ctxt->entity = ctxt->input;
4145 SKIP(2);
4146 } else {
4147 if (elem != NULL) xmlFree(elem);
4148 xmlFreeElementContent(ret);
4149 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4151 ctxt->sax->error(ctxt->userData,
4152 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4153 ctxt->wellFormed = 0;
4154 ctxt->disableSAX = 1;
4155 return(NULL);
4156 }
4157
4158 } else {
4159 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4160 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4161 ctxt->sax->error(ctxt->userData,
4162 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4163 ctxt->wellFormed = 0;
4164 ctxt->disableSAX = 1;
4165 }
4166 return(ret);
4167}
4168
4169/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004170 * xmlParseElementChildrenContentD:
4171 * @ctxt: an XML parser context
4172 *
4173 * VMS version of xmlParseElementChildrenContentDecl()
4174 *
4175 * Returns the tree of xmlElementContentPtr describing the element
4176 * hierarchy.
4177 */
4178/**
Owen Taylor3473f882001-02-23 17:55:21 +00004179 * xmlParseElementChildrenContentDecl:
4180 * @ctxt: an XML parser context
4181 *
4182 * parse the declaration for a Mixed Element content
4183 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4184 *
4185 *
4186 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4187 *
4188 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4189 *
4190 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4191 *
4192 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4193 *
4194 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4195 * TODO Parameter-entity replacement text must be properly nested
4196 * with parenthetized groups. That is to say, if either of the
4197 * opening or closing parentheses in a choice, seq, or Mixed
4198 * construct is contained in the replacement text for a parameter
4199 * entity, both must be contained in the same replacement text. For
4200 * interoperability, if a parameter-entity reference appears in a
4201 * choice, seq, or Mixed construct, its replacement text should not
4202 * be empty, and neither the first nor last non-blank character of
4203 * the replacement text should be a connector (| or ,).
4204 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004205 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004206 * hierarchy.
4207 */
4208xmlElementContentPtr
4209#ifdef VMS
4210xmlParseElementChildrenContentD
4211#else
4212xmlParseElementChildrenContentDecl
4213#endif
4214(xmlParserCtxtPtr ctxt) {
4215 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4216 xmlChar *elem;
4217 xmlChar type = 0;
4218
4219 SKIP_BLANKS;
4220 GROW;
4221 if (RAW == '(') {
4222 /* Recurse on first child */
4223 NEXT;
4224 SKIP_BLANKS;
4225 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4226 SKIP_BLANKS;
4227 GROW;
4228 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004229 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004230 if (elem == NULL) {
4231 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4233 ctxt->sax->error(ctxt->userData,
4234 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4235 ctxt->wellFormed = 0;
4236 ctxt->disableSAX = 1;
4237 return(NULL);
4238 }
4239 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4240 GROW;
4241 if (RAW == '?') {
4242 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4243 NEXT;
4244 } else if (RAW == '*') {
4245 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4246 NEXT;
4247 } else if (RAW == '+') {
4248 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4249 NEXT;
4250 } else {
4251 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4252 }
4253 xmlFree(elem);
4254 GROW;
4255 }
4256 SKIP_BLANKS;
4257 SHRINK;
4258 while (RAW != ')') {
4259 /*
4260 * Each loop we parse one separator and one element.
4261 */
4262 if (RAW == ',') {
4263 if (type == 0) type = CUR;
4264
4265 /*
4266 * Detect "Name | Name , Name" error
4267 */
4268 else if (type != CUR) {
4269 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4271 ctxt->sax->error(ctxt->userData,
4272 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4273 type);
4274 ctxt->wellFormed = 0;
4275 ctxt->disableSAX = 1;
4276 if ((op != NULL) && (op != ret))
4277 xmlFreeElementContent(op);
4278 if ((last != NULL) && (last != ret) &&
4279 (last != ret->c1) && (last != ret->c2))
4280 xmlFreeElementContent(last);
4281 if (ret != NULL)
4282 xmlFreeElementContent(ret);
4283 return(NULL);
4284 }
4285 NEXT;
4286
4287 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4288 if (op == NULL) {
4289 xmlFreeElementContent(ret);
4290 return(NULL);
4291 }
4292 if (last == NULL) {
4293 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004294 if (ret != NULL)
4295 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004296 ret = cur = op;
4297 } else {
4298 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004299 if (op != NULL)
4300 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004301 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004302 if (last != NULL)
4303 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004304 cur =op;
4305 last = NULL;
4306 }
4307 } else if (RAW == '|') {
4308 if (type == 0) type = CUR;
4309
4310 /*
4311 * Detect "Name , Name | Name" error
4312 */
4313 else if (type != CUR) {
4314 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4316 ctxt->sax->error(ctxt->userData,
4317 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4318 type);
4319 ctxt->wellFormed = 0;
4320 ctxt->disableSAX = 1;
4321 if ((op != NULL) && (op != ret) && (op != last))
4322 xmlFreeElementContent(op);
4323 if ((last != NULL) && (last != ret) &&
4324 (last != ret->c1) && (last != ret->c2))
4325 xmlFreeElementContent(last);
4326 if (ret != NULL)
4327 xmlFreeElementContent(ret);
4328 return(NULL);
4329 }
4330 NEXT;
4331
4332 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4333 if (op == NULL) {
4334 if ((op != NULL) && (op != ret))
4335 xmlFreeElementContent(op);
4336 if ((last != NULL) && (last != ret) &&
4337 (last != ret->c1) && (last != ret->c2))
4338 xmlFreeElementContent(last);
4339 if (ret != NULL)
4340 xmlFreeElementContent(ret);
4341 return(NULL);
4342 }
4343 if (last == NULL) {
4344 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004345 if (ret != NULL)
4346 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004347 ret = cur = op;
4348 } else {
4349 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004350 if (op != NULL)
4351 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004352 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004353 if (last != NULL)
4354 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004355 cur =op;
4356 last = NULL;
4357 }
4358 } else {
4359 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4361 ctxt->sax->error(ctxt->userData,
4362 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4363 ctxt->wellFormed = 0;
4364 ctxt->disableSAX = 1;
4365 if ((op != NULL) && (op != ret))
4366 xmlFreeElementContent(op);
4367 if ((last != NULL) && (last != ret) &&
4368 (last != ret->c1) && (last != ret->c2))
4369 xmlFreeElementContent(last);
4370 if (ret != NULL)
4371 xmlFreeElementContent(ret);
4372 return(NULL);
4373 }
4374 GROW;
4375 SKIP_BLANKS;
4376 GROW;
4377 if (RAW == '(') {
4378 /* Recurse on second child */
4379 NEXT;
4380 SKIP_BLANKS;
4381 last = xmlParseElementChildrenContentDecl(ctxt);
4382 SKIP_BLANKS;
4383 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004384 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004385 if (elem == NULL) {
4386 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4387 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4388 ctxt->sax->error(ctxt->userData,
4389 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4390 ctxt->wellFormed = 0;
4391 ctxt->disableSAX = 1;
4392 if ((op != NULL) && (op != ret))
4393 xmlFreeElementContent(op);
4394 if ((last != NULL) && (last != ret) &&
4395 (last != ret->c1) && (last != ret->c2))
4396 xmlFreeElementContent(last);
4397 if (ret != NULL)
4398 xmlFreeElementContent(ret);
4399 return(NULL);
4400 }
4401 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4402 xmlFree(elem);
4403 if (RAW == '?') {
4404 last->ocur = XML_ELEMENT_CONTENT_OPT;
4405 NEXT;
4406 } else if (RAW == '*') {
4407 last->ocur = XML_ELEMENT_CONTENT_MULT;
4408 NEXT;
4409 } else if (RAW == '+') {
4410 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4411 NEXT;
4412 } else {
4413 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4414 }
4415 }
4416 SKIP_BLANKS;
4417 GROW;
4418 }
4419 if ((cur != NULL) && (last != NULL)) {
4420 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004421 if (last != NULL)
4422 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004423 }
4424 ctxt->entity = ctxt->input;
4425 NEXT;
4426 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004427 if (ret != NULL)
4428 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004429 NEXT;
4430 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004431 if (ret != NULL)
4432 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004433 NEXT;
4434 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004435 if (ret != NULL)
4436 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004437 NEXT;
4438 }
4439 return(ret);
4440}
4441
4442/**
4443 * xmlParseElementContentDecl:
4444 * @ctxt: an XML parser context
4445 * @name: the name of the element being defined.
4446 * @result: the Element Content pointer will be stored here if any
4447 *
4448 * parse the declaration for an Element content either Mixed or Children,
4449 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4450 *
4451 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4452 *
4453 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4454 */
4455
4456int
4457xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4458 xmlElementContentPtr *result) {
4459
4460 xmlElementContentPtr tree = NULL;
4461 xmlParserInputPtr input = ctxt->input;
4462 int res;
4463
4464 *result = NULL;
4465
4466 if (RAW != '(') {
4467 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004470 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
4473 return(-1);
4474 }
4475 NEXT;
4476 GROW;
4477 SKIP_BLANKS;
4478 if ((RAW == '#') && (NXT(1) == 'P') &&
4479 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4480 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4481 (NXT(6) == 'A')) {
4482 tree = xmlParseElementMixedContentDecl(ctxt);
4483 res = XML_ELEMENT_TYPE_MIXED;
4484 } else {
4485 tree = xmlParseElementChildrenContentDecl(ctxt);
4486 res = XML_ELEMENT_TYPE_ELEMENT;
4487 }
4488 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4489 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4490 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4491 ctxt->sax->error(ctxt->userData,
4492"Element content declaration doesn't start and stop in the same entity\n");
4493 ctxt->wellFormed = 0;
4494 ctxt->disableSAX = 1;
4495 }
4496 SKIP_BLANKS;
4497 *result = tree;
4498 return(res);
4499}
4500
4501/**
4502 * xmlParseElementDecl:
4503 * @ctxt: an XML parser context
4504 *
4505 * parse an Element declaration.
4506 *
4507 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4508 *
4509 * [ VC: Unique Element Type Declaration ]
4510 * No element type may be declared more than once
4511 *
4512 * Returns the type of the element, or -1 in case of error
4513 */
4514int
4515xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4516 xmlChar *name;
4517 int ret = -1;
4518 xmlElementContentPtr content = NULL;
4519
4520 GROW;
4521 if ((RAW == '<') && (NXT(1) == '!') &&
4522 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4523 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4524 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4525 (NXT(8) == 'T')) {
4526 xmlParserInputPtr input = ctxt->input;
4527
4528 SKIP(9);
4529 if (!IS_BLANK(CUR)) {
4530 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4532 ctxt->sax->error(ctxt->userData,
4533 "Space required after 'ELEMENT'\n");
4534 ctxt->wellFormed = 0;
4535 ctxt->disableSAX = 1;
4536 }
4537 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004538 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004539 if (name == NULL) {
4540 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4542 ctxt->sax->error(ctxt->userData,
4543 "xmlParseElementDecl: no name for Element\n");
4544 ctxt->wellFormed = 0;
4545 ctxt->disableSAX = 1;
4546 return(-1);
4547 }
4548 while ((RAW == 0) && (ctxt->inputNr > 1))
4549 xmlPopInput(ctxt);
4550 if (!IS_BLANK(CUR)) {
4551 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4552 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4553 ctxt->sax->error(ctxt->userData,
4554 "Space required after the element name\n");
4555 ctxt->wellFormed = 0;
4556 ctxt->disableSAX = 1;
4557 }
4558 SKIP_BLANKS;
4559 if ((RAW == 'E') && (NXT(1) == 'M') &&
4560 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4561 (NXT(4) == 'Y')) {
4562 SKIP(5);
4563 /*
4564 * Element must always be empty.
4565 */
4566 ret = XML_ELEMENT_TYPE_EMPTY;
4567 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4568 (NXT(2) == 'Y')) {
4569 SKIP(3);
4570 /*
4571 * Element is a generic container.
4572 */
4573 ret = XML_ELEMENT_TYPE_ANY;
4574 } else if (RAW == '(') {
4575 ret = xmlParseElementContentDecl(ctxt, name, &content);
4576 } else {
4577 /*
4578 * [ WFC: PEs in Internal Subset ] error handling.
4579 */
4580 if ((RAW == '%') && (ctxt->external == 0) &&
4581 (ctxt->inputNr == 1)) {
4582 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4584 ctxt->sax->error(ctxt->userData,
4585 "PEReference: forbidden within markup decl in internal subset\n");
4586 } else {
4587 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4588 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4589 ctxt->sax->error(ctxt->userData,
4590 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4591 }
4592 ctxt->wellFormed = 0;
4593 ctxt->disableSAX = 1;
4594 if (name != NULL) xmlFree(name);
4595 return(-1);
4596 }
4597
4598 SKIP_BLANKS;
4599 /*
4600 * Pop-up of finished entities.
4601 */
4602 while ((RAW == 0) && (ctxt->inputNr > 1))
4603 xmlPopInput(ctxt);
4604 SKIP_BLANKS;
4605
4606 if (RAW != '>') {
4607 ctxt->errNo = XML_ERR_GT_REQUIRED;
4608 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4609 ctxt->sax->error(ctxt->userData,
4610 "xmlParseElementDecl: expected '>' at the end\n");
4611 ctxt->wellFormed = 0;
4612 ctxt->disableSAX = 1;
4613 } else {
4614 if (input != ctxt->input) {
4615 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4617 ctxt->sax->error(ctxt->userData,
4618"Element declaration doesn't start and stop in the same entity\n");
4619 ctxt->wellFormed = 0;
4620 ctxt->disableSAX = 1;
4621 }
4622
4623 NEXT;
4624 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4625 (ctxt->sax->elementDecl != NULL))
4626 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4627 content);
4628 }
4629 if (content != NULL) {
4630 xmlFreeElementContent(content);
4631 }
4632 if (name != NULL) {
4633 xmlFree(name);
4634 }
4635 }
4636 return(ret);
4637}
4638
4639/**
Owen Taylor3473f882001-02-23 17:55:21 +00004640 * xmlParseConditionalSections
4641 * @ctxt: an XML parser context
4642 *
4643 * [61] conditionalSect ::= includeSect | ignoreSect
4644 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4645 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4646 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4647 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4648 */
4649
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004650static void
Owen Taylor3473f882001-02-23 17:55:21 +00004651xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4652 SKIP(3);
4653 SKIP_BLANKS;
4654 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4655 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4656 (NXT(6) == 'E')) {
4657 SKIP(7);
4658 SKIP_BLANKS;
4659 if (RAW != '[') {
4660 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4661 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4662 ctxt->sax->error(ctxt->userData,
4663 "XML conditional section '[' expected\n");
4664 ctxt->wellFormed = 0;
4665 ctxt->disableSAX = 1;
4666 } else {
4667 NEXT;
4668 }
4669 if (xmlParserDebugEntities) {
4670 if ((ctxt->input != NULL) && (ctxt->input->filename))
4671 xmlGenericError(xmlGenericErrorContext,
4672 "%s(%d): ", ctxt->input->filename,
4673 ctxt->input->line);
4674 xmlGenericError(xmlGenericErrorContext,
4675 "Entering INCLUDE Conditional Section\n");
4676 }
4677
4678 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4679 (NXT(2) != '>'))) {
4680 const xmlChar *check = CUR_PTR;
4681 int cons = ctxt->input->consumed;
4682 int tok = ctxt->token;
4683
4684 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4685 xmlParseConditionalSections(ctxt);
4686 } else if (IS_BLANK(CUR)) {
4687 NEXT;
4688 } else if (RAW == '%') {
4689 xmlParsePEReference(ctxt);
4690 } else
4691 xmlParseMarkupDecl(ctxt);
4692
4693 /*
4694 * Pop-up of finished entities.
4695 */
4696 while ((RAW == 0) && (ctxt->inputNr > 1))
4697 xmlPopInput(ctxt);
4698
4699 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4700 (tok == ctxt->token)) {
4701 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4703 ctxt->sax->error(ctxt->userData,
4704 "Content error in the external subset\n");
4705 ctxt->wellFormed = 0;
4706 ctxt->disableSAX = 1;
4707 break;
4708 }
4709 }
4710 if (xmlParserDebugEntities) {
4711 if ((ctxt->input != NULL) && (ctxt->input->filename))
4712 xmlGenericError(xmlGenericErrorContext,
4713 "%s(%d): ", ctxt->input->filename,
4714 ctxt->input->line);
4715 xmlGenericError(xmlGenericErrorContext,
4716 "Leaving INCLUDE Conditional Section\n");
4717 }
4718
4719 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4720 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4721 int state;
4722 int instate;
4723 int depth = 0;
4724
4725 SKIP(6);
4726 SKIP_BLANKS;
4727 if (RAW != '[') {
4728 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4730 ctxt->sax->error(ctxt->userData,
4731 "XML conditional section '[' expected\n");
4732 ctxt->wellFormed = 0;
4733 ctxt->disableSAX = 1;
4734 } else {
4735 NEXT;
4736 }
4737 if (xmlParserDebugEntities) {
4738 if ((ctxt->input != NULL) && (ctxt->input->filename))
4739 xmlGenericError(xmlGenericErrorContext,
4740 "%s(%d): ", ctxt->input->filename,
4741 ctxt->input->line);
4742 xmlGenericError(xmlGenericErrorContext,
4743 "Entering IGNORE Conditional Section\n");
4744 }
4745
4746 /*
4747 * Parse up to the end of the conditionnal section
4748 * But disable SAX event generating DTD building in the meantime
4749 */
4750 state = ctxt->disableSAX;
4751 instate = ctxt->instate;
4752 ctxt->disableSAX = 1;
4753 ctxt->instate = XML_PARSER_IGNORE;
4754
4755 while (depth >= 0) {
4756 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4757 depth++;
4758 SKIP(3);
4759 continue;
4760 }
4761 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4762 if (--depth >= 0) SKIP(3);
4763 continue;
4764 }
4765 NEXT;
4766 continue;
4767 }
4768
4769 ctxt->disableSAX = state;
4770 ctxt->instate = instate;
4771
4772 if (xmlParserDebugEntities) {
4773 if ((ctxt->input != NULL) && (ctxt->input->filename))
4774 xmlGenericError(xmlGenericErrorContext,
4775 "%s(%d): ", ctxt->input->filename,
4776 ctxt->input->line);
4777 xmlGenericError(xmlGenericErrorContext,
4778 "Leaving IGNORE Conditional Section\n");
4779 }
4780
4781 } else {
4782 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4784 ctxt->sax->error(ctxt->userData,
4785 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4786 ctxt->wellFormed = 0;
4787 ctxt->disableSAX = 1;
4788 }
4789
4790 if (RAW == 0)
4791 SHRINK;
4792
4793 if (RAW == 0) {
4794 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4795 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4796 ctxt->sax->error(ctxt->userData,
4797 "XML conditional section not closed\n");
4798 ctxt->wellFormed = 0;
4799 ctxt->disableSAX = 1;
4800 } else {
4801 SKIP(3);
4802 }
4803}
4804
4805/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004806 * xmlParseMarkupDecl:
4807 * @ctxt: an XML parser context
4808 *
4809 * parse Markup declarations
4810 *
4811 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4812 * NotationDecl | PI | Comment
4813 *
4814 * [ VC: Proper Declaration/PE Nesting ]
4815 * Parameter-entity replacement text must be properly nested with
4816 * markup declarations. That is to say, if either the first character
4817 * or the last character of a markup declaration (markupdecl above) is
4818 * contained in the replacement text for a parameter-entity reference,
4819 * both must be contained in the same replacement text.
4820 *
4821 * [ WFC: PEs in Internal Subset ]
4822 * In the internal DTD subset, parameter-entity references can occur
4823 * only where markup declarations can occur, not within markup declarations.
4824 * (This does not apply to references that occur in external parameter
4825 * entities or to the external subset.)
4826 */
4827void
4828xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4829 GROW;
4830 xmlParseElementDecl(ctxt);
4831 xmlParseAttributeListDecl(ctxt);
4832 xmlParseEntityDecl(ctxt);
4833 xmlParseNotationDecl(ctxt);
4834 xmlParsePI(ctxt);
4835 xmlParseComment(ctxt);
4836 /*
4837 * This is only for internal subset. On external entities,
4838 * the replacement is done before parsing stage
4839 */
4840 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4841 xmlParsePEReference(ctxt);
4842
4843 /*
4844 * Conditional sections are allowed from entities included
4845 * by PE References in the internal subset.
4846 */
4847 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4848 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4849 xmlParseConditionalSections(ctxt);
4850 }
4851 }
4852
4853 ctxt->instate = XML_PARSER_DTD;
4854}
4855
4856/**
4857 * xmlParseTextDecl:
4858 * @ctxt: an XML parser context
4859 *
4860 * parse an XML declaration header for external entities
4861 *
4862 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4863 *
4864 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4865 */
4866
4867void
4868xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4869 xmlChar *version;
4870
4871 /*
4872 * We know that '<?xml' is here.
4873 */
4874 if ((RAW == '<') && (NXT(1) == '?') &&
4875 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4876 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4877 SKIP(5);
4878 } else {
4879 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4881 ctxt->sax->error(ctxt->userData,
4882 "Text declaration '<?xml' required\n");
4883 ctxt->wellFormed = 0;
4884 ctxt->disableSAX = 1;
4885
4886 return;
4887 }
4888
4889 if (!IS_BLANK(CUR)) {
4890 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4891 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4892 ctxt->sax->error(ctxt->userData,
4893 "Space needed after '<?xml'\n");
4894 ctxt->wellFormed = 0;
4895 ctxt->disableSAX = 1;
4896 }
4897 SKIP_BLANKS;
4898
4899 /*
4900 * We may have the VersionInfo here.
4901 */
4902 version = xmlParseVersionInfo(ctxt);
4903 if (version == NULL)
4904 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4905 ctxt->input->version = version;
4906
4907 /*
4908 * We must have the encoding declaration
4909 */
4910 if (!IS_BLANK(CUR)) {
4911 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4914 ctxt->wellFormed = 0;
4915 ctxt->disableSAX = 1;
4916 }
4917 xmlParseEncodingDecl(ctxt);
4918 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4919 /*
4920 * The XML REC instructs us to stop parsing right here
4921 */
4922 return;
4923 }
4924
4925 SKIP_BLANKS;
4926 if ((RAW == '?') && (NXT(1) == '>')) {
4927 SKIP(2);
4928 } else if (RAW == '>') {
4929 /* Deprecated old WD ... */
4930 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4932 ctxt->sax->error(ctxt->userData,
4933 "XML declaration must end-up with '?>'\n");
4934 ctxt->wellFormed = 0;
4935 ctxt->disableSAX = 1;
4936 NEXT;
4937 } else {
4938 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4940 ctxt->sax->error(ctxt->userData,
4941 "parsing XML declaration: '?>' expected\n");
4942 ctxt->wellFormed = 0;
4943 ctxt->disableSAX = 1;
4944 MOVETO_ENDTAG(CUR_PTR);
4945 NEXT;
4946 }
4947}
4948
4949/**
Owen Taylor3473f882001-02-23 17:55:21 +00004950 * xmlParseExternalSubset:
4951 * @ctxt: an XML parser context
4952 * @ExternalID: the external identifier
4953 * @SystemID: the system identifier (or URL)
4954 *
4955 * parse Markup declarations from an external subset
4956 *
4957 * [30] extSubset ::= textDecl? extSubsetDecl
4958 *
4959 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4960 */
4961void
4962xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4963 const xmlChar *SystemID) {
4964 GROW;
4965 if ((RAW == '<') && (NXT(1) == '?') &&
4966 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4967 (NXT(4) == 'l')) {
4968 xmlParseTextDecl(ctxt);
4969 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4970 /*
4971 * The XML REC instructs us to stop parsing right here
4972 */
4973 ctxt->instate = XML_PARSER_EOF;
4974 return;
4975 }
4976 }
4977 if (ctxt->myDoc == NULL) {
4978 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4979 }
4980 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4981 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4982
4983 ctxt->instate = XML_PARSER_DTD;
4984 ctxt->external = 1;
4985 while (((RAW == '<') && (NXT(1) == '?')) ||
4986 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004987 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004988 const xmlChar *check = CUR_PTR;
4989 int cons = ctxt->input->consumed;
4990 int tok = ctxt->token;
4991
4992 GROW;
4993 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4994 xmlParseConditionalSections(ctxt);
4995 } else if (IS_BLANK(CUR)) {
4996 NEXT;
4997 } else if (RAW == '%') {
4998 xmlParsePEReference(ctxt);
4999 } else
5000 xmlParseMarkupDecl(ctxt);
5001
5002 /*
5003 * Pop-up of finished entities.
5004 */
5005 while ((RAW == 0) && (ctxt->inputNr > 1))
5006 xmlPopInput(ctxt);
5007
5008 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
5009 (tok == ctxt->token)) {
5010 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5011 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5012 ctxt->sax->error(ctxt->userData,
5013 "Content error in the external subset\n");
5014 ctxt->wellFormed = 0;
5015 ctxt->disableSAX = 1;
5016 break;
5017 }
5018 }
5019
5020 if (RAW != 0) {
5021 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
5022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5023 ctxt->sax->error(ctxt->userData,
5024 "Extra content at the end of the document\n");
5025 ctxt->wellFormed = 0;
5026 ctxt->disableSAX = 1;
5027 }
5028
5029}
5030
5031/**
5032 * xmlParseReference:
5033 * @ctxt: an XML parser context
5034 *
5035 * parse and handle entity references in content, depending on the SAX
5036 * interface, this may end-up in a call to character() if this is a
5037 * CharRef, a predefined entity, if there is no reference() callback.
5038 * or if the parser was asked to switch to that mode.
5039 *
5040 * [67] Reference ::= EntityRef | CharRef
5041 */
5042void
5043xmlParseReference(xmlParserCtxtPtr ctxt) {
5044 xmlEntityPtr ent;
5045 xmlChar *val;
5046 if (RAW != '&') return;
5047
5048 if (NXT(1) == '#') {
5049 int i = 0;
5050 xmlChar out[10];
5051 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005052 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005053
5054 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
5055 /*
5056 * So we are using non-UTF-8 buffers
5057 * Check that the char fit on 8bits, if not
5058 * generate a CharRef.
5059 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005060 if (value <= 0xFF) {
5061 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00005062 out[1] = 0;
5063 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5064 (!ctxt->disableSAX))
5065 ctxt->sax->characters(ctxt->userData, out, 1);
5066 } else {
5067 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005068 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005069 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005070 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00005071 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5072 (!ctxt->disableSAX))
5073 ctxt->sax->reference(ctxt->userData, out);
5074 }
5075 } else {
5076 /*
5077 * Just encode the value in UTF-8
5078 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005079 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005080 out[i] = 0;
5081 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5082 (!ctxt->disableSAX))
5083 ctxt->sax->characters(ctxt->userData, out, i);
5084 }
5085 } else {
5086 ent = xmlParseEntityRef(ctxt);
5087 if (ent == NULL) return;
5088 if ((ent->name != NULL) &&
5089 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5090 xmlNodePtr list = NULL;
5091 int ret;
5092
5093
5094 /*
5095 * The first reference to the entity trigger a parsing phase
5096 * where the ent->children is filled with the result from
5097 * the parsing.
5098 */
5099 if (ent->children == NULL) {
5100 xmlChar *value;
5101 value = ent->content;
5102
5103 /*
5104 * Check that this entity is well formed
5105 */
5106 if ((value != NULL) &&
5107 (value[1] == 0) && (value[0] == '<') &&
5108 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5109 /*
5110 * DONE: get definite answer on this !!!
5111 * Lots of entity decls are used to declare a single
5112 * char
5113 * <!ENTITY lt "<">
5114 * Which seems to be valid since
5115 * 2.4: The ampersand character (&) and the left angle
5116 * bracket (<) may appear in their literal form only
5117 * when used ... They are also legal within the literal
5118 * entity value of an internal entity declaration;i
5119 * see "4.3.2 Well-Formed Parsed Entities".
5120 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5121 * Looking at the OASIS test suite and James Clark
5122 * tests, this is broken. However the XML REC uses
5123 * it. Is the XML REC not well-formed ????
5124 * This is a hack to avoid this problem
5125 *
5126 * ANSWER: since lt gt amp .. are already defined,
5127 * this is a redefinition and hence the fact that the
5128 * contentis not well balanced is not a Wf error, this
5129 * is lousy but acceptable.
5130 */
5131 list = xmlNewDocText(ctxt->myDoc, value);
5132 if (list != NULL) {
5133 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5134 (ent->children == NULL)) {
5135 ent->children = list;
5136 ent->last = list;
5137 list->parent = (xmlNodePtr) ent;
5138 } else {
5139 xmlFreeNodeList(list);
5140 }
5141 } else if (list != NULL) {
5142 xmlFreeNodeList(list);
5143 }
5144 } else {
5145 /*
5146 * 4.3.2: An internal general parsed entity is well-formed
5147 * if its replacement text matches the production labeled
5148 * content.
5149 */
5150 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5151 ctxt->depth++;
5152 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5153 ctxt->sax, NULL, ctxt->depth,
5154 value, &list);
5155 ctxt->depth--;
5156 } else if (ent->etype ==
5157 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5158 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005159 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005160 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005161 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005162 ctxt->depth--;
5163 } else {
5164 ret = -1;
5165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5166 ctxt->sax->error(ctxt->userData,
5167 "Internal: invalid entity type\n");
5168 }
5169 if (ret == XML_ERR_ENTITY_LOOP) {
5170 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5171 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5172 ctxt->sax->error(ctxt->userData,
5173 "Detected entity reference loop\n");
5174 ctxt->wellFormed = 0;
5175 ctxt->disableSAX = 1;
5176 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005177 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5178 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005179 (ent->children == NULL)) {
5180 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005181 if (ctxt->replaceEntities) {
5182 /*
5183 * Prune it directly in the generated document
5184 * except for single text nodes.
5185 */
5186 if ((list->type == XML_TEXT_NODE) &&
5187 (list->next == NULL)) {
5188 list->parent = (xmlNodePtr) ent;
5189 list = NULL;
5190 } else {
5191 while (list != NULL) {
5192 list->parent = (xmlNodePtr) ctxt->node;
5193 if (list->next == NULL)
5194 ent->last = list;
5195 list = list->next;
5196 }
5197 list = ent->children;
5198 }
5199 } else {
5200 while (list != NULL) {
5201 list->parent = (xmlNodePtr) ent;
5202 if (list->next == NULL)
5203 ent->last = list;
5204 list = list->next;
5205 }
Owen Taylor3473f882001-02-23 17:55:21 +00005206 }
5207 } else {
5208 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005209 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005210 }
5211 } else if (ret > 0) {
5212 ctxt->errNo = ret;
5213 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5214 ctxt->sax->error(ctxt->userData,
5215 "Entity value required\n");
5216 ctxt->wellFormed = 0;
5217 ctxt->disableSAX = 1;
5218 } else if (list != NULL) {
5219 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005220 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005221 }
5222 }
5223 }
5224 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5225 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5226 /*
5227 * Create a node.
5228 */
5229 ctxt->sax->reference(ctxt->userData, ent->name);
5230 return;
5231 } else if (ctxt->replaceEntities) {
5232 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5233 /*
5234 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005235 * a simple tree copy for all references except the first
5236 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005237 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005238 if (list == NULL) {
5239 xmlNodePtr new, cur;
5240 cur = ent->children;
5241 while (cur != NULL) {
5242 new = xmlCopyNode(cur, 1);
5243 xmlAddChild(ctxt->node, new);
5244 if (cur == ent->last)
5245 break;
5246 cur = cur->next;
5247 }
5248 } else {
5249 /*
5250 * the name change is to avoid coalescing of the
5251 * node with a prossible previous text one which
5252 * would make ent->children a dandling pointer
5253 */
5254 if (ent->children->type == XML_TEXT_NODE)
5255 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5256 if ((ent->last != ent->children) &&
5257 (ent->last->type == XML_TEXT_NODE))
5258 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5259 xmlAddChildList(ctxt->node, ent->children);
5260 }
5261
Owen Taylor3473f882001-02-23 17:55:21 +00005262 /*
5263 * This is to avoid a nasty side effect, see
5264 * characters() in SAX.c
5265 */
5266 ctxt->nodemem = 0;
5267 ctxt->nodelen = 0;
5268 return;
5269 } else {
5270 /*
5271 * Probably running in SAX mode
5272 */
5273 xmlParserInputPtr input;
5274
5275 input = xmlNewEntityInputStream(ctxt, ent);
5276 xmlPushInput(ctxt, input);
5277 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5278 (RAW == '<') && (NXT(1) == '?') &&
5279 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5280 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5281 xmlParseTextDecl(ctxt);
5282 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5283 /*
5284 * The XML REC instructs us to stop parsing right here
5285 */
5286 ctxt->instate = XML_PARSER_EOF;
5287 return;
5288 }
5289 if (input->standalone == 1) {
5290 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5291 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5292 ctxt->sax->error(ctxt->userData,
5293 "external parsed entities cannot be standalone\n");
5294 ctxt->wellFormed = 0;
5295 ctxt->disableSAX = 1;
5296 }
5297 }
5298 return;
5299 }
5300 }
5301 } else {
5302 val = ent->content;
5303 if (val == NULL) return;
5304 /*
5305 * inline the entity.
5306 */
5307 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5308 (!ctxt->disableSAX))
5309 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5310 }
5311 }
5312}
5313
5314/**
5315 * xmlParseEntityRef:
5316 * @ctxt: an XML parser context
5317 *
5318 * parse ENTITY references declarations
5319 *
5320 * [68] EntityRef ::= '&' Name ';'
5321 *
5322 * [ WFC: Entity Declared ]
5323 * In a document without any DTD, a document with only an internal DTD
5324 * subset which contains no parameter entity references, or a document
5325 * with "standalone='yes'", the Name given in the entity reference
5326 * must match that in an entity declaration, except that well-formed
5327 * documents need not declare any of the following entities: amp, lt,
5328 * gt, apos, quot. The declaration of a parameter entity must precede
5329 * any reference to it. Similarly, the declaration of a general entity
5330 * must precede any reference to it which appears in a default value in an
5331 * attribute-list declaration. Note that if entities are declared in the
5332 * external subset or in external parameter entities, a non-validating
5333 * processor is not obligated to read and process their declarations;
5334 * for such documents, the rule that an entity must be declared is a
5335 * well-formedness constraint only if standalone='yes'.
5336 *
5337 * [ WFC: Parsed Entity ]
5338 * An entity reference must not contain the name of an unparsed entity
5339 *
5340 * Returns the xmlEntityPtr if found, or NULL otherwise.
5341 */
5342xmlEntityPtr
5343xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5344 xmlChar *name;
5345 xmlEntityPtr ent = NULL;
5346
5347 GROW;
5348
5349 if (RAW == '&') {
5350 NEXT;
5351 name = xmlParseName(ctxt);
5352 if (name == NULL) {
5353 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5354 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5355 ctxt->sax->error(ctxt->userData,
5356 "xmlParseEntityRef: no name\n");
5357 ctxt->wellFormed = 0;
5358 ctxt->disableSAX = 1;
5359 } else {
5360 if (RAW == ';') {
5361 NEXT;
5362 /*
5363 * Ask first SAX for entity resolution, otherwise try the
5364 * predefined set.
5365 */
5366 if (ctxt->sax != NULL) {
5367 if (ctxt->sax->getEntity != NULL)
5368 ent = ctxt->sax->getEntity(ctxt->userData, name);
5369 if (ent == NULL)
5370 ent = xmlGetPredefinedEntity(name);
5371 }
5372 /*
5373 * [ WFC: Entity Declared ]
5374 * In a document without any DTD, a document with only an
5375 * internal DTD subset which contains no parameter entity
5376 * references, or a document with "standalone='yes'", the
5377 * Name given in the entity reference must match that in an
5378 * entity declaration, except that well-formed documents
5379 * need not declare any of the following entities: amp, lt,
5380 * gt, apos, quot.
5381 * The declaration of a parameter entity must precede any
5382 * reference to it.
5383 * Similarly, the declaration of a general entity must
5384 * precede any reference to it which appears in a default
5385 * value in an attribute-list declaration. Note that if
5386 * entities are declared in the external subset or in
5387 * external parameter entities, a non-validating processor
5388 * is not obligated to read and process their declarations;
5389 * for such documents, the rule that an entity must be
5390 * declared is a well-formedness constraint only if
5391 * standalone='yes'.
5392 */
5393 if (ent == NULL) {
5394 if ((ctxt->standalone == 1) ||
5395 ((ctxt->hasExternalSubset == 0) &&
5396 (ctxt->hasPErefs == 0))) {
5397 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5399 ctxt->sax->error(ctxt->userData,
5400 "Entity '%s' not defined\n", name);
5401 ctxt->wellFormed = 0;
5402 ctxt->disableSAX = 1;
5403 } else {
5404 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
Daniel Veillard5d90b6c2001-08-22 14:29:45 +00005405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005406 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005407 "Entity '%s' not defined\n", name);
5408 }
5409 }
5410
5411 /*
5412 * [ WFC: Parsed Entity ]
5413 * An entity reference must not contain the name of an
5414 * unparsed entity
5415 */
5416 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5417 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5418 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5419 ctxt->sax->error(ctxt->userData,
5420 "Entity reference to unparsed entity %s\n", name);
5421 ctxt->wellFormed = 0;
5422 ctxt->disableSAX = 1;
5423 }
5424
5425 /*
5426 * [ WFC: No External Entity References ]
5427 * Attribute values cannot contain direct or indirect
5428 * entity references to external entities.
5429 */
5430 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5431 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5432 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5433 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5434 ctxt->sax->error(ctxt->userData,
5435 "Attribute references external entity '%s'\n", name);
5436 ctxt->wellFormed = 0;
5437 ctxt->disableSAX = 1;
5438 }
5439 /*
5440 * [ WFC: No < in Attribute Values ]
5441 * The replacement text of any entity referred to directly or
5442 * indirectly in an attribute value (other than "&lt;") must
5443 * not contain a <.
5444 */
5445 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5446 (ent != NULL) &&
5447 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5448 (ent->content != NULL) &&
5449 (xmlStrchr(ent->content, '<'))) {
5450 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5451 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5452 ctxt->sax->error(ctxt->userData,
5453 "'<' in entity '%s' is not allowed in attributes values\n", name);
5454 ctxt->wellFormed = 0;
5455 ctxt->disableSAX = 1;
5456 }
5457
5458 /*
5459 * Internal check, no parameter entities here ...
5460 */
5461 else {
5462 switch (ent->etype) {
5463 case XML_INTERNAL_PARAMETER_ENTITY:
5464 case XML_EXTERNAL_PARAMETER_ENTITY:
5465 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5467 ctxt->sax->error(ctxt->userData,
5468 "Attempt to reference the parameter entity '%s'\n", name);
5469 ctxt->wellFormed = 0;
5470 ctxt->disableSAX = 1;
5471 break;
5472 default:
5473 break;
5474 }
5475 }
5476
5477 /*
5478 * [ WFC: No Recursion ]
5479 * A parsed entity must not contain a recursive reference
5480 * to itself, either directly or indirectly.
5481 * Done somewhere else
5482 */
5483
5484 } else {
5485 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5487 ctxt->sax->error(ctxt->userData,
5488 "xmlParseEntityRef: expecting ';'\n");
5489 ctxt->wellFormed = 0;
5490 ctxt->disableSAX = 1;
5491 }
5492 xmlFree(name);
5493 }
5494 }
5495 return(ent);
5496}
5497
5498/**
5499 * xmlParseStringEntityRef:
5500 * @ctxt: an XML parser context
5501 * @str: a pointer to an index in the string
5502 *
5503 * parse ENTITY references declarations, but this version parses it from
5504 * a string value.
5505 *
5506 * [68] EntityRef ::= '&' Name ';'
5507 *
5508 * [ WFC: Entity Declared ]
5509 * In a document without any DTD, a document with only an internal DTD
5510 * subset which contains no parameter entity references, or a document
5511 * with "standalone='yes'", the Name given in the entity reference
5512 * must match that in an entity declaration, except that well-formed
5513 * documents need not declare any of the following entities: amp, lt,
5514 * gt, apos, quot. The declaration of a parameter entity must precede
5515 * any reference to it. Similarly, the declaration of a general entity
5516 * must precede any reference to it which appears in a default value in an
5517 * attribute-list declaration. Note that if entities are declared in the
5518 * external subset or in external parameter entities, a non-validating
5519 * processor is not obligated to read and process their declarations;
5520 * for such documents, the rule that an entity must be declared is a
5521 * well-formedness constraint only if standalone='yes'.
5522 *
5523 * [ WFC: Parsed Entity ]
5524 * An entity reference must not contain the name of an unparsed entity
5525 *
5526 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5527 * is updated to the current location in the string.
5528 */
5529xmlEntityPtr
5530xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5531 xmlChar *name;
5532 const xmlChar *ptr;
5533 xmlChar cur;
5534 xmlEntityPtr ent = NULL;
5535
5536 if ((str == NULL) || (*str == NULL))
5537 return(NULL);
5538 ptr = *str;
5539 cur = *ptr;
5540 if (cur == '&') {
5541 ptr++;
5542 cur = *ptr;
5543 name = xmlParseStringName(ctxt, &ptr);
5544 if (name == NULL) {
5545 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5547 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005548 "xmlParseStringEntityRef: no name\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005549 ctxt->wellFormed = 0;
5550 ctxt->disableSAX = 1;
5551 } else {
5552 if (*ptr == ';') {
5553 ptr++;
5554 /*
5555 * Ask first SAX for entity resolution, otherwise try the
5556 * predefined set.
5557 */
5558 if (ctxt->sax != NULL) {
5559 if (ctxt->sax->getEntity != NULL)
5560 ent = ctxt->sax->getEntity(ctxt->userData, name);
5561 if (ent == NULL)
5562 ent = xmlGetPredefinedEntity(name);
5563 }
5564 /*
5565 * [ WFC: Entity Declared ]
5566 * In a document without any DTD, a document with only an
5567 * internal DTD subset which contains no parameter entity
5568 * references, or a document with "standalone='yes'", the
5569 * Name given in the entity reference must match that in an
5570 * entity declaration, except that well-formed documents
5571 * need not declare any of the following entities: amp, lt,
5572 * gt, apos, quot.
5573 * The declaration of a parameter entity must precede any
5574 * reference to it.
5575 * Similarly, the declaration of a general entity must
5576 * precede any reference to it which appears in a default
5577 * value in an attribute-list declaration. Note that if
5578 * entities are declared in the external subset or in
5579 * external parameter entities, a non-validating processor
5580 * is not obligated to read and process their declarations;
5581 * for such documents, the rule that an entity must be
5582 * declared is a well-formedness constraint only if
5583 * standalone='yes'.
5584 */
5585 if (ent == NULL) {
5586 if ((ctxt->standalone == 1) ||
5587 ((ctxt->hasExternalSubset == 0) &&
5588 (ctxt->hasPErefs == 0))) {
5589 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5591 ctxt->sax->error(ctxt->userData,
5592 "Entity '%s' not defined\n", name);
5593 ctxt->wellFormed = 0;
5594 ctxt->disableSAX = 1;
5595 } else {
5596 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5597 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5598 ctxt->sax->warning(ctxt->userData,
5599 "Entity '%s' not defined\n", name);
5600 }
5601 }
5602
5603 /*
5604 * [ WFC: Parsed Entity ]
5605 * An entity reference must not contain the name of an
5606 * unparsed entity
5607 */
5608 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5609 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5611 ctxt->sax->error(ctxt->userData,
5612 "Entity reference to unparsed entity %s\n", name);
5613 ctxt->wellFormed = 0;
5614 ctxt->disableSAX = 1;
5615 }
5616
5617 /*
5618 * [ WFC: No External Entity References ]
5619 * Attribute values cannot contain direct or indirect
5620 * entity references to external entities.
5621 */
5622 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5623 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5624 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5626 ctxt->sax->error(ctxt->userData,
5627 "Attribute references external entity '%s'\n", name);
5628 ctxt->wellFormed = 0;
5629 ctxt->disableSAX = 1;
5630 }
5631 /*
5632 * [ WFC: No < in Attribute Values ]
5633 * The replacement text of any entity referred to directly or
5634 * indirectly in an attribute value (other than "&lt;") must
5635 * not contain a <.
5636 */
5637 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5638 (ent != NULL) &&
5639 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5640 (ent->content != NULL) &&
5641 (xmlStrchr(ent->content, '<'))) {
5642 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5643 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5644 ctxt->sax->error(ctxt->userData,
5645 "'<' in entity '%s' is not allowed in attributes values\n", name);
5646 ctxt->wellFormed = 0;
5647 ctxt->disableSAX = 1;
5648 }
5649
5650 /*
5651 * Internal check, no parameter entities here ...
5652 */
5653 else {
5654 switch (ent->etype) {
5655 case XML_INTERNAL_PARAMETER_ENTITY:
5656 case XML_EXTERNAL_PARAMETER_ENTITY:
5657 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5659 ctxt->sax->error(ctxt->userData,
5660 "Attempt to reference the parameter entity '%s'\n", name);
5661 ctxt->wellFormed = 0;
5662 ctxt->disableSAX = 1;
5663 break;
5664 default:
5665 break;
5666 }
5667 }
5668
5669 /*
5670 * [ WFC: No Recursion ]
5671 * A parsed entity must not contain a recursive reference
5672 * to itself, either directly or indirectly.
5673 * Done somewhwere else
5674 */
5675
5676 } else {
5677 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5678 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5679 ctxt->sax->error(ctxt->userData,
Daniel Veillardf300b7e2001-08-13 10:43:15 +00005680 "xmlParseStringEntityRef: expecting ';'\n");
Owen Taylor3473f882001-02-23 17:55:21 +00005681 ctxt->wellFormed = 0;
5682 ctxt->disableSAX = 1;
5683 }
5684 xmlFree(name);
5685 }
5686 }
5687 *str = ptr;
5688 return(ent);
5689}
5690
5691/**
5692 * xmlParsePEReference:
5693 * @ctxt: an XML parser context
5694 *
5695 * parse PEReference declarations
5696 * The entity content is handled directly by pushing it's content as
5697 * a new input stream.
5698 *
5699 * [69] PEReference ::= '%' Name ';'
5700 *
5701 * [ WFC: No Recursion ]
5702 * A parsed entity must not contain a recursive
5703 * reference to itself, either directly or indirectly.
5704 *
5705 * [ WFC: Entity Declared ]
5706 * In a document without any DTD, a document with only an internal DTD
5707 * subset which contains no parameter entity references, or a document
5708 * with "standalone='yes'", ... ... The declaration of a parameter
5709 * entity must precede any reference to it...
5710 *
5711 * [ VC: Entity Declared ]
5712 * In a document with an external subset or external parameter entities
5713 * with "standalone='no'", ... ... The declaration of a parameter entity
5714 * must precede any reference to it...
5715 *
5716 * [ WFC: In DTD ]
5717 * Parameter-entity references may only appear in the DTD.
5718 * NOTE: misleading but this is handled.
5719 */
5720void
5721xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5722 xmlChar *name;
5723 xmlEntityPtr entity = NULL;
5724 xmlParserInputPtr input;
5725
5726 if (RAW == '%') {
5727 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005728 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005729 if (name == NULL) {
5730 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5731 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5732 ctxt->sax->error(ctxt->userData,
5733 "xmlParsePEReference: no name\n");
5734 ctxt->wellFormed = 0;
5735 ctxt->disableSAX = 1;
5736 } else {
5737 if (RAW == ';') {
5738 NEXT;
5739 if ((ctxt->sax != NULL) &&
5740 (ctxt->sax->getParameterEntity != NULL))
5741 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5742 name);
5743 if (entity == NULL) {
5744 /*
5745 * [ WFC: Entity Declared ]
5746 * In a document without any DTD, a document with only an
5747 * internal DTD subset which contains no parameter entity
5748 * references, or a document with "standalone='yes'", ...
5749 * ... The declaration of a parameter entity must precede
5750 * any reference to it...
5751 */
5752 if ((ctxt->standalone == 1) ||
5753 ((ctxt->hasExternalSubset == 0) &&
5754 (ctxt->hasPErefs == 0))) {
5755 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5756 if ((!ctxt->disableSAX) &&
5757 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5758 ctxt->sax->error(ctxt->userData,
5759 "PEReference: %%%s; not found\n", name);
5760 ctxt->wellFormed = 0;
5761 ctxt->disableSAX = 1;
5762 } else {
5763 /*
5764 * [ VC: Entity Declared ]
5765 * In a document with an external subset or external
5766 * parameter entities with "standalone='no'", ...
5767 * ... The declaration of a parameter entity must precede
5768 * any reference to it...
5769 */
5770 if ((!ctxt->disableSAX) &&
5771 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5772 ctxt->sax->warning(ctxt->userData,
5773 "PEReference: %%%s; not found\n", name);
5774 ctxt->valid = 0;
5775 }
5776 } else {
5777 /*
5778 * Internal checking in case the entity quest barfed
5779 */
5780 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5781 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5782 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5783 ctxt->sax->warning(ctxt->userData,
5784 "Internal: %%%s; is not a parameter entity\n", name);
5785 } else {
5786 /*
5787 * TODO !!!
5788 * handle the extra spaces added before and after
5789 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5790 */
5791 input = xmlNewEntityInputStream(ctxt, entity);
5792 xmlPushInput(ctxt, input);
5793 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5794 (RAW == '<') && (NXT(1) == '?') &&
5795 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5796 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5797 xmlParseTextDecl(ctxt);
5798 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5799 /*
5800 * The XML REC instructs us to stop parsing
5801 * right here
5802 */
5803 ctxt->instate = XML_PARSER_EOF;
5804 xmlFree(name);
5805 return;
5806 }
5807 }
5808 if (ctxt->token == 0)
5809 ctxt->token = ' ';
5810 }
5811 }
5812 ctxt->hasPErefs = 1;
5813 } else {
5814 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5815 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5816 ctxt->sax->error(ctxt->userData,
5817 "xmlParsePEReference: expecting ';'\n");
5818 ctxt->wellFormed = 0;
5819 ctxt->disableSAX = 1;
5820 }
5821 xmlFree(name);
5822 }
5823 }
5824}
5825
5826/**
5827 * xmlParseStringPEReference:
5828 * @ctxt: an XML parser context
5829 * @str: a pointer to an index in the string
5830 *
5831 * parse PEReference declarations
5832 *
5833 * [69] PEReference ::= '%' Name ';'
5834 *
5835 * [ WFC: No Recursion ]
5836 * A parsed entity must not contain a recursive
5837 * reference to itself, either directly or indirectly.
5838 *
5839 * [ WFC: Entity Declared ]
5840 * In a document without any DTD, a document with only an internal DTD
5841 * subset which contains no parameter entity references, or a document
5842 * with "standalone='yes'", ... ... The declaration of a parameter
5843 * entity must precede any reference to it...
5844 *
5845 * [ VC: Entity Declared ]
5846 * In a document with an external subset or external parameter entities
5847 * with "standalone='no'", ... ... The declaration of a parameter entity
5848 * must precede any reference to it...
5849 *
5850 * [ WFC: In DTD ]
5851 * Parameter-entity references may only appear in the DTD.
5852 * NOTE: misleading but this is handled.
5853 *
5854 * Returns the string of the entity content.
5855 * str is updated to the current value of the index
5856 */
5857xmlEntityPtr
5858xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5859 const xmlChar *ptr;
5860 xmlChar cur;
5861 xmlChar *name;
5862 xmlEntityPtr entity = NULL;
5863
5864 if ((str == NULL) || (*str == NULL)) return(NULL);
5865 ptr = *str;
5866 cur = *ptr;
5867 if (cur == '%') {
5868 ptr++;
5869 cur = *ptr;
5870 name = xmlParseStringName(ctxt, &ptr);
5871 if (name == NULL) {
5872 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5873 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5874 ctxt->sax->error(ctxt->userData,
5875 "xmlParseStringPEReference: no name\n");
5876 ctxt->wellFormed = 0;
5877 ctxt->disableSAX = 1;
5878 } else {
5879 cur = *ptr;
5880 if (cur == ';') {
5881 ptr++;
5882 cur = *ptr;
5883 if ((ctxt->sax != NULL) &&
5884 (ctxt->sax->getParameterEntity != NULL))
5885 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5886 name);
5887 if (entity == NULL) {
5888 /*
5889 * [ WFC: Entity Declared ]
5890 * In a document without any DTD, a document with only an
5891 * internal DTD subset which contains no parameter entity
5892 * references, or a document with "standalone='yes'", ...
5893 * ... The declaration of a parameter entity must precede
5894 * any reference to it...
5895 */
5896 if ((ctxt->standalone == 1) ||
5897 ((ctxt->hasExternalSubset == 0) &&
5898 (ctxt->hasPErefs == 0))) {
5899 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5900 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5901 ctxt->sax->error(ctxt->userData,
5902 "PEReference: %%%s; not found\n", name);
5903 ctxt->wellFormed = 0;
5904 ctxt->disableSAX = 1;
5905 } else {
5906 /*
5907 * [ VC: Entity Declared ]
5908 * In a document with an external subset or external
5909 * parameter entities with "standalone='no'", ...
5910 * ... The declaration of a parameter entity must
5911 * precede any reference to it...
5912 */
5913 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5914 ctxt->sax->warning(ctxt->userData,
5915 "PEReference: %%%s; not found\n", name);
5916 ctxt->valid = 0;
5917 }
5918 } else {
5919 /*
5920 * Internal checking in case the entity quest barfed
5921 */
5922 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5923 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5924 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5925 ctxt->sax->warning(ctxt->userData,
5926 "Internal: %%%s; is not a parameter entity\n", name);
5927 }
5928 }
5929 ctxt->hasPErefs = 1;
5930 } else {
5931 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5933 ctxt->sax->error(ctxt->userData,
5934 "xmlParseStringPEReference: expecting ';'\n");
5935 ctxt->wellFormed = 0;
5936 ctxt->disableSAX = 1;
5937 }
5938 xmlFree(name);
5939 }
5940 }
5941 *str = ptr;
5942 return(entity);
5943}
5944
5945/**
5946 * xmlParseDocTypeDecl:
5947 * @ctxt: an XML parser context
5948 *
5949 * parse a DOCTYPE declaration
5950 *
5951 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5952 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5953 *
5954 * [ VC: Root Element Type ]
5955 * The Name in the document type declaration must match the element
5956 * type of the root element.
5957 */
5958
5959void
5960xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5961 xmlChar *name = NULL;
5962 xmlChar *ExternalID = NULL;
5963 xmlChar *URI = NULL;
5964
5965 /*
5966 * We know that '<!DOCTYPE' has been detected.
5967 */
5968 SKIP(9);
5969
5970 SKIP_BLANKS;
5971
5972 /*
5973 * Parse the DOCTYPE name.
5974 */
5975 name = xmlParseName(ctxt);
5976 if (name == NULL) {
5977 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5978 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5979 ctxt->sax->error(ctxt->userData,
5980 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5981 ctxt->wellFormed = 0;
5982 ctxt->disableSAX = 1;
5983 }
5984 ctxt->intSubName = name;
5985
5986 SKIP_BLANKS;
5987
5988 /*
5989 * Check for SystemID and ExternalID
5990 */
5991 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5992
5993 if ((URI != NULL) || (ExternalID != NULL)) {
5994 ctxt->hasExternalSubset = 1;
5995 }
5996 ctxt->extSubURI = URI;
5997 ctxt->extSubSystem = ExternalID;
5998
5999 SKIP_BLANKS;
6000
6001 /*
6002 * Create and update the internal subset.
6003 */
6004 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
6005 (!ctxt->disableSAX))
6006 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
6007
6008 /*
6009 * Is there any internal subset declarations ?
6010 * they are handled separately in xmlParseInternalSubset()
6011 */
6012 if (RAW == '[')
6013 return;
6014
6015 /*
6016 * We should be at the end of the DOCTYPE declaration.
6017 */
6018 if (RAW != '>') {
6019 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6020 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6021 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6022 ctxt->wellFormed = 0;
6023 ctxt->disableSAX = 1;
6024 }
6025 NEXT;
6026}
6027
6028/**
6029 * xmlParseInternalsubset:
6030 * @ctxt: an XML parser context
6031 *
6032 * parse the internal subset declaration
6033 *
6034 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
6035 */
6036
Daniel Veillard56a4cb82001-03-24 17:00:36 +00006037static void
Owen Taylor3473f882001-02-23 17:55:21 +00006038xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
6039 /*
6040 * Is there any DTD definition ?
6041 */
6042 if (RAW == '[') {
6043 ctxt->instate = XML_PARSER_DTD;
6044 NEXT;
6045 /*
6046 * Parse the succession of Markup declarations and
6047 * PEReferences.
6048 * Subsequence (markupdecl | PEReference | S)*
6049 */
6050 while (RAW != ']') {
6051 const xmlChar *check = CUR_PTR;
6052 int cons = ctxt->input->consumed;
6053
6054 SKIP_BLANKS;
6055 xmlParseMarkupDecl(ctxt);
6056 xmlParsePEReference(ctxt);
6057
6058 /*
6059 * Pop-up of finished entities.
6060 */
6061 while ((RAW == 0) && (ctxt->inputNr > 1))
6062 xmlPopInput(ctxt);
6063
6064 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
6065 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6067 ctxt->sax->error(ctxt->userData,
6068 "xmlParseInternalSubset: error detected in Markup declaration\n");
6069 ctxt->wellFormed = 0;
6070 ctxt->disableSAX = 1;
6071 break;
6072 }
6073 }
6074 if (RAW == ']') {
6075 NEXT;
6076 SKIP_BLANKS;
6077 }
6078 }
6079
6080 /*
6081 * We should be at the end of the DOCTYPE declaration.
6082 */
6083 if (RAW != '>') {
6084 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6086 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6087 ctxt->wellFormed = 0;
6088 ctxt->disableSAX = 1;
6089 }
6090 NEXT;
6091}
6092
6093/**
6094 * xmlParseAttribute:
6095 * @ctxt: an XML parser context
6096 * @value: a xmlChar ** used to store the value of the attribute
6097 *
6098 * parse an attribute
6099 *
6100 * [41] Attribute ::= Name Eq AttValue
6101 *
6102 * [ WFC: No External Entity References ]
6103 * Attribute values cannot contain direct or indirect entity references
6104 * to external entities.
6105 *
6106 * [ WFC: No < in Attribute Values ]
6107 * The replacement text of any entity referred to directly or indirectly in
6108 * an attribute value (other than "&lt;") must not contain a <.
6109 *
6110 * [ VC: Attribute Value Type ]
6111 * The attribute must have been declared; the value must be of the type
6112 * declared for it.
6113 *
6114 * [25] Eq ::= S? '=' S?
6115 *
6116 * With namespace:
6117 *
6118 * [NS 11] Attribute ::= QName Eq AttValue
6119 *
6120 * Also the case QName == xmlns:??? is handled independently as a namespace
6121 * definition.
6122 *
6123 * Returns the attribute name, and the value in *value.
6124 */
6125
6126xmlChar *
6127xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6128 xmlChar *name, *val;
6129
6130 *value = NULL;
6131 name = xmlParseName(ctxt);
6132 if (name == NULL) {
6133 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6135 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6136 ctxt->wellFormed = 0;
6137 ctxt->disableSAX = 1;
6138 return(NULL);
6139 }
6140
6141 /*
6142 * read the value
6143 */
6144 SKIP_BLANKS;
6145 if (RAW == '=') {
6146 NEXT;
6147 SKIP_BLANKS;
6148 val = xmlParseAttValue(ctxt);
6149 ctxt->instate = XML_PARSER_CONTENT;
6150 } else {
6151 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6152 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6153 ctxt->sax->error(ctxt->userData,
6154 "Specification mandate value for attribute %s\n", name);
6155 ctxt->wellFormed = 0;
6156 ctxt->disableSAX = 1;
6157 xmlFree(name);
6158 return(NULL);
6159 }
6160
6161 /*
6162 * Check that xml:lang conforms to the specification
6163 * No more registered as an error, just generate a warning now
6164 * since this was deprecated in XML second edition
6165 */
6166 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6167 if (!xmlCheckLanguageID(val)) {
6168 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6169 ctxt->sax->warning(ctxt->userData,
6170 "Malformed value for xml:lang : %s\n", val);
6171 }
6172 }
6173
6174 /*
6175 * Check that xml:space conforms to the specification
6176 */
6177 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6178 if (xmlStrEqual(val, BAD_CAST "default"))
6179 *(ctxt->space) = 0;
6180 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6181 *(ctxt->space) = 1;
6182 else {
6183 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6184 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6185 ctxt->sax->error(ctxt->userData,
6186"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6187 val);
6188 ctxt->wellFormed = 0;
6189 ctxt->disableSAX = 1;
6190 }
6191 }
6192
6193 *value = val;
6194 return(name);
6195}
6196
6197/**
6198 * xmlParseStartTag:
6199 * @ctxt: an XML parser context
6200 *
6201 * parse a start of tag either for rule element or
6202 * EmptyElement. In both case we don't parse the tag closing chars.
6203 *
6204 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6205 *
6206 * [ WFC: Unique Att Spec ]
6207 * No attribute name may appear more than once in the same start-tag or
6208 * empty-element tag.
6209 *
6210 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6211 *
6212 * [ WFC: Unique Att Spec ]
6213 * No attribute name may appear more than once in the same start-tag or
6214 * empty-element tag.
6215 *
6216 * With namespace:
6217 *
6218 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6219 *
6220 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6221 *
6222 * Returns the element name parsed
6223 */
6224
6225xmlChar *
6226xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6227 xmlChar *name;
6228 xmlChar *attname;
6229 xmlChar *attvalue;
6230 const xmlChar **atts = NULL;
6231 int nbatts = 0;
6232 int maxatts = 0;
6233 int i;
6234
6235 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006236 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006237
6238 name = xmlParseName(ctxt);
6239 if (name == NULL) {
6240 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6241 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6242 ctxt->sax->error(ctxt->userData,
6243 "xmlParseStartTag: invalid element name\n");
6244 ctxt->wellFormed = 0;
6245 ctxt->disableSAX = 1;
6246 return(NULL);
6247 }
6248
6249 /*
6250 * Now parse the attributes, it ends up with the ending
6251 *
6252 * (S Attribute)* S?
6253 */
6254 SKIP_BLANKS;
6255 GROW;
6256
Daniel Veillard21a0f912001-02-25 19:54:14 +00006257 while ((RAW != '>') &&
6258 ((RAW != '/') || (NXT(1) != '>')) &&
6259 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006260 const xmlChar *q = CUR_PTR;
6261 int cons = ctxt->input->consumed;
6262
6263 attname = xmlParseAttribute(ctxt, &attvalue);
6264 if ((attname != NULL) && (attvalue != NULL)) {
6265 /*
6266 * [ WFC: Unique Att Spec ]
6267 * No attribute name may appear more than once in the same
6268 * start-tag or empty-element tag.
6269 */
6270 for (i = 0; i < nbatts;i += 2) {
6271 if (xmlStrEqual(atts[i], attname)) {
6272 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6273 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6274 ctxt->sax->error(ctxt->userData,
6275 "Attribute %s redefined\n",
6276 attname);
6277 ctxt->wellFormed = 0;
6278 ctxt->disableSAX = 1;
6279 xmlFree(attname);
6280 xmlFree(attvalue);
6281 goto failed;
6282 }
6283 }
6284
6285 /*
6286 * Add the pair to atts
6287 */
6288 if (atts == NULL) {
6289 maxatts = 10;
6290 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6291 if (atts == NULL) {
6292 xmlGenericError(xmlGenericErrorContext,
6293 "malloc of %ld byte failed\n",
6294 maxatts * (long)sizeof(xmlChar *));
6295 return(NULL);
6296 }
6297 } else if (nbatts + 4 > maxatts) {
6298 maxatts *= 2;
6299 atts = (const xmlChar **) xmlRealloc((void *) atts,
6300 maxatts * sizeof(xmlChar *));
6301 if (atts == NULL) {
6302 xmlGenericError(xmlGenericErrorContext,
6303 "realloc of %ld byte failed\n",
6304 maxatts * (long)sizeof(xmlChar *));
6305 return(NULL);
6306 }
6307 }
6308 atts[nbatts++] = attname;
6309 atts[nbatts++] = attvalue;
6310 atts[nbatts] = NULL;
6311 atts[nbatts + 1] = NULL;
6312 } else {
6313 if (attname != NULL)
6314 xmlFree(attname);
6315 if (attvalue != NULL)
6316 xmlFree(attvalue);
6317 }
6318
6319failed:
6320
6321 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6322 break;
6323 if (!IS_BLANK(RAW)) {
6324 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6326 ctxt->sax->error(ctxt->userData,
6327 "attributes construct error\n");
6328 ctxt->wellFormed = 0;
6329 ctxt->disableSAX = 1;
6330 }
6331 SKIP_BLANKS;
6332 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6333 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6334 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6335 ctxt->sax->error(ctxt->userData,
6336 "xmlParseStartTag: problem parsing attributes\n");
6337 ctxt->wellFormed = 0;
6338 ctxt->disableSAX = 1;
6339 break;
6340 }
6341 GROW;
6342 }
6343
6344 /*
6345 * SAX: Start of Element !
6346 */
6347 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6348 (!ctxt->disableSAX))
6349 ctxt->sax->startElement(ctxt->userData, name, atts);
6350
6351 if (atts != NULL) {
6352 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6353 xmlFree((void *) atts);
6354 }
6355 return(name);
6356}
6357
6358/**
6359 * xmlParseEndTag:
6360 * @ctxt: an XML parser context
6361 *
6362 * parse an end of tag
6363 *
6364 * [42] ETag ::= '</' Name S? '>'
6365 *
6366 * With namespace
6367 *
6368 * [NS 9] ETag ::= '</' QName S? '>'
6369 */
6370
6371void
6372xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6373 xmlChar *name;
6374 xmlChar *oldname;
6375
6376 GROW;
6377 if ((RAW != '<') || (NXT(1) != '/')) {
6378 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6380 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6381 ctxt->wellFormed = 0;
6382 ctxt->disableSAX = 1;
6383 return;
6384 }
6385 SKIP(2);
6386
6387 name = xmlParseName(ctxt);
6388
6389 /*
6390 * We should definitely be at the ending "S? '>'" part
6391 */
6392 GROW;
6393 SKIP_BLANKS;
6394 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6395 ctxt->errNo = XML_ERR_GT_REQUIRED;
6396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6397 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6398 ctxt->wellFormed = 0;
6399 ctxt->disableSAX = 1;
6400 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006401 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006402
6403 /*
6404 * [ WFC: Element Type Match ]
6405 * The Name in an element's end-tag must match the element type in the
6406 * start-tag.
6407 *
6408 */
6409 if ((name == NULL) || (ctxt->name == NULL) ||
6410 (!xmlStrEqual(name, ctxt->name))) {
6411 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6413 if ((name != NULL) && (ctxt->name != NULL)) {
6414 ctxt->sax->error(ctxt->userData,
6415 "Opening and ending tag mismatch: %s and %s\n",
6416 ctxt->name, name);
6417 } else if (ctxt->name != NULL) {
6418 ctxt->sax->error(ctxt->userData,
6419 "Ending tag eror for: %s\n", ctxt->name);
6420 } else {
6421 ctxt->sax->error(ctxt->userData,
6422 "Ending tag error: internal error ???\n");
6423 }
6424
6425 }
6426 ctxt->wellFormed = 0;
6427 ctxt->disableSAX = 1;
6428 }
6429
6430 /*
6431 * SAX: End of Tag
6432 */
6433 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6434 (!ctxt->disableSAX))
6435 ctxt->sax->endElement(ctxt->userData, name);
6436
6437 if (name != NULL)
6438 xmlFree(name);
6439 oldname = namePop(ctxt);
6440 spacePop(ctxt);
6441 if (oldname != NULL) {
6442#ifdef DEBUG_STACK
6443 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6444#endif
6445 xmlFree(oldname);
6446 }
6447 return;
6448}
6449
6450/**
6451 * xmlParseCDSect:
6452 * @ctxt: an XML parser context
6453 *
6454 * Parse escaped pure raw content.
6455 *
6456 * [18] CDSect ::= CDStart CData CDEnd
6457 *
6458 * [19] CDStart ::= '<![CDATA['
6459 *
6460 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6461 *
6462 * [21] CDEnd ::= ']]>'
6463 */
6464void
6465xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6466 xmlChar *buf = NULL;
6467 int len = 0;
6468 int size = XML_PARSER_BUFFER_SIZE;
6469 int r, rl;
6470 int s, sl;
6471 int cur, l;
6472 int count = 0;
6473
6474 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6475 (NXT(2) == '[') && (NXT(3) == 'C') &&
6476 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6477 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6478 (NXT(8) == '[')) {
6479 SKIP(9);
6480 } else
6481 return;
6482
6483 ctxt->instate = XML_PARSER_CDATA_SECTION;
6484 r = CUR_CHAR(rl);
6485 if (!IS_CHAR(r)) {
6486 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6487 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6488 ctxt->sax->error(ctxt->userData,
6489 "CData section not finished\n");
6490 ctxt->wellFormed = 0;
6491 ctxt->disableSAX = 1;
6492 ctxt->instate = XML_PARSER_CONTENT;
6493 return;
6494 }
6495 NEXTL(rl);
6496 s = CUR_CHAR(sl);
6497 if (!IS_CHAR(s)) {
6498 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6500 ctxt->sax->error(ctxt->userData,
6501 "CData section not finished\n");
6502 ctxt->wellFormed = 0;
6503 ctxt->disableSAX = 1;
6504 ctxt->instate = XML_PARSER_CONTENT;
6505 return;
6506 }
6507 NEXTL(sl);
6508 cur = CUR_CHAR(l);
6509 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6510 if (buf == NULL) {
6511 xmlGenericError(xmlGenericErrorContext,
6512 "malloc of %d byte failed\n", size);
6513 return;
6514 }
6515 while (IS_CHAR(cur) &&
6516 ((r != ']') || (s != ']') || (cur != '>'))) {
6517 if (len + 5 >= size) {
6518 size *= 2;
6519 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6520 if (buf == NULL) {
6521 xmlGenericError(xmlGenericErrorContext,
6522 "realloc of %d byte failed\n", size);
6523 return;
6524 }
6525 }
6526 COPY_BUF(rl,buf,len,r);
6527 r = s;
6528 rl = sl;
6529 s = cur;
6530 sl = l;
6531 count++;
6532 if (count > 50) {
6533 GROW;
6534 count = 0;
6535 }
6536 NEXTL(l);
6537 cur = CUR_CHAR(l);
6538 }
6539 buf[len] = 0;
6540 ctxt->instate = XML_PARSER_CONTENT;
6541 if (cur != '>') {
6542 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6543 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6544 ctxt->sax->error(ctxt->userData,
6545 "CData section not finished\n%.50s\n", buf);
6546 ctxt->wellFormed = 0;
6547 ctxt->disableSAX = 1;
6548 xmlFree(buf);
6549 return;
6550 }
6551 NEXTL(l);
6552
6553 /*
6554 * Ok the buffer is to be consumed as cdata.
6555 */
6556 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6557 if (ctxt->sax->cdataBlock != NULL)
6558 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006559 else if (ctxt->sax->characters != NULL)
6560 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006561 }
6562 xmlFree(buf);
6563}
6564
6565/**
6566 * xmlParseContent:
6567 * @ctxt: an XML parser context
6568 *
6569 * Parse a content:
6570 *
6571 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6572 */
6573
6574void
6575xmlParseContent(xmlParserCtxtPtr ctxt) {
6576 GROW;
6577 while (((RAW != 0) || (ctxt->token != 0)) &&
6578 ((RAW != '<') || (NXT(1) != '/'))) {
6579 const xmlChar *test = CUR_PTR;
6580 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006581 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006582 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006583
6584 /*
6585 * Handle possible processed charrefs.
6586 */
6587 if (ctxt->token != 0) {
6588 xmlParseCharData(ctxt, 0);
6589 }
6590 /*
6591 * First case : a Processing Instruction.
6592 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006593 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006594 xmlParsePI(ctxt);
6595 }
6596
6597 /*
6598 * Second case : a CDSection
6599 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006600 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006601 (NXT(2) == '[') && (NXT(3) == 'C') &&
6602 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6603 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6604 (NXT(8) == '[')) {
6605 xmlParseCDSect(ctxt);
6606 }
6607
6608 /*
6609 * Third case : a comment
6610 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006611 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006612 (NXT(2) == '-') && (NXT(3) == '-')) {
6613 xmlParseComment(ctxt);
6614 ctxt->instate = XML_PARSER_CONTENT;
6615 }
6616
6617 /*
6618 * Fourth case : a sub-element.
6619 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006620 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006621 xmlParseElement(ctxt);
6622 }
6623
6624 /*
6625 * Fifth case : a reference. If if has not been resolved,
6626 * parsing returns it's Name, create the node
6627 */
6628
Daniel Veillard21a0f912001-02-25 19:54:14 +00006629 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006630 xmlParseReference(ctxt);
6631 }
6632
6633 /*
6634 * Last case, text. Note that References are handled directly.
6635 */
6636 else {
6637 xmlParseCharData(ctxt, 0);
6638 }
6639
6640 GROW;
6641 /*
6642 * Pop-up of finished entities.
6643 */
6644 while ((RAW == 0) && (ctxt->inputNr > 1))
6645 xmlPopInput(ctxt);
6646 SHRINK;
6647
6648 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6649 (tok == ctxt->token)) {
6650 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6652 ctxt->sax->error(ctxt->userData,
6653 "detected an error in element content\n");
6654 ctxt->wellFormed = 0;
6655 ctxt->disableSAX = 1;
6656 ctxt->instate = XML_PARSER_EOF;
6657 break;
6658 }
6659 }
6660}
6661
6662/**
6663 * xmlParseElement:
6664 * @ctxt: an XML parser context
6665 *
6666 * parse an XML element, this is highly recursive
6667 *
6668 * [39] element ::= EmptyElemTag | STag content ETag
6669 *
6670 * [ WFC: Element Type Match ]
6671 * The Name in an element's end-tag must match the element type in the
6672 * start-tag.
6673 *
6674 * [ VC: Element Valid ]
6675 * An element is valid if there is a declaration matching elementdecl
6676 * where the Name matches the element type and one of the following holds:
6677 * - The declaration matches EMPTY and the element has no content.
6678 * - The declaration matches children and the sequence of child elements
6679 * belongs to the language generated by the regular expression in the
6680 * content model, with optional white space (characters matching the
6681 * nonterminal S) between each pair of child elements.
6682 * - The declaration matches Mixed and the content consists of character
6683 * data and child elements whose types match names in the content model.
6684 * - The declaration matches ANY, and the types of any child elements have
6685 * been declared.
6686 */
6687
6688void
6689xmlParseElement(xmlParserCtxtPtr ctxt) {
6690 const xmlChar *openTag = CUR_PTR;
6691 xmlChar *name;
6692 xmlChar *oldname;
6693 xmlParserNodeInfo node_info;
6694 xmlNodePtr ret;
6695
6696 /* Capture start position */
6697 if (ctxt->record_info) {
6698 node_info.begin_pos = ctxt->input->consumed +
6699 (CUR_PTR - ctxt->input->base);
6700 node_info.begin_line = ctxt->input->line;
6701 }
6702
6703 if (ctxt->spaceNr == 0)
6704 spacePush(ctxt, -1);
6705 else
6706 spacePush(ctxt, *ctxt->space);
6707
6708 name = xmlParseStartTag(ctxt);
6709 if (name == NULL) {
6710 spacePop(ctxt);
6711 return;
6712 }
6713 namePush(ctxt, name);
6714 ret = ctxt->node;
6715
6716 /*
6717 * [ VC: Root Element Type ]
6718 * The Name in the document type declaration must match the element
6719 * type of the root element.
6720 */
6721 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6722 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6723 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6724
6725 /*
6726 * Check for an Empty Element.
6727 */
6728 if ((RAW == '/') && (NXT(1) == '>')) {
6729 SKIP(2);
6730 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6731 (!ctxt->disableSAX))
6732 ctxt->sax->endElement(ctxt->userData, name);
6733 oldname = namePop(ctxt);
6734 spacePop(ctxt);
6735 if (oldname != NULL) {
6736#ifdef DEBUG_STACK
6737 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6738#endif
6739 xmlFree(oldname);
6740 }
6741 if ( ret != NULL && ctxt->record_info ) {
6742 node_info.end_pos = ctxt->input->consumed +
6743 (CUR_PTR - ctxt->input->base);
6744 node_info.end_line = ctxt->input->line;
6745 node_info.node = ret;
6746 xmlParserAddNodeInfo(ctxt, &node_info);
6747 }
6748 return;
6749 }
6750 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006751 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006752 } else {
6753 ctxt->errNo = XML_ERR_GT_REQUIRED;
6754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6755 ctxt->sax->error(ctxt->userData,
6756 "Couldn't find end of Start Tag\n%.30s\n",
6757 openTag);
6758 ctxt->wellFormed = 0;
6759 ctxt->disableSAX = 1;
6760
6761 /*
6762 * end of parsing of this node.
6763 */
6764 nodePop(ctxt);
6765 oldname = namePop(ctxt);
6766 spacePop(ctxt);
6767 if (oldname != NULL) {
6768#ifdef DEBUG_STACK
6769 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6770#endif
6771 xmlFree(oldname);
6772 }
6773
6774 /*
6775 * Capture end position and add node
6776 */
6777 if ( ret != NULL && ctxt->record_info ) {
6778 node_info.end_pos = ctxt->input->consumed +
6779 (CUR_PTR - ctxt->input->base);
6780 node_info.end_line = ctxt->input->line;
6781 node_info.node = ret;
6782 xmlParserAddNodeInfo(ctxt, &node_info);
6783 }
6784 return;
6785 }
6786
6787 /*
6788 * Parse the content of the element:
6789 */
6790 xmlParseContent(ctxt);
6791 if (!IS_CHAR(RAW)) {
6792 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6794 ctxt->sax->error(ctxt->userData,
6795 "Premature end of data in tag %.30s\n", openTag);
6796 ctxt->wellFormed = 0;
6797 ctxt->disableSAX = 1;
6798
6799 /*
6800 * end of parsing of this node.
6801 */
6802 nodePop(ctxt);
6803 oldname = namePop(ctxt);
6804 spacePop(ctxt);
6805 if (oldname != NULL) {
6806#ifdef DEBUG_STACK
6807 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6808#endif
6809 xmlFree(oldname);
6810 }
6811 return;
6812 }
6813
6814 /*
6815 * parse the end of tag: '</' should be here.
6816 */
6817 xmlParseEndTag(ctxt);
6818
6819 /*
6820 * Capture end position and add node
6821 */
6822 if ( ret != NULL && ctxt->record_info ) {
6823 node_info.end_pos = ctxt->input->consumed +
6824 (CUR_PTR - ctxt->input->base);
6825 node_info.end_line = ctxt->input->line;
6826 node_info.node = ret;
6827 xmlParserAddNodeInfo(ctxt, &node_info);
6828 }
6829}
6830
6831/**
6832 * xmlParseVersionNum:
6833 * @ctxt: an XML parser context
6834 *
6835 * parse the XML version value.
6836 *
6837 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6838 *
6839 * Returns the string giving the XML version number, or NULL
6840 */
6841xmlChar *
6842xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6843 xmlChar *buf = NULL;
6844 int len = 0;
6845 int size = 10;
6846 xmlChar cur;
6847
6848 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6849 if (buf == NULL) {
6850 xmlGenericError(xmlGenericErrorContext,
6851 "malloc of %d byte failed\n", size);
6852 return(NULL);
6853 }
6854 cur = CUR;
6855 while (((cur >= 'a') && (cur <= 'z')) ||
6856 ((cur >= 'A') && (cur <= 'Z')) ||
6857 ((cur >= '0') && (cur <= '9')) ||
6858 (cur == '_') || (cur == '.') ||
6859 (cur == ':') || (cur == '-')) {
6860 if (len + 1 >= size) {
6861 size *= 2;
6862 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6863 if (buf == NULL) {
6864 xmlGenericError(xmlGenericErrorContext,
6865 "realloc of %d byte failed\n", size);
6866 return(NULL);
6867 }
6868 }
6869 buf[len++] = cur;
6870 NEXT;
6871 cur=CUR;
6872 }
6873 buf[len] = 0;
6874 return(buf);
6875}
6876
6877/**
6878 * xmlParseVersionInfo:
6879 * @ctxt: an XML parser context
6880 *
6881 * parse the XML version.
6882 *
6883 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6884 *
6885 * [25] Eq ::= S? '=' S?
6886 *
6887 * Returns the version string, e.g. "1.0"
6888 */
6889
6890xmlChar *
6891xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6892 xmlChar *version = NULL;
6893 const xmlChar *q;
6894
6895 if ((RAW == 'v') && (NXT(1) == 'e') &&
6896 (NXT(2) == 'r') && (NXT(3) == 's') &&
6897 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6898 (NXT(6) == 'n')) {
6899 SKIP(7);
6900 SKIP_BLANKS;
6901 if (RAW != '=') {
6902 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6904 ctxt->sax->error(ctxt->userData,
6905 "xmlParseVersionInfo : expected '='\n");
6906 ctxt->wellFormed = 0;
6907 ctxt->disableSAX = 1;
6908 return(NULL);
6909 }
6910 NEXT;
6911 SKIP_BLANKS;
6912 if (RAW == '"') {
6913 NEXT;
6914 q = CUR_PTR;
6915 version = xmlParseVersionNum(ctxt);
6916 if (RAW != '"') {
6917 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData,
6920 "String not closed\n%.50s\n", q);
6921 ctxt->wellFormed = 0;
6922 ctxt->disableSAX = 1;
6923 } else
6924 NEXT;
6925 } else if (RAW == '\''){
6926 NEXT;
6927 q = CUR_PTR;
6928 version = xmlParseVersionNum(ctxt);
6929 if (RAW != '\'') {
6930 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6932 ctxt->sax->error(ctxt->userData,
6933 "String not closed\n%.50s\n", q);
6934 ctxt->wellFormed = 0;
6935 ctxt->disableSAX = 1;
6936 } else
6937 NEXT;
6938 } else {
6939 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6941 ctxt->sax->error(ctxt->userData,
6942 "xmlParseVersionInfo : expected ' or \"\n");
6943 ctxt->wellFormed = 0;
6944 ctxt->disableSAX = 1;
6945 }
6946 }
6947 return(version);
6948}
6949
6950/**
6951 * xmlParseEncName:
6952 * @ctxt: an XML parser context
6953 *
6954 * parse the XML encoding name
6955 *
6956 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6957 *
6958 * Returns the encoding name value or NULL
6959 */
6960xmlChar *
6961xmlParseEncName(xmlParserCtxtPtr ctxt) {
6962 xmlChar *buf = NULL;
6963 int len = 0;
6964 int size = 10;
6965 xmlChar cur;
6966
6967 cur = CUR;
6968 if (((cur >= 'a') && (cur <= 'z')) ||
6969 ((cur >= 'A') && (cur <= 'Z'))) {
6970 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6971 if (buf == NULL) {
6972 xmlGenericError(xmlGenericErrorContext,
6973 "malloc of %d byte failed\n", size);
6974 return(NULL);
6975 }
6976
6977 buf[len++] = cur;
6978 NEXT;
6979 cur = CUR;
6980 while (((cur >= 'a') && (cur <= 'z')) ||
6981 ((cur >= 'A') && (cur <= 'Z')) ||
6982 ((cur >= '0') && (cur <= '9')) ||
6983 (cur == '.') || (cur == '_') ||
6984 (cur == '-')) {
6985 if (len + 1 >= size) {
6986 size *= 2;
6987 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6988 if (buf == NULL) {
6989 xmlGenericError(xmlGenericErrorContext,
6990 "realloc of %d byte failed\n", size);
6991 return(NULL);
6992 }
6993 }
6994 buf[len++] = cur;
6995 NEXT;
6996 cur = CUR;
6997 if (cur == 0) {
6998 SHRINK;
6999 GROW;
7000 cur = CUR;
7001 }
7002 }
7003 buf[len] = 0;
7004 } else {
7005 ctxt->errNo = XML_ERR_ENCODING_NAME;
7006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7007 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
7008 ctxt->wellFormed = 0;
7009 ctxt->disableSAX = 1;
7010 }
7011 return(buf);
7012}
7013
7014/**
7015 * xmlParseEncodingDecl:
7016 * @ctxt: an XML parser context
7017 *
7018 * parse the XML encoding declaration
7019 *
7020 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
7021 *
7022 * this setups the conversion filters.
7023 *
7024 * Returns the encoding value or NULL
7025 */
7026
7027xmlChar *
7028xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
7029 xmlChar *encoding = NULL;
7030 const xmlChar *q;
7031
7032 SKIP_BLANKS;
7033 if ((RAW == 'e') && (NXT(1) == 'n') &&
7034 (NXT(2) == 'c') && (NXT(3) == 'o') &&
7035 (NXT(4) == 'd') && (NXT(5) == 'i') &&
7036 (NXT(6) == 'n') && (NXT(7) == 'g')) {
7037 SKIP(8);
7038 SKIP_BLANKS;
7039 if (RAW != '=') {
7040 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7042 ctxt->sax->error(ctxt->userData,
7043 "xmlParseEncodingDecl : expected '='\n");
7044 ctxt->wellFormed = 0;
7045 ctxt->disableSAX = 1;
7046 return(NULL);
7047 }
7048 NEXT;
7049 SKIP_BLANKS;
7050 if (RAW == '"') {
7051 NEXT;
7052 q = CUR_PTR;
7053 encoding = xmlParseEncName(ctxt);
7054 if (RAW != '"') {
7055 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7056 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7057 ctxt->sax->error(ctxt->userData,
7058 "String not closed\n%.50s\n", q);
7059 ctxt->wellFormed = 0;
7060 ctxt->disableSAX = 1;
7061 } else
7062 NEXT;
7063 } else if (RAW == '\''){
7064 NEXT;
7065 q = CUR_PTR;
7066 encoding = xmlParseEncName(ctxt);
7067 if (RAW != '\'') {
7068 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7069 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7070 ctxt->sax->error(ctxt->userData,
7071 "String not closed\n%.50s\n", q);
7072 ctxt->wellFormed = 0;
7073 ctxt->disableSAX = 1;
7074 } else
7075 NEXT;
7076 } else if (RAW == '"'){
7077 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7079 ctxt->sax->error(ctxt->userData,
7080 "xmlParseEncodingDecl : expected ' or \"\n");
7081 ctxt->wellFormed = 0;
7082 ctxt->disableSAX = 1;
7083 }
7084 if (encoding != NULL) {
7085 xmlCharEncoding enc;
7086 xmlCharEncodingHandlerPtr handler;
7087
7088 if (ctxt->input->encoding != NULL)
7089 xmlFree((xmlChar *) ctxt->input->encoding);
7090 ctxt->input->encoding = encoding;
7091
7092 enc = xmlParseCharEncoding((const char *) encoding);
7093 /*
7094 * registered set of known encodings
7095 */
7096 if (enc != XML_CHAR_ENCODING_ERROR) {
7097 xmlSwitchEncoding(ctxt, enc);
7098 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7099 xmlFree(encoding);
7100 return(NULL);
7101 }
7102 } else {
7103 /*
7104 * fallback for unknown encodings
7105 */
7106 handler = xmlFindCharEncodingHandler((const char *) encoding);
7107 if (handler != NULL) {
7108 xmlSwitchToEncoding(ctxt, handler);
7109 } else {
7110 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7112 ctxt->sax->error(ctxt->userData,
7113 "Unsupported encoding %s\n", encoding);
7114 return(NULL);
7115 }
7116 }
7117 }
7118 }
7119 return(encoding);
7120}
7121
7122/**
7123 * xmlParseSDDecl:
7124 * @ctxt: an XML parser context
7125 *
7126 * parse the XML standalone declaration
7127 *
7128 * [32] SDDecl ::= S 'standalone' Eq
7129 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7130 *
7131 * [ VC: Standalone Document Declaration ]
7132 * TODO The standalone document declaration must have the value "no"
7133 * if any external markup declarations contain declarations of:
7134 * - attributes with default values, if elements to which these
7135 * attributes apply appear in the document without specifications
7136 * of values for these attributes, or
7137 * - entities (other than amp, lt, gt, apos, quot), if references
7138 * to those entities appear in the document, or
7139 * - attributes with values subject to normalization, where the
7140 * attribute appears in the document with a value which will change
7141 * as a result of normalization, or
7142 * - element types with element content, if white space occurs directly
7143 * within any instance of those types.
7144 *
7145 * Returns 1 if standalone, 0 otherwise
7146 */
7147
7148int
7149xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7150 int standalone = -1;
7151
7152 SKIP_BLANKS;
7153 if ((RAW == 's') && (NXT(1) == 't') &&
7154 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7155 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7156 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7157 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7158 SKIP(10);
7159 SKIP_BLANKS;
7160 if (RAW != '=') {
7161 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7163 ctxt->sax->error(ctxt->userData,
7164 "XML standalone declaration : expected '='\n");
7165 ctxt->wellFormed = 0;
7166 ctxt->disableSAX = 1;
7167 return(standalone);
7168 }
7169 NEXT;
7170 SKIP_BLANKS;
7171 if (RAW == '\''){
7172 NEXT;
7173 if ((RAW == 'n') && (NXT(1) == 'o')) {
7174 standalone = 0;
7175 SKIP(2);
7176 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7177 (NXT(2) == 's')) {
7178 standalone = 1;
7179 SKIP(3);
7180 } else {
7181 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7182 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7183 ctxt->sax->error(ctxt->userData,
7184 "standalone accepts only 'yes' or 'no'\n");
7185 ctxt->wellFormed = 0;
7186 ctxt->disableSAX = 1;
7187 }
7188 if (RAW != '\'') {
7189 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7190 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7191 ctxt->sax->error(ctxt->userData, "String not closed\n");
7192 ctxt->wellFormed = 0;
7193 ctxt->disableSAX = 1;
7194 } else
7195 NEXT;
7196 } else if (RAW == '"'){
7197 NEXT;
7198 if ((RAW == 'n') && (NXT(1) == 'o')) {
7199 standalone = 0;
7200 SKIP(2);
7201 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7202 (NXT(2) == 's')) {
7203 standalone = 1;
7204 SKIP(3);
7205 } else {
7206 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7208 ctxt->sax->error(ctxt->userData,
7209 "standalone accepts only 'yes' or 'no'\n");
7210 ctxt->wellFormed = 0;
7211 ctxt->disableSAX = 1;
7212 }
7213 if (RAW != '"') {
7214 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7215 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7216 ctxt->sax->error(ctxt->userData, "String not closed\n");
7217 ctxt->wellFormed = 0;
7218 ctxt->disableSAX = 1;
7219 } else
7220 NEXT;
7221 } else {
7222 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7223 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7224 ctxt->sax->error(ctxt->userData,
7225 "Standalone value not found\n");
7226 ctxt->wellFormed = 0;
7227 ctxt->disableSAX = 1;
7228 }
7229 }
7230 return(standalone);
7231}
7232
7233/**
7234 * xmlParseXMLDecl:
7235 * @ctxt: an XML parser context
7236 *
7237 * parse an XML declaration header
7238 *
7239 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7240 */
7241
7242void
7243xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7244 xmlChar *version;
7245
7246 /*
7247 * We know that '<?xml' is here.
7248 */
7249 SKIP(5);
7250
7251 if (!IS_BLANK(RAW)) {
7252 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7254 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7255 ctxt->wellFormed = 0;
7256 ctxt->disableSAX = 1;
7257 }
7258 SKIP_BLANKS;
7259
7260 /*
7261 * We should have the VersionInfo here.
7262 */
7263 version = xmlParseVersionInfo(ctxt);
7264 if (version == NULL)
7265 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7266 ctxt->version = xmlStrdup(version);
7267 xmlFree(version);
7268
7269 /*
7270 * We may have the encoding declaration
7271 */
7272 if (!IS_BLANK(RAW)) {
7273 if ((RAW == '?') && (NXT(1) == '>')) {
7274 SKIP(2);
7275 return;
7276 }
7277 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7279 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7280 ctxt->wellFormed = 0;
7281 ctxt->disableSAX = 1;
7282 }
7283 xmlParseEncodingDecl(ctxt);
7284 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7285 /*
7286 * The XML REC instructs us to stop parsing right here
7287 */
7288 return;
7289 }
7290
7291 /*
7292 * We may have the standalone status.
7293 */
7294 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7295 if ((RAW == '?') && (NXT(1) == '>')) {
7296 SKIP(2);
7297 return;
7298 }
7299 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7301 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7302 ctxt->wellFormed = 0;
7303 ctxt->disableSAX = 1;
7304 }
7305 SKIP_BLANKS;
7306 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7307
7308 SKIP_BLANKS;
7309 if ((RAW == '?') && (NXT(1) == '>')) {
7310 SKIP(2);
7311 } else if (RAW == '>') {
7312 /* Deprecated old WD ... */
7313 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7314 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7315 ctxt->sax->error(ctxt->userData,
7316 "XML declaration must end-up with '?>'\n");
7317 ctxt->wellFormed = 0;
7318 ctxt->disableSAX = 1;
7319 NEXT;
7320 } else {
7321 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7323 ctxt->sax->error(ctxt->userData,
7324 "parsing XML declaration: '?>' expected\n");
7325 ctxt->wellFormed = 0;
7326 ctxt->disableSAX = 1;
7327 MOVETO_ENDTAG(CUR_PTR);
7328 NEXT;
7329 }
7330}
7331
7332/**
7333 * xmlParseMisc:
7334 * @ctxt: an XML parser context
7335 *
7336 * parse an XML Misc* optionnal field.
7337 *
7338 * [27] Misc ::= Comment | PI | S
7339 */
7340
7341void
7342xmlParseMisc(xmlParserCtxtPtr ctxt) {
7343 while (((RAW == '<') && (NXT(1) == '?')) ||
7344 ((RAW == '<') && (NXT(1) == '!') &&
7345 (NXT(2) == '-') && (NXT(3) == '-')) ||
7346 IS_BLANK(CUR)) {
7347 if ((RAW == '<') && (NXT(1) == '?')) {
7348 xmlParsePI(ctxt);
7349 } else if (IS_BLANK(CUR)) {
7350 NEXT;
7351 } else
7352 xmlParseComment(ctxt);
7353 }
7354}
7355
7356/**
7357 * xmlParseDocument:
7358 * @ctxt: an XML parser context
7359 *
7360 * parse an XML document (and build a tree if using the standard SAX
7361 * interface).
7362 *
7363 * [1] document ::= prolog element Misc*
7364 *
7365 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7366 *
7367 * Returns 0, -1 in case of error. the parser context is augmented
7368 * as a result of the parsing.
7369 */
7370
7371int
7372xmlParseDocument(xmlParserCtxtPtr ctxt) {
7373 xmlChar start[4];
7374 xmlCharEncoding enc;
7375
7376 xmlInitParser();
7377
7378 GROW;
7379
7380 /*
7381 * SAX: beginning of the document processing.
7382 */
7383 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7384 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7385
Daniel Veillard50f34372001-08-03 12:06:36 +00007386 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007387 /*
7388 * Get the 4 first bytes and decode the charset
7389 * if enc != XML_CHAR_ENCODING_NONE
7390 * plug some encoding conversion routines.
7391 */
7392 start[0] = RAW;
7393 start[1] = NXT(1);
7394 start[2] = NXT(2);
7395 start[3] = NXT(3);
7396 enc = xmlDetectCharEncoding(start, 4);
7397 if (enc != XML_CHAR_ENCODING_NONE) {
7398 xmlSwitchEncoding(ctxt, enc);
7399 }
Owen Taylor3473f882001-02-23 17:55:21 +00007400 }
7401
7402
7403 if (CUR == 0) {
7404 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7405 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7406 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7407 ctxt->wellFormed = 0;
7408 ctxt->disableSAX = 1;
7409 }
7410
7411 /*
7412 * Check for the XMLDecl in the Prolog.
7413 */
7414 GROW;
7415 if ((RAW == '<') && (NXT(1) == '?') &&
7416 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7417 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7418
7419 /*
7420 * Note that we will switch encoding on the fly.
7421 */
7422 xmlParseXMLDecl(ctxt);
7423 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7424 /*
7425 * The XML REC instructs us to stop parsing right here
7426 */
7427 return(-1);
7428 }
7429 ctxt->standalone = ctxt->input->standalone;
7430 SKIP_BLANKS;
7431 } else {
7432 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7433 }
7434 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7435 ctxt->sax->startDocument(ctxt->userData);
7436
7437 /*
7438 * The Misc part of the Prolog
7439 */
7440 GROW;
7441 xmlParseMisc(ctxt);
7442
7443 /*
7444 * Then possibly doc type declaration(s) and more Misc
7445 * (doctypedecl Misc*)?
7446 */
7447 GROW;
7448 if ((RAW == '<') && (NXT(1) == '!') &&
7449 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7450 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7451 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7452 (NXT(8) == 'E')) {
7453
7454 ctxt->inSubset = 1;
7455 xmlParseDocTypeDecl(ctxt);
7456 if (RAW == '[') {
7457 ctxt->instate = XML_PARSER_DTD;
7458 xmlParseInternalSubset(ctxt);
7459 }
7460
7461 /*
7462 * Create and update the external subset.
7463 */
7464 ctxt->inSubset = 2;
7465 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7466 (!ctxt->disableSAX))
7467 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7468 ctxt->extSubSystem, ctxt->extSubURI);
7469 ctxt->inSubset = 0;
7470
7471
7472 ctxt->instate = XML_PARSER_PROLOG;
7473 xmlParseMisc(ctxt);
7474 }
7475
7476 /*
7477 * Time to start parsing the tree itself
7478 */
7479 GROW;
7480 if (RAW != '<') {
7481 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7482 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7483 ctxt->sax->error(ctxt->userData,
7484 "Start tag expected, '<' not found\n");
7485 ctxt->wellFormed = 0;
7486 ctxt->disableSAX = 1;
7487 ctxt->instate = XML_PARSER_EOF;
7488 } else {
7489 ctxt->instate = XML_PARSER_CONTENT;
7490 xmlParseElement(ctxt);
7491 ctxt->instate = XML_PARSER_EPILOG;
7492
7493
7494 /*
7495 * The Misc part at the end
7496 */
7497 xmlParseMisc(ctxt);
7498
7499 if (RAW != 0) {
7500 ctxt->errNo = XML_ERR_DOCUMENT_END;
7501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7502 ctxt->sax->error(ctxt->userData,
7503 "Extra content at the end of the document\n");
7504 ctxt->wellFormed = 0;
7505 ctxt->disableSAX = 1;
7506 }
7507 ctxt->instate = XML_PARSER_EOF;
7508 }
7509
7510 /*
7511 * SAX: end of the document processing.
7512 */
7513 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7514 (!ctxt->disableSAX))
7515 ctxt->sax->endDocument(ctxt->userData);
7516
7517 if (! ctxt->wellFormed) return(-1);
7518 return(0);
7519}
7520
7521/**
7522 * xmlParseExtParsedEnt:
7523 * @ctxt: an XML parser context
7524 *
7525 * parse a genreral parsed entity
7526 * An external general parsed entity is well-formed if it matches the
7527 * production labeled extParsedEnt.
7528 *
7529 * [78] extParsedEnt ::= TextDecl? content
7530 *
7531 * Returns 0, -1 in case of error. the parser context is augmented
7532 * as a result of the parsing.
7533 */
7534
7535int
7536xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7537 xmlChar start[4];
7538 xmlCharEncoding enc;
7539
7540 xmlDefaultSAXHandlerInit();
7541
7542 GROW;
7543
7544 /*
7545 * SAX: beginning of the document processing.
7546 */
7547 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7548 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7549
7550 /*
7551 * Get the 4 first bytes and decode the charset
7552 * if enc != XML_CHAR_ENCODING_NONE
7553 * plug some encoding conversion routines.
7554 */
7555 start[0] = RAW;
7556 start[1] = NXT(1);
7557 start[2] = NXT(2);
7558 start[3] = NXT(3);
7559 enc = xmlDetectCharEncoding(start, 4);
7560 if (enc != XML_CHAR_ENCODING_NONE) {
7561 xmlSwitchEncoding(ctxt, enc);
7562 }
7563
7564
7565 if (CUR == 0) {
7566 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7568 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7569 ctxt->wellFormed = 0;
7570 ctxt->disableSAX = 1;
7571 }
7572
7573 /*
7574 * Check for the XMLDecl in the Prolog.
7575 */
7576 GROW;
7577 if ((RAW == '<') && (NXT(1) == '?') &&
7578 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7579 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7580
7581 /*
7582 * Note that we will switch encoding on the fly.
7583 */
7584 xmlParseXMLDecl(ctxt);
7585 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7586 /*
7587 * The XML REC instructs us to stop parsing right here
7588 */
7589 return(-1);
7590 }
7591 SKIP_BLANKS;
7592 } else {
7593 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7594 }
7595 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7596 ctxt->sax->startDocument(ctxt->userData);
7597
7598 /*
7599 * Doing validity checking on chunk doesn't make sense
7600 */
7601 ctxt->instate = XML_PARSER_CONTENT;
7602 ctxt->validate = 0;
7603 ctxt->loadsubset = 0;
7604 ctxt->depth = 0;
7605
7606 xmlParseContent(ctxt);
7607
7608 if ((RAW == '<') && (NXT(1) == '/')) {
7609 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7610 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7611 ctxt->sax->error(ctxt->userData,
7612 "chunk is not well balanced\n");
7613 ctxt->wellFormed = 0;
7614 ctxt->disableSAX = 1;
7615 } else if (RAW != 0) {
7616 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7618 ctxt->sax->error(ctxt->userData,
7619 "extra content at the end of well balanced chunk\n");
7620 ctxt->wellFormed = 0;
7621 ctxt->disableSAX = 1;
7622 }
7623
7624 /*
7625 * SAX: end of the document processing.
7626 */
7627 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7628 (!ctxt->disableSAX))
7629 ctxt->sax->endDocument(ctxt->userData);
7630
7631 if (! ctxt->wellFormed) return(-1);
7632 return(0);
7633}
7634
7635/************************************************************************
7636 * *
7637 * Progressive parsing interfaces *
7638 * *
7639 ************************************************************************/
7640
7641/**
7642 * xmlParseLookupSequence:
7643 * @ctxt: an XML parser context
7644 * @first: the first char to lookup
7645 * @next: the next char to lookup or zero
7646 * @third: the next char to lookup or zero
7647 *
7648 * Try to find if a sequence (first, next, third) or just (first next) or
7649 * (first) is available in the input stream.
7650 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7651 * to avoid rescanning sequences of bytes, it DOES change the state of the
7652 * parser, do not use liberally.
7653 *
7654 * Returns the index to the current parsing point if the full sequence
7655 * is available, -1 otherwise.
7656 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007657static int
Owen Taylor3473f882001-02-23 17:55:21 +00007658xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7659 xmlChar next, xmlChar third) {
7660 int base, len;
7661 xmlParserInputPtr in;
7662 const xmlChar *buf;
7663
7664 in = ctxt->input;
7665 if (in == NULL) return(-1);
7666 base = in->cur - in->base;
7667 if (base < 0) return(-1);
7668 if (ctxt->checkIndex > base)
7669 base = ctxt->checkIndex;
7670 if (in->buf == NULL) {
7671 buf = in->base;
7672 len = in->length;
7673 } else {
7674 buf = in->buf->buffer->content;
7675 len = in->buf->buffer->use;
7676 }
7677 /* take into account the sequence length */
7678 if (third) len -= 2;
7679 else if (next) len --;
7680 for (;base < len;base++) {
7681 if (buf[base] == first) {
7682 if (third != 0) {
7683 if ((buf[base + 1] != next) ||
7684 (buf[base + 2] != third)) continue;
7685 } else if (next != 0) {
7686 if (buf[base + 1] != next) continue;
7687 }
7688 ctxt->checkIndex = 0;
7689#ifdef DEBUG_PUSH
7690 if (next == 0)
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: lookup '%c' found at %d\n",
7693 first, base);
7694 else if (third == 0)
7695 xmlGenericError(xmlGenericErrorContext,
7696 "PP: lookup '%c%c' found at %d\n",
7697 first, next, base);
7698 else
7699 xmlGenericError(xmlGenericErrorContext,
7700 "PP: lookup '%c%c%c' found at %d\n",
7701 first, next, third, base);
7702#endif
7703 return(base - (in->cur - in->base));
7704 }
7705 }
7706 ctxt->checkIndex = base;
7707#ifdef DEBUG_PUSH
7708 if (next == 0)
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: lookup '%c' failed\n", first);
7711 else if (third == 0)
7712 xmlGenericError(xmlGenericErrorContext,
7713 "PP: lookup '%c%c' failed\n", first, next);
7714 else
7715 xmlGenericError(xmlGenericErrorContext,
7716 "PP: lookup '%c%c%c' failed\n", first, next, third);
7717#endif
7718 return(-1);
7719}
7720
7721/**
7722 * xmlParseTryOrFinish:
7723 * @ctxt: an XML parser context
7724 * @terminate: last chunk indicator
7725 *
7726 * Try to progress on parsing
7727 *
7728 * Returns zero if no parsing was possible
7729 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007730static int
Owen Taylor3473f882001-02-23 17:55:21 +00007731xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7732 int ret = 0;
7733 int avail;
7734 xmlChar cur, next;
7735
7736#ifdef DEBUG_PUSH
7737 switch (ctxt->instate) {
7738 case XML_PARSER_EOF:
7739 xmlGenericError(xmlGenericErrorContext,
7740 "PP: try EOF\n"); break;
7741 case XML_PARSER_START:
7742 xmlGenericError(xmlGenericErrorContext,
7743 "PP: try START\n"); break;
7744 case XML_PARSER_MISC:
7745 xmlGenericError(xmlGenericErrorContext,
7746 "PP: try MISC\n");break;
7747 case XML_PARSER_COMMENT:
7748 xmlGenericError(xmlGenericErrorContext,
7749 "PP: try COMMENT\n");break;
7750 case XML_PARSER_PROLOG:
7751 xmlGenericError(xmlGenericErrorContext,
7752 "PP: try PROLOG\n");break;
7753 case XML_PARSER_START_TAG:
7754 xmlGenericError(xmlGenericErrorContext,
7755 "PP: try START_TAG\n");break;
7756 case XML_PARSER_CONTENT:
7757 xmlGenericError(xmlGenericErrorContext,
7758 "PP: try CONTENT\n");break;
7759 case XML_PARSER_CDATA_SECTION:
7760 xmlGenericError(xmlGenericErrorContext,
7761 "PP: try CDATA_SECTION\n");break;
7762 case XML_PARSER_END_TAG:
7763 xmlGenericError(xmlGenericErrorContext,
7764 "PP: try END_TAG\n");break;
7765 case XML_PARSER_ENTITY_DECL:
7766 xmlGenericError(xmlGenericErrorContext,
7767 "PP: try ENTITY_DECL\n");break;
7768 case XML_PARSER_ENTITY_VALUE:
7769 xmlGenericError(xmlGenericErrorContext,
7770 "PP: try ENTITY_VALUE\n");break;
7771 case XML_PARSER_ATTRIBUTE_VALUE:
7772 xmlGenericError(xmlGenericErrorContext,
7773 "PP: try ATTRIBUTE_VALUE\n");break;
7774 case XML_PARSER_DTD:
7775 xmlGenericError(xmlGenericErrorContext,
7776 "PP: try DTD\n");break;
7777 case XML_PARSER_EPILOG:
7778 xmlGenericError(xmlGenericErrorContext,
7779 "PP: try EPILOG\n");break;
7780 case XML_PARSER_PI:
7781 xmlGenericError(xmlGenericErrorContext,
7782 "PP: try PI\n");break;
7783 case XML_PARSER_IGNORE:
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: try IGNORE\n");break;
7786 }
7787#endif
7788
7789 while (1) {
7790 /*
7791 * Pop-up of finished entities.
7792 */
7793 while ((RAW == 0) && (ctxt->inputNr > 1))
7794 xmlPopInput(ctxt);
7795
7796 if (ctxt->input ==NULL) break;
7797 if (ctxt->input->buf == NULL)
7798 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7799 else
7800 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7801 if (avail < 1)
7802 goto done;
7803 switch (ctxt->instate) {
7804 case XML_PARSER_EOF:
7805 /*
7806 * Document parsing is done !
7807 */
7808 goto done;
7809 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007810 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7811 xmlChar start[4];
7812 xmlCharEncoding enc;
7813
7814 /*
7815 * Very first chars read from the document flow.
7816 */
7817 if (avail < 4)
7818 goto done;
7819
7820 /*
7821 * Get the 4 first bytes and decode the charset
7822 * if enc != XML_CHAR_ENCODING_NONE
7823 * plug some encoding conversion routines.
7824 */
7825 start[0] = RAW;
7826 start[1] = NXT(1);
7827 start[2] = NXT(2);
7828 start[3] = NXT(3);
7829 enc = xmlDetectCharEncoding(start, 4);
7830 if (enc != XML_CHAR_ENCODING_NONE) {
7831 xmlSwitchEncoding(ctxt, enc);
7832 }
7833 break;
7834 }
Owen Taylor3473f882001-02-23 17:55:21 +00007835
7836 cur = ctxt->input->cur[0];
7837 next = ctxt->input->cur[1];
7838 if (cur == 0) {
7839 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7840 ctxt->sax->setDocumentLocator(ctxt->userData,
7841 &xmlDefaultSAXLocator);
7842 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7844 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7845 ctxt->wellFormed = 0;
7846 ctxt->disableSAX = 1;
7847 ctxt->instate = XML_PARSER_EOF;
7848#ifdef DEBUG_PUSH
7849 xmlGenericError(xmlGenericErrorContext,
7850 "PP: entering EOF\n");
7851#endif
7852 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7853 ctxt->sax->endDocument(ctxt->userData);
7854 goto done;
7855 }
7856 if ((cur == '<') && (next == '?')) {
7857 /* PI or XML decl */
7858 if (avail < 5) return(ret);
7859 if ((!terminate) &&
7860 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7861 return(ret);
7862 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7863 ctxt->sax->setDocumentLocator(ctxt->userData,
7864 &xmlDefaultSAXLocator);
7865 if ((ctxt->input->cur[2] == 'x') &&
7866 (ctxt->input->cur[3] == 'm') &&
7867 (ctxt->input->cur[4] == 'l') &&
7868 (IS_BLANK(ctxt->input->cur[5]))) {
7869 ret += 5;
7870#ifdef DEBUG_PUSH
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: Parsing XML Decl\n");
7873#endif
7874 xmlParseXMLDecl(ctxt);
7875 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7876 /*
7877 * The XML REC instructs us to stop parsing right
7878 * here
7879 */
7880 ctxt->instate = XML_PARSER_EOF;
7881 return(0);
7882 }
7883 ctxt->standalone = ctxt->input->standalone;
7884 if ((ctxt->encoding == NULL) &&
7885 (ctxt->input->encoding != NULL))
7886 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7887 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7888 (!ctxt->disableSAX))
7889 ctxt->sax->startDocument(ctxt->userData);
7890 ctxt->instate = XML_PARSER_MISC;
7891#ifdef DEBUG_PUSH
7892 xmlGenericError(xmlGenericErrorContext,
7893 "PP: entering MISC\n");
7894#endif
7895 } else {
7896 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7897 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7898 (!ctxt->disableSAX))
7899 ctxt->sax->startDocument(ctxt->userData);
7900 ctxt->instate = XML_PARSER_MISC;
7901#ifdef DEBUG_PUSH
7902 xmlGenericError(xmlGenericErrorContext,
7903 "PP: entering MISC\n");
7904#endif
7905 }
7906 } else {
7907 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7908 ctxt->sax->setDocumentLocator(ctxt->userData,
7909 &xmlDefaultSAXLocator);
7910 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7911 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7912 (!ctxt->disableSAX))
7913 ctxt->sax->startDocument(ctxt->userData);
7914 ctxt->instate = XML_PARSER_MISC;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering MISC\n");
7918#endif
7919 }
7920 break;
7921 case XML_PARSER_MISC:
7922 SKIP_BLANKS;
7923 if (ctxt->input->buf == NULL)
7924 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7925 else
7926 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7927 if (avail < 2)
7928 goto done;
7929 cur = ctxt->input->cur[0];
7930 next = ctxt->input->cur[1];
7931 if ((cur == '<') && (next == '?')) {
7932 if ((!terminate) &&
7933 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7934 goto done;
7935#ifdef DEBUG_PUSH
7936 xmlGenericError(xmlGenericErrorContext,
7937 "PP: Parsing PI\n");
7938#endif
7939 xmlParsePI(ctxt);
7940 } else if ((cur == '<') && (next == '!') &&
7941 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7942 if ((!terminate) &&
7943 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7944 goto done;
7945#ifdef DEBUG_PUSH
7946 xmlGenericError(xmlGenericErrorContext,
7947 "PP: Parsing Comment\n");
7948#endif
7949 xmlParseComment(ctxt);
7950 ctxt->instate = XML_PARSER_MISC;
7951 } else if ((cur == '<') && (next == '!') &&
7952 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7953 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7954 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7955 (ctxt->input->cur[8] == 'E')) {
7956 if ((!terminate) &&
7957 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7958 goto done;
7959#ifdef DEBUG_PUSH
7960 xmlGenericError(xmlGenericErrorContext,
7961 "PP: Parsing internal subset\n");
7962#endif
7963 ctxt->inSubset = 1;
7964 xmlParseDocTypeDecl(ctxt);
7965 if (RAW == '[') {
7966 ctxt->instate = XML_PARSER_DTD;
7967#ifdef DEBUG_PUSH
7968 xmlGenericError(xmlGenericErrorContext,
7969 "PP: entering DTD\n");
7970#endif
7971 } else {
7972 /*
7973 * Create and update the external subset.
7974 */
7975 ctxt->inSubset = 2;
7976 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7977 (ctxt->sax->externalSubset != NULL))
7978 ctxt->sax->externalSubset(ctxt->userData,
7979 ctxt->intSubName, ctxt->extSubSystem,
7980 ctxt->extSubURI);
7981 ctxt->inSubset = 0;
7982 ctxt->instate = XML_PARSER_PROLOG;
7983#ifdef DEBUG_PUSH
7984 xmlGenericError(xmlGenericErrorContext,
7985 "PP: entering PROLOG\n");
7986#endif
7987 }
7988 } else if ((cur == '<') && (next == '!') &&
7989 (avail < 9)) {
7990 goto done;
7991 } else {
7992 ctxt->instate = XML_PARSER_START_TAG;
7993#ifdef DEBUG_PUSH
7994 xmlGenericError(xmlGenericErrorContext,
7995 "PP: entering START_TAG\n");
7996#endif
7997 }
7998 break;
7999 case XML_PARSER_IGNORE:
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: internal error, state == IGNORE");
8002 ctxt->instate = XML_PARSER_DTD;
8003#ifdef DEBUG_PUSH
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: entering DTD\n");
8006#endif
8007 break;
8008 case XML_PARSER_PROLOG:
8009 SKIP_BLANKS;
8010 if (ctxt->input->buf == NULL)
8011 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8012 else
8013 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8014 if (avail < 2)
8015 goto done;
8016 cur = ctxt->input->cur[0];
8017 next = ctxt->input->cur[1];
8018 if ((cur == '<') && (next == '?')) {
8019 if ((!terminate) &&
8020 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8021 goto done;
8022#ifdef DEBUG_PUSH
8023 xmlGenericError(xmlGenericErrorContext,
8024 "PP: Parsing PI\n");
8025#endif
8026 xmlParsePI(ctxt);
8027 } else if ((cur == '<') && (next == '!') &&
8028 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8029 if ((!terminate) &&
8030 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8031 goto done;
8032#ifdef DEBUG_PUSH
8033 xmlGenericError(xmlGenericErrorContext,
8034 "PP: Parsing Comment\n");
8035#endif
8036 xmlParseComment(ctxt);
8037 ctxt->instate = XML_PARSER_PROLOG;
8038 } else if ((cur == '<') && (next == '!') &&
8039 (avail < 4)) {
8040 goto done;
8041 } else {
8042 ctxt->instate = XML_PARSER_START_TAG;
8043#ifdef DEBUG_PUSH
8044 xmlGenericError(xmlGenericErrorContext,
8045 "PP: entering START_TAG\n");
8046#endif
8047 }
8048 break;
8049 case XML_PARSER_EPILOG:
8050 SKIP_BLANKS;
8051 if (ctxt->input->buf == NULL)
8052 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
8053 else
8054 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
8055 if (avail < 2)
8056 goto done;
8057 cur = ctxt->input->cur[0];
8058 next = ctxt->input->cur[1];
8059 if ((cur == '<') && (next == '?')) {
8060 if ((!terminate) &&
8061 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8062 goto done;
8063#ifdef DEBUG_PUSH
8064 xmlGenericError(xmlGenericErrorContext,
8065 "PP: Parsing PI\n");
8066#endif
8067 xmlParsePI(ctxt);
8068 ctxt->instate = XML_PARSER_EPILOG;
8069 } else if ((cur == '<') && (next == '!') &&
8070 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8071 if ((!terminate) &&
8072 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8073 goto done;
8074#ifdef DEBUG_PUSH
8075 xmlGenericError(xmlGenericErrorContext,
8076 "PP: Parsing Comment\n");
8077#endif
8078 xmlParseComment(ctxt);
8079 ctxt->instate = XML_PARSER_EPILOG;
8080 } else if ((cur == '<') && (next == '!') &&
8081 (avail < 4)) {
8082 goto done;
8083 } else {
8084 ctxt->errNo = XML_ERR_DOCUMENT_END;
8085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8086 ctxt->sax->error(ctxt->userData,
8087 "Extra content at the end of the document\n");
8088 ctxt->wellFormed = 0;
8089 ctxt->disableSAX = 1;
8090 ctxt->instate = XML_PARSER_EOF;
8091#ifdef DEBUG_PUSH
8092 xmlGenericError(xmlGenericErrorContext,
8093 "PP: entering EOF\n");
8094#endif
8095 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8096 (!ctxt->disableSAX))
8097 ctxt->sax->endDocument(ctxt->userData);
8098 goto done;
8099 }
8100 break;
8101 case XML_PARSER_START_TAG: {
8102 xmlChar *name, *oldname;
8103
8104 if ((avail < 2) && (ctxt->inputNr == 1))
8105 goto done;
8106 cur = ctxt->input->cur[0];
8107 if (cur != '<') {
8108 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8110 ctxt->sax->error(ctxt->userData,
8111 "Start tag expect, '<' not found\n");
8112 ctxt->wellFormed = 0;
8113 ctxt->disableSAX = 1;
8114 ctxt->instate = XML_PARSER_EOF;
8115#ifdef DEBUG_PUSH
8116 xmlGenericError(xmlGenericErrorContext,
8117 "PP: entering EOF\n");
8118#endif
8119 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8120 (!ctxt->disableSAX))
8121 ctxt->sax->endDocument(ctxt->userData);
8122 goto done;
8123 }
8124 if ((!terminate) &&
8125 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8126 goto done;
8127 if (ctxt->spaceNr == 0)
8128 spacePush(ctxt, -1);
8129 else
8130 spacePush(ctxt, *ctxt->space);
8131 name = xmlParseStartTag(ctxt);
8132 if (name == NULL) {
8133 spacePop(ctxt);
8134 ctxt->instate = XML_PARSER_EOF;
8135#ifdef DEBUG_PUSH
8136 xmlGenericError(xmlGenericErrorContext,
8137 "PP: entering EOF\n");
8138#endif
8139 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8140 (!ctxt->disableSAX))
8141 ctxt->sax->endDocument(ctxt->userData);
8142 goto done;
8143 }
8144 namePush(ctxt, xmlStrdup(name));
8145
8146 /*
8147 * [ VC: Root Element Type ]
8148 * The Name in the document type declaration must match
8149 * the element type of the root element.
8150 */
8151 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8152 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8153 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8154
8155 /*
8156 * Check for an Empty Element.
8157 */
8158 if ((RAW == '/') && (NXT(1) == '>')) {
8159 SKIP(2);
8160 if ((ctxt->sax != NULL) &&
8161 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8162 ctxt->sax->endElement(ctxt->userData, name);
8163 xmlFree(name);
8164 oldname = namePop(ctxt);
8165 spacePop(ctxt);
8166 if (oldname != NULL) {
8167#ifdef DEBUG_STACK
8168 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8169#endif
8170 xmlFree(oldname);
8171 }
8172 if (ctxt->name == NULL) {
8173 ctxt->instate = XML_PARSER_EPILOG;
8174#ifdef DEBUG_PUSH
8175 xmlGenericError(xmlGenericErrorContext,
8176 "PP: entering EPILOG\n");
8177#endif
8178 } else {
8179 ctxt->instate = XML_PARSER_CONTENT;
8180#ifdef DEBUG_PUSH
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: entering CONTENT\n");
8183#endif
8184 }
8185 break;
8186 }
8187 if (RAW == '>') {
8188 NEXT;
8189 } else {
8190 ctxt->errNo = XML_ERR_GT_REQUIRED;
8191 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8192 ctxt->sax->error(ctxt->userData,
8193 "Couldn't find end of Start Tag %s\n",
8194 name);
8195 ctxt->wellFormed = 0;
8196 ctxt->disableSAX = 1;
8197
8198 /*
8199 * end of parsing of this node.
8200 */
8201 nodePop(ctxt);
8202 oldname = namePop(ctxt);
8203 spacePop(ctxt);
8204 if (oldname != NULL) {
8205#ifdef DEBUG_STACK
8206 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8207#endif
8208 xmlFree(oldname);
8209 }
8210 }
8211 xmlFree(name);
8212 ctxt->instate = XML_PARSER_CONTENT;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering CONTENT\n");
8216#endif
8217 break;
8218 }
8219 case XML_PARSER_CONTENT: {
8220 const xmlChar *test;
8221 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008222 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008223
8224 /*
8225 * Handle preparsed entities and charRef
8226 */
8227 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008228 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008229
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008230 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008231 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8232 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008233 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008234 ctxt->token = 0;
8235 }
8236 if ((avail < 2) && (ctxt->inputNr == 1))
8237 goto done;
8238 cur = ctxt->input->cur[0];
8239 next = ctxt->input->cur[1];
8240
8241 test = CUR_PTR;
8242 cons = ctxt->input->consumed;
8243 tok = ctxt->token;
8244 if ((cur == '<') && (next == '?')) {
8245 if ((!terminate) &&
8246 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8247 goto done;
8248#ifdef DEBUG_PUSH
8249 xmlGenericError(xmlGenericErrorContext,
8250 "PP: Parsing PI\n");
8251#endif
8252 xmlParsePI(ctxt);
8253 } else if ((cur == '<') && (next == '!') &&
8254 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8255 if ((!terminate) &&
8256 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8257 goto done;
8258#ifdef DEBUG_PUSH
8259 xmlGenericError(xmlGenericErrorContext,
8260 "PP: Parsing Comment\n");
8261#endif
8262 xmlParseComment(ctxt);
8263 ctxt->instate = XML_PARSER_CONTENT;
8264 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8265 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8266 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8267 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8268 (ctxt->input->cur[8] == '[')) {
8269 SKIP(9);
8270 ctxt->instate = XML_PARSER_CDATA_SECTION;
8271#ifdef DEBUG_PUSH
8272 xmlGenericError(xmlGenericErrorContext,
8273 "PP: entering CDATA_SECTION\n");
8274#endif
8275 break;
8276 } else if ((cur == '<') && (next == '!') &&
8277 (avail < 9)) {
8278 goto done;
8279 } else if ((cur == '<') && (next == '/')) {
8280 ctxt->instate = XML_PARSER_END_TAG;
8281#ifdef DEBUG_PUSH
8282 xmlGenericError(xmlGenericErrorContext,
8283 "PP: entering END_TAG\n");
8284#endif
8285 break;
8286 } else if (cur == '<') {
8287 ctxt->instate = XML_PARSER_START_TAG;
8288#ifdef DEBUG_PUSH
8289 xmlGenericError(xmlGenericErrorContext,
8290 "PP: entering START_TAG\n");
8291#endif
8292 break;
8293 } else if (cur == '&') {
8294 if ((!terminate) &&
8295 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8296 goto done;
8297#ifdef DEBUG_PUSH
8298 xmlGenericError(xmlGenericErrorContext,
8299 "PP: Parsing Reference\n");
8300#endif
8301 xmlParseReference(ctxt);
8302 } else {
8303 /* TODO Avoid the extra copy, handle directly !!! */
8304 /*
8305 * Goal of the following test is:
8306 * - minimize calls to the SAX 'character' callback
8307 * when they are mergeable
8308 * - handle an problem for isBlank when we only parse
8309 * a sequence of blank chars and the next one is
8310 * not available to check against '<' presence.
8311 * - tries to homogenize the differences in SAX
8312 * callbacks beween the push and pull versions
8313 * of the parser.
8314 */
8315 if ((ctxt->inputNr == 1) &&
8316 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8317 if ((!terminate) &&
8318 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8319 goto done;
8320 }
8321 ctxt->checkIndex = 0;
8322#ifdef DEBUG_PUSH
8323 xmlGenericError(xmlGenericErrorContext,
8324 "PP: Parsing char data\n");
8325#endif
8326 xmlParseCharData(ctxt, 0);
8327 }
8328 /*
8329 * Pop-up of finished entities.
8330 */
8331 while ((RAW == 0) && (ctxt->inputNr > 1))
8332 xmlPopInput(ctxt);
8333 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8334 (tok == ctxt->token)) {
8335 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8336 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8337 ctxt->sax->error(ctxt->userData,
8338 "detected an error in element content\n");
8339 ctxt->wellFormed = 0;
8340 ctxt->disableSAX = 1;
8341 ctxt->instate = XML_PARSER_EOF;
8342 break;
8343 }
8344 break;
8345 }
8346 case XML_PARSER_CDATA_SECTION: {
8347 /*
8348 * The Push mode need to have the SAX callback for
8349 * cdataBlock merge back contiguous callbacks.
8350 */
8351 int base;
8352
8353 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8354 if (base < 0) {
8355 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8356 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8357 if (ctxt->sax->cdataBlock != NULL)
8358 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8359 XML_PARSER_BIG_BUFFER_SIZE);
8360 }
8361 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8362 ctxt->checkIndex = 0;
8363 }
8364 goto done;
8365 } else {
8366 if ((ctxt->sax != NULL) && (base > 0) &&
8367 (!ctxt->disableSAX)) {
8368 if (ctxt->sax->cdataBlock != NULL)
8369 ctxt->sax->cdataBlock(ctxt->userData,
8370 ctxt->input->cur, base);
8371 }
8372 SKIP(base + 3);
8373 ctxt->checkIndex = 0;
8374 ctxt->instate = XML_PARSER_CONTENT;
8375#ifdef DEBUG_PUSH
8376 xmlGenericError(xmlGenericErrorContext,
8377 "PP: entering CONTENT\n");
8378#endif
8379 }
8380 break;
8381 }
8382 case XML_PARSER_END_TAG:
8383 if (avail < 2)
8384 goto done;
8385 if ((!terminate) &&
8386 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8387 goto done;
8388 xmlParseEndTag(ctxt);
8389 if (ctxt->name == NULL) {
8390 ctxt->instate = XML_PARSER_EPILOG;
8391#ifdef DEBUG_PUSH
8392 xmlGenericError(xmlGenericErrorContext,
8393 "PP: entering EPILOG\n");
8394#endif
8395 } else {
8396 ctxt->instate = XML_PARSER_CONTENT;
8397#ifdef DEBUG_PUSH
8398 xmlGenericError(xmlGenericErrorContext,
8399 "PP: entering CONTENT\n");
8400#endif
8401 }
8402 break;
8403 case XML_PARSER_DTD: {
8404 /*
8405 * Sorry but progressive parsing of the internal subset
8406 * is not expected to be supported. We first check that
8407 * the full content of the internal subset is available and
8408 * the parsing is launched only at that point.
8409 * Internal subset ends up with "']' S? '>'" in an unescaped
8410 * section and not in a ']]>' sequence which are conditional
8411 * sections (whoever argued to keep that crap in XML deserve
8412 * a place in hell !).
8413 */
8414 int base, i;
8415 xmlChar *buf;
8416 xmlChar quote = 0;
8417
8418 base = ctxt->input->cur - ctxt->input->base;
8419 if (base < 0) return(0);
8420 if (ctxt->checkIndex > base)
8421 base = ctxt->checkIndex;
8422 buf = ctxt->input->buf->buffer->content;
8423 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8424 base++) {
8425 if (quote != 0) {
8426 if (buf[base] == quote)
8427 quote = 0;
8428 continue;
8429 }
8430 if (buf[base] == '"') {
8431 quote = '"';
8432 continue;
8433 }
8434 if (buf[base] == '\'') {
8435 quote = '\'';
8436 continue;
8437 }
8438 if (buf[base] == ']') {
8439 if ((unsigned int) base +1 >=
8440 ctxt->input->buf->buffer->use)
8441 break;
8442 if (buf[base + 1] == ']') {
8443 /* conditional crap, skip both ']' ! */
8444 base++;
8445 continue;
8446 }
8447 for (i = 0;
8448 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8449 i++) {
8450 if (buf[base + i] == '>')
8451 goto found_end_int_subset;
8452 }
8453 break;
8454 }
8455 }
8456 /*
8457 * We didn't found the end of the Internal subset
8458 */
8459 if (quote == 0)
8460 ctxt->checkIndex = base;
8461#ifdef DEBUG_PUSH
8462 if (next == 0)
8463 xmlGenericError(xmlGenericErrorContext,
8464 "PP: lookup of int subset end filed\n");
8465#endif
8466 goto done;
8467
8468found_end_int_subset:
8469 xmlParseInternalSubset(ctxt);
8470 ctxt->inSubset = 2;
8471 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8472 (ctxt->sax->externalSubset != NULL))
8473 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8474 ctxt->extSubSystem, ctxt->extSubURI);
8475 ctxt->inSubset = 0;
8476 ctxt->instate = XML_PARSER_PROLOG;
8477 ctxt->checkIndex = 0;
8478#ifdef DEBUG_PUSH
8479 xmlGenericError(xmlGenericErrorContext,
8480 "PP: entering PROLOG\n");
8481#endif
8482 break;
8483 }
8484 case XML_PARSER_COMMENT:
8485 xmlGenericError(xmlGenericErrorContext,
8486 "PP: internal error, state == COMMENT\n");
8487 ctxt->instate = XML_PARSER_CONTENT;
8488#ifdef DEBUG_PUSH
8489 xmlGenericError(xmlGenericErrorContext,
8490 "PP: entering CONTENT\n");
8491#endif
8492 break;
8493 case XML_PARSER_PI:
8494 xmlGenericError(xmlGenericErrorContext,
8495 "PP: internal error, state == PI\n");
8496 ctxt->instate = XML_PARSER_CONTENT;
8497#ifdef DEBUG_PUSH
8498 xmlGenericError(xmlGenericErrorContext,
8499 "PP: entering CONTENT\n");
8500#endif
8501 break;
8502 case XML_PARSER_ENTITY_DECL:
8503 xmlGenericError(xmlGenericErrorContext,
8504 "PP: internal error, state == ENTITY_DECL\n");
8505 ctxt->instate = XML_PARSER_DTD;
8506#ifdef DEBUG_PUSH
8507 xmlGenericError(xmlGenericErrorContext,
8508 "PP: entering DTD\n");
8509#endif
8510 break;
8511 case XML_PARSER_ENTITY_VALUE:
8512 xmlGenericError(xmlGenericErrorContext,
8513 "PP: internal error, state == ENTITY_VALUE\n");
8514 ctxt->instate = XML_PARSER_CONTENT;
8515#ifdef DEBUG_PUSH
8516 xmlGenericError(xmlGenericErrorContext,
8517 "PP: entering DTD\n");
8518#endif
8519 break;
8520 case XML_PARSER_ATTRIBUTE_VALUE:
8521 xmlGenericError(xmlGenericErrorContext,
8522 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8523 ctxt->instate = XML_PARSER_START_TAG;
8524#ifdef DEBUG_PUSH
8525 xmlGenericError(xmlGenericErrorContext,
8526 "PP: entering START_TAG\n");
8527#endif
8528 break;
8529 case XML_PARSER_SYSTEM_LITERAL:
8530 xmlGenericError(xmlGenericErrorContext,
8531 "PP: internal error, state == SYSTEM_LITERAL\n");
8532 ctxt->instate = XML_PARSER_START_TAG;
8533#ifdef DEBUG_PUSH
8534 xmlGenericError(xmlGenericErrorContext,
8535 "PP: entering START_TAG\n");
8536#endif
8537 break;
8538 }
8539 }
8540done:
8541#ifdef DEBUG_PUSH
8542 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8543#endif
8544 return(ret);
8545}
8546
8547/**
Owen Taylor3473f882001-02-23 17:55:21 +00008548 * xmlParseChunk:
8549 * @ctxt: an XML parser context
8550 * @chunk: an char array
8551 * @size: the size in byte of the chunk
8552 * @terminate: last chunk indicator
8553 *
8554 * Parse a Chunk of memory
8555 *
8556 * Returns zero if no error, the xmlParserErrors otherwise.
8557 */
8558int
8559xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8560 int terminate) {
8561 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8562 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8563 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8564 int cur = ctxt->input->cur - ctxt->input->base;
8565
8566 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8567 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8568 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008569 ctxt->input->end =
8570 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008571#ifdef DEBUG_PUSH
8572 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8573#endif
8574
8575 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8576 xmlParseTryOrFinish(ctxt, terminate);
8577 } else if (ctxt->instate != XML_PARSER_EOF) {
8578 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8579 xmlParserInputBufferPtr in = ctxt->input->buf;
8580 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8581 (in->raw != NULL)) {
8582 int nbchars;
8583
8584 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8585 if (nbchars < 0) {
8586 xmlGenericError(xmlGenericErrorContext,
8587 "xmlParseChunk: encoder error\n");
8588 return(XML_ERR_INVALID_ENCODING);
8589 }
8590 }
8591 }
8592 }
8593 xmlParseTryOrFinish(ctxt, terminate);
8594 if (terminate) {
8595 /*
8596 * Check for termination
8597 */
8598 if ((ctxt->instate != XML_PARSER_EOF) &&
8599 (ctxt->instate != XML_PARSER_EPILOG)) {
8600 ctxt->errNo = XML_ERR_DOCUMENT_END;
8601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8602 ctxt->sax->error(ctxt->userData,
8603 "Extra content at the end of the document\n");
8604 ctxt->wellFormed = 0;
8605 ctxt->disableSAX = 1;
8606 }
8607 if (ctxt->instate != XML_PARSER_EOF) {
8608 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8609 (!ctxt->disableSAX))
8610 ctxt->sax->endDocument(ctxt->userData);
8611 }
8612 ctxt->instate = XML_PARSER_EOF;
8613 }
8614 return((xmlParserErrors) ctxt->errNo);
8615}
8616
8617/************************************************************************
8618 * *
8619 * I/O front end functions to the parser *
8620 * *
8621 ************************************************************************/
8622
8623/**
8624 * xmlStopParser:
8625 * @ctxt: an XML parser context
8626 *
8627 * Blocks further parser processing
8628 */
8629void
8630xmlStopParser(xmlParserCtxtPtr ctxt) {
8631 ctxt->instate = XML_PARSER_EOF;
8632 if (ctxt->input != NULL)
8633 ctxt->input->cur = BAD_CAST"";
8634}
8635
8636/**
8637 * xmlCreatePushParserCtxt:
8638 * @sax: a SAX handler
8639 * @user_data: The user data returned on SAX callbacks
8640 * @chunk: a pointer to an array of chars
8641 * @size: number of chars in the array
8642 * @filename: an optional file name or URI
8643 *
8644 * Create a parser context for using the XML parser in push mode
8645 * To allow content encoding detection, @size should be >= 4
8646 * The value of @filename is used for fetching external entities
8647 * and error/warning reports.
8648 *
8649 * Returns the new parser context or NULL
8650 */
8651xmlParserCtxtPtr
8652xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8653 const char *chunk, int size, const char *filename) {
8654 xmlParserCtxtPtr ctxt;
8655 xmlParserInputPtr inputStream;
8656 xmlParserInputBufferPtr buf;
8657 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8658
8659 /*
8660 * plug some encoding conversion routines
8661 */
8662 if ((chunk != NULL) && (size >= 4))
8663 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8664
8665 buf = xmlAllocParserInputBuffer(enc);
8666 if (buf == NULL) return(NULL);
8667
8668 ctxt = xmlNewParserCtxt();
8669 if (ctxt == NULL) {
8670 xmlFree(buf);
8671 return(NULL);
8672 }
8673 if (sax != NULL) {
8674 if (ctxt->sax != &xmlDefaultSAXHandler)
8675 xmlFree(ctxt->sax);
8676 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8677 if (ctxt->sax == NULL) {
8678 xmlFree(buf);
8679 xmlFree(ctxt);
8680 return(NULL);
8681 }
8682 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8683 if (user_data != NULL)
8684 ctxt->userData = user_data;
8685 }
8686 if (filename == NULL) {
8687 ctxt->directory = NULL;
8688 } else {
8689 ctxt->directory = xmlParserGetDirectory(filename);
8690 }
8691
8692 inputStream = xmlNewInputStream(ctxt);
8693 if (inputStream == NULL) {
8694 xmlFreeParserCtxt(ctxt);
8695 return(NULL);
8696 }
8697
8698 if (filename == NULL)
8699 inputStream->filename = NULL;
8700 else
8701 inputStream->filename = xmlMemStrdup(filename);
8702 inputStream->buf = buf;
8703 inputStream->base = inputStream->buf->buffer->content;
8704 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008705 inputStream->end =
8706 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008707
8708 inputPush(ctxt, inputStream);
8709
8710 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8711 (ctxt->input->buf != NULL)) {
8712 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8713#ifdef DEBUG_PUSH
8714 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8715#endif
8716 }
8717
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008718 if (enc != XML_CHAR_ENCODING_NONE) {
8719 xmlSwitchEncoding(ctxt, enc);
8720 }
8721
Owen Taylor3473f882001-02-23 17:55:21 +00008722 return(ctxt);
8723}
8724
8725/**
8726 * xmlCreateIOParserCtxt:
8727 * @sax: a SAX handler
8728 * @user_data: The user data returned on SAX callbacks
8729 * @ioread: an I/O read function
8730 * @ioclose: an I/O close function
8731 * @ioctx: an I/O handler
8732 * @enc: the charset encoding if known
8733 *
8734 * Create a parser context for using the XML parser with an existing
8735 * I/O stream
8736 *
8737 * Returns the new parser context or NULL
8738 */
8739xmlParserCtxtPtr
8740xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8741 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8742 void *ioctx, xmlCharEncoding enc) {
8743 xmlParserCtxtPtr ctxt;
8744 xmlParserInputPtr inputStream;
8745 xmlParserInputBufferPtr buf;
8746
8747 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8748 if (buf == NULL) return(NULL);
8749
8750 ctxt = xmlNewParserCtxt();
8751 if (ctxt == NULL) {
8752 xmlFree(buf);
8753 return(NULL);
8754 }
8755 if (sax != NULL) {
8756 if (ctxt->sax != &xmlDefaultSAXHandler)
8757 xmlFree(ctxt->sax);
8758 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8759 if (ctxt->sax == NULL) {
8760 xmlFree(buf);
8761 xmlFree(ctxt);
8762 return(NULL);
8763 }
8764 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8765 if (user_data != NULL)
8766 ctxt->userData = user_data;
8767 }
8768
8769 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8770 if (inputStream == NULL) {
8771 xmlFreeParserCtxt(ctxt);
8772 return(NULL);
8773 }
8774 inputPush(ctxt, inputStream);
8775
8776 return(ctxt);
8777}
8778
8779/************************************************************************
8780 * *
8781 * Front ends when parsing a Dtd *
8782 * *
8783 ************************************************************************/
8784
8785/**
8786 * xmlIOParseDTD:
8787 * @sax: the SAX handler block or NULL
8788 * @input: an Input Buffer
8789 * @enc: the charset encoding if known
8790 *
8791 * Load and parse a DTD
8792 *
8793 * Returns the resulting xmlDtdPtr or NULL in case of error.
8794 * @input will be freed at parsing end.
8795 */
8796
8797xmlDtdPtr
8798xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8799 xmlCharEncoding enc) {
8800 xmlDtdPtr ret = NULL;
8801 xmlParserCtxtPtr ctxt;
8802 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008803 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008804
8805 if (input == NULL)
8806 return(NULL);
8807
8808 ctxt = xmlNewParserCtxt();
8809 if (ctxt == NULL) {
8810 return(NULL);
8811 }
8812
8813 /*
8814 * Set-up the SAX context
8815 */
8816 if (sax != NULL) {
8817 if (ctxt->sax != NULL)
8818 xmlFree(ctxt->sax);
8819 ctxt->sax = sax;
8820 ctxt->userData = NULL;
8821 }
8822
8823 /*
8824 * generate a parser input from the I/O handler
8825 */
8826
8827 pinput = xmlNewIOInputStream(ctxt, input, enc);
8828 if (pinput == NULL) {
8829 if (sax != NULL) ctxt->sax = NULL;
8830 xmlFreeParserCtxt(ctxt);
8831 return(NULL);
8832 }
8833
8834 /*
8835 * plug some encoding conversion routines here.
8836 */
8837 xmlPushInput(ctxt, pinput);
8838
8839 pinput->filename = NULL;
8840 pinput->line = 1;
8841 pinput->col = 1;
8842 pinput->base = ctxt->input->cur;
8843 pinput->cur = ctxt->input->cur;
8844 pinput->free = NULL;
8845
8846 /*
8847 * let's parse that entity knowing it's an external subset.
8848 */
8849 ctxt->inSubset = 2;
8850 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8851 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8852 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008853
8854 if (enc == XML_CHAR_ENCODING_NONE) {
8855 /*
8856 * Get the 4 first bytes and decode the charset
8857 * if enc != XML_CHAR_ENCODING_NONE
8858 * plug some encoding conversion routines.
8859 */
8860 start[0] = RAW;
8861 start[1] = NXT(1);
8862 start[2] = NXT(2);
8863 start[3] = NXT(3);
8864 enc = xmlDetectCharEncoding(start, 4);
8865 if (enc != XML_CHAR_ENCODING_NONE) {
8866 xmlSwitchEncoding(ctxt, enc);
8867 }
8868 }
8869
Owen Taylor3473f882001-02-23 17:55:21 +00008870 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8871
8872 if (ctxt->myDoc != NULL) {
8873 if (ctxt->wellFormed) {
8874 ret = ctxt->myDoc->extSubset;
8875 ctxt->myDoc->extSubset = NULL;
8876 } else {
8877 ret = NULL;
8878 }
8879 xmlFreeDoc(ctxt->myDoc);
8880 ctxt->myDoc = NULL;
8881 }
8882 if (sax != NULL) ctxt->sax = NULL;
8883 xmlFreeParserCtxt(ctxt);
8884
8885 return(ret);
8886}
8887
8888/**
8889 * xmlSAXParseDTD:
8890 * @sax: the SAX handler block
8891 * @ExternalID: a NAME* containing the External ID of the DTD
8892 * @SystemID: a NAME* containing the URL to the DTD
8893 *
8894 * Load and parse an external subset.
8895 *
8896 * Returns the resulting xmlDtdPtr or NULL in case of error.
8897 */
8898
8899xmlDtdPtr
8900xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8901 const xmlChar *SystemID) {
8902 xmlDtdPtr ret = NULL;
8903 xmlParserCtxtPtr ctxt;
8904 xmlParserInputPtr input = NULL;
8905 xmlCharEncoding enc;
8906
8907 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8908
8909 ctxt = xmlNewParserCtxt();
8910 if (ctxt == NULL) {
8911 return(NULL);
8912 }
8913
8914 /*
8915 * Set-up the SAX context
8916 */
8917 if (sax != NULL) {
8918 if (ctxt->sax != NULL)
8919 xmlFree(ctxt->sax);
8920 ctxt->sax = sax;
8921 ctxt->userData = NULL;
8922 }
8923
8924 /*
8925 * Ask the Entity resolver to load the damn thing
8926 */
8927
8928 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8929 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8930 if (input == NULL) {
8931 if (sax != NULL) ctxt->sax = NULL;
8932 xmlFreeParserCtxt(ctxt);
8933 return(NULL);
8934 }
8935
8936 /*
8937 * plug some encoding conversion routines here.
8938 */
8939 xmlPushInput(ctxt, input);
8940 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8941 xmlSwitchEncoding(ctxt, enc);
8942
8943 if (input->filename == NULL)
8944 input->filename = (char *) xmlStrdup(SystemID);
8945 input->line = 1;
8946 input->col = 1;
8947 input->base = ctxt->input->cur;
8948 input->cur = ctxt->input->cur;
8949 input->free = NULL;
8950
8951 /*
8952 * let's parse that entity knowing it's an external subset.
8953 */
8954 ctxt->inSubset = 2;
8955 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8956 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8957 ExternalID, SystemID);
8958 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8959
8960 if (ctxt->myDoc != NULL) {
8961 if (ctxt->wellFormed) {
8962 ret = ctxt->myDoc->extSubset;
8963 ctxt->myDoc->extSubset = NULL;
8964 } else {
8965 ret = NULL;
8966 }
8967 xmlFreeDoc(ctxt->myDoc);
8968 ctxt->myDoc = NULL;
8969 }
8970 if (sax != NULL) ctxt->sax = NULL;
8971 xmlFreeParserCtxt(ctxt);
8972
8973 return(ret);
8974}
8975
8976/**
8977 * xmlParseDTD:
8978 * @ExternalID: a NAME* containing the External ID of the DTD
8979 * @SystemID: a NAME* containing the URL to the DTD
8980 *
8981 * Load and parse an external subset.
8982 *
8983 * Returns the resulting xmlDtdPtr or NULL in case of error.
8984 */
8985
8986xmlDtdPtr
8987xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8988 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8989}
8990
8991/************************************************************************
8992 * *
8993 * Front ends when parsing an Entity *
8994 * *
8995 ************************************************************************/
8996
8997/**
Owen Taylor3473f882001-02-23 17:55:21 +00008998 * xmlParseCtxtExternalEntity:
8999 * @ctx: the existing parsing context
9000 * @URL: the URL for the entity to load
9001 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009002 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009003 *
9004 * Parse an external general entity within an existing parsing context
9005 * An external general parsed entity is well-formed if it matches the
9006 * production labeled extParsedEnt.
9007 *
9008 * [78] extParsedEnt ::= TextDecl? content
9009 *
9010 * Returns 0 if the entity is well formed, -1 in case of args problem and
9011 * the parser error code otherwise
9012 */
9013
9014int
9015xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009016 const xmlChar *ID, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009017 xmlParserCtxtPtr ctxt;
9018 xmlDocPtr newDoc;
9019 xmlSAXHandlerPtr oldsax = NULL;
9020 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009021 xmlChar start[4];
9022 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009023
9024 if (ctx->depth > 40) {
9025 return(XML_ERR_ENTITY_LOOP);
9026 }
9027
Daniel Veillardcda96922001-08-21 10:56:31 +00009028 if (lst != NULL)
9029 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009030 if ((URL == NULL) && (ID == NULL))
9031 return(-1);
9032 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
9033 return(-1);
9034
9035
9036 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9037 if (ctxt == NULL) return(-1);
9038 ctxt->userData = ctxt;
9039 oldsax = ctxt->sax;
9040 ctxt->sax = ctx->sax;
9041 newDoc = xmlNewDoc(BAD_CAST "1.0");
9042 if (newDoc == NULL) {
9043 xmlFreeParserCtxt(ctxt);
9044 return(-1);
9045 }
9046 if (ctx->myDoc != NULL) {
9047 newDoc->intSubset = ctx->myDoc->intSubset;
9048 newDoc->extSubset = ctx->myDoc->extSubset;
9049 }
9050 if (ctx->myDoc->URL != NULL) {
9051 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
9052 }
9053 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9054 if (newDoc->children == NULL) {
9055 ctxt->sax = oldsax;
9056 xmlFreeParserCtxt(ctxt);
9057 newDoc->intSubset = NULL;
9058 newDoc->extSubset = NULL;
9059 xmlFreeDoc(newDoc);
9060 return(-1);
9061 }
9062 nodePush(ctxt, newDoc->children);
9063 if (ctx->myDoc == NULL) {
9064 ctxt->myDoc = newDoc;
9065 } else {
9066 ctxt->myDoc = ctx->myDoc;
9067 newDoc->children->doc = ctx->myDoc;
9068 }
9069
Daniel Veillard87a764e2001-06-20 17:41:10 +00009070 /*
9071 * Get the 4 first bytes and decode the charset
9072 * if enc != XML_CHAR_ENCODING_NONE
9073 * plug some encoding conversion routines.
9074 */
9075 GROW
9076 start[0] = RAW;
9077 start[1] = NXT(1);
9078 start[2] = NXT(2);
9079 start[3] = NXT(3);
9080 enc = xmlDetectCharEncoding(start, 4);
9081 if (enc != XML_CHAR_ENCODING_NONE) {
9082 xmlSwitchEncoding(ctxt, enc);
9083 }
9084
Owen Taylor3473f882001-02-23 17:55:21 +00009085 /*
9086 * Parse a possible text declaration first
9087 */
Owen Taylor3473f882001-02-23 17:55:21 +00009088 if ((RAW == '<') && (NXT(1) == '?') &&
9089 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9090 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9091 xmlParseTextDecl(ctxt);
9092 }
9093
9094 /*
9095 * Doing validity checking on chunk doesn't make sense
9096 */
9097 ctxt->instate = XML_PARSER_CONTENT;
9098 ctxt->validate = ctx->validate;
9099 ctxt->loadsubset = ctx->loadsubset;
9100 ctxt->depth = ctx->depth + 1;
9101 ctxt->replaceEntities = ctx->replaceEntities;
9102 if (ctxt->validate) {
9103 ctxt->vctxt.error = ctx->vctxt.error;
9104 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009105 } else {
9106 ctxt->vctxt.error = NULL;
9107 ctxt->vctxt.warning = NULL;
9108 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009109 ctxt->vctxt.nodeTab = NULL;
9110 ctxt->vctxt.nodeNr = 0;
9111 ctxt->vctxt.nodeMax = 0;
9112 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009113
9114 xmlParseContent(ctxt);
9115
9116 if ((RAW == '<') && (NXT(1) == '/')) {
9117 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9119 ctxt->sax->error(ctxt->userData,
9120 "chunk is not well balanced\n");
9121 ctxt->wellFormed = 0;
9122 ctxt->disableSAX = 1;
9123 } else if (RAW != 0) {
9124 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9125 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9126 ctxt->sax->error(ctxt->userData,
9127 "extra content at the end of well balanced chunk\n");
9128 ctxt->wellFormed = 0;
9129 ctxt->disableSAX = 1;
9130 }
9131 if (ctxt->node != newDoc->children) {
9132 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9133 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9134 ctxt->sax->error(ctxt->userData,
9135 "chunk is not well balanced\n");
9136 ctxt->wellFormed = 0;
9137 ctxt->disableSAX = 1;
9138 }
9139
9140 if (!ctxt->wellFormed) {
9141 if (ctxt->errNo == 0)
9142 ret = 1;
9143 else
9144 ret = ctxt->errNo;
9145 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009146 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009147 xmlNodePtr cur;
9148
9149 /*
9150 * Return the newly created nodeset after unlinking it from
9151 * they pseudo parent.
9152 */
9153 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009154 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009155 while (cur != NULL) {
9156 cur->parent = NULL;
9157 cur = cur->next;
9158 }
9159 newDoc->children->children = NULL;
9160 }
9161 ret = 0;
9162 }
9163 ctxt->sax = oldsax;
9164 xmlFreeParserCtxt(ctxt);
9165 newDoc->intSubset = NULL;
9166 newDoc->extSubset = NULL;
9167 xmlFreeDoc(newDoc);
9168
9169 return(ret);
9170}
9171
9172/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009173 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009174 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009175 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009176 * @sax: the SAX handler bloc (possibly NULL)
9177 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9178 * @depth: Used for loop detection, use 0
9179 * @URL: the URL for the entity to load
9180 * @ID: the System ID for the entity to load
9181 * @list: the return value for the set of parsed nodes
9182 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009183 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009184 *
9185 * Returns 0 if the entity is well formed, -1 in case of args problem and
9186 * the parser error code otherwise
9187 */
9188
Daniel Veillard257d9102001-05-08 10:41:44 +00009189static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009190xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9191 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009192 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009193 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009194 xmlParserCtxtPtr ctxt;
9195 xmlDocPtr newDoc;
9196 xmlSAXHandlerPtr oldsax = NULL;
9197 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009198 xmlChar start[4];
9199 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009200
9201 if (depth > 40) {
9202 return(XML_ERR_ENTITY_LOOP);
9203 }
9204
9205
9206
9207 if (list != NULL)
9208 *list = NULL;
9209 if ((URL == NULL) && (ID == NULL))
9210 return(-1);
9211 if (doc == NULL) /* @@ relax but check for dereferences */
9212 return(-1);
9213
9214
9215 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9216 if (ctxt == NULL) return(-1);
9217 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009218 if (oldctxt != NULL) {
9219 ctxt->_private = oldctxt->_private;
9220 ctxt->loadsubset = oldctxt->loadsubset;
9221 ctxt->validate = oldctxt->validate;
9222 ctxt->external = oldctxt->external;
9223 } else {
9224 /*
9225 * Doing validity checking on chunk without context
9226 * doesn't make sense
9227 */
9228 ctxt->_private = NULL;
9229 ctxt->validate = 0;
9230 ctxt->external = 2;
9231 ctxt->loadsubset = 0;
9232 }
Owen Taylor3473f882001-02-23 17:55:21 +00009233 if (sax != NULL) {
9234 oldsax = ctxt->sax;
9235 ctxt->sax = sax;
9236 if (user_data != NULL)
9237 ctxt->userData = user_data;
9238 }
9239 newDoc = xmlNewDoc(BAD_CAST "1.0");
9240 if (newDoc == NULL) {
9241 xmlFreeParserCtxt(ctxt);
9242 return(-1);
9243 }
9244 if (doc != NULL) {
9245 newDoc->intSubset = doc->intSubset;
9246 newDoc->extSubset = doc->extSubset;
9247 }
9248 if (doc->URL != NULL) {
9249 newDoc->URL = xmlStrdup(doc->URL);
9250 }
9251 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9252 if (newDoc->children == NULL) {
9253 if (sax != NULL)
9254 ctxt->sax = oldsax;
9255 xmlFreeParserCtxt(ctxt);
9256 newDoc->intSubset = NULL;
9257 newDoc->extSubset = NULL;
9258 xmlFreeDoc(newDoc);
9259 return(-1);
9260 }
9261 nodePush(ctxt, newDoc->children);
9262 if (doc == NULL) {
9263 ctxt->myDoc = newDoc;
9264 } else {
9265 ctxt->myDoc = doc;
9266 newDoc->children->doc = doc;
9267 }
9268
Daniel Veillard87a764e2001-06-20 17:41:10 +00009269 /*
9270 * Get the 4 first bytes and decode the charset
9271 * if enc != XML_CHAR_ENCODING_NONE
9272 * plug some encoding conversion routines.
9273 */
9274 GROW;
9275 start[0] = RAW;
9276 start[1] = NXT(1);
9277 start[2] = NXT(2);
9278 start[3] = NXT(3);
9279 enc = xmlDetectCharEncoding(start, 4);
9280 if (enc != XML_CHAR_ENCODING_NONE) {
9281 xmlSwitchEncoding(ctxt, enc);
9282 }
9283
Owen Taylor3473f882001-02-23 17:55:21 +00009284 /*
9285 * Parse a possible text declaration first
9286 */
Owen Taylor3473f882001-02-23 17:55:21 +00009287 if ((RAW == '<') && (NXT(1) == '?') &&
9288 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9289 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9290 xmlParseTextDecl(ctxt);
9291 }
9292
Owen Taylor3473f882001-02-23 17:55:21 +00009293 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009294 ctxt->depth = depth;
9295
9296 xmlParseContent(ctxt);
9297
9298 if ((RAW == '<') && (NXT(1) == '/')) {
9299 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9300 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9301 ctxt->sax->error(ctxt->userData,
9302 "chunk is not well balanced\n");
9303 ctxt->wellFormed = 0;
9304 ctxt->disableSAX = 1;
9305 } else if (RAW != 0) {
9306 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9308 ctxt->sax->error(ctxt->userData,
9309 "extra content at the end of well balanced chunk\n");
9310 ctxt->wellFormed = 0;
9311 ctxt->disableSAX = 1;
9312 }
9313 if (ctxt->node != newDoc->children) {
9314 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9315 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9316 ctxt->sax->error(ctxt->userData,
9317 "chunk is not well balanced\n");
9318 ctxt->wellFormed = 0;
9319 ctxt->disableSAX = 1;
9320 }
9321
9322 if (!ctxt->wellFormed) {
9323 if (ctxt->errNo == 0)
9324 ret = 1;
9325 else
9326 ret = ctxt->errNo;
9327 } else {
9328 if (list != NULL) {
9329 xmlNodePtr cur;
9330
9331 /*
9332 * Return the newly created nodeset after unlinking it from
9333 * they pseudo parent.
9334 */
9335 cur = newDoc->children->children;
9336 *list = cur;
9337 while (cur != NULL) {
9338 cur->parent = NULL;
9339 cur = cur->next;
9340 }
9341 newDoc->children->children = NULL;
9342 }
9343 ret = 0;
9344 }
9345 if (sax != NULL)
9346 ctxt->sax = oldsax;
9347 xmlFreeParserCtxt(ctxt);
9348 newDoc->intSubset = NULL;
9349 newDoc->extSubset = NULL;
9350 xmlFreeDoc(newDoc);
9351
9352 return(ret);
9353}
9354
9355/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009356 * xmlParseExternalEntity:
9357 * @doc: the document the chunk pertains to
9358 * @sax: the SAX handler bloc (possibly NULL)
9359 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9360 * @depth: Used for loop detection, use 0
9361 * @URL: the URL for the entity to load
9362 * @ID: the System ID for the entity to load
Daniel Veillardcda96922001-08-21 10:56:31 +00009363 * @lst: the return value for the set of parsed nodes
Daniel Veillard257d9102001-05-08 10:41:44 +00009364 *
9365 * Parse an external general entity
9366 * An external general parsed entity is well-formed if it matches the
9367 * production labeled extParsedEnt.
9368 *
9369 * [78] extParsedEnt ::= TextDecl? content
9370 *
9371 * Returns 0 if the entity is well formed, -1 in case of args problem and
9372 * the parser error code otherwise
9373 */
9374
9375int
9376xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardcda96922001-08-21 10:56:31 +00009377 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009378 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
Daniel Veillardcda96922001-08-21 10:56:31 +00009379 ID, lst));
Daniel Veillard257d9102001-05-08 10:41:44 +00009380}
9381
9382/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009383 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009384 * @doc: the document the chunk pertains to
9385 * @sax: the SAX handler bloc (possibly NULL)
9386 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9387 * @depth: Used for loop detection, use 0
9388 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
Daniel Veillardcda96922001-08-21 10:56:31 +00009389 * @lst: the return value for the set of parsed nodes
Owen Taylor3473f882001-02-23 17:55:21 +00009390 *
9391 * Parse a well-balanced chunk of an XML document
9392 * called by the parser
9393 * The allowed sequence for the Well Balanced Chunk is the one defined by
9394 * the content production in the XML grammar:
9395 *
9396 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9397 *
9398 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9399 * the parser error code otherwise
9400 */
9401
9402int
9403xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
Daniel Veillardcda96922001-08-21 10:56:31 +00009404 void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
Owen Taylor3473f882001-02-23 17:55:21 +00009405 xmlParserCtxtPtr ctxt;
9406 xmlDocPtr newDoc;
9407 xmlSAXHandlerPtr oldsax = NULL;
9408 int size;
9409 int ret = 0;
9410
9411 if (depth > 40) {
9412 return(XML_ERR_ENTITY_LOOP);
9413 }
9414
9415
Daniel Veillardcda96922001-08-21 10:56:31 +00009416 if (lst != NULL)
9417 *lst = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009418 if (string == NULL)
9419 return(-1);
9420
9421 size = xmlStrlen(string);
9422
9423 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9424 if (ctxt == NULL) return(-1);
9425 ctxt->userData = ctxt;
9426 if (sax != NULL) {
9427 oldsax = ctxt->sax;
9428 ctxt->sax = sax;
9429 if (user_data != NULL)
9430 ctxt->userData = user_data;
9431 }
9432 newDoc = xmlNewDoc(BAD_CAST "1.0");
9433 if (newDoc == NULL) {
9434 xmlFreeParserCtxt(ctxt);
9435 return(-1);
9436 }
9437 if (doc != NULL) {
9438 newDoc->intSubset = doc->intSubset;
9439 newDoc->extSubset = doc->extSubset;
9440 }
9441 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9442 if (newDoc->children == NULL) {
9443 if (sax != NULL)
9444 ctxt->sax = oldsax;
9445 xmlFreeParserCtxt(ctxt);
9446 newDoc->intSubset = NULL;
9447 newDoc->extSubset = NULL;
9448 xmlFreeDoc(newDoc);
9449 return(-1);
9450 }
9451 nodePush(ctxt, newDoc->children);
9452 if (doc == NULL) {
9453 ctxt->myDoc = newDoc;
9454 } else {
9455 ctxt->myDoc = doc;
9456 newDoc->children->doc = doc;
9457 }
9458 ctxt->instate = XML_PARSER_CONTENT;
9459 ctxt->depth = depth;
9460
9461 /*
9462 * Doing validity checking on chunk doesn't make sense
9463 */
9464 ctxt->validate = 0;
9465 ctxt->loadsubset = 0;
9466
9467 xmlParseContent(ctxt);
9468
9469 if ((RAW == '<') && (NXT(1) == '/')) {
9470 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9472 ctxt->sax->error(ctxt->userData,
9473 "chunk is not well balanced\n");
9474 ctxt->wellFormed = 0;
9475 ctxt->disableSAX = 1;
9476 } else if (RAW != 0) {
9477 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9479 ctxt->sax->error(ctxt->userData,
9480 "extra content at the end of well balanced chunk\n");
9481 ctxt->wellFormed = 0;
9482 ctxt->disableSAX = 1;
9483 }
9484 if (ctxt->node != newDoc->children) {
9485 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9487 ctxt->sax->error(ctxt->userData,
9488 "chunk is not well balanced\n");
9489 ctxt->wellFormed = 0;
9490 ctxt->disableSAX = 1;
9491 }
9492
9493 if (!ctxt->wellFormed) {
9494 if (ctxt->errNo == 0)
9495 ret = 1;
9496 else
9497 ret = ctxt->errNo;
9498 } else {
Daniel Veillardcda96922001-08-21 10:56:31 +00009499 if (lst != NULL) {
Owen Taylor3473f882001-02-23 17:55:21 +00009500 xmlNodePtr cur;
9501
9502 /*
9503 * Return the newly created nodeset after unlinking it from
9504 * they pseudo parent.
9505 */
9506 cur = newDoc->children->children;
Daniel Veillardcda96922001-08-21 10:56:31 +00009507 *lst = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00009508 while (cur != NULL) {
9509 cur->parent = NULL;
9510 cur = cur->next;
9511 }
9512 newDoc->children->children = NULL;
9513 }
9514 ret = 0;
9515 }
9516 if (sax != NULL)
9517 ctxt->sax = oldsax;
9518 xmlFreeParserCtxt(ctxt);
9519 newDoc->intSubset = NULL;
9520 newDoc->extSubset = NULL;
9521 xmlFreeDoc(newDoc);
9522
9523 return(ret);
9524}
9525
9526/**
9527 * xmlSAXParseEntity:
9528 * @sax: the SAX handler block
9529 * @filename: the filename
9530 *
9531 * parse an XML external entity out of context and build a tree.
9532 * It use the given SAX function block to handle the parsing callback.
9533 * If sax is NULL, fallback to the default DOM tree building routines.
9534 *
9535 * [78] extParsedEnt ::= TextDecl? content
9536 *
9537 * This correspond to a "Well Balanced" chunk
9538 *
9539 * Returns the resulting document tree
9540 */
9541
9542xmlDocPtr
9543xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9544 xmlDocPtr ret;
9545 xmlParserCtxtPtr ctxt;
9546 char *directory = NULL;
9547
9548 ctxt = xmlCreateFileParserCtxt(filename);
9549 if (ctxt == NULL) {
9550 return(NULL);
9551 }
9552 if (sax != NULL) {
9553 if (ctxt->sax != NULL)
9554 xmlFree(ctxt->sax);
9555 ctxt->sax = sax;
9556 ctxt->userData = NULL;
9557 }
9558
9559 if ((ctxt->directory == NULL) && (directory == NULL))
9560 directory = xmlParserGetDirectory(filename);
9561
9562 xmlParseExtParsedEnt(ctxt);
9563
9564 if (ctxt->wellFormed)
9565 ret = ctxt->myDoc;
9566 else {
9567 ret = NULL;
9568 xmlFreeDoc(ctxt->myDoc);
9569 ctxt->myDoc = NULL;
9570 }
9571 if (sax != NULL)
9572 ctxt->sax = NULL;
9573 xmlFreeParserCtxt(ctxt);
9574
9575 return(ret);
9576}
9577
9578/**
9579 * xmlParseEntity:
9580 * @filename: the filename
9581 *
9582 * parse an XML external entity out of context and build a tree.
9583 *
9584 * [78] extParsedEnt ::= TextDecl? content
9585 *
9586 * This correspond to a "Well Balanced" chunk
9587 *
9588 * Returns the resulting document tree
9589 */
9590
9591xmlDocPtr
9592xmlParseEntity(const char *filename) {
9593 return(xmlSAXParseEntity(NULL, filename));
9594}
9595
9596/**
9597 * xmlCreateEntityParserCtxt:
9598 * @URL: the entity URL
9599 * @ID: the entity PUBLIC ID
9600 * @base: a posible base for the target URI
9601 *
9602 * Create a parser context for an external entity
9603 * Automatic support for ZLIB/Compress compressed document is provided
9604 * by default if found at compile-time.
9605 *
9606 * Returns the new parser context or NULL
9607 */
9608xmlParserCtxtPtr
9609xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9610 const xmlChar *base) {
9611 xmlParserCtxtPtr ctxt;
9612 xmlParserInputPtr inputStream;
9613 char *directory = NULL;
9614 xmlChar *uri;
9615
9616 ctxt = xmlNewParserCtxt();
9617 if (ctxt == NULL) {
9618 return(NULL);
9619 }
9620
9621 uri = xmlBuildURI(URL, base);
9622
9623 if (uri == NULL) {
9624 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9625 if (inputStream == NULL) {
9626 xmlFreeParserCtxt(ctxt);
9627 return(NULL);
9628 }
9629
9630 inputPush(ctxt, inputStream);
9631
9632 if ((ctxt->directory == NULL) && (directory == NULL))
9633 directory = xmlParserGetDirectory((char *)URL);
9634 if ((ctxt->directory == NULL) && (directory != NULL))
9635 ctxt->directory = directory;
9636 } else {
9637 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9638 if (inputStream == NULL) {
9639 xmlFree(uri);
9640 xmlFreeParserCtxt(ctxt);
9641 return(NULL);
9642 }
9643
9644 inputPush(ctxt, inputStream);
9645
9646 if ((ctxt->directory == NULL) && (directory == NULL))
9647 directory = xmlParserGetDirectory((char *)uri);
9648 if ((ctxt->directory == NULL) && (directory != NULL))
9649 ctxt->directory = directory;
9650 xmlFree(uri);
9651 }
9652
9653 return(ctxt);
9654}
9655
9656/************************************************************************
9657 * *
9658 * Front ends when parsing from a file *
9659 * *
9660 ************************************************************************/
9661
9662/**
9663 * xmlCreateFileParserCtxt:
9664 * @filename: the filename
9665 *
9666 * Create a parser context for a file content.
9667 * Automatic support for ZLIB/Compress compressed document is provided
9668 * by default if found at compile-time.
9669 *
9670 * Returns the new parser context or NULL
9671 */
9672xmlParserCtxtPtr
9673xmlCreateFileParserCtxt(const char *filename)
9674{
9675 xmlParserCtxtPtr ctxt;
9676 xmlParserInputPtr inputStream;
Owen Taylor3473f882001-02-23 17:55:21 +00009677 char *directory = NULL;
9678
Owen Taylor3473f882001-02-23 17:55:21 +00009679 ctxt = xmlNewParserCtxt();
9680 if (ctxt == NULL) {
9681 if (xmlDefaultSAXHandler.error != NULL) {
9682 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9683 }
9684 return(NULL);
9685 }
9686
Daniel Veillard9f7b84b2001-08-23 15:31:19 +00009687 inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00009688 if (inputStream == NULL) {
9689 xmlFreeParserCtxt(ctxt);
9690 return(NULL);
9691 }
9692
Owen Taylor3473f882001-02-23 17:55:21 +00009693 inputPush(ctxt, inputStream);
9694 if ((ctxt->directory == NULL) && (directory == NULL))
9695 directory = xmlParserGetDirectory(filename);
9696 if ((ctxt->directory == NULL) && (directory != NULL))
9697 ctxt->directory = directory;
9698
9699 return(ctxt);
9700}
9701
9702/**
9703 * xmlSAXParseFile:
9704 * @sax: the SAX handler block
9705 * @filename: the filename
9706 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9707 * documents
9708 *
9709 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9710 * compressed document is provided by default if found at compile-time.
9711 * It use the given SAX function block to handle the parsing callback.
9712 * If sax is NULL, fallback to the default DOM tree building routines.
9713 *
9714 * Returns the resulting document tree
9715 */
9716
9717xmlDocPtr
9718xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9719 int recovery) {
9720 xmlDocPtr ret;
9721 xmlParserCtxtPtr ctxt;
9722 char *directory = NULL;
9723
9724 ctxt = xmlCreateFileParserCtxt(filename);
9725 if (ctxt == NULL) {
9726 return(NULL);
9727 }
9728 if (sax != NULL) {
9729 if (ctxt->sax != NULL)
9730 xmlFree(ctxt->sax);
9731 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009732 }
9733
9734 if ((ctxt->directory == NULL) && (directory == NULL))
9735 directory = xmlParserGetDirectory(filename);
9736 if ((ctxt->directory == NULL) && (directory != NULL))
9737 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9738
9739 xmlParseDocument(ctxt);
9740
9741 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9742 else {
9743 ret = NULL;
9744 xmlFreeDoc(ctxt->myDoc);
9745 ctxt->myDoc = NULL;
9746 }
9747 if (sax != NULL)
9748 ctxt->sax = NULL;
9749 xmlFreeParserCtxt(ctxt);
9750
9751 return(ret);
9752}
9753
9754/**
9755 * xmlRecoverDoc:
9756 * @cur: a pointer to an array of xmlChar
9757 *
9758 * parse an XML in-memory document and build a tree.
9759 * In the case the document is not Well Formed, a tree is built anyway
9760 *
9761 * Returns the resulting document tree
9762 */
9763
9764xmlDocPtr
9765xmlRecoverDoc(xmlChar *cur) {
9766 return(xmlSAXParseDoc(NULL, cur, 1));
9767}
9768
9769/**
9770 * xmlParseFile:
9771 * @filename: the filename
9772 *
9773 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9774 * compressed document is provided by default if found at compile-time.
9775 *
Daniel Veillard5d96fff2001-08-31 14:55:30 +00009776 * Returns the resulting document tree if the file was wellformed,
9777 * NULL otherwise.
Owen Taylor3473f882001-02-23 17:55:21 +00009778 */
9779
9780xmlDocPtr
9781xmlParseFile(const char *filename) {
9782 return(xmlSAXParseFile(NULL, filename, 0));
9783}
9784
9785/**
9786 * xmlRecoverFile:
9787 * @filename: the filename
9788 *
9789 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9790 * compressed document is provided by default if found at compile-time.
9791 * In the case the document is not Well Formed, a tree is built anyway
9792 *
9793 * Returns the resulting document tree
9794 */
9795
9796xmlDocPtr
9797xmlRecoverFile(const char *filename) {
9798 return(xmlSAXParseFile(NULL, filename, 1));
9799}
9800
9801
9802/**
9803 * xmlSetupParserForBuffer:
9804 * @ctxt: an XML parser context
9805 * @buffer: a xmlChar * buffer
9806 * @filename: a file name
9807 *
9808 * Setup the parser context to parse a new buffer; Clears any prior
9809 * contents from the parser context. The buffer parameter must not be
9810 * NULL, but the filename parameter can be
9811 */
9812void
9813xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9814 const char* filename)
9815{
9816 xmlParserInputPtr input;
9817
9818 input = xmlNewInputStream(ctxt);
9819 if (input == NULL) {
9820 perror("malloc");
9821 xmlFree(ctxt);
9822 return;
9823 }
9824
9825 xmlClearParserCtxt(ctxt);
9826 if (filename != NULL)
9827 input->filename = xmlMemStrdup(filename);
9828 input->base = buffer;
9829 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009830 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009831 inputPush(ctxt, input);
9832}
9833
9834/**
9835 * xmlSAXUserParseFile:
9836 * @sax: a SAX handler
9837 * @user_data: The user data returned on SAX callbacks
9838 * @filename: a file name
9839 *
9840 * parse an XML file and call the given SAX handler routines.
9841 * Automatic support for ZLIB/Compress compressed document is provided
9842 *
9843 * Returns 0 in case of success or a error number otherwise
9844 */
9845int
9846xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9847 const char *filename) {
9848 int ret = 0;
9849 xmlParserCtxtPtr ctxt;
9850
9851 ctxt = xmlCreateFileParserCtxt(filename);
9852 if (ctxt == NULL) return -1;
9853 if (ctxt->sax != &xmlDefaultSAXHandler)
9854 xmlFree(ctxt->sax);
9855 ctxt->sax = sax;
9856 if (user_data != NULL)
9857 ctxt->userData = user_data;
9858
9859 xmlParseDocument(ctxt);
9860
9861 if (ctxt->wellFormed)
9862 ret = 0;
9863 else {
9864 if (ctxt->errNo != 0)
9865 ret = ctxt->errNo;
9866 else
9867 ret = -1;
9868 }
9869 if (sax != NULL)
9870 ctxt->sax = NULL;
9871 xmlFreeParserCtxt(ctxt);
9872
9873 return ret;
9874}
9875
9876/************************************************************************
9877 * *
9878 * Front ends when parsing from memory *
9879 * *
9880 ************************************************************************/
9881
9882/**
9883 * xmlCreateMemoryParserCtxt:
9884 * @buffer: a pointer to a char array
9885 * @size: the size of the array
9886 *
9887 * Create a parser context for an XML in-memory document.
9888 *
9889 * Returns the new parser context or NULL
9890 */
9891xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009892xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009893 xmlParserCtxtPtr ctxt;
9894 xmlParserInputPtr input;
9895 xmlParserInputBufferPtr buf;
9896
9897 if (buffer == NULL)
9898 return(NULL);
9899 if (size <= 0)
9900 return(NULL);
9901
9902 ctxt = xmlNewParserCtxt();
9903 if (ctxt == NULL)
9904 return(NULL);
9905
9906 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9907 if (buf == NULL) return(NULL);
9908
9909 input = xmlNewInputStream(ctxt);
9910 if (input == NULL) {
9911 xmlFreeParserCtxt(ctxt);
9912 return(NULL);
9913 }
9914
9915 input->filename = NULL;
9916 input->buf = buf;
9917 input->base = input->buf->buffer->content;
9918 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009919 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009920
9921 inputPush(ctxt, input);
9922 return(ctxt);
9923}
9924
9925/**
9926 * xmlSAXParseMemory:
9927 * @sax: the SAX handler block
9928 * @buffer: an pointer to a char array
9929 * @size: the size of the array
9930 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9931 * documents
9932 *
9933 * parse an XML in-memory block and use the given SAX function block
9934 * to handle the parsing callback. If sax is NULL, fallback to the default
9935 * DOM tree building routines.
9936 *
9937 * Returns the resulting document tree
9938 */
9939xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009940xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9941 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009942 xmlDocPtr ret;
9943 xmlParserCtxtPtr ctxt;
9944
9945 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9946 if (ctxt == NULL) return(NULL);
9947 if (sax != NULL) {
9948 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009949 }
9950
9951 xmlParseDocument(ctxt);
9952
9953 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9954 else {
9955 ret = NULL;
9956 xmlFreeDoc(ctxt->myDoc);
9957 ctxt->myDoc = NULL;
9958 }
9959 if (sax != NULL)
9960 ctxt->sax = NULL;
9961 xmlFreeParserCtxt(ctxt);
9962
9963 return(ret);
9964}
9965
9966/**
9967 * xmlParseMemory:
9968 * @buffer: an pointer to a char array
9969 * @size: the size of the array
9970 *
9971 * parse an XML in-memory block and build a tree.
9972 *
9973 * Returns the resulting document tree
9974 */
9975
Daniel Veillard50822cb2001-07-26 20:05:51 +00009976xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009977 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9978}
9979
9980/**
9981 * xmlRecoverMemory:
9982 * @buffer: an pointer to a char array
9983 * @size: the size of the array
9984 *
9985 * parse an XML in-memory block and build a tree.
9986 * In the case the document is not Well Formed, a tree is built anyway
9987 *
9988 * Returns the resulting document tree
9989 */
9990
Daniel Veillard50822cb2001-07-26 20:05:51 +00009991xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009992 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9993}
9994
9995/**
9996 * xmlSAXUserParseMemory:
9997 * @sax: a SAX handler
9998 * @user_data: The user data returned on SAX callbacks
9999 * @buffer: an in-memory XML document input
10000 * @size: the length of the XML document in bytes
10001 *
10002 * A better SAX parsing routine.
10003 * parse an XML in-memory buffer and call the given SAX handler routines.
10004 *
10005 * Returns 0 in case of success or a error number otherwise
10006 */
10007int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +000010008 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +000010009 int ret = 0;
10010 xmlParserCtxtPtr ctxt;
10011 xmlSAXHandlerPtr oldsax = NULL;
10012
10013 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
10014 if (ctxt == NULL) return -1;
10015 if (sax != NULL) {
10016 oldsax = ctxt->sax;
10017 ctxt->sax = sax;
10018 }
Daniel Veillard30211a02001-04-26 09:33:18 +000010019 if (user_data != NULL)
10020 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +000010021
10022 xmlParseDocument(ctxt);
10023
10024 if (ctxt->wellFormed)
10025 ret = 0;
10026 else {
10027 if (ctxt->errNo != 0)
10028 ret = ctxt->errNo;
10029 else
10030 ret = -1;
10031 }
10032 if (sax != NULL) {
10033 ctxt->sax = oldsax;
10034 }
10035 xmlFreeParserCtxt(ctxt);
10036
10037 return ret;
10038}
10039
10040/**
10041 * xmlCreateDocParserCtxt:
10042 * @cur: a pointer to an array of xmlChar
10043 *
10044 * Creates a parser context for an XML in-memory document.
10045 *
10046 * Returns the new parser context or NULL
10047 */
10048xmlParserCtxtPtr
10049xmlCreateDocParserCtxt(xmlChar *cur) {
10050 int len;
10051
10052 if (cur == NULL)
10053 return(NULL);
10054 len = xmlStrlen(cur);
10055 return(xmlCreateMemoryParserCtxt((char *)cur, len));
10056}
10057
10058/**
10059 * xmlSAXParseDoc:
10060 * @sax: the SAX handler block
10061 * @cur: a pointer to an array of xmlChar
10062 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10063 * documents
10064 *
10065 * parse an XML in-memory document and build a tree.
10066 * It use the given SAX function block to handle the parsing callback.
10067 * If sax is NULL, fallback to the default DOM tree building routines.
10068 *
10069 * Returns the resulting document tree
10070 */
10071
10072xmlDocPtr
10073xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10074 xmlDocPtr ret;
10075 xmlParserCtxtPtr ctxt;
10076
10077 if (cur == NULL) return(NULL);
10078
10079
10080 ctxt = xmlCreateDocParserCtxt(cur);
10081 if (ctxt == NULL) return(NULL);
10082 if (sax != NULL) {
10083 ctxt->sax = sax;
10084 ctxt->userData = NULL;
10085 }
10086
10087 xmlParseDocument(ctxt);
10088 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10089 else {
10090 ret = NULL;
10091 xmlFreeDoc(ctxt->myDoc);
10092 ctxt->myDoc = NULL;
10093 }
10094 if (sax != NULL)
10095 ctxt->sax = NULL;
10096 xmlFreeParserCtxt(ctxt);
10097
10098 return(ret);
10099}
10100
10101/**
10102 * xmlParseDoc:
10103 * @cur: a pointer to an array of xmlChar
10104 *
10105 * parse an XML in-memory document and build a tree.
10106 *
10107 * Returns the resulting document tree
10108 */
10109
10110xmlDocPtr
10111xmlParseDoc(xmlChar *cur) {
10112 return(xmlSAXParseDoc(NULL, cur, 0));
10113}
10114
10115
10116/************************************************************************
10117 * *
10118 * Miscellaneous *
10119 * *
10120 ************************************************************************/
10121
10122#ifdef LIBXML_XPATH_ENABLED
10123#include <libxml/xpath.h>
10124#endif
10125
10126static int xmlParserInitialized = 0;
10127
10128/**
10129 * xmlInitParser:
10130 *
10131 * Initialization function for the XML parser.
10132 * This is not reentrant. Call once before processing in case of
10133 * use in multithreaded programs.
10134 */
10135
10136void
10137xmlInitParser(void) {
10138 if (xmlParserInitialized) return;
10139
10140 xmlInitCharEncodingHandlers();
10141 xmlInitializePredefinedEntities();
10142 xmlDefaultSAXHandlerInit();
10143 xmlRegisterDefaultInputCallbacks();
10144 xmlRegisterDefaultOutputCallbacks();
10145#ifdef LIBXML_HTML_ENABLED
10146 htmlInitAutoClose();
10147 htmlDefaultSAXHandlerInit();
10148#endif
10149#ifdef LIBXML_XPATH_ENABLED
10150 xmlXPathInit();
10151#endif
10152 xmlParserInitialized = 1;
10153}
10154
10155/**
10156 * xmlCleanupParser:
10157 *
10158 * Cleanup function for the XML parser. It tries to reclaim all
10159 * parsing related global memory allocated for the parser processing.
10160 * It doesn't deallocate any document related memory. Calling this
10161 * function should not prevent reusing the parser.
10162 */
10163
10164void
10165xmlCleanupParser(void) {
10166 xmlParserInitialized = 0;
10167 xmlCleanupCharEncodingHandlers();
10168 xmlCleanupPredefinedEntities();
Daniel Veillarde2940dd2001-08-22 00:06:49 +000010169#ifdef LIBXML_CATALOG_ENABLED
10170 xmlCatalogCleanup();
10171#endif
Owen Taylor3473f882001-02-23 17:55:21 +000010172}
10173