blob: f308617466ebd2a2951e85857a861a581fabce29 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard2f362242001-03-02 17:36:21 +00001478 if (ctxt->keepBlanks)
1479 return(0);
1480
Owen Taylor3473f882001-02-23 17:55:21 +00001481 /*
1482 * Check for xml:space value.
1483 */
1484 if (*(ctxt->space) == 1)
1485 return(0);
1486
1487 /*
1488 * Check that the string is made of blanks
1489 */
1490 for (i = 0;i < len;i++)
1491 if (!(IS_BLANK(str[i]))) return(0);
1492
1493 /*
1494 * Look if the element is mixed content in the Dtd if available
1495 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001496 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001497 if (ctxt->myDoc != NULL) {
1498 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1499 if (ret == 0) return(1);
1500 if (ret == 1) return(0);
1501 }
1502
1503 /*
1504 * Otherwise, heuristic :-\
1505 */
Owen Taylor3473f882001-02-23 17:55:21 +00001506 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if ((ctxt->node->children == NULL) &&
1508 (RAW == '<') && (NXT(1) == '/')) return(0);
1509
1510 lastChild = xmlGetLastChild(ctxt->node);
1511 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001512 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1513 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001514 } else if (xmlNodeIsText(lastChild))
1515 return(0);
1516 else if ((ctxt->node->children != NULL) &&
1517 (xmlNodeIsText(ctxt->node->children)))
1518 return(0);
1519 return(1);
1520}
1521
1522/*
1523 * Forward definition for recusive behaviour.
1524 */
1525void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1526void xmlParseReference(xmlParserCtxtPtr ctxt);
1527
1528/************************************************************************
1529 * *
1530 * Extra stuff for namespace support *
1531 * Relates to http://www.w3.org/TR/WD-xml-names *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * xmlSplitQName:
1537 * @ctxt: an XML parser context
1538 * @name: an XML parser context
1539 * @prefix: a xmlChar **
1540 *
1541 * parse an UTF8 encoded XML qualified name string
1542 *
1543 * [NS 5] QName ::= (Prefix ':')? LocalPart
1544 *
1545 * [NS 6] Prefix ::= NCName
1546 *
1547 * [NS 7] LocalPart ::= NCName
1548 *
1549 * Returns the local part, and prefix is updated
1550 * to get the Prefix if any.
1551 */
1552
1553xmlChar *
1554xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1555 xmlChar buf[XML_MAX_NAMELEN + 5];
1556 xmlChar *buffer = NULL;
1557 int len = 0;
1558 int max = XML_MAX_NAMELEN;
1559 xmlChar *ret = NULL;
1560 const xmlChar *cur = name;
1561 int c;
1562
1563 *prefix = NULL;
1564
1565#ifndef XML_XML_NAMESPACE
1566 /* xml: prefix is not really a namespace */
1567 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1568 (cur[2] == 'l') && (cur[3] == ':'))
1569 return(xmlStrdup(name));
1570#endif
1571
1572 /* nasty but valid */
1573 if (cur[0] == ':')
1574 return(xmlStrdup(name));
1575
1576 c = *cur++;
1577 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 buffer = NULL;
1619 max = XML_MAX_NAMELEN;
1620 }
1621
1622
1623 if (c == ':') {
1624 c = *cur++;
1625 if (c == 0) return(ret);
1626 *prefix = ret;
1627 len = 0;
1628
1629 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1630 buf[len++] = c;
1631 c = *cur++;
1632 }
1633 if (len >= max) {
1634 /*
1635 * Okay someone managed to make a huge name, so he's ready to pay
1636 * for the processing speed.
1637 */
1638 max = len * 2;
1639
1640 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1641 if (buffer == NULL) {
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "xmlSplitQName: out of memory\n");
1645 return(NULL);
1646 }
1647 memcpy(buffer, buf, len);
1648 while (c != 0) { /* tested bigname2.xml */
1649 if (len + 10 > max) {
1650 max *= 2;
1651 buffer = (xmlChar *) xmlRealloc(buffer,
1652 max * sizeof(xmlChar));
1653 if (buffer == NULL) {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "xmlSplitQName: out of memory\n");
1657 return(NULL);
1658 }
1659 }
1660 buffer[len++] = c;
1661 c = *cur++;
1662 }
1663 buffer[len] = 0;
1664 }
1665
1666 if (buffer == NULL)
1667 ret = xmlStrndup(buf, len);
1668 else {
1669 ret = buffer;
1670 }
1671 }
1672
1673 return(ret);
1674}
1675
1676/************************************************************************
1677 * *
1678 * The parser itself *
1679 * Relates to http://www.w3.org/TR/REC-xml *
1680 * *
1681 ************************************************************************/
1682
Daniel Veillard76d66f42001-05-16 21:05:17 +00001683static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001684/**
1685 * xmlParseName:
1686 * @ctxt: an XML parser context
1687 *
1688 * parse an XML name.
1689 *
1690 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1691 * CombiningChar | Extender
1692 *
1693 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1694 *
1695 * [6] Names ::= Name (S Name)*
1696 *
1697 * Returns the Name parsed or NULL
1698 */
1699
1700xmlChar *
1701xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001702 const xmlChar *in;
1703 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 int count = 0;
1705
1706 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001707
1708 /*
1709 * Accelerator for simple ASCII names
1710 */
1711 in = ctxt->input->cur;
1712 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1713 ((*in >= 0x41) && (*in <= 0x5A)) ||
1714 (*in == '_') || (*in == ':')) {
1715 in++;
1716 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001719 (*in == '_') || (*in == '-') ||
1720 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001722 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723 count = in - ctxt->input->cur;
1724 ret = xmlStrndup(ctxt->input->cur, count);
1725 ctxt->input->cur = in;
1726 return(ret);
1727 }
1728 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001729 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001730}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001731
Daniel Veillard76d66f42001-05-16 21:05:17 +00001732static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001733xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1734 xmlChar buf[XML_MAX_NAMELEN + 5];
1735 int len = 0, l;
1736 int c;
1737 int count = 0;
1738
1739 /*
1740 * Handler for more complex cases
1741 */
1742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001743 c = CUR_CHAR(l);
1744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1745 (!IS_LETTER(c) && (c != '_') &&
1746 (c != ':'))) {
1747 return(NULL);
1748 }
1749
1750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1751 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1752 (c == '.') || (c == '-') ||
1753 (c == '_') || (c == ':') ||
1754 (IS_COMBINING(c)) ||
1755 (IS_EXTENDER(c)))) {
1756 if (count++ > 100) {
1757 count = 0;
1758 GROW;
1759 }
1760 COPY_BUF(l,buf,len,c);
1761 NEXTL(l);
1762 c = CUR_CHAR(l);
1763 if (len >= XML_MAX_NAMELEN) {
1764 /*
1765 * Okay someone managed to make a huge name, so he's ready to pay
1766 * for the processing speed.
1767 */
1768 xmlChar *buffer;
1769 int max = len * 2;
1770
1771 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1772 if (buffer == NULL) {
1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001775 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001776 return(NULL);
1777 }
1778 memcpy(buffer, buf, len);
1779 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1780 (c == '.') || (c == '-') ||
1781 (c == '_') || (c == ':') ||
1782 (IS_COMBINING(c)) ||
1783 (IS_EXTENDER(c))) {
1784 if (count++ > 100) {
1785 count = 0;
1786 GROW;
1787 }
1788 if (len + 10 > max) {
1789 max *= 2;
1790 buffer = (xmlChar *) xmlRealloc(buffer,
1791 max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001795 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001796 return(NULL);
1797 }
1798 }
1799 COPY_BUF(l,buffer,len,c);
1800 NEXTL(l);
1801 c = CUR_CHAR(l);
1802 }
1803 buffer[len] = 0;
1804 return(buffer);
1805 }
1806 }
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseStringName:
1812 * @ctxt: an XML parser context
1813 * @str: a pointer to the string pointer (IN/OUT)
1814 *
1815 * parse an XML name.
1816 *
1817 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1818 * CombiningChar | Extender
1819 *
1820 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1821 *
1822 * [6] Names ::= Name (S Name)*
1823 *
1824 * Returns the Name parsed or NULL. The str pointer
1825 * is updated to the current location in the string.
1826 */
1827
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001828static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001829xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1830 xmlChar buf[XML_MAX_NAMELEN + 5];
1831 const xmlChar *cur = *str;
1832 int len = 0, l;
1833 int c;
1834
1835 c = CUR_SCHAR(cur, l);
1836 if (!IS_LETTER(c) && (c != '_') &&
1837 (c != ':')) {
1838 return(NULL);
1839 }
1840
1841 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1842 (c == '.') || (c == '-') ||
1843 (c == '_') || (c == ':') ||
1844 (IS_COMBINING(c)) ||
1845 (IS_EXTENDER(c))) {
1846 COPY_BUF(l,buf,len,c);
1847 cur += l;
1848 c = CUR_SCHAR(cur, l);
1849 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1850 /*
1851 * Okay someone managed to make a huge name, so he's ready to pay
1852 * for the processing speed.
1853 */
1854 xmlChar *buffer;
1855 int max = len * 2;
1856
1857 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1858 if (buffer == NULL) {
1859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1860 ctxt->sax->error(ctxt->userData,
1861 "xmlParseStringName: out of memory\n");
1862 return(NULL);
1863 }
1864 memcpy(buffer, buf, len);
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
1877 "xmlParseStringName: out of memory\n");
1878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 cur += l;
1883 c = CUR_SCHAR(cur, l);
1884 }
1885 buffer[len] = 0;
1886 *str = cur;
1887 return(buffer);
1888 }
1889 }
1890 *str = cur;
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseNmtoken:
1896 * @ctxt: an XML parser context
1897 *
1898 * parse an XML Nmtoken.
1899 *
1900 * [7] Nmtoken ::= (NameChar)+
1901 *
1902 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1903 *
1904 * Returns the Nmtoken parsed or NULL
1905 */
1906
1907xmlChar *
1908xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1909 xmlChar buf[XML_MAX_NAMELEN + 5];
1910 int len = 0, l;
1911 int c;
1912 int count = 0;
1913
1914 GROW;
1915 c = CUR_CHAR(l);
1916
1917 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1918 (c == '.') || (c == '-') ||
1919 (c == '_') || (c == ':') ||
1920 (IS_COMBINING(c)) ||
1921 (IS_EXTENDER(c))) {
1922 if (count++ > 100) {
1923 count = 0;
1924 GROW;
1925 }
1926 COPY_BUF(l,buf,len,c);
1927 NEXTL(l);
1928 c = CUR_CHAR(l);
1929 if (len >= XML_MAX_NAMELEN) {
1930 /*
1931 * Okay someone managed to make a huge token, so he's ready to pay
1932 * for the processing speed.
1933 */
1934 xmlChar *buffer;
1935 int max = len * 2;
1936
1937 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1938 if (buffer == NULL) {
1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1940 ctxt->sax->error(ctxt->userData,
1941 "xmlParseNmtoken: out of memory\n");
1942 return(NULL);
1943 }
1944 memcpy(buffer, buf, len);
1945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1946 (c == '.') || (c == '-') ||
1947 (c == '_') || (c == ':') ||
1948 (IS_COMBINING(c)) ||
1949 (IS_EXTENDER(c))) {
1950 if (count++ > 100) {
1951 count = 0;
1952 GROW;
1953 }
1954 if (len + 10 > max) {
1955 max *= 2;
1956 buffer = (xmlChar *) xmlRealloc(buffer,
1957 max * sizeof(xmlChar));
1958 if (buffer == NULL) {
1959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001961 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001962 return(NULL);
1963 }
1964 }
1965 COPY_BUF(l,buffer,len,c);
1966 NEXTL(l);
1967 c = CUR_CHAR(l);
1968 }
1969 buffer[len] = 0;
1970 return(buffer);
1971 }
1972 }
1973 if (len == 0)
1974 return(NULL);
1975 return(xmlStrndup(buf, len));
1976}
1977
1978/**
1979 * xmlParseEntityValue:
1980 * @ctxt: an XML parser context
1981 * @orig: if non-NULL store a copy of the original entity value
1982 *
1983 * parse a value for ENTITY declarations
1984 *
1985 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1986 * "'" ([^%&'] | PEReference | Reference)* "'"
1987 *
1988 * Returns the EntityValue parsed with reference substitued or NULL
1989 */
1990
1991xmlChar *
1992xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1993 xmlChar *buf = NULL;
1994 int len = 0;
1995 int size = XML_PARSER_BUFFER_SIZE;
1996 int c, l;
1997 xmlChar stop;
1998 xmlChar *ret = NULL;
1999 const xmlChar *cur = NULL;
2000 xmlParserInputPtr input;
2001
2002 if (RAW == '"') stop = '"';
2003 else if (RAW == '\'') stop = '\'';
2004 else {
2005 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 return(NULL);
2011 }
2012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2013 if (buf == NULL) {
2014 xmlGenericError(xmlGenericErrorContext,
2015 "malloc of %d byte failed\n", size);
2016 return(NULL);
2017 }
2018
2019 /*
2020 * The content of the entity definition is copied in a buffer.
2021 */
2022
2023 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2024 input = ctxt->input;
2025 GROW;
2026 NEXT;
2027 c = CUR_CHAR(l);
2028 /*
2029 * NOTE: 4.4.5 Included in Literal
2030 * When a parameter entity reference appears in a literal entity
2031 * value, ... a single or double quote character in the replacement
2032 * text is always treated as a normal data character and will not
2033 * terminate the literal.
2034 * In practice it means we stop the loop only when back at parsing
2035 * the initial entity and the quote is found
2036 */
2037 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2038 (ctxt->input != input))) {
2039 if (len + 5 >= size) {
2040 size *= 2;
2041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2042 if (buf == NULL) {
2043 xmlGenericError(xmlGenericErrorContext,
2044 "realloc of %d byte failed\n", size);
2045 return(NULL);
2046 }
2047 }
2048 COPY_BUF(l,buf,len,c);
2049 NEXTL(l);
2050 /*
2051 * Pop-up of finished entities.
2052 */
2053 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2054 xmlPopInput(ctxt);
2055
2056 GROW;
2057 c = CUR_CHAR(l);
2058 if (c == 0) {
2059 GROW;
2060 c = CUR_CHAR(l);
2061 }
2062 }
2063 buf[len] = 0;
2064
2065 /*
2066 * Raise problem w.r.t. '&' and '%' being used in non-entities
2067 * reference constructs. Note Charref will be handled in
2068 * xmlStringDecodeEntities()
2069 */
2070 cur = buf;
2071 while (*cur != 0) { /* non input consuming */
2072 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2073 xmlChar *name;
2074 xmlChar tmp = *cur;
2075
2076 cur++;
2077 name = xmlParseStringName(ctxt, &cur);
2078 if ((name == NULL) || (*cur != ';')) {
2079 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2081 ctxt->sax->error(ctxt->userData,
2082 "EntityValue: '%c' forbidden except for entities references\n",
2083 tmp);
2084 ctxt->wellFormed = 0;
2085 ctxt->disableSAX = 1;
2086 }
2087 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2088 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2090 ctxt->sax->error(ctxt->userData,
2091 "EntityValue: PEReferences forbidden in internal subset\n",
2092 tmp);
2093 ctxt->wellFormed = 0;
2094 ctxt->disableSAX = 1;
2095 }
2096 if (name != NULL)
2097 xmlFree(name);
2098 }
2099 cur++;
2100 }
2101
2102 /*
2103 * Then PEReference entities are substituted.
2104 */
2105 if (c != stop) {
2106 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2109 ctxt->wellFormed = 0;
2110 ctxt->disableSAX = 1;
2111 xmlFree(buf);
2112 } else {
2113 NEXT;
2114 /*
2115 * NOTE: 4.4.7 Bypassed
2116 * When a general entity reference appears in the EntityValue in
2117 * an entity declaration, it is bypassed and left as is.
2118 * so XML_SUBSTITUTE_REF is not set here.
2119 */
2120 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2121 0, 0, 0);
2122 if (orig != NULL)
2123 *orig = buf;
2124 else
2125 xmlFree(buf);
2126 }
2127
2128 return(ret);
2129}
2130
2131/**
2132 * xmlParseAttValue:
2133 * @ctxt: an XML parser context
2134 *
2135 * parse a value for an attribute
2136 * Note: the parser won't do substitution of entities here, this
2137 * will be handled later in xmlStringGetNodeList
2138 *
2139 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2140 * "'" ([^<&'] | Reference)* "'"
2141 *
2142 * 3.3.3 Attribute-Value Normalization:
2143 * Before the value of an attribute is passed to the application or
2144 * checked for validity, the XML processor must normalize it as follows:
2145 * - a character reference is processed by appending the referenced
2146 * character to the attribute value
2147 * - an entity reference is processed by recursively processing the
2148 * replacement text of the entity
2149 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2150 * appending #x20 to the normalized value, except that only a single
2151 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2152 * parsed entity or the literal entity value of an internal parsed entity
2153 * - other characters are processed by appending them to the normalized value
2154 * If the declared value is not CDATA, then the XML processor must further
2155 * process the normalized attribute value by discarding any leading and
2156 * trailing space (#x20) characters, and by replacing sequences of space
2157 * (#x20) characters by a single space (#x20) character.
2158 * All attributes for which no declaration has been read should be treated
2159 * by a non-validating parser as if declared CDATA.
2160 *
2161 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2162 */
2163
2164xmlChar *
2165xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2166 xmlChar limit = 0;
2167 xmlChar *buf = NULL;
2168 int len = 0;
2169 int buf_size = 0;
2170 int c, l;
2171 xmlChar *current = NULL;
2172 xmlEntityPtr ent;
2173
2174
2175 SHRINK;
2176 if (NXT(0) == '"') {
2177 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2178 limit = '"';
2179 NEXT;
2180 } else if (NXT(0) == '\'') {
2181 limit = '\'';
2182 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2183 NEXT;
2184 } else {
2185 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2187 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2188 ctxt->wellFormed = 0;
2189 ctxt->disableSAX = 1;
2190 return(NULL);
2191 }
2192
2193 /*
2194 * allocate a translation buffer.
2195 */
2196 buf_size = XML_PARSER_BUFFER_SIZE;
2197 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2198 if (buf == NULL) {
2199 perror("xmlParseAttValue: malloc failed");
2200 return(NULL);
2201 }
2202
2203 /*
2204 * Ok loop until we reach one of the ending char or a size limit.
2205 */
2206 c = CUR_CHAR(l);
2207 while (((NXT(0) != limit) && /* checked */
2208 (c != '<')) || (ctxt->token != 0)) {
2209 if (c == 0) break;
2210 if (ctxt->token == '&') {
2211 /*
2212 * The reparsing will be done in xmlStringGetNodeList()
2213 * called by the attribute() function in SAX.c
2214 */
2215 static xmlChar buffer[6] = "&#38;";
2216
2217 if (len > buf_size - 10) {
2218 growBuffer(buf);
2219 }
2220 current = &buffer[0];
2221 while (*current != 0) { /* non input consuming */
2222 buf[len++] = *current++;
2223 }
2224 ctxt->token = 0;
2225 } else if (c == '&') {
2226 if (NXT(1) == '#') {
2227 int val = xmlParseCharRef(ctxt);
2228 if (val == '&') {
2229 /*
2230 * The reparsing will be done in xmlStringGetNodeList()
2231 * called by the attribute() function in SAX.c
2232 */
2233 static xmlChar buffer[6] = "&#38;";
2234
2235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
2238 current = &buffer[0];
2239 while (*current != 0) { /* non input consuming */
2240 buf[len++] = *current++;
2241 }
2242 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002243 if (len > buf_size - 10) {
2244 growBuffer(buf);
2245 }
Owen Taylor3473f882001-02-23 17:55:21 +00002246 len += xmlCopyChar(0, &buf[len], val);
2247 }
2248 } else {
2249 ent = xmlParseEntityRef(ctxt);
2250 if ((ent != NULL) &&
2251 (ctxt->replaceEntities != 0)) {
2252 xmlChar *rep;
2253
2254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2255 rep = xmlStringDecodeEntities(ctxt, ent->content,
2256 XML_SUBSTITUTE_REF, 0, 0, 0);
2257 if (rep != NULL) {
2258 current = rep;
2259 while (*current != 0) { /* non input consuming */
2260 buf[len++] = *current++;
2261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
2264 }
2265 xmlFree(rep);
2266 }
2267 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002268 if (len > buf_size - 10) {
2269 growBuffer(buf);
2270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (ent->content != NULL)
2272 buf[len++] = ent->content[0];
2273 }
2274 } else if (ent != NULL) {
2275 int i = xmlStrlen(ent->name);
2276 const xmlChar *cur = ent->name;
2277
2278 /*
2279 * This may look absurd but is needed to detect
2280 * entities problems
2281 */
2282 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2283 (ent->content != NULL)) {
2284 xmlChar *rep;
2285 rep = xmlStringDecodeEntities(ctxt, ent->content,
2286 XML_SUBSTITUTE_REF, 0, 0, 0);
2287 if (rep != NULL)
2288 xmlFree(rep);
2289 }
2290
2291 /*
2292 * Just output the reference
2293 */
2294 buf[len++] = '&';
2295 if (len > buf_size - i - 10) {
2296 growBuffer(buf);
2297 }
2298 for (;i > 0;i--)
2299 buf[len++] = *cur++;
2300 buf[len++] = ';';
2301 }
2302 }
2303 } else {
2304 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2305 COPY_BUF(l,buf,len,0x20);
2306 if (len > buf_size - 10) {
2307 growBuffer(buf);
2308 }
2309 } else {
2310 COPY_BUF(l,buf,len,c);
2311 if (len > buf_size - 10) {
2312 growBuffer(buf);
2313 }
2314 }
2315 NEXTL(l);
2316 }
2317 GROW;
2318 c = CUR_CHAR(l);
2319 }
2320 buf[len++] = 0;
2321 if (RAW == '<') {
2322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324 ctxt->sax->error(ctxt->userData,
2325 "Unescaped '<' not allowed in attributes values\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else if (RAW != limit) {
2329 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else
2335 NEXT;
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParseSystemLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML Literal
2344 *
2345 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2346 *
2347 * Returns the SystemLiteral parsed or NULL
2348 */
2349
2350xmlChar *
2351xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 int cur, l;
2356 xmlChar stop;
2357 int state = ctxt->instate;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376
2377 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2378 if (buf == NULL) {
2379 xmlGenericError(xmlGenericErrorContext,
2380 "malloc of %d byte failed\n", size);
2381 return(NULL);
2382 }
2383 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2384 cur = CUR_CHAR(l);
2385 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2386 if (len + 5 >= size) {
2387 size *= 2;
2388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2389 if (buf == NULL) {
2390 xmlGenericError(xmlGenericErrorContext,
2391 "realloc of %d byte failed\n", size);
2392 ctxt->instate = (xmlParserInputState) state;
2393 return(NULL);
2394 }
2395 }
2396 count++;
2397 if (count > 50) {
2398 GROW;
2399 count = 0;
2400 }
2401 COPY_BUF(l,buf,len,cur);
2402 NEXTL(l);
2403 cur = CUR_CHAR(l);
2404 if (cur == 0) {
2405 GROW;
2406 SHRINK;
2407 cur = CUR_CHAR(l);
2408 }
2409 }
2410 buf[len] = 0;
2411 ctxt->instate = (xmlParserInputState) state;
2412 if (!IS_CHAR(cur)) {
2413 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2416 ctxt->wellFormed = 0;
2417 ctxt->disableSAX = 1;
2418 } else {
2419 NEXT;
2420 }
2421 return(buf);
2422}
2423
2424/**
2425 * xmlParsePubidLiteral:
2426 * @ctxt: an XML parser context
2427 *
2428 * parse an XML public literal
2429 *
2430 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2431 *
2432 * Returns the PubidLiteral parsed or NULL.
2433 */
2434
2435xmlChar *
2436xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2437 xmlChar *buf = NULL;
2438 int len = 0;
2439 int size = XML_PARSER_BUFFER_SIZE;
2440 xmlChar cur;
2441 xmlChar stop;
2442 int count = 0;
2443
2444 SHRINK;
2445 if (RAW == '"') {
2446 NEXT;
2447 stop = '"';
2448 } else if (RAW == '\'') {
2449 NEXT;
2450 stop = '\'';
2451 } else {
2452 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData,
2455 "SystemLiteral \" or ' expected\n");
2456 ctxt->wellFormed = 0;
2457 ctxt->disableSAX = 1;
2458 return(NULL);
2459 }
2460 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2461 if (buf == NULL) {
2462 xmlGenericError(xmlGenericErrorContext,
2463 "malloc of %d byte failed\n", size);
2464 return(NULL);
2465 }
2466 cur = CUR;
2467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2468 if (len + 1 >= size) {
2469 size *= 2;
2470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2471 if (buf == NULL) {
2472 xmlGenericError(xmlGenericErrorContext,
2473 "realloc of %d byte failed\n", size);
2474 return(NULL);
2475 }
2476 }
2477 buf[len++] = cur;
2478 count++;
2479 if (count > 50) {
2480 GROW;
2481 count = 0;
2482 }
2483 NEXT;
2484 cur = CUR;
2485 if (cur == 0) {
2486 GROW;
2487 SHRINK;
2488 cur = CUR;
2489 }
2490 }
2491 buf[len] = 0;
2492 if (cur != stop) {
2493 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2496 ctxt->wellFormed = 0;
2497 ctxt->disableSAX = 1;
2498 } else {
2499 NEXT;
2500 }
2501 return(buf);
2502}
2503
Daniel Veillard48b2f892001-02-25 16:11:03 +00002504void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002505/**
2506 * xmlParseCharData:
2507 * @ctxt: an XML parser context
2508 * @cdata: int indicating whether we are within a CDATA section
2509 *
2510 * parse a CharData section.
2511 * if we are within a CDATA section ']]>' marks an end of section.
2512 *
2513 * The right angle bracket (>) may be represented using the string "&gt;",
2514 * and must, for compatibility, be escaped using "&gt;" or a character
2515 * reference when it appears in the string "]]>" in content, when that
2516 * string is not marking the end of a CDATA section.
2517 *
2518 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2519 */
2520
2521void
2522xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002523 const xmlChar *in;
2524 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002525 int line = ctxt->input->line;
2526 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002527
2528 SHRINK;
2529 GROW;
2530 /*
2531 * Accelerated common case where input don't need to be
2532 * modified before passing it to the handler.
2533 */
2534 if ((ctxt->token == 0) && (!cdata)) {
2535 in = ctxt->input->cur;
2536 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002537get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538 while (((*in >= 0x20) && (*in != '<') &&
2539 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2540 in++;
2541 if (*in == 0xA) {
2542 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002543 in++;
2544 while (*in == 0xA) {
2545 ctxt->input->line++;
2546 in++;
2547 }
2548 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002549 }
2550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002551 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002552 if (IS_BLANK(*ctxt->input->cur)) {
2553 const xmlChar *tmp = ctxt->input->cur;
2554 ctxt->input->cur = in;
2555 if (areBlanks(ctxt, tmp, nbchar)) {
2556 if (ctxt->sax->ignorableWhitespace != NULL)
2557 ctxt->sax->ignorableWhitespace(ctxt->userData,
2558 tmp, nbchar);
2559 } else {
2560 if (ctxt->sax->characters != NULL)
2561 ctxt->sax->characters(ctxt->userData,
2562 tmp, nbchar);
2563 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002564 line = ctxt->input->line;
2565 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002566 } else {
2567 if (ctxt->sax->characters != NULL)
2568 ctxt->sax->characters(ctxt->userData,
2569 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002570 line = ctxt->input->line;
2571 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002572 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002573 }
2574 ctxt->input->cur = in;
2575 if (*in == 0xD) {
2576 in++;
2577 if (*in == 0xA) {
2578 ctxt->input->cur = in;
2579 in++;
2580 ctxt->input->line++;
2581 continue; /* while */
2582 }
2583 in--;
2584 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002585 if (*in == '<') {
2586 return;
2587 }
2588 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 return;
2590 }
2591 SHRINK;
2592 GROW;
2593 in = ctxt->input->cur;
2594 } while ((*in >= 0x20) && (*in <= 0x7F));
2595 nbchar = 0;
2596 }
Daniel Veillard50582112001-03-26 22:52:16 +00002597 ctxt->input->line = line;
2598 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 xmlParseCharDataComplex(ctxt, cdata);
2600}
2601
2602void
2603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2605 int nbchar = 0;
2606 int cur, l;
2607 int count = 0;
2608
2609 SHRINK;
2610 GROW;
2611 cur = CUR_CHAR(l);
2612 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2613 ((cur != '&') || (ctxt->token == '&')) &&
2614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2615 if ((cur == ']') && (NXT(1) == ']') &&
2616 (NXT(2) == '>')) {
2617 if (cdata) break;
2618 else {
2619 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2621 ctxt->sax->error(ctxt->userData,
2622 "Sequence ']]>' not allowed in content\n");
2623 /* Should this be relaxed ??? I see a "must here */
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 }
2628 COPY_BUF(l,buf,nbchar,cur);
2629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2630 /*
2631 * Ok the segment is to be consumed as chars.
2632 */
2633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2634 if (areBlanks(ctxt, buf, nbchar)) {
2635 if (ctxt->sax->ignorableWhitespace != NULL)
2636 ctxt->sax->ignorableWhitespace(ctxt->userData,
2637 buf, nbchar);
2638 } else {
2639 if (ctxt->sax->characters != NULL)
2640 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2641 }
2642 }
2643 nbchar = 0;
2644 }
2645 count++;
2646 if (count > 50) {
2647 GROW;
2648 count = 0;
2649 }
2650 NEXTL(l);
2651 cur = CUR_CHAR(l);
2652 }
2653 if (nbchar != 0) {
2654 /*
2655 * Ok the segment is to be consumed as chars.
2656 */
2657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2658 if (areBlanks(ctxt, buf, nbchar)) {
2659 if (ctxt->sax->ignorableWhitespace != NULL)
2660 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2661 } else {
2662 if (ctxt->sax->characters != NULL)
2663 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2664 }
2665 }
2666 }
2667}
2668
2669/**
2670 * xmlParseExternalID:
2671 * @ctxt: an XML parser context
2672 * @publicID: a xmlChar** receiving PubidLiteral
2673 * @strict: indicate whether we should restrict parsing to only
2674 * production [75], see NOTE below
2675 *
2676 * Parse an External ID or a Public ID
2677 *
2678 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2679 * 'PUBLIC' S PubidLiteral S SystemLiteral
2680 *
2681 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2682 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2683 *
2684 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2685 *
2686 * Returns the function returns SystemLiteral and in the second
2687 * case publicID receives PubidLiteral, is strict is off
2688 * it is possible to return NULL and have publicID set.
2689 */
2690
2691xmlChar *
2692xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2693 xmlChar *URI = NULL;
2694
2695 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002696
2697 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2699 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2700 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2701 SKIP(6);
2702 if (!IS_BLANK(CUR)) {
2703 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "Space required after 'SYSTEM'\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 SKIP_BLANKS;
2711 URI = xmlParseSystemLiteral(ctxt);
2712 if (URI == NULL) {
2713 ctxt->errNo = XML_ERR_URI_REQUIRED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "xmlParseExternalID: SYSTEM, no URI\n");
2717 ctxt->wellFormed = 0;
2718 ctxt->disableSAX = 1;
2719 }
2720 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2721 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2722 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2723 SKIP(6);
2724 if (!IS_BLANK(CUR)) {
2725 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "Space required after 'PUBLIC'\n");
2729 ctxt->wellFormed = 0;
2730 ctxt->disableSAX = 1;
2731 }
2732 SKIP_BLANKS;
2733 *publicID = xmlParsePubidLiteral(ctxt);
2734 if (*publicID == NULL) {
2735 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2737 ctxt->sax->error(ctxt->userData,
2738 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2739 ctxt->wellFormed = 0;
2740 ctxt->disableSAX = 1;
2741 }
2742 if (strict) {
2743 /*
2744 * We don't handle [83] so "S SystemLiteral" is required.
2745 */
2746 if (!IS_BLANK(CUR)) {
2747 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2749 ctxt->sax->error(ctxt->userData,
2750 "Space required after the Public Identifier\n");
2751 ctxt->wellFormed = 0;
2752 ctxt->disableSAX = 1;
2753 }
2754 } else {
2755 /*
2756 * We handle [83] so we return immediately, if
2757 * "S SystemLiteral" is not detected. From a purely parsing
2758 * point of view that's a nice mess.
2759 */
2760 const xmlChar *ptr;
2761 GROW;
2762
2763 ptr = CUR_PTR;
2764 if (!IS_BLANK(*ptr)) return(NULL);
2765
2766 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2768 }
2769 SKIP_BLANKS;
2770 URI = xmlParseSystemLiteral(ctxt);
2771 if (URI == NULL) {
2772 ctxt->errNo = XML_ERR_URI_REQUIRED;
2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2774 ctxt->sax->error(ctxt->userData,
2775 "xmlParseExternalID: PUBLIC, no URI\n");
2776 ctxt->wellFormed = 0;
2777 ctxt->disableSAX = 1;
2778 }
2779 }
2780 return(URI);
2781}
2782
2783/**
2784 * xmlParseComment:
2785 * @ctxt: an XML parser context
2786 *
2787 * Skip an XML (SGML) comment <!-- .... -->
2788 * The spec says that "For compatibility, the string "--" (double-hyphen)
2789 * must not occur within comments. "
2790 *
2791 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2792 */
2793void
2794xmlParseComment(xmlParserCtxtPtr ctxt) {
2795 xmlChar *buf = NULL;
2796 int len;
2797 int size = XML_PARSER_BUFFER_SIZE;
2798 int q, ql;
2799 int r, rl;
2800 int cur, l;
2801 xmlParserInputState state;
2802 xmlParserInputPtr input = ctxt->input;
2803 int count = 0;
2804
2805 /*
2806 * Check that there is a comment right here.
2807 */
2808 if ((RAW != '<') || (NXT(1) != '!') ||
2809 (NXT(2) != '-') || (NXT(3) != '-')) return;
2810
2811 state = ctxt->instate;
2812 ctxt->instate = XML_PARSER_COMMENT;
2813 SHRINK;
2814 SKIP(4);
2815 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2816 if (buf == NULL) {
2817 xmlGenericError(xmlGenericErrorContext,
2818 "malloc of %d byte failed\n", size);
2819 ctxt->instate = state;
2820 return;
2821 }
2822 q = CUR_CHAR(ql);
2823 NEXTL(ql);
2824 r = CUR_CHAR(rl);
2825 NEXTL(rl);
2826 cur = CUR_CHAR(l);
2827 len = 0;
2828 while (IS_CHAR(cur) && /* checked */
2829 ((cur != '>') ||
2830 (r != '-') || (q != '-'))) {
2831 if ((r == '-') && (q == '-') && (len > 1)) {
2832 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2834 ctxt->sax->error(ctxt->userData,
2835 "Comment must not contain '--' (double-hyphen)`\n");
2836 ctxt->wellFormed = 0;
2837 ctxt->disableSAX = 1;
2838 }
2839 if (len + 5 >= size) {
2840 size *= 2;
2841 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2842 if (buf == NULL) {
2843 xmlGenericError(xmlGenericErrorContext,
2844 "realloc of %d byte failed\n", size);
2845 ctxt->instate = state;
2846 return;
2847 }
2848 }
2849 COPY_BUF(ql,buf,len,q);
2850 q = r;
2851 ql = rl;
2852 r = cur;
2853 rl = l;
2854
2855 count++;
2856 if (count > 50) {
2857 GROW;
2858 count = 0;
2859 }
2860 NEXTL(l);
2861 cur = CUR_CHAR(l);
2862 if (cur == 0) {
2863 SHRINK;
2864 GROW;
2865 cur = CUR_CHAR(l);
2866 }
2867 }
2868 buf[len] = 0;
2869 if (!IS_CHAR(cur)) {
2870 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2872 ctxt->sax->error(ctxt->userData,
2873 "Comment not terminated \n<!--%.50s\n", buf);
2874 ctxt->wellFormed = 0;
2875 ctxt->disableSAX = 1;
2876 xmlFree(buf);
2877 } else {
2878 if (input != ctxt->input) {
2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882"Comment doesn't start and stop in the same entity\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 NEXT;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2888 (!ctxt->disableSAX))
2889 ctxt->sax->comment(ctxt->userData, buf);
2890 xmlFree(buf);
2891 }
2892 ctxt->instate = state;
2893}
2894
2895/**
2896 * xmlParsePITarget:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse the name of a PI
2900 *
2901 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2902 *
2903 * Returns the PITarget name or NULL
2904 */
2905
2906xmlChar *
2907xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2908 xmlChar *name;
2909
2910 name = xmlParseName(ctxt);
2911 if ((name != NULL) &&
2912 ((name[0] == 'x') || (name[0] == 'X')) &&
2913 ((name[1] == 'm') || (name[1] == 'M')) &&
2914 ((name[2] == 'l') || (name[2] == 'L'))) {
2915 int i;
2916 if ((name[0] == 'x') && (name[1] == 'm') &&
2917 (name[2] == 'l') && (name[3] == 0)) {
2918 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2920 ctxt->sax->error(ctxt->userData,
2921 "XML declaration allowed only at the start of the document\n");
2922 ctxt->wellFormed = 0;
2923 ctxt->disableSAX = 1;
2924 return(name);
2925 } else if (name[3] == 0) {
2926 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 return(name);
2932 }
2933 for (i = 0;;i++) {
2934 if (xmlW3CPIs[i] == NULL) break;
2935 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2936 return(name);
2937 }
2938 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 ctxt->sax->warning(ctxt->userData,
2941 "xmlParsePItarget: invalid name prefix 'xml'\n");
2942 }
2943 }
2944 return(name);
2945}
2946
2947/**
2948 * xmlParsePI:
2949 * @ctxt: an XML parser context
2950 *
2951 * parse an XML Processing Instruction.
2952 *
2953 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2954 *
2955 * The processing is transfered to SAX once parsed.
2956 */
2957
2958void
2959xmlParsePI(xmlParserCtxtPtr ctxt) {
2960 xmlChar *buf = NULL;
2961 int len = 0;
2962 int size = XML_PARSER_BUFFER_SIZE;
2963 int cur, l;
2964 xmlChar *target;
2965 xmlParserInputState state;
2966 int count = 0;
2967
2968 if ((RAW == '<') && (NXT(1) == '?')) {
2969 xmlParserInputPtr input = ctxt->input;
2970 state = ctxt->instate;
2971 ctxt->instate = XML_PARSER_PI;
2972 /*
2973 * this is a Processing Instruction.
2974 */
2975 SKIP(2);
2976 SHRINK;
2977
2978 /*
2979 * Parse the target name and check for special support like
2980 * namespace.
2981 */
2982 target = xmlParsePITarget(ctxt);
2983 if (target != NULL) {
2984 if ((RAW == '?') && (NXT(1) == '>')) {
2985 if (input != ctxt->input) {
2986 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988 ctxt->sax->error(ctxt->userData,
2989 "PI declaration doesn't start and stop in the same entity\n");
2990 ctxt->wellFormed = 0;
2991 ctxt->disableSAX = 1;
2992 }
2993 SKIP(2);
2994
2995 /*
2996 * SAX: PI detected.
2997 */
2998 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2999 (ctxt->sax->processingInstruction != NULL))
3000 ctxt->sax->processingInstruction(ctxt->userData,
3001 target, NULL);
3002 ctxt->instate = state;
3003 xmlFree(target);
3004 return;
3005 }
3006 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3007 if (buf == NULL) {
3008 xmlGenericError(xmlGenericErrorContext,
3009 "malloc of %d byte failed\n", size);
3010 ctxt->instate = state;
3011 return;
3012 }
3013 cur = CUR;
3014 if (!IS_BLANK(cur)) {
3015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParsePI: PI %s space expected\n", target);
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 }
3022 SKIP_BLANKS;
3023 cur = CUR_CHAR(l);
3024 while (IS_CHAR(cur) && /* checked */
3025 ((cur != '?') || (NXT(1) != '>'))) {
3026 if (len + 5 >= size) {
3027 size *= 2;
3028 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3029 if (buf == NULL) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "realloc of %d byte failed\n", size);
3032 ctxt->instate = state;
3033 return;
3034 }
3035 }
3036 count++;
3037 if (count > 50) {
3038 GROW;
3039 count = 0;
3040 }
3041 COPY_BUF(l,buf,len,cur);
3042 NEXTL(l);
3043 cur = CUR_CHAR(l);
3044 if (cur == 0) {
3045 SHRINK;
3046 GROW;
3047 cur = CUR_CHAR(l);
3048 }
3049 }
3050 buf[len] = 0;
3051 if (cur != '?') {
3052 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParsePI: PI %s never end ...\n", target);
3056 ctxt->wellFormed = 0;
3057 ctxt->disableSAX = 1;
3058 } else {
3059 if (input != ctxt->input) {
3060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "PI declaration doesn't start and stop in the same entity\n");
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 }
3067 SKIP(2);
3068
3069 /*
3070 * SAX: PI detected.
3071 */
3072 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3073 (ctxt->sax->processingInstruction != NULL))
3074 ctxt->sax->processingInstruction(ctxt->userData,
3075 target, buf);
3076 }
3077 xmlFree(buf);
3078 xmlFree(target);
3079 } else {
3080 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3082 ctxt->sax->error(ctxt->userData,
3083 "xmlParsePI : no target name\n");
3084 ctxt->wellFormed = 0;
3085 ctxt->disableSAX = 1;
3086 }
3087 ctxt->instate = state;
3088 }
3089}
3090
3091/**
3092 * xmlParseNotationDecl:
3093 * @ctxt: an XML parser context
3094 *
3095 * parse a notation declaration
3096 *
3097 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3098 *
3099 * Hence there is actually 3 choices:
3100 * 'PUBLIC' S PubidLiteral
3101 * 'PUBLIC' S PubidLiteral S SystemLiteral
3102 * and 'SYSTEM' S SystemLiteral
3103 *
3104 * See the NOTE on xmlParseExternalID().
3105 */
3106
3107void
3108xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3109 xmlChar *name;
3110 xmlChar *Pubid;
3111 xmlChar *Systemid;
3112
3113 if ((RAW == '<') && (NXT(1) == '!') &&
3114 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3115 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3116 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3117 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3118 xmlParserInputPtr input = ctxt->input;
3119 SHRINK;
3120 SKIP(10);
3121 if (!IS_BLANK(CUR)) {
3122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124 ctxt->sax->error(ctxt->userData,
3125 "Space required after '<!NOTATION'\n");
3126 ctxt->wellFormed = 0;
3127 ctxt->disableSAX = 1;
3128 return;
3129 }
3130 SKIP_BLANKS;
3131
Daniel Veillard76d66f42001-05-16 21:05:17 +00003132 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (name == NULL) {
3134 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData,
3137 "NOTATION: Name expected here\n");
3138 ctxt->wellFormed = 0;
3139 ctxt->disableSAX = 1;
3140 return;
3141 }
3142 if (!IS_BLANK(CUR)) {
3143 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3145 ctxt->sax->error(ctxt->userData,
3146 "Space required after the NOTATION name'\n");
3147 ctxt->wellFormed = 0;
3148 ctxt->disableSAX = 1;
3149 return;
3150 }
3151 SKIP_BLANKS;
3152
3153 /*
3154 * Parse the IDs.
3155 */
3156 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3157 SKIP_BLANKS;
3158
3159 if (RAW == '>') {
3160 if (input != ctxt->input) {
3161 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164"Notation declaration doesn't start and stop in the same entity\n");
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 }
3168 NEXT;
3169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3170 (ctxt->sax->notationDecl != NULL))
3171 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3172 } else {
3173 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "'>' required to close NOTATION declaration\n");
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 }
3180 xmlFree(name);
3181 if (Systemid != NULL) xmlFree(Systemid);
3182 if (Pubid != NULL) xmlFree(Pubid);
3183 }
3184}
3185
3186/**
3187 * xmlParseEntityDecl:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse <!ENTITY declarations
3191 *
3192 * [70] EntityDecl ::= GEDecl | PEDecl
3193 *
3194 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3195 *
3196 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3197 *
3198 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3199 *
3200 * [74] PEDef ::= EntityValue | ExternalID
3201 *
3202 * [76] NDataDecl ::= S 'NDATA' S Name
3203 *
3204 * [ VC: Notation Declared ]
3205 * The Name must match the declared name of a notation.
3206 */
3207
3208void
3209xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3210 xmlChar *name = NULL;
3211 xmlChar *value = NULL;
3212 xmlChar *URI = NULL, *literal = NULL;
3213 xmlChar *ndata = NULL;
3214 int isParameter = 0;
3215 xmlChar *orig = NULL;
3216
3217 GROW;
3218 if ((RAW == '<') && (NXT(1) == '!') &&
3219 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3220 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3221 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3222 xmlParserInputPtr input = ctxt->input;
3223 ctxt->instate = XML_PARSER_ENTITY_DECL;
3224 SHRINK;
3225 SKIP(8);
3226 if (!IS_BLANK(CUR)) {
3227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Space required after '<!ENTITY'\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 SKIP_BLANKS;
3235
3236 if (RAW == '%') {
3237 NEXT;
3238 if (!IS_BLANK(CUR)) {
3239 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "Space required after '%'\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 }
3246 SKIP_BLANKS;
3247 isParameter = 1;
3248 }
3249
Daniel Veillard76d66f42001-05-16 21:05:17 +00003250 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (name == NULL) {
3252 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 return;
3258 }
3259 if (!IS_BLANK(CUR)) {
3260 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3262 ctxt->sax->error(ctxt->userData,
3263 "Space required after the entity name\n");
3264 ctxt->wellFormed = 0;
3265 ctxt->disableSAX = 1;
3266 }
3267 SKIP_BLANKS;
3268
3269 /*
3270 * handle the various case of definitions...
3271 */
3272 if (isParameter) {
3273 if ((RAW == '"') || (RAW == '\'')) {
3274 value = xmlParseEntityValue(ctxt, &orig);
3275 if (value) {
3276 if ((ctxt->sax != NULL) &&
3277 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3278 ctxt->sax->entityDecl(ctxt->userData, name,
3279 XML_INTERNAL_PARAMETER_ENTITY,
3280 NULL, NULL, value);
3281 }
3282 } else {
3283 URI = xmlParseExternalID(ctxt, &literal, 1);
3284 if ((URI == NULL) && (literal == NULL)) {
3285 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288 "Entity value required\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 if (URI) {
3293 xmlURIPtr uri;
3294
3295 uri = xmlParseURI((const char *) URI);
3296 if (uri == NULL) {
3297 ctxt->errNo = XML_ERR_INVALID_URI;
3298 if ((ctxt->sax != NULL) &&
3299 (!ctxt->disableSAX) &&
3300 (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "Invalid URI: %s\n", URI);
3303 ctxt->wellFormed = 0;
3304 } else {
3305 if (uri->fragment != NULL) {
3306 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3307 if ((ctxt->sax != NULL) &&
3308 (!ctxt->disableSAX) &&
3309 (ctxt->sax->error != NULL))
3310 ctxt->sax->error(ctxt->userData,
3311 "Fragment not allowed: %s\n", URI);
3312 ctxt->wellFormed = 0;
3313 } else {
3314 if ((ctxt->sax != NULL) &&
3315 (!ctxt->disableSAX) &&
3316 (ctxt->sax->entityDecl != NULL))
3317 ctxt->sax->entityDecl(ctxt->userData, name,
3318 XML_EXTERNAL_PARAMETER_ENTITY,
3319 literal, URI, NULL);
3320 }
3321 xmlFreeURI(uri);
3322 }
3323 }
3324 }
3325 } else {
3326 if ((RAW == '"') || (RAW == '\'')) {
3327 value = xmlParseEntityValue(ctxt, &orig);
3328 if ((ctxt->sax != NULL) &&
3329 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3330 ctxt->sax->entityDecl(ctxt->userData, name,
3331 XML_INTERNAL_GENERAL_ENTITY,
3332 NULL, NULL, value);
3333 } else {
3334 URI = xmlParseExternalID(ctxt, &literal, 1);
3335 if ((URI == NULL) && (literal == NULL)) {
3336 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "Entity value required\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 }
3343 if (URI) {
3344 xmlURIPtr uri;
3345
3346 uri = xmlParseURI((const char *)URI);
3347 if (uri == NULL) {
3348 ctxt->errNo = XML_ERR_INVALID_URI;
3349 if ((ctxt->sax != NULL) &&
3350 (!ctxt->disableSAX) &&
3351 (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Invalid URI: %s\n", URI);
3354 ctxt->wellFormed = 0;
3355 } else {
3356 if (uri->fragment != NULL) {
3357 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3358 if ((ctxt->sax != NULL) &&
3359 (!ctxt->disableSAX) &&
3360 (ctxt->sax->error != NULL))
3361 ctxt->sax->error(ctxt->userData,
3362 "Fragment not allowed: %s\n", URI);
3363 ctxt->wellFormed = 0;
3364 }
3365 xmlFreeURI(uri);
3366 }
3367 }
3368 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3369 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3371 ctxt->sax->error(ctxt->userData,
3372 "Space required before 'NDATA'\n");
3373 ctxt->wellFormed = 0;
3374 ctxt->disableSAX = 1;
3375 }
3376 SKIP_BLANKS;
3377 if ((RAW == 'N') && (NXT(1) == 'D') &&
3378 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3379 (NXT(4) == 'A')) {
3380 SKIP(5);
3381 if (!IS_BLANK(CUR)) {
3382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "Space required after 'NDATA'\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 }
3389 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003390 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003391 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3392 (ctxt->sax->unparsedEntityDecl != NULL))
3393 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3394 literal, URI, ndata);
3395 } else {
3396 if ((ctxt->sax != NULL) &&
3397 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3398 ctxt->sax->entityDecl(ctxt->userData, name,
3399 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3400 literal, URI, NULL);
3401 }
3402 }
3403 }
3404 SKIP_BLANKS;
3405 if (RAW != '>') {
3406 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "xmlParseEntityDecl: entity %s not terminated\n", name);
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 } else {
3413 if (input != ctxt->input) {
3414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3416 ctxt->sax->error(ctxt->userData,
3417"Entity declaration doesn't start and stop in the same entity\n");
3418 ctxt->wellFormed = 0;
3419 ctxt->disableSAX = 1;
3420 }
3421 NEXT;
3422 }
3423 if (orig != NULL) {
3424 /*
3425 * Ugly mechanism to save the raw entity value.
3426 */
3427 xmlEntityPtr cur = NULL;
3428
3429 if (isParameter) {
3430 if ((ctxt->sax != NULL) &&
3431 (ctxt->sax->getParameterEntity != NULL))
3432 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3433 } else {
3434 if ((ctxt->sax != NULL) &&
3435 (ctxt->sax->getEntity != NULL))
3436 cur = ctxt->sax->getEntity(ctxt->userData, name);
3437 }
3438 if (cur != NULL) {
3439 if (cur->orig != NULL)
3440 xmlFree(orig);
3441 else
3442 cur->orig = orig;
3443 } else
3444 xmlFree(orig);
3445 }
3446 if (name != NULL) xmlFree(name);
3447 if (value != NULL) xmlFree(value);
3448 if (URI != NULL) xmlFree(URI);
3449 if (literal != NULL) xmlFree(literal);
3450 if (ndata != NULL) xmlFree(ndata);
3451 }
3452}
3453
3454/**
3455 * xmlParseDefaultDecl:
3456 * @ctxt: an XML parser context
3457 * @value: Receive a possible fixed default value for the attribute
3458 *
3459 * Parse an attribute default declaration
3460 *
3461 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3462 *
3463 * [ VC: Required Attribute ]
3464 * if the default declaration is the keyword #REQUIRED, then the
3465 * attribute must be specified for all elements of the type in the
3466 * attribute-list declaration.
3467 *
3468 * [ VC: Attribute Default Legal ]
3469 * The declared default value must meet the lexical constraints of
3470 * the declared attribute type c.f. xmlValidateAttributeDecl()
3471 *
3472 * [ VC: Fixed Attribute Default ]
3473 * if an attribute has a default value declared with the #FIXED
3474 * keyword, instances of that attribute must match the default value.
3475 *
3476 * [ WFC: No < in Attribute Values ]
3477 * handled in xmlParseAttValue()
3478 *
3479 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3480 * or XML_ATTRIBUTE_FIXED.
3481 */
3482
3483int
3484xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3485 int val;
3486 xmlChar *ret;
3487
3488 *value = NULL;
3489 if ((RAW == '#') && (NXT(1) == 'R') &&
3490 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3491 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3492 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3493 (NXT(8) == 'D')) {
3494 SKIP(9);
3495 return(XML_ATTRIBUTE_REQUIRED);
3496 }
3497 if ((RAW == '#') && (NXT(1) == 'I') &&
3498 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3499 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3500 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3501 SKIP(8);
3502 return(XML_ATTRIBUTE_IMPLIED);
3503 }
3504 val = XML_ATTRIBUTE_NONE;
3505 if ((RAW == '#') && (NXT(1) == 'F') &&
3506 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3507 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3508 SKIP(6);
3509 val = XML_ATTRIBUTE_FIXED;
3510 if (!IS_BLANK(CUR)) {
3511 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513 ctxt->sax->error(ctxt->userData,
3514 "Space required after '#FIXED'\n");
3515 ctxt->wellFormed = 0;
3516 ctxt->disableSAX = 1;
3517 }
3518 SKIP_BLANKS;
3519 }
3520 ret = xmlParseAttValue(ctxt);
3521 ctxt->instate = XML_PARSER_DTD;
3522 if (ret == NULL) {
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525 "Attribute default value declaration error\n");
3526 ctxt->wellFormed = 0;
3527 ctxt->disableSAX = 1;
3528 } else
3529 *value = ret;
3530 return(val);
3531}
3532
3533/**
3534 * xmlParseNotationType:
3535 * @ctxt: an XML parser context
3536 *
3537 * parse an Notation attribute type.
3538 *
3539 * Note: the leading 'NOTATION' S part has already being parsed...
3540 *
3541 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3542 *
3543 * [ VC: Notation Attributes ]
3544 * Values of this type must match one of the notation names included
3545 * in the declaration; all notation names in the declaration must be declared.
3546 *
3547 * Returns: the notation attribute tree built while parsing
3548 */
3549
3550xmlEnumerationPtr
3551xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3552 xmlChar *name;
3553 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3554
3555 if (RAW != '(') {
3556 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "'(' required to start 'NOTATION'\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 return(NULL);
3563 }
3564 SHRINK;
3565 do {
3566 NEXT;
3567 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003568 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 if (name == NULL) {
3570 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3572 ctxt->sax->error(ctxt->userData,
3573 "Name expected in NOTATION declaration\n");
3574 ctxt->wellFormed = 0;
3575 ctxt->disableSAX = 1;
3576 return(ret);
3577 }
3578 cur = xmlCreateEnumeration(name);
3579 xmlFree(name);
3580 if (cur == NULL) return(ret);
3581 if (last == NULL) ret = last = cur;
3582 else {
3583 last->next = cur;
3584 last = cur;
3585 }
3586 SKIP_BLANKS;
3587 } while (RAW == '|');
3588 if (RAW != ')') {
3589 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "')' required to finish NOTATION declaration\n");
3593 ctxt->wellFormed = 0;
3594 ctxt->disableSAX = 1;
3595 if ((last != NULL) && (last != ret))
3596 xmlFreeEnumeration(last);
3597 return(ret);
3598 }
3599 NEXT;
3600 return(ret);
3601}
3602
3603/**
3604 * xmlParseEnumerationType:
3605 * @ctxt: an XML parser context
3606 *
3607 * parse an Enumeration attribute type.
3608 *
3609 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3610 *
3611 * [ VC: Enumeration ]
3612 * Values of this type must match one of the Nmtoken tokens in
3613 * the declaration
3614 *
3615 * Returns: the enumeration attribute tree built while parsing
3616 */
3617
3618xmlEnumerationPtr
3619xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3620 xmlChar *name;
3621 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3622
3623 if (RAW != '(') {
3624 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "'(' required to start ATTLIST enumeration\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return(NULL);
3631 }
3632 SHRINK;
3633 do {
3634 NEXT;
3635 SKIP_BLANKS;
3636 name = xmlParseNmtoken(ctxt);
3637 if (name == NULL) {
3638 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "NmToken expected in ATTLIST enumeration\n");
3642 ctxt->wellFormed = 0;
3643 ctxt->disableSAX = 1;
3644 return(ret);
3645 }
3646 cur = xmlCreateEnumeration(name);
3647 xmlFree(name);
3648 if (cur == NULL) return(ret);
3649 if (last == NULL) ret = last = cur;
3650 else {
3651 last->next = cur;
3652 last = cur;
3653 }
3654 SKIP_BLANKS;
3655 } while (RAW == '|');
3656 if (RAW != ')') {
3657 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3659 ctxt->sax->error(ctxt->userData,
3660 "')' required to finish ATTLIST enumeration\n");
3661 ctxt->wellFormed = 0;
3662 ctxt->disableSAX = 1;
3663 return(ret);
3664 }
3665 NEXT;
3666 return(ret);
3667}
3668
3669/**
3670 * xmlParseEnumeratedType:
3671 * @ctxt: an XML parser context
3672 * @tree: the enumeration tree built while parsing
3673 *
3674 * parse an Enumerated attribute type.
3675 *
3676 * [57] EnumeratedType ::= NotationType | Enumeration
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 *
3681 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3682 */
3683
3684int
3685xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3686 if ((RAW == 'N') && (NXT(1) == 'O') &&
3687 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3688 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3689 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3690 SKIP(8);
3691 if (!IS_BLANK(CUR)) {
3692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Space required after 'NOTATION'\n");
3696 ctxt->wellFormed = 0;
3697 ctxt->disableSAX = 1;
3698 return(0);
3699 }
3700 SKIP_BLANKS;
3701 *tree = xmlParseNotationType(ctxt);
3702 if (*tree == NULL) return(0);
3703 return(XML_ATTRIBUTE_NOTATION);
3704 }
3705 *tree = xmlParseEnumerationType(ctxt);
3706 if (*tree == NULL) return(0);
3707 return(XML_ATTRIBUTE_ENUMERATION);
3708}
3709
3710/**
3711 * xmlParseAttributeType:
3712 * @ctxt: an XML parser context
3713 * @tree: the enumeration tree built while parsing
3714 *
3715 * parse the Attribute list def for an element
3716 *
3717 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3718 *
3719 * [55] StringType ::= 'CDATA'
3720 *
3721 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3722 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3723 *
3724 * Validity constraints for attribute values syntax are checked in
3725 * xmlValidateAttributeValue()
3726 *
3727 * [ VC: ID ]
3728 * Values of type ID must match the Name production. A name must not
3729 * appear more than once in an XML document as a value of this type;
3730 * i.e., ID values must uniquely identify the elements which bear them.
3731 *
3732 * [ VC: One ID per Element Type ]
3733 * No element type may have more than one ID attribute specified.
3734 *
3735 * [ VC: ID Attribute Default ]
3736 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3737 *
3738 * [ VC: IDREF ]
3739 * Values of type IDREF must match the Name production, and values
3740 * of type IDREFS must match Names; each IDREF Name must match the value
3741 * of an ID attribute on some element in the XML document; i.e. IDREF
3742 * values must match the value of some ID attribute.
3743 *
3744 * [ VC: Entity Name ]
3745 * Values of type ENTITY must match the Name production, values
3746 * of type ENTITIES must match Names; each Entity Name must match the
3747 * name of an unparsed entity declared in the DTD.
3748 *
3749 * [ VC: Name Token ]
3750 * Values of type NMTOKEN must match the Nmtoken production; values
3751 * of type NMTOKENS must match Nmtokens.
3752 *
3753 * Returns the attribute type
3754 */
3755int
3756xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3757 SHRINK;
3758 if ((RAW == 'C') && (NXT(1) == 'D') &&
3759 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3760 (NXT(4) == 'A')) {
3761 SKIP(5);
3762 return(XML_ATTRIBUTE_CDATA);
3763 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3764 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3765 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3766 SKIP(6);
3767 return(XML_ATTRIBUTE_IDREFS);
3768 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3769 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3770 (NXT(4) == 'F')) {
3771 SKIP(5);
3772 return(XML_ATTRIBUTE_IDREF);
3773 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3774 SKIP(2);
3775 return(XML_ATTRIBUTE_ID);
3776 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3777 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3778 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3779 SKIP(6);
3780 return(XML_ATTRIBUTE_ENTITY);
3781 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3782 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3783 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3784 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3785 SKIP(8);
3786 return(XML_ATTRIBUTE_ENTITIES);
3787 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3788 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3789 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3790 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3791 SKIP(8);
3792 return(XML_ATTRIBUTE_NMTOKENS);
3793 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3794 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3795 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3796 (NXT(6) == 'N')) {
3797 SKIP(7);
3798 return(XML_ATTRIBUTE_NMTOKEN);
3799 }
3800 return(xmlParseEnumeratedType(ctxt, tree));
3801}
3802
3803/**
3804 * xmlParseAttributeListDecl:
3805 * @ctxt: an XML parser context
3806 *
3807 * : parse the Attribute list def for an element
3808 *
3809 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3810 *
3811 * [53] AttDef ::= S Name S AttType S DefaultDecl
3812 *
3813 */
3814void
3815xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3816 xmlChar *elemName;
3817 xmlChar *attrName;
3818 xmlEnumerationPtr tree;
3819
3820 if ((RAW == '<') && (NXT(1) == '!') &&
3821 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3822 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3823 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3824 (NXT(8) == 'T')) {
3825 xmlParserInputPtr input = ctxt->input;
3826
3827 SKIP(9);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after '<!ATTLIST'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 }
3836 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003837 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if (elemName == NULL) {
3839 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3841 ctxt->sax->error(ctxt->userData,
3842 "ATTLIST: no name for Element\n");
3843 ctxt->wellFormed = 0;
3844 ctxt->disableSAX = 1;
3845 return;
3846 }
3847 SKIP_BLANKS;
3848 GROW;
3849 while (RAW != '>') {
3850 const xmlChar *check = CUR_PTR;
3851 int type;
3852 int def;
3853 xmlChar *defaultValue = NULL;
3854
3855 GROW;
3856 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003857 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (attrName == NULL) {
3859 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3861 ctxt->sax->error(ctxt->userData,
3862 "ATTLIST: no name for Attribute\n");
3863 ctxt->wellFormed = 0;
3864 ctxt->disableSAX = 1;
3865 break;
3866 }
3867 GROW;
3868 if (!IS_BLANK(CUR)) {
3869 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3871 ctxt->sax->error(ctxt->userData,
3872 "Space required after the attribute name\n");
3873 ctxt->wellFormed = 0;
3874 ctxt->disableSAX = 1;
3875 if (attrName != NULL)
3876 xmlFree(attrName);
3877 if (defaultValue != NULL)
3878 xmlFree(defaultValue);
3879 break;
3880 }
3881 SKIP_BLANKS;
3882
3883 type = xmlParseAttributeType(ctxt, &tree);
3884 if (type <= 0) {
3885 if (attrName != NULL)
3886 xmlFree(attrName);
3887 if (defaultValue != NULL)
3888 xmlFree(defaultValue);
3889 break;
3890 }
3891
3892 GROW;
3893 if (!IS_BLANK(CUR)) {
3894 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3896 ctxt->sax->error(ctxt->userData,
3897 "Space required after the attribute type\n");
3898 ctxt->wellFormed = 0;
3899 ctxt->disableSAX = 1;
3900 if (attrName != NULL)
3901 xmlFree(attrName);
3902 if (defaultValue != NULL)
3903 xmlFree(defaultValue);
3904 if (tree != NULL)
3905 xmlFreeEnumeration(tree);
3906 break;
3907 }
3908 SKIP_BLANKS;
3909
3910 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3911 if (def <= 0) {
3912 if (attrName != NULL)
3913 xmlFree(attrName);
3914 if (defaultValue != NULL)
3915 xmlFree(defaultValue);
3916 if (tree != NULL)
3917 xmlFreeEnumeration(tree);
3918 break;
3919 }
3920
3921 GROW;
3922 if (RAW != '>') {
3923 if (!IS_BLANK(CUR)) {
3924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "Space required after the attribute default value\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 if (attrName != NULL)
3931 xmlFree(attrName);
3932 if (defaultValue != NULL)
3933 xmlFree(defaultValue);
3934 if (tree != NULL)
3935 xmlFreeEnumeration(tree);
3936 break;
3937 }
3938 SKIP_BLANKS;
3939 }
3940 if (check == CUR_PTR) {
3941 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "xmlParseAttributeListDecl: detected internal error\n");
3945 if (attrName != NULL)
3946 xmlFree(attrName);
3947 if (defaultValue != NULL)
3948 xmlFree(defaultValue);
3949 if (tree != NULL)
3950 xmlFreeEnumeration(tree);
3951 break;
3952 }
3953 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3954 (ctxt->sax->attributeDecl != NULL))
3955 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3956 type, def, defaultValue, tree);
3957 if (attrName != NULL)
3958 xmlFree(attrName);
3959 if (defaultValue != NULL)
3960 xmlFree(defaultValue);
3961 GROW;
3962 }
3963 if (RAW == '>') {
3964 if (input != ctxt->input) {
3965 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3967 ctxt->sax->error(ctxt->userData,
3968"Attribute list declaration doesn't start and stop in the same entity\n");
3969 ctxt->wellFormed = 0;
3970 ctxt->disableSAX = 1;
3971 }
3972 NEXT;
3973 }
3974
3975 xmlFree(elemName);
3976 }
3977}
3978
3979/**
3980 * xmlParseElementMixedContentDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * parse the declaration for a Mixed Element content
3984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3985 *
3986 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3987 * '(' S? '#PCDATA' S? ')'
3988 *
3989 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3990 *
3991 * [ VC: No Duplicate Types ]
3992 * The same name must not appear more than once in a single
3993 * mixed-content declaration.
3994 *
3995 * returns: the list of the xmlElementContentPtr describing the element choices
3996 */
3997xmlElementContentPtr
3998xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3999 xmlElementContentPtr ret = NULL, cur = NULL, n;
4000 xmlChar *elem = NULL;
4001
4002 GROW;
4003 if ((RAW == '#') && (NXT(1) == 'P') &&
4004 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4005 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4006 (NXT(6) == 'A')) {
4007 SKIP(7);
4008 SKIP_BLANKS;
4009 SHRINK;
4010 if (RAW == ')') {
4011 ctxt->entity = ctxt->input;
4012 NEXT;
4013 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4014 if (RAW == '*') {
4015 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4016 NEXT;
4017 }
4018 return(ret);
4019 }
4020 if ((RAW == '(') || (RAW == '|')) {
4021 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4022 if (ret == NULL) return(NULL);
4023 }
4024 while (RAW == '|') {
4025 NEXT;
4026 if (elem == NULL) {
4027 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4028 if (ret == NULL) return(NULL);
4029 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004030 if (cur != NULL)
4031 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 cur = ret;
4033 } else {
4034 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4035 if (n == NULL) return(NULL);
4036 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004037 if (n->c1 != NULL)
4038 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004040 if (n != NULL)
4041 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004042 cur = n;
4043 xmlFree(elem);
4044 }
4045 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004046 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (elem == NULL) {
4048 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4050 ctxt->sax->error(ctxt->userData,
4051 "xmlParseElementMixedContentDecl : Name expected\n");
4052 ctxt->wellFormed = 0;
4053 ctxt->disableSAX = 1;
4054 xmlFreeElementContent(cur);
4055 return(NULL);
4056 }
4057 SKIP_BLANKS;
4058 GROW;
4059 }
4060 if ((RAW == ')') && (NXT(1) == '*')) {
4061 if (elem != NULL) {
4062 cur->c2 = xmlNewElementContent(elem,
4063 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004064 if (cur->c2 != NULL)
4065 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 xmlFree(elem);
4067 }
4068 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4069 ctxt->entity = ctxt->input;
4070 SKIP(2);
4071 } else {
4072 if (elem != NULL) xmlFree(elem);
4073 xmlFreeElementContent(ret);
4074 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076 ctxt->sax->error(ctxt->userData,
4077 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4078 ctxt->wellFormed = 0;
4079 ctxt->disableSAX = 1;
4080 return(NULL);
4081 }
4082
4083 } else {
4084 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4086 ctxt->sax->error(ctxt->userData,
4087 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4088 ctxt->wellFormed = 0;
4089 ctxt->disableSAX = 1;
4090 }
4091 return(ret);
4092}
4093
4094/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004095 * xmlParseElementChildrenContentD:
4096 * @ctxt: an XML parser context
4097 *
4098 * VMS version of xmlParseElementChildrenContentDecl()
4099 *
4100 * Returns the tree of xmlElementContentPtr describing the element
4101 * hierarchy.
4102 */
4103/**
Owen Taylor3473f882001-02-23 17:55:21 +00004104 * xmlParseElementChildrenContentDecl:
4105 * @ctxt: an XML parser context
4106 *
4107 * parse the declaration for a Mixed Element content
4108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4109 *
4110 *
4111 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4112 *
4113 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4114 *
4115 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4116 *
4117 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4118 *
4119 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4120 * TODO Parameter-entity replacement text must be properly nested
4121 * with parenthetized groups. That is to say, if either of the
4122 * opening or closing parentheses in a choice, seq, or Mixed
4123 * construct is contained in the replacement text for a parameter
4124 * entity, both must be contained in the same replacement text. For
4125 * interoperability, if a parameter-entity reference appears in a
4126 * choice, seq, or Mixed construct, its replacement text should not
4127 * be empty, and neither the first nor last non-blank character of
4128 * the replacement text should be a connector (| or ,).
4129 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004130 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004131 * hierarchy.
4132 */
4133xmlElementContentPtr
4134#ifdef VMS
4135xmlParseElementChildrenContentD
4136#else
4137xmlParseElementChildrenContentDecl
4138#endif
4139(xmlParserCtxtPtr ctxt) {
4140 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4141 xmlChar *elem;
4142 xmlChar type = 0;
4143
4144 SKIP_BLANKS;
4145 GROW;
4146 if (RAW == '(') {
4147 /* Recurse on first child */
4148 NEXT;
4149 SKIP_BLANKS;
4150 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4151 SKIP_BLANKS;
4152 GROW;
4153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004155 if (elem == NULL) {
4156 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4158 ctxt->sax->error(ctxt->userData,
4159 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4160 ctxt->wellFormed = 0;
4161 ctxt->disableSAX = 1;
4162 return(NULL);
4163 }
4164 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4165 GROW;
4166 if (RAW == '?') {
4167 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4168 NEXT;
4169 } else if (RAW == '*') {
4170 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4171 NEXT;
4172 } else if (RAW == '+') {
4173 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4174 NEXT;
4175 } else {
4176 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4177 }
4178 xmlFree(elem);
4179 GROW;
4180 }
4181 SKIP_BLANKS;
4182 SHRINK;
4183 while (RAW != ')') {
4184 /*
4185 * Each loop we parse one separator and one element.
4186 */
4187 if (RAW == ',') {
4188 if (type == 0) type = CUR;
4189
4190 /*
4191 * Detect "Name | Name , Name" error
4192 */
4193 else if (type != CUR) {
4194 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4196 ctxt->sax->error(ctxt->userData,
4197 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4198 type);
4199 ctxt->wellFormed = 0;
4200 ctxt->disableSAX = 1;
4201 if ((op != NULL) && (op != ret))
4202 xmlFreeElementContent(op);
4203 if ((last != NULL) && (last != ret) &&
4204 (last != ret->c1) && (last != ret->c2))
4205 xmlFreeElementContent(last);
4206 if (ret != NULL)
4207 xmlFreeElementContent(ret);
4208 return(NULL);
4209 }
4210 NEXT;
4211
4212 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4213 if (op == NULL) {
4214 xmlFreeElementContent(ret);
4215 return(NULL);
4216 }
4217 if (last == NULL) {
4218 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (ret != NULL)
4220 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 ret = cur = op;
4222 } else {
4223 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004224 if (op != NULL)
4225 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004227 if (last != NULL)
4228 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004229 cur =op;
4230 last = NULL;
4231 }
4232 } else if (RAW == '|') {
4233 if (type == 0) type = CUR;
4234
4235 /*
4236 * Detect "Name , Name | Name" error
4237 */
4238 else if (type != CUR) {
4239 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241 ctxt->sax->error(ctxt->userData,
4242 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4243 type);
4244 ctxt->wellFormed = 0;
4245 ctxt->disableSAX = 1;
4246 if ((op != NULL) && (op != ret) && (op != last))
4247 xmlFreeElementContent(op);
4248 if ((last != NULL) && (last != ret) &&
4249 (last != ret->c1) && (last != ret->c2))
4250 xmlFreeElementContent(last);
4251 if (ret != NULL)
4252 xmlFreeElementContent(ret);
4253 return(NULL);
4254 }
4255 NEXT;
4256
4257 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4258 if (op == NULL) {
4259 if ((op != NULL) && (op != ret))
4260 xmlFreeElementContent(op);
4261 if ((last != NULL) && (last != ret) &&
4262 (last != ret->c1) && (last != ret->c2))
4263 xmlFreeElementContent(last);
4264 if (ret != NULL)
4265 xmlFreeElementContent(ret);
4266 return(NULL);
4267 }
4268 if (last == NULL) {
4269 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004270 if (ret != NULL)
4271 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 ret = cur = op;
4273 } else {
4274 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004275 if (op != NULL)
4276 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004278 if (last != NULL)
4279 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004280 cur =op;
4281 last = NULL;
4282 }
4283 } else {
4284 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286 ctxt->sax->error(ctxt->userData,
4287 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4288 ctxt->wellFormed = 0;
4289 ctxt->disableSAX = 1;
4290 if ((op != NULL) && (op != ret))
4291 xmlFreeElementContent(op);
4292 if ((last != NULL) && (last != ret) &&
4293 (last != ret->c1) && (last != ret->c2))
4294 xmlFreeElementContent(last);
4295 if (ret != NULL)
4296 xmlFreeElementContent(ret);
4297 return(NULL);
4298 }
4299 GROW;
4300 SKIP_BLANKS;
4301 GROW;
4302 if (RAW == '(') {
4303 /* Recurse on second child */
4304 NEXT;
4305 SKIP_BLANKS;
4306 last = xmlParseElementChildrenContentDecl(ctxt);
4307 SKIP_BLANKS;
4308 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004309 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 if (elem == NULL) {
4311 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4313 ctxt->sax->error(ctxt->userData,
4314 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4315 ctxt->wellFormed = 0;
4316 ctxt->disableSAX = 1;
4317 if ((op != NULL) && (op != ret))
4318 xmlFreeElementContent(op);
4319 if ((last != NULL) && (last != ret) &&
4320 (last != ret->c1) && (last != ret->c2))
4321 xmlFreeElementContent(last);
4322 if (ret != NULL)
4323 xmlFreeElementContent(ret);
4324 return(NULL);
4325 }
4326 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4327 xmlFree(elem);
4328 if (RAW == '?') {
4329 last->ocur = XML_ELEMENT_CONTENT_OPT;
4330 NEXT;
4331 } else if (RAW == '*') {
4332 last->ocur = XML_ELEMENT_CONTENT_MULT;
4333 NEXT;
4334 } else if (RAW == '+') {
4335 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4336 NEXT;
4337 } else {
4338 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4339 }
4340 }
4341 SKIP_BLANKS;
4342 GROW;
4343 }
4344 if ((cur != NULL) && (last != NULL)) {
4345 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004346 if (last != NULL)
4347 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
4349 ctxt->entity = ctxt->input;
4350 NEXT;
4351 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004352 if (ret != NULL)
4353 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 NEXT;
4355 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004356 if (ret != NULL)
4357 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 NEXT;
4359 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004360 if (ret != NULL)
4361 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004362 NEXT;
4363 }
4364 return(ret);
4365}
4366
4367/**
4368 * xmlParseElementContentDecl:
4369 * @ctxt: an XML parser context
4370 * @name: the name of the element being defined.
4371 * @result: the Element Content pointer will be stored here if any
4372 *
4373 * parse the declaration for an Element content either Mixed or Children,
4374 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4375 *
4376 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4377 *
4378 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4379 */
4380
4381int
4382xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4383 xmlElementContentPtr *result) {
4384
4385 xmlElementContentPtr tree = NULL;
4386 xmlParserInputPtr input = ctxt->input;
4387 int res;
4388
4389 *result = NULL;
4390
4391 if (RAW != '(') {
4392 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004395 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 ctxt->wellFormed = 0;
4397 ctxt->disableSAX = 1;
4398 return(-1);
4399 }
4400 NEXT;
4401 GROW;
4402 SKIP_BLANKS;
4403 if ((RAW == '#') && (NXT(1) == 'P') &&
4404 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4405 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4406 (NXT(6) == 'A')) {
4407 tree = xmlParseElementMixedContentDecl(ctxt);
4408 res = XML_ELEMENT_TYPE_MIXED;
4409 } else {
4410 tree = xmlParseElementChildrenContentDecl(ctxt);
4411 res = XML_ELEMENT_TYPE_ELEMENT;
4412 }
4413 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416 ctxt->sax->error(ctxt->userData,
4417"Element content declaration doesn't start and stop in the same entity\n");
4418 ctxt->wellFormed = 0;
4419 ctxt->disableSAX = 1;
4420 }
4421 SKIP_BLANKS;
4422 *result = tree;
4423 return(res);
4424}
4425
4426/**
4427 * xmlParseElementDecl:
4428 * @ctxt: an XML parser context
4429 *
4430 * parse an Element declaration.
4431 *
4432 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4433 *
4434 * [ VC: Unique Element Type Declaration ]
4435 * No element type may be declared more than once
4436 *
4437 * Returns the type of the element, or -1 in case of error
4438 */
4439int
4440xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4441 xmlChar *name;
4442 int ret = -1;
4443 xmlElementContentPtr content = NULL;
4444
4445 GROW;
4446 if ((RAW == '<') && (NXT(1) == '!') &&
4447 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4448 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4449 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4450 (NXT(8) == 'T')) {
4451 xmlParserInputPtr input = ctxt->input;
4452
4453 SKIP(9);
4454 if (!IS_BLANK(CUR)) {
4455 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4457 ctxt->sax->error(ctxt->userData,
4458 "Space required after 'ELEMENT'\n");
4459 ctxt->wellFormed = 0;
4460 ctxt->disableSAX = 1;
4461 }
4462 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004463 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004464 if (name == NULL) {
4465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "xmlParseElementDecl: no name for Element\n");
4469 ctxt->wellFormed = 0;
4470 ctxt->disableSAX = 1;
4471 return(-1);
4472 }
4473 while ((RAW == 0) && (ctxt->inputNr > 1))
4474 xmlPopInput(ctxt);
4475 if (!IS_BLANK(CUR)) {
4476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4478 ctxt->sax->error(ctxt->userData,
4479 "Space required after the element name\n");
4480 ctxt->wellFormed = 0;
4481 ctxt->disableSAX = 1;
4482 }
4483 SKIP_BLANKS;
4484 if ((RAW == 'E') && (NXT(1) == 'M') &&
4485 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4486 (NXT(4) == 'Y')) {
4487 SKIP(5);
4488 /*
4489 * Element must always be empty.
4490 */
4491 ret = XML_ELEMENT_TYPE_EMPTY;
4492 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4493 (NXT(2) == 'Y')) {
4494 SKIP(3);
4495 /*
4496 * Element is a generic container.
4497 */
4498 ret = XML_ELEMENT_TYPE_ANY;
4499 } else if (RAW == '(') {
4500 ret = xmlParseElementContentDecl(ctxt, name, &content);
4501 } else {
4502 /*
4503 * [ WFC: PEs in Internal Subset ] error handling.
4504 */
4505 if ((RAW == '%') && (ctxt->external == 0) &&
4506 (ctxt->inputNr == 1)) {
4507 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData,
4510 "PEReference: forbidden within markup decl in internal subset\n");
4511 } else {
4512 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4514 ctxt->sax->error(ctxt->userData,
4515 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4516 }
4517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
4519 if (name != NULL) xmlFree(name);
4520 return(-1);
4521 }
4522
4523 SKIP_BLANKS;
4524 /*
4525 * Pop-up of finished entities.
4526 */
4527 while ((RAW == 0) && (ctxt->inputNr > 1))
4528 xmlPopInput(ctxt);
4529 SKIP_BLANKS;
4530
4531 if (RAW != '>') {
4532 ctxt->errNo = XML_ERR_GT_REQUIRED;
4533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4534 ctxt->sax->error(ctxt->userData,
4535 "xmlParseElementDecl: expected '>' at the end\n");
4536 ctxt->wellFormed = 0;
4537 ctxt->disableSAX = 1;
4538 } else {
4539 if (input != ctxt->input) {
4540 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4542 ctxt->sax->error(ctxt->userData,
4543"Element declaration doesn't start and stop in the same entity\n");
4544 ctxt->wellFormed = 0;
4545 ctxt->disableSAX = 1;
4546 }
4547
4548 NEXT;
4549 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4550 (ctxt->sax->elementDecl != NULL))
4551 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4552 content);
4553 }
4554 if (content != NULL) {
4555 xmlFreeElementContent(content);
4556 }
4557 if (name != NULL) {
4558 xmlFree(name);
4559 }
4560 }
4561 return(ret);
4562}
4563
4564/**
Owen Taylor3473f882001-02-23 17:55:21 +00004565 * xmlParseConditionalSections
4566 * @ctxt: an XML parser context
4567 *
4568 * [61] conditionalSect ::= includeSect | ignoreSect
4569 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4570 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4571 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4572 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4573 */
4574
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004575static void
Owen Taylor3473f882001-02-23 17:55:21 +00004576xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4577 SKIP(3);
4578 SKIP_BLANKS;
4579 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4580 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4581 (NXT(6) == 'E')) {
4582 SKIP(7);
4583 SKIP_BLANKS;
4584 if (RAW != '[') {
4585 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4586 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4587 ctxt->sax->error(ctxt->userData,
4588 "XML conditional section '[' expected\n");
4589 ctxt->wellFormed = 0;
4590 ctxt->disableSAX = 1;
4591 } else {
4592 NEXT;
4593 }
4594 if (xmlParserDebugEntities) {
4595 if ((ctxt->input != NULL) && (ctxt->input->filename))
4596 xmlGenericError(xmlGenericErrorContext,
4597 "%s(%d): ", ctxt->input->filename,
4598 ctxt->input->line);
4599 xmlGenericError(xmlGenericErrorContext,
4600 "Entering INCLUDE Conditional Section\n");
4601 }
4602
4603 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4604 (NXT(2) != '>'))) {
4605 const xmlChar *check = CUR_PTR;
4606 int cons = ctxt->input->consumed;
4607 int tok = ctxt->token;
4608
4609 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4610 xmlParseConditionalSections(ctxt);
4611 } else if (IS_BLANK(CUR)) {
4612 NEXT;
4613 } else if (RAW == '%') {
4614 xmlParsePEReference(ctxt);
4615 } else
4616 xmlParseMarkupDecl(ctxt);
4617
4618 /*
4619 * Pop-up of finished entities.
4620 */
4621 while ((RAW == 0) && (ctxt->inputNr > 1))
4622 xmlPopInput(ctxt);
4623
4624 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4625 (tok == ctxt->token)) {
4626 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4628 ctxt->sax->error(ctxt->userData,
4629 "Content error in the external subset\n");
4630 ctxt->wellFormed = 0;
4631 ctxt->disableSAX = 1;
4632 break;
4633 }
4634 }
4635 if (xmlParserDebugEntities) {
4636 if ((ctxt->input != NULL) && (ctxt->input->filename))
4637 xmlGenericError(xmlGenericErrorContext,
4638 "%s(%d): ", ctxt->input->filename,
4639 ctxt->input->line);
4640 xmlGenericError(xmlGenericErrorContext,
4641 "Leaving INCLUDE Conditional Section\n");
4642 }
4643
4644 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4645 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4646 int state;
4647 int instate;
4648 int depth = 0;
4649
4650 SKIP(6);
4651 SKIP_BLANKS;
4652 if (RAW != '[') {
4653 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4655 ctxt->sax->error(ctxt->userData,
4656 "XML conditional section '[' expected\n");
4657 ctxt->wellFormed = 0;
4658 ctxt->disableSAX = 1;
4659 } else {
4660 NEXT;
4661 }
4662 if (xmlParserDebugEntities) {
4663 if ((ctxt->input != NULL) && (ctxt->input->filename))
4664 xmlGenericError(xmlGenericErrorContext,
4665 "%s(%d): ", ctxt->input->filename,
4666 ctxt->input->line);
4667 xmlGenericError(xmlGenericErrorContext,
4668 "Entering IGNORE Conditional Section\n");
4669 }
4670
4671 /*
4672 * Parse up to the end of the conditionnal section
4673 * But disable SAX event generating DTD building in the meantime
4674 */
4675 state = ctxt->disableSAX;
4676 instate = ctxt->instate;
4677 ctxt->disableSAX = 1;
4678 ctxt->instate = XML_PARSER_IGNORE;
4679
4680 while (depth >= 0) {
4681 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4682 depth++;
4683 SKIP(3);
4684 continue;
4685 }
4686 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4687 if (--depth >= 0) SKIP(3);
4688 continue;
4689 }
4690 NEXT;
4691 continue;
4692 }
4693
4694 ctxt->disableSAX = state;
4695 ctxt->instate = instate;
4696
4697 if (xmlParserDebugEntities) {
4698 if ((ctxt->input != NULL) && (ctxt->input->filename))
4699 xmlGenericError(xmlGenericErrorContext,
4700 "%s(%d): ", ctxt->input->filename,
4701 ctxt->input->line);
4702 xmlGenericError(xmlGenericErrorContext,
4703 "Leaving IGNORE Conditional Section\n");
4704 }
4705
4706 } else {
4707 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4709 ctxt->sax->error(ctxt->userData,
4710 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4711 ctxt->wellFormed = 0;
4712 ctxt->disableSAX = 1;
4713 }
4714
4715 if (RAW == 0)
4716 SHRINK;
4717
4718 if (RAW == 0) {
4719 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4720 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4721 ctxt->sax->error(ctxt->userData,
4722 "XML conditional section not closed\n");
4723 ctxt->wellFormed = 0;
4724 ctxt->disableSAX = 1;
4725 } else {
4726 SKIP(3);
4727 }
4728}
4729
4730/**
Daniel Veillard5e3eecb2001-07-31 15:10:53 +00004731 * xmlParseMarkupDecl:
4732 * @ctxt: an XML parser context
4733 *
4734 * parse Markup declarations
4735 *
4736 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4737 * NotationDecl | PI | Comment
4738 *
4739 * [ VC: Proper Declaration/PE Nesting ]
4740 * Parameter-entity replacement text must be properly nested with
4741 * markup declarations. That is to say, if either the first character
4742 * or the last character of a markup declaration (markupdecl above) is
4743 * contained in the replacement text for a parameter-entity reference,
4744 * both must be contained in the same replacement text.
4745 *
4746 * [ WFC: PEs in Internal Subset ]
4747 * In the internal DTD subset, parameter-entity references can occur
4748 * only where markup declarations can occur, not within markup declarations.
4749 * (This does not apply to references that occur in external parameter
4750 * entities or to the external subset.)
4751 */
4752void
4753xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4754 GROW;
4755 xmlParseElementDecl(ctxt);
4756 xmlParseAttributeListDecl(ctxt);
4757 xmlParseEntityDecl(ctxt);
4758 xmlParseNotationDecl(ctxt);
4759 xmlParsePI(ctxt);
4760 xmlParseComment(ctxt);
4761 /*
4762 * This is only for internal subset. On external entities,
4763 * the replacement is done before parsing stage
4764 */
4765 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4766 xmlParsePEReference(ctxt);
4767
4768 /*
4769 * Conditional sections are allowed from entities included
4770 * by PE References in the internal subset.
4771 */
4772 if ((ctxt->external == 0) && (ctxt->inputNr > 1)) {
4773 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4774 xmlParseConditionalSections(ctxt);
4775 }
4776 }
4777
4778 ctxt->instate = XML_PARSER_DTD;
4779}
4780
4781/**
4782 * xmlParseTextDecl:
4783 * @ctxt: an XML parser context
4784 *
4785 * parse an XML declaration header for external entities
4786 *
4787 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4788 *
4789 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4790 */
4791
4792void
4793xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4794 xmlChar *version;
4795
4796 /*
4797 * We know that '<?xml' is here.
4798 */
4799 if ((RAW == '<') && (NXT(1) == '?') &&
4800 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4801 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4802 SKIP(5);
4803 } else {
4804 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4806 ctxt->sax->error(ctxt->userData,
4807 "Text declaration '<?xml' required\n");
4808 ctxt->wellFormed = 0;
4809 ctxt->disableSAX = 1;
4810
4811 return;
4812 }
4813
4814 if (!IS_BLANK(CUR)) {
4815 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4817 ctxt->sax->error(ctxt->userData,
4818 "Space needed after '<?xml'\n");
4819 ctxt->wellFormed = 0;
4820 ctxt->disableSAX = 1;
4821 }
4822 SKIP_BLANKS;
4823
4824 /*
4825 * We may have the VersionInfo here.
4826 */
4827 version = xmlParseVersionInfo(ctxt);
4828 if (version == NULL)
4829 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4830 ctxt->input->version = version;
4831
4832 /*
4833 * We must have the encoding declaration
4834 */
4835 if (!IS_BLANK(CUR)) {
4836 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4837 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4838 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4839 ctxt->wellFormed = 0;
4840 ctxt->disableSAX = 1;
4841 }
4842 xmlParseEncodingDecl(ctxt);
4843 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4844 /*
4845 * The XML REC instructs us to stop parsing right here
4846 */
4847 return;
4848 }
4849
4850 SKIP_BLANKS;
4851 if ((RAW == '?') && (NXT(1) == '>')) {
4852 SKIP(2);
4853 } else if (RAW == '>') {
4854 /* Deprecated old WD ... */
4855 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4857 ctxt->sax->error(ctxt->userData,
4858 "XML declaration must end-up with '?>'\n");
4859 ctxt->wellFormed = 0;
4860 ctxt->disableSAX = 1;
4861 NEXT;
4862 } else {
4863 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4864 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4865 ctxt->sax->error(ctxt->userData,
4866 "parsing XML declaration: '?>' expected\n");
4867 ctxt->wellFormed = 0;
4868 ctxt->disableSAX = 1;
4869 MOVETO_ENDTAG(CUR_PTR);
4870 NEXT;
4871 }
4872}
4873
4874/**
Owen Taylor3473f882001-02-23 17:55:21 +00004875 * xmlParseExternalSubset:
4876 * @ctxt: an XML parser context
4877 * @ExternalID: the external identifier
4878 * @SystemID: the system identifier (or URL)
4879 *
4880 * parse Markup declarations from an external subset
4881 *
4882 * [30] extSubset ::= textDecl? extSubsetDecl
4883 *
4884 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4885 */
4886void
4887xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4888 const xmlChar *SystemID) {
4889 GROW;
4890 if ((RAW == '<') && (NXT(1) == '?') &&
4891 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4892 (NXT(4) == 'l')) {
4893 xmlParseTextDecl(ctxt);
4894 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4895 /*
4896 * The XML REC instructs us to stop parsing right here
4897 */
4898 ctxt->instate = XML_PARSER_EOF;
4899 return;
4900 }
4901 }
4902 if (ctxt->myDoc == NULL) {
4903 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4904 }
4905 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4906 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4907
4908 ctxt->instate = XML_PARSER_DTD;
4909 ctxt->external = 1;
4910 while (((RAW == '<') && (NXT(1) == '?')) ||
4911 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004912 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004913 const xmlChar *check = CUR_PTR;
4914 int cons = ctxt->input->consumed;
4915 int tok = ctxt->token;
4916
4917 GROW;
4918 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4919 xmlParseConditionalSections(ctxt);
4920 } else if (IS_BLANK(CUR)) {
4921 NEXT;
4922 } else if (RAW == '%') {
4923 xmlParsePEReference(ctxt);
4924 } else
4925 xmlParseMarkupDecl(ctxt);
4926
4927 /*
4928 * Pop-up of finished entities.
4929 */
4930 while ((RAW == 0) && (ctxt->inputNr > 1))
4931 xmlPopInput(ctxt);
4932
4933 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4934 (tok == ctxt->token)) {
4935 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938 "Content error in the external subset\n");
4939 ctxt->wellFormed = 0;
4940 ctxt->disableSAX = 1;
4941 break;
4942 }
4943 }
4944
4945 if (RAW != 0) {
4946 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4948 ctxt->sax->error(ctxt->userData,
4949 "Extra content at the end of the document\n");
4950 ctxt->wellFormed = 0;
4951 ctxt->disableSAX = 1;
4952 }
4953
4954}
4955
4956/**
4957 * xmlParseReference:
4958 * @ctxt: an XML parser context
4959 *
4960 * parse and handle entity references in content, depending on the SAX
4961 * interface, this may end-up in a call to character() if this is a
4962 * CharRef, a predefined entity, if there is no reference() callback.
4963 * or if the parser was asked to switch to that mode.
4964 *
4965 * [67] Reference ::= EntityRef | CharRef
4966 */
4967void
4968xmlParseReference(xmlParserCtxtPtr ctxt) {
4969 xmlEntityPtr ent;
4970 xmlChar *val;
4971 if (RAW != '&') return;
4972
4973 if (NXT(1) == '#') {
4974 int i = 0;
4975 xmlChar out[10];
4976 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004977 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004978
4979 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4980 /*
4981 * So we are using non-UTF-8 buffers
4982 * Check that the char fit on 8bits, if not
4983 * generate a CharRef.
4984 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004985 if (value <= 0xFF) {
4986 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004987 out[1] = 0;
4988 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4989 (!ctxt->disableSAX))
4990 ctxt->sax->characters(ctxt->userData, out, 1);
4991 } else {
4992 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004993 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004994 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004995 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004996 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4997 (!ctxt->disableSAX))
4998 ctxt->sax->reference(ctxt->userData, out);
4999 }
5000 } else {
5001 /*
5002 * Just encode the value in UTF-8
5003 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005004 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00005005 out[i] = 0;
5006 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5007 (!ctxt->disableSAX))
5008 ctxt->sax->characters(ctxt->userData, out, i);
5009 }
5010 } else {
5011 ent = xmlParseEntityRef(ctxt);
5012 if (ent == NULL) return;
5013 if ((ent->name != NULL) &&
5014 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5015 xmlNodePtr list = NULL;
5016 int ret;
5017
5018
5019 /*
5020 * The first reference to the entity trigger a parsing phase
5021 * where the ent->children is filled with the result from
5022 * the parsing.
5023 */
5024 if (ent->children == NULL) {
5025 xmlChar *value;
5026 value = ent->content;
5027
5028 /*
5029 * Check that this entity is well formed
5030 */
5031 if ((value != NULL) &&
5032 (value[1] == 0) && (value[0] == '<') &&
5033 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5034 /*
5035 * DONE: get definite answer on this !!!
5036 * Lots of entity decls are used to declare a single
5037 * char
5038 * <!ENTITY lt "<">
5039 * Which seems to be valid since
5040 * 2.4: The ampersand character (&) and the left angle
5041 * bracket (<) may appear in their literal form only
5042 * when used ... They are also legal within the literal
5043 * entity value of an internal entity declaration;i
5044 * see "4.3.2 Well-Formed Parsed Entities".
5045 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5046 * Looking at the OASIS test suite and James Clark
5047 * tests, this is broken. However the XML REC uses
5048 * it. Is the XML REC not well-formed ????
5049 * This is a hack to avoid this problem
5050 *
5051 * ANSWER: since lt gt amp .. are already defined,
5052 * this is a redefinition and hence the fact that the
5053 * contentis not well balanced is not a Wf error, this
5054 * is lousy but acceptable.
5055 */
5056 list = xmlNewDocText(ctxt->myDoc, value);
5057 if (list != NULL) {
5058 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5059 (ent->children == NULL)) {
5060 ent->children = list;
5061 ent->last = list;
5062 list->parent = (xmlNodePtr) ent;
5063 } else {
5064 xmlFreeNodeList(list);
5065 }
5066 } else if (list != NULL) {
5067 xmlFreeNodeList(list);
5068 }
5069 } else {
5070 /*
5071 * 4.3.2: An internal general parsed entity is well-formed
5072 * if its replacement text matches the production labeled
5073 * content.
5074 */
5075 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5076 ctxt->depth++;
5077 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5078 ctxt->sax, NULL, ctxt->depth,
5079 value, &list);
5080 ctxt->depth--;
5081 } else if (ent->etype ==
5082 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5083 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005084 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005085 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005086 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005087 ctxt->depth--;
5088 } else {
5089 ret = -1;
5090 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5091 ctxt->sax->error(ctxt->userData,
5092 "Internal: invalid entity type\n");
5093 }
5094 if (ret == XML_ERR_ENTITY_LOOP) {
5095 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5097 ctxt->sax->error(ctxt->userData,
5098 "Detected entity reference loop\n");
5099 ctxt->wellFormed = 0;
5100 ctxt->disableSAX = 1;
5101 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005102 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5103 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005104 (ent->children == NULL)) {
5105 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005106 if (ctxt->replaceEntities) {
5107 /*
5108 * Prune it directly in the generated document
5109 * except for single text nodes.
5110 */
5111 if ((list->type == XML_TEXT_NODE) &&
5112 (list->next == NULL)) {
5113 list->parent = (xmlNodePtr) ent;
5114 list = NULL;
5115 } else {
5116 while (list != NULL) {
5117 list->parent = (xmlNodePtr) ctxt->node;
5118 if (list->next == NULL)
5119 ent->last = list;
5120 list = list->next;
5121 }
5122 list = ent->children;
5123 }
5124 } else {
5125 while (list != NULL) {
5126 list->parent = (xmlNodePtr) ent;
5127 if (list->next == NULL)
5128 ent->last = list;
5129 list = list->next;
5130 }
Owen Taylor3473f882001-02-23 17:55:21 +00005131 }
5132 } else {
5133 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005134 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 }
5136 } else if (ret > 0) {
5137 ctxt->errNo = ret;
5138 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5139 ctxt->sax->error(ctxt->userData,
5140 "Entity value required\n");
5141 ctxt->wellFormed = 0;
5142 ctxt->disableSAX = 1;
5143 } else if (list != NULL) {
5144 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005145 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005146 }
5147 }
5148 }
5149 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5150 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5151 /*
5152 * Create a node.
5153 */
5154 ctxt->sax->reference(ctxt->userData, ent->name);
5155 return;
5156 } else if (ctxt->replaceEntities) {
5157 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5158 /*
5159 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005160 * a simple tree copy for all references except the first
5161 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005162 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005163 if (list == NULL) {
5164 xmlNodePtr new, cur;
5165 cur = ent->children;
5166 while (cur != NULL) {
5167 new = xmlCopyNode(cur, 1);
5168 xmlAddChild(ctxt->node, new);
5169 if (cur == ent->last)
5170 break;
5171 cur = cur->next;
5172 }
5173 } else {
5174 /*
5175 * the name change is to avoid coalescing of the
5176 * node with a prossible previous text one which
5177 * would make ent->children a dandling pointer
5178 */
5179 if (ent->children->type == XML_TEXT_NODE)
5180 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5181 if ((ent->last != ent->children) &&
5182 (ent->last->type == XML_TEXT_NODE))
5183 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5184 xmlAddChildList(ctxt->node, ent->children);
5185 }
5186
Owen Taylor3473f882001-02-23 17:55:21 +00005187 /*
5188 * This is to avoid a nasty side effect, see
5189 * characters() in SAX.c
5190 */
5191 ctxt->nodemem = 0;
5192 ctxt->nodelen = 0;
5193 return;
5194 } else {
5195 /*
5196 * Probably running in SAX mode
5197 */
5198 xmlParserInputPtr input;
5199
5200 input = xmlNewEntityInputStream(ctxt, ent);
5201 xmlPushInput(ctxt, input);
5202 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5203 (RAW == '<') && (NXT(1) == '?') &&
5204 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5205 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5206 xmlParseTextDecl(ctxt);
5207 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5208 /*
5209 * The XML REC instructs us to stop parsing right here
5210 */
5211 ctxt->instate = XML_PARSER_EOF;
5212 return;
5213 }
5214 if (input->standalone == 1) {
5215 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5217 ctxt->sax->error(ctxt->userData,
5218 "external parsed entities cannot be standalone\n");
5219 ctxt->wellFormed = 0;
5220 ctxt->disableSAX = 1;
5221 }
5222 }
5223 return;
5224 }
5225 }
5226 } else {
5227 val = ent->content;
5228 if (val == NULL) return;
5229 /*
5230 * inline the entity.
5231 */
5232 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5233 (!ctxt->disableSAX))
5234 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5235 }
5236 }
5237}
5238
5239/**
5240 * xmlParseEntityRef:
5241 * @ctxt: an XML parser context
5242 *
5243 * parse ENTITY references declarations
5244 *
5245 * [68] EntityRef ::= '&' Name ';'
5246 *
5247 * [ WFC: Entity Declared ]
5248 * In a document without any DTD, a document with only an internal DTD
5249 * subset which contains no parameter entity references, or a document
5250 * with "standalone='yes'", the Name given in the entity reference
5251 * must match that in an entity declaration, except that well-formed
5252 * documents need not declare any of the following entities: amp, lt,
5253 * gt, apos, quot. The declaration of a parameter entity must precede
5254 * any reference to it. Similarly, the declaration of a general entity
5255 * must precede any reference to it which appears in a default value in an
5256 * attribute-list declaration. Note that if entities are declared in the
5257 * external subset or in external parameter entities, a non-validating
5258 * processor is not obligated to read and process their declarations;
5259 * for such documents, the rule that an entity must be declared is a
5260 * well-formedness constraint only if standalone='yes'.
5261 *
5262 * [ WFC: Parsed Entity ]
5263 * An entity reference must not contain the name of an unparsed entity
5264 *
5265 * Returns the xmlEntityPtr if found, or NULL otherwise.
5266 */
5267xmlEntityPtr
5268xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5269 xmlChar *name;
5270 xmlEntityPtr ent = NULL;
5271
5272 GROW;
5273
5274 if (RAW == '&') {
5275 NEXT;
5276 name = xmlParseName(ctxt);
5277 if (name == NULL) {
5278 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "xmlParseEntityRef: no name\n");
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
5284 } else {
5285 if (RAW == ';') {
5286 NEXT;
5287 /*
5288 * Ask first SAX for entity resolution, otherwise try the
5289 * predefined set.
5290 */
5291 if (ctxt->sax != NULL) {
5292 if (ctxt->sax->getEntity != NULL)
5293 ent = ctxt->sax->getEntity(ctxt->userData, name);
5294 if (ent == NULL)
5295 ent = xmlGetPredefinedEntity(name);
5296 }
5297 /*
5298 * [ WFC: Entity Declared ]
5299 * In a document without any DTD, a document with only an
5300 * internal DTD subset which contains no parameter entity
5301 * references, or a document with "standalone='yes'", the
5302 * Name given in the entity reference must match that in an
5303 * entity declaration, except that well-formed documents
5304 * need not declare any of the following entities: amp, lt,
5305 * gt, apos, quot.
5306 * The declaration of a parameter entity must precede any
5307 * reference to it.
5308 * Similarly, the declaration of a general entity must
5309 * precede any reference to it which appears in a default
5310 * value in an attribute-list declaration. Note that if
5311 * entities are declared in the external subset or in
5312 * external parameter entities, a non-validating processor
5313 * is not obligated to read and process their declarations;
5314 * for such documents, the rule that an entity must be
5315 * declared is a well-formedness constraint only if
5316 * standalone='yes'.
5317 */
5318 if (ent == NULL) {
5319 if ((ctxt->standalone == 1) ||
5320 ((ctxt->hasExternalSubset == 0) &&
5321 (ctxt->hasPErefs == 0))) {
5322 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5324 ctxt->sax->error(ctxt->userData,
5325 "Entity '%s' not defined\n", name);
5326 ctxt->wellFormed = 0;
5327 ctxt->disableSAX = 1;
5328 } else {
5329 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5330 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005331 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005332 "Entity '%s' not defined\n", name);
5333 }
5334 }
5335
5336 /*
5337 * [ WFC: Parsed Entity ]
5338 * An entity reference must not contain the name of an
5339 * unparsed entity
5340 */
5341 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5342 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5343 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5344 ctxt->sax->error(ctxt->userData,
5345 "Entity reference to unparsed entity %s\n", name);
5346 ctxt->wellFormed = 0;
5347 ctxt->disableSAX = 1;
5348 }
5349
5350 /*
5351 * [ WFC: No External Entity References ]
5352 * Attribute values cannot contain direct or indirect
5353 * entity references to external entities.
5354 */
5355 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5356 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5357 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5358 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5359 ctxt->sax->error(ctxt->userData,
5360 "Attribute references external entity '%s'\n", name);
5361 ctxt->wellFormed = 0;
5362 ctxt->disableSAX = 1;
5363 }
5364 /*
5365 * [ WFC: No < in Attribute Values ]
5366 * The replacement text of any entity referred to directly or
5367 * indirectly in an attribute value (other than "&lt;") must
5368 * not contain a <.
5369 */
5370 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5371 (ent != NULL) &&
5372 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5373 (ent->content != NULL) &&
5374 (xmlStrchr(ent->content, '<'))) {
5375 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5376 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5377 ctxt->sax->error(ctxt->userData,
5378 "'<' in entity '%s' is not allowed in attributes values\n", name);
5379 ctxt->wellFormed = 0;
5380 ctxt->disableSAX = 1;
5381 }
5382
5383 /*
5384 * Internal check, no parameter entities here ...
5385 */
5386 else {
5387 switch (ent->etype) {
5388 case XML_INTERNAL_PARAMETER_ENTITY:
5389 case XML_EXTERNAL_PARAMETER_ENTITY:
5390 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5391 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5392 ctxt->sax->error(ctxt->userData,
5393 "Attempt to reference the parameter entity '%s'\n", name);
5394 ctxt->wellFormed = 0;
5395 ctxt->disableSAX = 1;
5396 break;
5397 default:
5398 break;
5399 }
5400 }
5401
5402 /*
5403 * [ WFC: No Recursion ]
5404 * A parsed entity must not contain a recursive reference
5405 * to itself, either directly or indirectly.
5406 * Done somewhere else
5407 */
5408
5409 } else {
5410 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5412 ctxt->sax->error(ctxt->userData,
5413 "xmlParseEntityRef: expecting ';'\n");
5414 ctxt->wellFormed = 0;
5415 ctxt->disableSAX = 1;
5416 }
5417 xmlFree(name);
5418 }
5419 }
5420 return(ent);
5421}
5422
5423/**
5424 * xmlParseStringEntityRef:
5425 * @ctxt: an XML parser context
5426 * @str: a pointer to an index in the string
5427 *
5428 * parse ENTITY references declarations, but this version parses it from
5429 * a string value.
5430 *
5431 * [68] EntityRef ::= '&' Name ';'
5432 *
5433 * [ WFC: Entity Declared ]
5434 * In a document without any DTD, a document with only an internal DTD
5435 * subset which contains no parameter entity references, or a document
5436 * with "standalone='yes'", the Name given in the entity reference
5437 * must match that in an entity declaration, except that well-formed
5438 * documents need not declare any of the following entities: amp, lt,
5439 * gt, apos, quot. The declaration of a parameter entity must precede
5440 * any reference to it. Similarly, the declaration of a general entity
5441 * must precede any reference to it which appears in a default value in an
5442 * attribute-list declaration. Note that if entities are declared in the
5443 * external subset or in external parameter entities, a non-validating
5444 * processor is not obligated to read and process their declarations;
5445 * for such documents, the rule that an entity must be declared is a
5446 * well-formedness constraint only if standalone='yes'.
5447 *
5448 * [ WFC: Parsed Entity ]
5449 * An entity reference must not contain the name of an unparsed entity
5450 *
5451 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5452 * is updated to the current location in the string.
5453 */
5454xmlEntityPtr
5455xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5456 xmlChar *name;
5457 const xmlChar *ptr;
5458 xmlChar cur;
5459 xmlEntityPtr ent = NULL;
5460
5461 if ((str == NULL) || (*str == NULL))
5462 return(NULL);
5463 ptr = *str;
5464 cur = *ptr;
5465 if (cur == '&') {
5466 ptr++;
5467 cur = *ptr;
5468 name = xmlParseStringName(ctxt, &ptr);
5469 if (name == NULL) {
5470 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5472 ctxt->sax->error(ctxt->userData,
5473 "xmlParseEntityRef: no name\n");
5474 ctxt->wellFormed = 0;
5475 ctxt->disableSAX = 1;
5476 } else {
5477 if (*ptr == ';') {
5478 ptr++;
5479 /*
5480 * Ask first SAX for entity resolution, otherwise try the
5481 * predefined set.
5482 */
5483 if (ctxt->sax != NULL) {
5484 if (ctxt->sax->getEntity != NULL)
5485 ent = ctxt->sax->getEntity(ctxt->userData, name);
5486 if (ent == NULL)
5487 ent = xmlGetPredefinedEntity(name);
5488 }
5489 /*
5490 * [ WFC: Entity Declared ]
5491 * In a document without any DTD, a document with only an
5492 * internal DTD subset which contains no parameter entity
5493 * references, or a document with "standalone='yes'", the
5494 * Name given in the entity reference must match that in an
5495 * entity declaration, except that well-formed documents
5496 * need not declare any of the following entities: amp, lt,
5497 * gt, apos, quot.
5498 * The declaration of a parameter entity must precede any
5499 * reference to it.
5500 * Similarly, the declaration of a general entity must
5501 * precede any reference to it which appears in a default
5502 * value in an attribute-list declaration. Note that if
5503 * entities are declared in the external subset or in
5504 * external parameter entities, a non-validating processor
5505 * is not obligated to read and process their declarations;
5506 * for such documents, the rule that an entity must be
5507 * declared is a well-formedness constraint only if
5508 * standalone='yes'.
5509 */
5510 if (ent == NULL) {
5511 if ((ctxt->standalone == 1) ||
5512 ((ctxt->hasExternalSubset == 0) &&
5513 (ctxt->hasPErefs == 0))) {
5514 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5516 ctxt->sax->error(ctxt->userData,
5517 "Entity '%s' not defined\n", name);
5518 ctxt->wellFormed = 0;
5519 ctxt->disableSAX = 1;
5520 } else {
5521 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5522 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5523 ctxt->sax->warning(ctxt->userData,
5524 "Entity '%s' not defined\n", name);
5525 }
5526 }
5527
5528 /*
5529 * [ WFC: Parsed Entity ]
5530 * An entity reference must not contain the name of an
5531 * unparsed entity
5532 */
5533 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5534 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5536 ctxt->sax->error(ctxt->userData,
5537 "Entity reference to unparsed entity %s\n", name);
5538 ctxt->wellFormed = 0;
5539 ctxt->disableSAX = 1;
5540 }
5541
5542 /*
5543 * [ WFC: No External Entity References ]
5544 * Attribute values cannot contain direct or indirect
5545 * entity references to external entities.
5546 */
5547 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5548 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5549 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5550 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5551 ctxt->sax->error(ctxt->userData,
5552 "Attribute references external entity '%s'\n", name);
5553 ctxt->wellFormed = 0;
5554 ctxt->disableSAX = 1;
5555 }
5556 /*
5557 * [ WFC: No < in Attribute Values ]
5558 * The replacement text of any entity referred to directly or
5559 * indirectly in an attribute value (other than "&lt;") must
5560 * not contain a <.
5561 */
5562 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5563 (ent != NULL) &&
5564 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5565 (ent->content != NULL) &&
5566 (xmlStrchr(ent->content, '<'))) {
5567 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5568 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5569 ctxt->sax->error(ctxt->userData,
5570 "'<' in entity '%s' is not allowed in attributes values\n", name);
5571 ctxt->wellFormed = 0;
5572 ctxt->disableSAX = 1;
5573 }
5574
5575 /*
5576 * Internal check, no parameter entities here ...
5577 */
5578 else {
5579 switch (ent->etype) {
5580 case XML_INTERNAL_PARAMETER_ENTITY:
5581 case XML_EXTERNAL_PARAMETER_ENTITY:
5582 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5583 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5584 ctxt->sax->error(ctxt->userData,
5585 "Attempt to reference the parameter entity '%s'\n", name);
5586 ctxt->wellFormed = 0;
5587 ctxt->disableSAX = 1;
5588 break;
5589 default:
5590 break;
5591 }
5592 }
5593
5594 /*
5595 * [ WFC: No Recursion ]
5596 * A parsed entity must not contain a recursive reference
5597 * to itself, either directly or indirectly.
5598 * Done somewhwere else
5599 */
5600
5601 } else {
5602 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5603 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5604 ctxt->sax->error(ctxt->userData,
5605 "xmlParseEntityRef: expecting ';'\n");
5606 ctxt->wellFormed = 0;
5607 ctxt->disableSAX = 1;
5608 }
5609 xmlFree(name);
5610 }
5611 }
5612 *str = ptr;
5613 return(ent);
5614}
5615
5616/**
5617 * xmlParsePEReference:
5618 * @ctxt: an XML parser context
5619 *
5620 * parse PEReference declarations
5621 * The entity content is handled directly by pushing it's content as
5622 * a new input stream.
5623 *
5624 * [69] PEReference ::= '%' Name ';'
5625 *
5626 * [ WFC: No Recursion ]
5627 * A parsed entity must not contain a recursive
5628 * reference to itself, either directly or indirectly.
5629 *
5630 * [ WFC: Entity Declared ]
5631 * In a document without any DTD, a document with only an internal DTD
5632 * subset which contains no parameter entity references, or a document
5633 * with "standalone='yes'", ... ... The declaration of a parameter
5634 * entity must precede any reference to it...
5635 *
5636 * [ VC: Entity Declared ]
5637 * In a document with an external subset or external parameter entities
5638 * with "standalone='no'", ... ... The declaration of a parameter entity
5639 * must precede any reference to it...
5640 *
5641 * [ WFC: In DTD ]
5642 * Parameter-entity references may only appear in the DTD.
5643 * NOTE: misleading but this is handled.
5644 */
5645void
5646xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5647 xmlChar *name;
5648 xmlEntityPtr entity = NULL;
5649 xmlParserInputPtr input;
5650
5651 if (RAW == '%') {
5652 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005653 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005654 if (name == NULL) {
5655 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5657 ctxt->sax->error(ctxt->userData,
5658 "xmlParsePEReference: no name\n");
5659 ctxt->wellFormed = 0;
5660 ctxt->disableSAX = 1;
5661 } else {
5662 if (RAW == ';') {
5663 NEXT;
5664 if ((ctxt->sax != NULL) &&
5665 (ctxt->sax->getParameterEntity != NULL))
5666 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5667 name);
5668 if (entity == NULL) {
5669 /*
5670 * [ WFC: Entity Declared ]
5671 * In a document without any DTD, a document with only an
5672 * internal DTD subset which contains no parameter entity
5673 * references, or a document with "standalone='yes'", ...
5674 * ... The declaration of a parameter entity must precede
5675 * any reference to it...
5676 */
5677 if ((ctxt->standalone == 1) ||
5678 ((ctxt->hasExternalSubset == 0) &&
5679 (ctxt->hasPErefs == 0))) {
5680 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5681 if ((!ctxt->disableSAX) &&
5682 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5683 ctxt->sax->error(ctxt->userData,
5684 "PEReference: %%%s; not found\n", name);
5685 ctxt->wellFormed = 0;
5686 ctxt->disableSAX = 1;
5687 } else {
5688 /*
5689 * [ VC: Entity Declared ]
5690 * In a document with an external subset or external
5691 * parameter entities with "standalone='no'", ...
5692 * ... The declaration of a parameter entity must precede
5693 * any reference to it...
5694 */
5695 if ((!ctxt->disableSAX) &&
5696 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5697 ctxt->sax->warning(ctxt->userData,
5698 "PEReference: %%%s; not found\n", name);
5699 ctxt->valid = 0;
5700 }
5701 } else {
5702 /*
5703 * Internal checking in case the entity quest barfed
5704 */
5705 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5706 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5707 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5708 ctxt->sax->warning(ctxt->userData,
5709 "Internal: %%%s; is not a parameter entity\n", name);
5710 } else {
5711 /*
5712 * TODO !!!
5713 * handle the extra spaces added before and after
5714 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5715 */
5716 input = xmlNewEntityInputStream(ctxt, entity);
5717 xmlPushInput(ctxt, input);
5718 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5719 (RAW == '<') && (NXT(1) == '?') &&
5720 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5721 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5722 xmlParseTextDecl(ctxt);
5723 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5724 /*
5725 * The XML REC instructs us to stop parsing
5726 * right here
5727 */
5728 ctxt->instate = XML_PARSER_EOF;
5729 xmlFree(name);
5730 return;
5731 }
5732 }
5733 if (ctxt->token == 0)
5734 ctxt->token = ' ';
5735 }
5736 }
5737 ctxt->hasPErefs = 1;
5738 } else {
5739 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5740 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5741 ctxt->sax->error(ctxt->userData,
5742 "xmlParsePEReference: expecting ';'\n");
5743 ctxt->wellFormed = 0;
5744 ctxt->disableSAX = 1;
5745 }
5746 xmlFree(name);
5747 }
5748 }
5749}
5750
5751/**
5752 * xmlParseStringPEReference:
5753 * @ctxt: an XML parser context
5754 * @str: a pointer to an index in the string
5755 *
5756 * parse PEReference declarations
5757 *
5758 * [69] PEReference ::= '%' Name ';'
5759 *
5760 * [ WFC: No Recursion ]
5761 * A parsed entity must not contain a recursive
5762 * reference to itself, either directly or indirectly.
5763 *
5764 * [ WFC: Entity Declared ]
5765 * In a document without any DTD, a document with only an internal DTD
5766 * subset which contains no parameter entity references, or a document
5767 * with "standalone='yes'", ... ... The declaration of a parameter
5768 * entity must precede any reference to it...
5769 *
5770 * [ VC: Entity Declared ]
5771 * In a document with an external subset or external parameter entities
5772 * with "standalone='no'", ... ... The declaration of a parameter entity
5773 * must precede any reference to it...
5774 *
5775 * [ WFC: In DTD ]
5776 * Parameter-entity references may only appear in the DTD.
5777 * NOTE: misleading but this is handled.
5778 *
5779 * Returns the string of the entity content.
5780 * str is updated to the current value of the index
5781 */
5782xmlEntityPtr
5783xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5784 const xmlChar *ptr;
5785 xmlChar cur;
5786 xmlChar *name;
5787 xmlEntityPtr entity = NULL;
5788
5789 if ((str == NULL) || (*str == NULL)) return(NULL);
5790 ptr = *str;
5791 cur = *ptr;
5792 if (cur == '%') {
5793 ptr++;
5794 cur = *ptr;
5795 name = xmlParseStringName(ctxt, &ptr);
5796 if (name == NULL) {
5797 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5798 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5799 ctxt->sax->error(ctxt->userData,
5800 "xmlParseStringPEReference: no name\n");
5801 ctxt->wellFormed = 0;
5802 ctxt->disableSAX = 1;
5803 } else {
5804 cur = *ptr;
5805 if (cur == ';') {
5806 ptr++;
5807 cur = *ptr;
5808 if ((ctxt->sax != NULL) &&
5809 (ctxt->sax->getParameterEntity != NULL))
5810 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5811 name);
5812 if (entity == NULL) {
5813 /*
5814 * [ WFC: Entity Declared ]
5815 * In a document without any DTD, a document with only an
5816 * internal DTD subset which contains no parameter entity
5817 * references, or a document with "standalone='yes'", ...
5818 * ... The declaration of a parameter entity must precede
5819 * any reference to it...
5820 */
5821 if ((ctxt->standalone == 1) ||
5822 ((ctxt->hasExternalSubset == 0) &&
5823 (ctxt->hasPErefs == 0))) {
5824 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5826 ctxt->sax->error(ctxt->userData,
5827 "PEReference: %%%s; not found\n", name);
5828 ctxt->wellFormed = 0;
5829 ctxt->disableSAX = 1;
5830 } else {
5831 /*
5832 * [ VC: Entity Declared ]
5833 * In a document with an external subset or external
5834 * parameter entities with "standalone='no'", ...
5835 * ... The declaration of a parameter entity must
5836 * precede any reference to it...
5837 */
5838 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5839 ctxt->sax->warning(ctxt->userData,
5840 "PEReference: %%%s; not found\n", name);
5841 ctxt->valid = 0;
5842 }
5843 } else {
5844 /*
5845 * Internal checking in case the entity quest barfed
5846 */
5847 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5848 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5849 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5850 ctxt->sax->warning(ctxt->userData,
5851 "Internal: %%%s; is not a parameter entity\n", name);
5852 }
5853 }
5854 ctxt->hasPErefs = 1;
5855 } else {
5856 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5857 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5858 ctxt->sax->error(ctxt->userData,
5859 "xmlParseStringPEReference: expecting ';'\n");
5860 ctxt->wellFormed = 0;
5861 ctxt->disableSAX = 1;
5862 }
5863 xmlFree(name);
5864 }
5865 }
5866 *str = ptr;
5867 return(entity);
5868}
5869
5870/**
5871 * xmlParseDocTypeDecl:
5872 * @ctxt: an XML parser context
5873 *
5874 * parse a DOCTYPE declaration
5875 *
5876 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5877 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5878 *
5879 * [ VC: Root Element Type ]
5880 * The Name in the document type declaration must match the element
5881 * type of the root element.
5882 */
5883
5884void
5885xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5886 xmlChar *name = NULL;
5887 xmlChar *ExternalID = NULL;
5888 xmlChar *URI = NULL;
5889
5890 /*
5891 * We know that '<!DOCTYPE' has been detected.
5892 */
5893 SKIP(9);
5894
5895 SKIP_BLANKS;
5896
5897 /*
5898 * Parse the DOCTYPE name.
5899 */
5900 name = xmlParseName(ctxt);
5901 if (name == NULL) {
5902 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5904 ctxt->sax->error(ctxt->userData,
5905 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5906 ctxt->wellFormed = 0;
5907 ctxt->disableSAX = 1;
5908 }
5909 ctxt->intSubName = name;
5910
5911 SKIP_BLANKS;
5912
5913 /*
5914 * Check for SystemID and ExternalID
5915 */
5916 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5917
5918 if ((URI != NULL) || (ExternalID != NULL)) {
5919 ctxt->hasExternalSubset = 1;
5920 }
5921 ctxt->extSubURI = URI;
5922 ctxt->extSubSystem = ExternalID;
5923
5924 SKIP_BLANKS;
5925
5926 /*
5927 * Create and update the internal subset.
5928 */
5929 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5930 (!ctxt->disableSAX))
5931 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5932
5933 /*
5934 * Is there any internal subset declarations ?
5935 * they are handled separately in xmlParseInternalSubset()
5936 */
5937 if (RAW == '[')
5938 return;
5939
5940 /*
5941 * We should be at the end of the DOCTYPE declaration.
5942 */
5943 if (RAW != '>') {
5944 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5945 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5946 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5947 ctxt->wellFormed = 0;
5948 ctxt->disableSAX = 1;
5949 }
5950 NEXT;
5951}
5952
5953/**
5954 * xmlParseInternalsubset:
5955 * @ctxt: an XML parser context
5956 *
5957 * parse the internal subset declaration
5958 *
5959 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5960 */
5961
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005962static void
Owen Taylor3473f882001-02-23 17:55:21 +00005963xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5964 /*
5965 * Is there any DTD definition ?
5966 */
5967 if (RAW == '[') {
5968 ctxt->instate = XML_PARSER_DTD;
5969 NEXT;
5970 /*
5971 * Parse the succession of Markup declarations and
5972 * PEReferences.
5973 * Subsequence (markupdecl | PEReference | S)*
5974 */
5975 while (RAW != ']') {
5976 const xmlChar *check = CUR_PTR;
5977 int cons = ctxt->input->consumed;
5978
5979 SKIP_BLANKS;
5980 xmlParseMarkupDecl(ctxt);
5981 xmlParsePEReference(ctxt);
5982
5983 /*
5984 * Pop-up of finished entities.
5985 */
5986 while ((RAW == 0) && (ctxt->inputNr > 1))
5987 xmlPopInput(ctxt);
5988
5989 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5990 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5991 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5992 ctxt->sax->error(ctxt->userData,
5993 "xmlParseInternalSubset: error detected in Markup declaration\n");
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 break;
5997 }
5998 }
5999 if (RAW == ']') {
6000 NEXT;
6001 SKIP_BLANKS;
6002 }
6003 }
6004
6005 /*
6006 * We should be at the end of the DOCTYPE declaration.
6007 */
6008 if (RAW != '>') {
6009 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
6010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6011 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6012 ctxt->wellFormed = 0;
6013 ctxt->disableSAX = 1;
6014 }
6015 NEXT;
6016}
6017
6018/**
6019 * xmlParseAttribute:
6020 * @ctxt: an XML parser context
6021 * @value: a xmlChar ** used to store the value of the attribute
6022 *
6023 * parse an attribute
6024 *
6025 * [41] Attribute ::= Name Eq AttValue
6026 *
6027 * [ WFC: No External Entity References ]
6028 * Attribute values cannot contain direct or indirect entity references
6029 * to external entities.
6030 *
6031 * [ WFC: No < in Attribute Values ]
6032 * The replacement text of any entity referred to directly or indirectly in
6033 * an attribute value (other than "&lt;") must not contain a <.
6034 *
6035 * [ VC: Attribute Value Type ]
6036 * The attribute must have been declared; the value must be of the type
6037 * declared for it.
6038 *
6039 * [25] Eq ::= S? '=' S?
6040 *
6041 * With namespace:
6042 *
6043 * [NS 11] Attribute ::= QName Eq AttValue
6044 *
6045 * Also the case QName == xmlns:??? is handled independently as a namespace
6046 * definition.
6047 *
6048 * Returns the attribute name, and the value in *value.
6049 */
6050
6051xmlChar *
6052xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6053 xmlChar *name, *val;
6054
6055 *value = NULL;
6056 name = xmlParseName(ctxt);
6057 if (name == NULL) {
6058 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6060 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6061 ctxt->wellFormed = 0;
6062 ctxt->disableSAX = 1;
6063 return(NULL);
6064 }
6065
6066 /*
6067 * read the value
6068 */
6069 SKIP_BLANKS;
6070 if (RAW == '=') {
6071 NEXT;
6072 SKIP_BLANKS;
6073 val = xmlParseAttValue(ctxt);
6074 ctxt->instate = XML_PARSER_CONTENT;
6075 } else {
6076 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6077 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6078 ctxt->sax->error(ctxt->userData,
6079 "Specification mandate value for attribute %s\n", name);
6080 ctxt->wellFormed = 0;
6081 ctxt->disableSAX = 1;
6082 xmlFree(name);
6083 return(NULL);
6084 }
6085
6086 /*
6087 * Check that xml:lang conforms to the specification
6088 * No more registered as an error, just generate a warning now
6089 * since this was deprecated in XML second edition
6090 */
6091 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6092 if (!xmlCheckLanguageID(val)) {
6093 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6094 ctxt->sax->warning(ctxt->userData,
6095 "Malformed value for xml:lang : %s\n", val);
6096 }
6097 }
6098
6099 /*
6100 * Check that xml:space conforms to the specification
6101 */
6102 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6103 if (xmlStrEqual(val, BAD_CAST "default"))
6104 *(ctxt->space) = 0;
6105 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6106 *(ctxt->space) = 1;
6107 else {
6108 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6110 ctxt->sax->error(ctxt->userData,
6111"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6112 val);
6113 ctxt->wellFormed = 0;
6114 ctxt->disableSAX = 1;
6115 }
6116 }
6117
6118 *value = val;
6119 return(name);
6120}
6121
6122/**
6123 * xmlParseStartTag:
6124 * @ctxt: an XML parser context
6125 *
6126 * parse a start of tag either for rule element or
6127 * EmptyElement. In both case we don't parse the tag closing chars.
6128 *
6129 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6130 *
6131 * [ WFC: Unique Att Spec ]
6132 * No attribute name may appear more than once in the same start-tag or
6133 * empty-element tag.
6134 *
6135 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6136 *
6137 * [ WFC: Unique Att Spec ]
6138 * No attribute name may appear more than once in the same start-tag or
6139 * empty-element tag.
6140 *
6141 * With namespace:
6142 *
6143 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6144 *
6145 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6146 *
6147 * Returns the element name parsed
6148 */
6149
6150xmlChar *
6151xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6152 xmlChar *name;
6153 xmlChar *attname;
6154 xmlChar *attvalue;
6155 const xmlChar **atts = NULL;
6156 int nbatts = 0;
6157 int maxatts = 0;
6158 int i;
6159
6160 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006161 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006162
6163 name = xmlParseName(ctxt);
6164 if (name == NULL) {
6165 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6167 ctxt->sax->error(ctxt->userData,
6168 "xmlParseStartTag: invalid element name\n");
6169 ctxt->wellFormed = 0;
6170 ctxt->disableSAX = 1;
6171 return(NULL);
6172 }
6173
6174 /*
6175 * Now parse the attributes, it ends up with the ending
6176 *
6177 * (S Attribute)* S?
6178 */
6179 SKIP_BLANKS;
6180 GROW;
6181
Daniel Veillard21a0f912001-02-25 19:54:14 +00006182 while ((RAW != '>') &&
6183 ((RAW != '/') || (NXT(1) != '>')) &&
6184 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006185 const xmlChar *q = CUR_PTR;
6186 int cons = ctxt->input->consumed;
6187
6188 attname = xmlParseAttribute(ctxt, &attvalue);
6189 if ((attname != NULL) && (attvalue != NULL)) {
6190 /*
6191 * [ WFC: Unique Att Spec ]
6192 * No attribute name may appear more than once in the same
6193 * start-tag or empty-element tag.
6194 */
6195 for (i = 0; i < nbatts;i += 2) {
6196 if (xmlStrEqual(atts[i], attname)) {
6197 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6199 ctxt->sax->error(ctxt->userData,
6200 "Attribute %s redefined\n",
6201 attname);
6202 ctxt->wellFormed = 0;
6203 ctxt->disableSAX = 1;
6204 xmlFree(attname);
6205 xmlFree(attvalue);
6206 goto failed;
6207 }
6208 }
6209
6210 /*
6211 * Add the pair to atts
6212 */
6213 if (atts == NULL) {
6214 maxatts = 10;
6215 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6216 if (atts == NULL) {
6217 xmlGenericError(xmlGenericErrorContext,
6218 "malloc of %ld byte failed\n",
6219 maxatts * (long)sizeof(xmlChar *));
6220 return(NULL);
6221 }
6222 } else if (nbatts + 4 > maxatts) {
6223 maxatts *= 2;
6224 atts = (const xmlChar **) xmlRealloc((void *) atts,
6225 maxatts * sizeof(xmlChar *));
6226 if (atts == NULL) {
6227 xmlGenericError(xmlGenericErrorContext,
6228 "realloc of %ld byte failed\n",
6229 maxatts * (long)sizeof(xmlChar *));
6230 return(NULL);
6231 }
6232 }
6233 atts[nbatts++] = attname;
6234 atts[nbatts++] = attvalue;
6235 atts[nbatts] = NULL;
6236 atts[nbatts + 1] = NULL;
6237 } else {
6238 if (attname != NULL)
6239 xmlFree(attname);
6240 if (attvalue != NULL)
6241 xmlFree(attvalue);
6242 }
6243
6244failed:
6245
6246 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6247 break;
6248 if (!IS_BLANK(RAW)) {
6249 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6251 ctxt->sax->error(ctxt->userData,
6252 "attributes construct error\n");
6253 ctxt->wellFormed = 0;
6254 ctxt->disableSAX = 1;
6255 }
6256 SKIP_BLANKS;
6257 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6258 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6260 ctxt->sax->error(ctxt->userData,
6261 "xmlParseStartTag: problem parsing attributes\n");
6262 ctxt->wellFormed = 0;
6263 ctxt->disableSAX = 1;
6264 break;
6265 }
6266 GROW;
6267 }
6268
6269 /*
6270 * SAX: Start of Element !
6271 */
6272 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6273 (!ctxt->disableSAX))
6274 ctxt->sax->startElement(ctxt->userData, name, atts);
6275
6276 if (atts != NULL) {
6277 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6278 xmlFree((void *) atts);
6279 }
6280 return(name);
6281}
6282
6283/**
6284 * xmlParseEndTag:
6285 * @ctxt: an XML parser context
6286 *
6287 * parse an end of tag
6288 *
6289 * [42] ETag ::= '</' Name S? '>'
6290 *
6291 * With namespace
6292 *
6293 * [NS 9] ETag ::= '</' QName S? '>'
6294 */
6295
6296void
6297xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6298 xmlChar *name;
6299 xmlChar *oldname;
6300
6301 GROW;
6302 if ((RAW != '<') || (NXT(1) != '/')) {
6303 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6305 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6306 ctxt->wellFormed = 0;
6307 ctxt->disableSAX = 1;
6308 return;
6309 }
6310 SKIP(2);
6311
6312 name = xmlParseName(ctxt);
6313
6314 /*
6315 * We should definitely be at the ending "S? '>'" part
6316 */
6317 GROW;
6318 SKIP_BLANKS;
6319 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6320 ctxt->errNo = XML_ERR_GT_REQUIRED;
6321 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6322 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6323 ctxt->wellFormed = 0;
6324 ctxt->disableSAX = 1;
6325 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006326 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006327
6328 /*
6329 * [ WFC: Element Type Match ]
6330 * The Name in an element's end-tag must match the element type in the
6331 * start-tag.
6332 *
6333 */
6334 if ((name == NULL) || (ctxt->name == NULL) ||
6335 (!xmlStrEqual(name, ctxt->name))) {
6336 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6338 if ((name != NULL) && (ctxt->name != NULL)) {
6339 ctxt->sax->error(ctxt->userData,
6340 "Opening and ending tag mismatch: %s and %s\n",
6341 ctxt->name, name);
6342 } else if (ctxt->name != NULL) {
6343 ctxt->sax->error(ctxt->userData,
6344 "Ending tag eror for: %s\n", ctxt->name);
6345 } else {
6346 ctxt->sax->error(ctxt->userData,
6347 "Ending tag error: internal error ???\n");
6348 }
6349
6350 }
6351 ctxt->wellFormed = 0;
6352 ctxt->disableSAX = 1;
6353 }
6354
6355 /*
6356 * SAX: End of Tag
6357 */
6358 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6359 (!ctxt->disableSAX))
6360 ctxt->sax->endElement(ctxt->userData, name);
6361
6362 if (name != NULL)
6363 xmlFree(name);
6364 oldname = namePop(ctxt);
6365 spacePop(ctxt);
6366 if (oldname != NULL) {
6367#ifdef DEBUG_STACK
6368 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6369#endif
6370 xmlFree(oldname);
6371 }
6372 return;
6373}
6374
6375/**
6376 * xmlParseCDSect:
6377 * @ctxt: an XML parser context
6378 *
6379 * Parse escaped pure raw content.
6380 *
6381 * [18] CDSect ::= CDStart CData CDEnd
6382 *
6383 * [19] CDStart ::= '<![CDATA['
6384 *
6385 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6386 *
6387 * [21] CDEnd ::= ']]>'
6388 */
6389void
6390xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6391 xmlChar *buf = NULL;
6392 int len = 0;
6393 int size = XML_PARSER_BUFFER_SIZE;
6394 int r, rl;
6395 int s, sl;
6396 int cur, l;
6397 int count = 0;
6398
6399 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6400 (NXT(2) == '[') && (NXT(3) == 'C') &&
6401 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6402 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6403 (NXT(8) == '[')) {
6404 SKIP(9);
6405 } else
6406 return;
6407
6408 ctxt->instate = XML_PARSER_CDATA_SECTION;
6409 r = CUR_CHAR(rl);
6410 if (!IS_CHAR(r)) {
6411 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6412 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6413 ctxt->sax->error(ctxt->userData,
6414 "CData section not finished\n");
6415 ctxt->wellFormed = 0;
6416 ctxt->disableSAX = 1;
6417 ctxt->instate = XML_PARSER_CONTENT;
6418 return;
6419 }
6420 NEXTL(rl);
6421 s = CUR_CHAR(sl);
6422 if (!IS_CHAR(s)) {
6423 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6425 ctxt->sax->error(ctxt->userData,
6426 "CData section not finished\n");
6427 ctxt->wellFormed = 0;
6428 ctxt->disableSAX = 1;
6429 ctxt->instate = XML_PARSER_CONTENT;
6430 return;
6431 }
6432 NEXTL(sl);
6433 cur = CUR_CHAR(l);
6434 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6435 if (buf == NULL) {
6436 xmlGenericError(xmlGenericErrorContext,
6437 "malloc of %d byte failed\n", size);
6438 return;
6439 }
6440 while (IS_CHAR(cur) &&
6441 ((r != ']') || (s != ']') || (cur != '>'))) {
6442 if (len + 5 >= size) {
6443 size *= 2;
6444 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6445 if (buf == NULL) {
6446 xmlGenericError(xmlGenericErrorContext,
6447 "realloc of %d byte failed\n", size);
6448 return;
6449 }
6450 }
6451 COPY_BUF(rl,buf,len,r);
6452 r = s;
6453 rl = sl;
6454 s = cur;
6455 sl = l;
6456 count++;
6457 if (count > 50) {
6458 GROW;
6459 count = 0;
6460 }
6461 NEXTL(l);
6462 cur = CUR_CHAR(l);
6463 }
6464 buf[len] = 0;
6465 ctxt->instate = XML_PARSER_CONTENT;
6466 if (cur != '>') {
6467 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6469 ctxt->sax->error(ctxt->userData,
6470 "CData section not finished\n%.50s\n", buf);
6471 ctxt->wellFormed = 0;
6472 ctxt->disableSAX = 1;
6473 xmlFree(buf);
6474 return;
6475 }
6476 NEXTL(l);
6477
6478 /*
6479 * Ok the buffer is to be consumed as cdata.
6480 */
6481 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6482 if (ctxt->sax->cdataBlock != NULL)
6483 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006484 else if (ctxt->sax->characters != NULL)
6485 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006486 }
6487 xmlFree(buf);
6488}
6489
6490/**
6491 * xmlParseContent:
6492 * @ctxt: an XML parser context
6493 *
6494 * Parse a content:
6495 *
6496 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6497 */
6498
6499void
6500xmlParseContent(xmlParserCtxtPtr ctxt) {
6501 GROW;
6502 while (((RAW != 0) || (ctxt->token != 0)) &&
6503 ((RAW != '<') || (NXT(1) != '/'))) {
6504 const xmlChar *test = CUR_PTR;
6505 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006506 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006507 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006508
6509 /*
6510 * Handle possible processed charrefs.
6511 */
6512 if (ctxt->token != 0) {
6513 xmlParseCharData(ctxt, 0);
6514 }
6515 /*
6516 * First case : a Processing Instruction.
6517 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006518 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006519 xmlParsePI(ctxt);
6520 }
6521
6522 /*
6523 * Second case : a CDSection
6524 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006525 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006526 (NXT(2) == '[') && (NXT(3) == 'C') &&
6527 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6528 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6529 (NXT(8) == '[')) {
6530 xmlParseCDSect(ctxt);
6531 }
6532
6533 /*
6534 * Third case : a comment
6535 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006536 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006537 (NXT(2) == '-') && (NXT(3) == '-')) {
6538 xmlParseComment(ctxt);
6539 ctxt->instate = XML_PARSER_CONTENT;
6540 }
6541
6542 /*
6543 * Fourth case : a sub-element.
6544 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006545 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006546 xmlParseElement(ctxt);
6547 }
6548
6549 /*
6550 * Fifth case : a reference. If if has not been resolved,
6551 * parsing returns it's Name, create the node
6552 */
6553
Daniel Veillard21a0f912001-02-25 19:54:14 +00006554 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006555 xmlParseReference(ctxt);
6556 }
6557
6558 /*
6559 * Last case, text. Note that References are handled directly.
6560 */
6561 else {
6562 xmlParseCharData(ctxt, 0);
6563 }
6564
6565 GROW;
6566 /*
6567 * Pop-up of finished entities.
6568 */
6569 while ((RAW == 0) && (ctxt->inputNr > 1))
6570 xmlPopInput(ctxt);
6571 SHRINK;
6572
6573 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6574 (tok == ctxt->token)) {
6575 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6576 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6577 ctxt->sax->error(ctxt->userData,
6578 "detected an error in element content\n");
6579 ctxt->wellFormed = 0;
6580 ctxt->disableSAX = 1;
6581 ctxt->instate = XML_PARSER_EOF;
6582 break;
6583 }
6584 }
6585}
6586
6587/**
6588 * xmlParseElement:
6589 * @ctxt: an XML parser context
6590 *
6591 * parse an XML element, this is highly recursive
6592 *
6593 * [39] element ::= EmptyElemTag | STag content ETag
6594 *
6595 * [ WFC: Element Type Match ]
6596 * The Name in an element's end-tag must match the element type in the
6597 * start-tag.
6598 *
6599 * [ VC: Element Valid ]
6600 * An element is valid if there is a declaration matching elementdecl
6601 * where the Name matches the element type and one of the following holds:
6602 * - The declaration matches EMPTY and the element has no content.
6603 * - The declaration matches children and the sequence of child elements
6604 * belongs to the language generated by the regular expression in the
6605 * content model, with optional white space (characters matching the
6606 * nonterminal S) between each pair of child elements.
6607 * - The declaration matches Mixed and the content consists of character
6608 * data and child elements whose types match names in the content model.
6609 * - The declaration matches ANY, and the types of any child elements have
6610 * been declared.
6611 */
6612
6613void
6614xmlParseElement(xmlParserCtxtPtr ctxt) {
6615 const xmlChar *openTag = CUR_PTR;
6616 xmlChar *name;
6617 xmlChar *oldname;
6618 xmlParserNodeInfo node_info;
6619 xmlNodePtr ret;
6620
6621 /* Capture start position */
6622 if (ctxt->record_info) {
6623 node_info.begin_pos = ctxt->input->consumed +
6624 (CUR_PTR - ctxt->input->base);
6625 node_info.begin_line = ctxt->input->line;
6626 }
6627
6628 if (ctxt->spaceNr == 0)
6629 spacePush(ctxt, -1);
6630 else
6631 spacePush(ctxt, *ctxt->space);
6632
6633 name = xmlParseStartTag(ctxt);
6634 if (name == NULL) {
6635 spacePop(ctxt);
6636 return;
6637 }
6638 namePush(ctxt, name);
6639 ret = ctxt->node;
6640
6641 /*
6642 * [ VC: Root Element Type ]
6643 * The Name in the document type declaration must match the element
6644 * type of the root element.
6645 */
6646 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6647 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6648 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6649
6650 /*
6651 * Check for an Empty Element.
6652 */
6653 if ((RAW == '/') && (NXT(1) == '>')) {
6654 SKIP(2);
6655 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6656 (!ctxt->disableSAX))
6657 ctxt->sax->endElement(ctxt->userData, name);
6658 oldname = namePop(ctxt);
6659 spacePop(ctxt);
6660 if (oldname != NULL) {
6661#ifdef DEBUG_STACK
6662 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6663#endif
6664 xmlFree(oldname);
6665 }
6666 if ( ret != NULL && ctxt->record_info ) {
6667 node_info.end_pos = ctxt->input->consumed +
6668 (CUR_PTR - ctxt->input->base);
6669 node_info.end_line = ctxt->input->line;
6670 node_info.node = ret;
6671 xmlParserAddNodeInfo(ctxt, &node_info);
6672 }
6673 return;
6674 }
6675 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006676 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006677 } else {
6678 ctxt->errNo = XML_ERR_GT_REQUIRED;
6679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6680 ctxt->sax->error(ctxt->userData,
6681 "Couldn't find end of Start Tag\n%.30s\n",
6682 openTag);
6683 ctxt->wellFormed = 0;
6684 ctxt->disableSAX = 1;
6685
6686 /*
6687 * end of parsing of this node.
6688 */
6689 nodePop(ctxt);
6690 oldname = namePop(ctxt);
6691 spacePop(ctxt);
6692 if (oldname != NULL) {
6693#ifdef DEBUG_STACK
6694 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6695#endif
6696 xmlFree(oldname);
6697 }
6698
6699 /*
6700 * Capture end position and add node
6701 */
6702 if ( ret != NULL && ctxt->record_info ) {
6703 node_info.end_pos = ctxt->input->consumed +
6704 (CUR_PTR - ctxt->input->base);
6705 node_info.end_line = ctxt->input->line;
6706 node_info.node = ret;
6707 xmlParserAddNodeInfo(ctxt, &node_info);
6708 }
6709 return;
6710 }
6711
6712 /*
6713 * Parse the content of the element:
6714 */
6715 xmlParseContent(ctxt);
6716 if (!IS_CHAR(RAW)) {
6717 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6718 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6719 ctxt->sax->error(ctxt->userData,
6720 "Premature end of data in tag %.30s\n", openTag);
6721 ctxt->wellFormed = 0;
6722 ctxt->disableSAX = 1;
6723
6724 /*
6725 * end of parsing of this node.
6726 */
6727 nodePop(ctxt);
6728 oldname = namePop(ctxt);
6729 spacePop(ctxt);
6730 if (oldname != NULL) {
6731#ifdef DEBUG_STACK
6732 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6733#endif
6734 xmlFree(oldname);
6735 }
6736 return;
6737 }
6738
6739 /*
6740 * parse the end of tag: '</' should be here.
6741 */
6742 xmlParseEndTag(ctxt);
6743
6744 /*
6745 * Capture end position and add node
6746 */
6747 if ( ret != NULL && ctxt->record_info ) {
6748 node_info.end_pos = ctxt->input->consumed +
6749 (CUR_PTR - ctxt->input->base);
6750 node_info.end_line = ctxt->input->line;
6751 node_info.node = ret;
6752 xmlParserAddNodeInfo(ctxt, &node_info);
6753 }
6754}
6755
6756/**
6757 * xmlParseVersionNum:
6758 * @ctxt: an XML parser context
6759 *
6760 * parse the XML version value.
6761 *
6762 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6763 *
6764 * Returns the string giving the XML version number, or NULL
6765 */
6766xmlChar *
6767xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6768 xmlChar *buf = NULL;
6769 int len = 0;
6770 int size = 10;
6771 xmlChar cur;
6772
6773 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6774 if (buf == NULL) {
6775 xmlGenericError(xmlGenericErrorContext,
6776 "malloc of %d byte failed\n", size);
6777 return(NULL);
6778 }
6779 cur = CUR;
6780 while (((cur >= 'a') && (cur <= 'z')) ||
6781 ((cur >= 'A') && (cur <= 'Z')) ||
6782 ((cur >= '0') && (cur <= '9')) ||
6783 (cur == '_') || (cur == '.') ||
6784 (cur == ':') || (cur == '-')) {
6785 if (len + 1 >= size) {
6786 size *= 2;
6787 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6788 if (buf == NULL) {
6789 xmlGenericError(xmlGenericErrorContext,
6790 "realloc of %d byte failed\n", size);
6791 return(NULL);
6792 }
6793 }
6794 buf[len++] = cur;
6795 NEXT;
6796 cur=CUR;
6797 }
6798 buf[len] = 0;
6799 return(buf);
6800}
6801
6802/**
6803 * xmlParseVersionInfo:
6804 * @ctxt: an XML parser context
6805 *
6806 * parse the XML version.
6807 *
6808 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6809 *
6810 * [25] Eq ::= S? '=' S?
6811 *
6812 * Returns the version string, e.g. "1.0"
6813 */
6814
6815xmlChar *
6816xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6817 xmlChar *version = NULL;
6818 const xmlChar *q;
6819
6820 if ((RAW == 'v') && (NXT(1) == 'e') &&
6821 (NXT(2) == 'r') && (NXT(3) == 's') &&
6822 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6823 (NXT(6) == 'n')) {
6824 SKIP(7);
6825 SKIP_BLANKS;
6826 if (RAW != '=') {
6827 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6829 ctxt->sax->error(ctxt->userData,
6830 "xmlParseVersionInfo : expected '='\n");
6831 ctxt->wellFormed = 0;
6832 ctxt->disableSAX = 1;
6833 return(NULL);
6834 }
6835 NEXT;
6836 SKIP_BLANKS;
6837 if (RAW == '"') {
6838 NEXT;
6839 q = CUR_PTR;
6840 version = xmlParseVersionNum(ctxt);
6841 if (RAW != '"') {
6842 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6843 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6844 ctxt->sax->error(ctxt->userData,
6845 "String not closed\n%.50s\n", q);
6846 ctxt->wellFormed = 0;
6847 ctxt->disableSAX = 1;
6848 } else
6849 NEXT;
6850 } else if (RAW == '\''){
6851 NEXT;
6852 q = CUR_PTR;
6853 version = xmlParseVersionNum(ctxt);
6854 if (RAW != '\'') {
6855 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6857 ctxt->sax->error(ctxt->userData,
6858 "String not closed\n%.50s\n", q);
6859 ctxt->wellFormed = 0;
6860 ctxt->disableSAX = 1;
6861 } else
6862 NEXT;
6863 } else {
6864 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6865 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6866 ctxt->sax->error(ctxt->userData,
6867 "xmlParseVersionInfo : expected ' or \"\n");
6868 ctxt->wellFormed = 0;
6869 ctxt->disableSAX = 1;
6870 }
6871 }
6872 return(version);
6873}
6874
6875/**
6876 * xmlParseEncName:
6877 * @ctxt: an XML parser context
6878 *
6879 * parse the XML encoding name
6880 *
6881 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6882 *
6883 * Returns the encoding name value or NULL
6884 */
6885xmlChar *
6886xmlParseEncName(xmlParserCtxtPtr ctxt) {
6887 xmlChar *buf = NULL;
6888 int len = 0;
6889 int size = 10;
6890 xmlChar cur;
6891
6892 cur = CUR;
6893 if (((cur >= 'a') && (cur <= 'z')) ||
6894 ((cur >= 'A') && (cur <= 'Z'))) {
6895 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6896 if (buf == NULL) {
6897 xmlGenericError(xmlGenericErrorContext,
6898 "malloc of %d byte failed\n", size);
6899 return(NULL);
6900 }
6901
6902 buf[len++] = cur;
6903 NEXT;
6904 cur = CUR;
6905 while (((cur >= 'a') && (cur <= 'z')) ||
6906 ((cur >= 'A') && (cur <= 'Z')) ||
6907 ((cur >= '0') && (cur <= '9')) ||
6908 (cur == '.') || (cur == '_') ||
6909 (cur == '-')) {
6910 if (len + 1 >= size) {
6911 size *= 2;
6912 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6913 if (buf == NULL) {
6914 xmlGenericError(xmlGenericErrorContext,
6915 "realloc of %d byte failed\n", size);
6916 return(NULL);
6917 }
6918 }
6919 buf[len++] = cur;
6920 NEXT;
6921 cur = CUR;
6922 if (cur == 0) {
6923 SHRINK;
6924 GROW;
6925 cur = CUR;
6926 }
6927 }
6928 buf[len] = 0;
6929 } else {
6930 ctxt->errNo = XML_ERR_ENCODING_NAME;
6931 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6932 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6933 ctxt->wellFormed = 0;
6934 ctxt->disableSAX = 1;
6935 }
6936 return(buf);
6937}
6938
6939/**
6940 * xmlParseEncodingDecl:
6941 * @ctxt: an XML parser context
6942 *
6943 * parse the XML encoding declaration
6944 *
6945 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6946 *
6947 * this setups the conversion filters.
6948 *
6949 * Returns the encoding value or NULL
6950 */
6951
6952xmlChar *
6953xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6954 xmlChar *encoding = NULL;
6955 const xmlChar *q;
6956
6957 SKIP_BLANKS;
6958 if ((RAW == 'e') && (NXT(1) == 'n') &&
6959 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6960 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6961 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6962 SKIP(8);
6963 SKIP_BLANKS;
6964 if (RAW != '=') {
6965 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6967 ctxt->sax->error(ctxt->userData,
6968 "xmlParseEncodingDecl : expected '='\n");
6969 ctxt->wellFormed = 0;
6970 ctxt->disableSAX = 1;
6971 return(NULL);
6972 }
6973 NEXT;
6974 SKIP_BLANKS;
6975 if (RAW == '"') {
6976 NEXT;
6977 q = CUR_PTR;
6978 encoding = xmlParseEncName(ctxt);
6979 if (RAW != '"') {
6980 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6981 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6982 ctxt->sax->error(ctxt->userData,
6983 "String not closed\n%.50s\n", q);
6984 ctxt->wellFormed = 0;
6985 ctxt->disableSAX = 1;
6986 } else
6987 NEXT;
6988 } else if (RAW == '\''){
6989 NEXT;
6990 q = CUR_PTR;
6991 encoding = xmlParseEncName(ctxt);
6992 if (RAW != '\'') {
6993 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6994 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6995 ctxt->sax->error(ctxt->userData,
6996 "String not closed\n%.50s\n", q);
6997 ctxt->wellFormed = 0;
6998 ctxt->disableSAX = 1;
6999 } else
7000 NEXT;
7001 } else if (RAW == '"'){
7002 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7004 ctxt->sax->error(ctxt->userData,
7005 "xmlParseEncodingDecl : expected ' or \"\n");
7006 ctxt->wellFormed = 0;
7007 ctxt->disableSAX = 1;
7008 }
7009 if (encoding != NULL) {
7010 xmlCharEncoding enc;
7011 xmlCharEncodingHandlerPtr handler;
7012
7013 if (ctxt->input->encoding != NULL)
7014 xmlFree((xmlChar *) ctxt->input->encoding);
7015 ctxt->input->encoding = encoding;
7016
7017 enc = xmlParseCharEncoding((const char *) encoding);
7018 /*
7019 * registered set of known encodings
7020 */
7021 if (enc != XML_CHAR_ENCODING_ERROR) {
7022 xmlSwitchEncoding(ctxt, enc);
7023 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7024 xmlFree(encoding);
7025 return(NULL);
7026 }
7027 } else {
7028 /*
7029 * fallback for unknown encodings
7030 */
7031 handler = xmlFindCharEncodingHandler((const char *) encoding);
7032 if (handler != NULL) {
7033 xmlSwitchToEncoding(ctxt, handler);
7034 } else {
7035 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7036 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7037 ctxt->sax->error(ctxt->userData,
7038 "Unsupported encoding %s\n", encoding);
7039 return(NULL);
7040 }
7041 }
7042 }
7043 }
7044 return(encoding);
7045}
7046
7047/**
7048 * xmlParseSDDecl:
7049 * @ctxt: an XML parser context
7050 *
7051 * parse the XML standalone declaration
7052 *
7053 * [32] SDDecl ::= S 'standalone' Eq
7054 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7055 *
7056 * [ VC: Standalone Document Declaration ]
7057 * TODO The standalone document declaration must have the value "no"
7058 * if any external markup declarations contain declarations of:
7059 * - attributes with default values, if elements to which these
7060 * attributes apply appear in the document without specifications
7061 * of values for these attributes, or
7062 * - entities (other than amp, lt, gt, apos, quot), if references
7063 * to those entities appear in the document, or
7064 * - attributes with values subject to normalization, where the
7065 * attribute appears in the document with a value which will change
7066 * as a result of normalization, or
7067 * - element types with element content, if white space occurs directly
7068 * within any instance of those types.
7069 *
7070 * Returns 1 if standalone, 0 otherwise
7071 */
7072
7073int
7074xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7075 int standalone = -1;
7076
7077 SKIP_BLANKS;
7078 if ((RAW == 's') && (NXT(1) == 't') &&
7079 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7080 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7081 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7082 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7083 SKIP(10);
7084 SKIP_BLANKS;
7085 if (RAW != '=') {
7086 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7088 ctxt->sax->error(ctxt->userData,
7089 "XML standalone declaration : expected '='\n");
7090 ctxt->wellFormed = 0;
7091 ctxt->disableSAX = 1;
7092 return(standalone);
7093 }
7094 NEXT;
7095 SKIP_BLANKS;
7096 if (RAW == '\''){
7097 NEXT;
7098 if ((RAW == 'n') && (NXT(1) == 'o')) {
7099 standalone = 0;
7100 SKIP(2);
7101 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7102 (NXT(2) == 's')) {
7103 standalone = 1;
7104 SKIP(3);
7105 } else {
7106 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7108 ctxt->sax->error(ctxt->userData,
7109 "standalone accepts only 'yes' or 'no'\n");
7110 ctxt->wellFormed = 0;
7111 ctxt->disableSAX = 1;
7112 }
7113 if (RAW != '\'') {
7114 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7116 ctxt->sax->error(ctxt->userData, "String not closed\n");
7117 ctxt->wellFormed = 0;
7118 ctxt->disableSAX = 1;
7119 } else
7120 NEXT;
7121 } else if (RAW == '"'){
7122 NEXT;
7123 if ((RAW == 'n') && (NXT(1) == 'o')) {
7124 standalone = 0;
7125 SKIP(2);
7126 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7127 (NXT(2) == 's')) {
7128 standalone = 1;
7129 SKIP(3);
7130 } else {
7131 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7133 ctxt->sax->error(ctxt->userData,
7134 "standalone accepts only 'yes' or 'no'\n");
7135 ctxt->wellFormed = 0;
7136 ctxt->disableSAX = 1;
7137 }
7138 if (RAW != '"') {
7139 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7140 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7141 ctxt->sax->error(ctxt->userData, "String not closed\n");
7142 ctxt->wellFormed = 0;
7143 ctxt->disableSAX = 1;
7144 } else
7145 NEXT;
7146 } else {
7147 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7148 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7149 ctxt->sax->error(ctxt->userData,
7150 "Standalone value not found\n");
7151 ctxt->wellFormed = 0;
7152 ctxt->disableSAX = 1;
7153 }
7154 }
7155 return(standalone);
7156}
7157
7158/**
7159 * xmlParseXMLDecl:
7160 * @ctxt: an XML parser context
7161 *
7162 * parse an XML declaration header
7163 *
7164 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7165 */
7166
7167void
7168xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7169 xmlChar *version;
7170
7171 /*
7172 * We know that '<?xml' is here.
7173 */
7174 SKIP(5);
7175
7176 if (!IS_BLANK(RAW)) {
7177 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7178 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7179 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7180 ctxt->wellFormed = 0;
7181 ctxt->disableSAX = 1;
7182 }
7183 SKIP_BLANKS;
7184
7185 /*
7186 * We should have the VersionInfo here.
7187 */
7188 version = xmlParseVersionInfo(ctxt);
7189 if (version == NULL)
7190 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7191 ctxt->version = xmlStrdup(version);
7192 xmlFree(version);
7193
7194 /*
7195 * We may have the encoding declaration
7196 */
7197 if (!IS_BLANK(RAW)) {
7198 if ((RAW == '?') && (NXT(1) == '>')) {
7199 SKIP(2);
7200 return;
7201 }
7202 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7204 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7205 ctxt->wellFormed = 0;
7206 ctxt->disableSAX = 1;
7207 }
7208 xmlParseEncodingDecl(ctxt);
7209 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7210 /*
7211 * The XML REC instructs us to stop parsing right here
7212 */
7213 return;
7214 }
7215
7216 /*
7217 * We may have the standalone status.
7218 */
7219 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7220 if ((RAW == '?') && (NXT(1) == '>')) {
7221 SKIP(2);
7222 return;
7223 }
7224 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7226 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7227 ctxt->wellFormed = 0;
7228 ctxt->disableSAX = 1;
7229 }
7230 SKIP_BLANKS;
7231 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7232
7233 SKIP_BLANKS;
7234 if ((RAW == '?') && (NXT(1) == '>')) {
7235 SKIP(2);
7236 } else if (RAW == '>') {
7237 /* Deprecated old WD ... */
7238 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7240 ctxt->sax->error(ctxt->userData,
7241 "XML declaration must end-up with '?>'\n");
7242 ctxt->wellFormed = 0;
7243 ctxt->disableSAX = 1;
7244 NEXT;
7245 } else {
7246 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7247 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7248 ctxt->sax->error(ctxt->userData,
7249 "parsing XML declaration: '?>' expected\n");
7250 ctxt->wellFormed = 0;
7251 ctxt->disableSAX = 1;
7252 MOVETO_ENDTAG(CUR_PTR);
7253 NEXT;
7254 }
7255}
7256
7257/**
7258 * xmlParseMisc:
7259 * @ctxt: an XML parser context
7260 *
7261 * parse an XML Misc* optionnal field.
7262 *
7263 * [27] Misc ::= Comment | PI | S
7264 */
7265
7266void
7267xmlParseMisc(xmlParserCtxtPtr ctxt) {
7268 while (((RAW == '<') && (NXT(1) == '?')) ||
7269 ((RAW == '<') && (NXT(1) == '!') &&
7270 (NXT(2) == '-') && (NXT(3) == '-')) ||
7271 IS_BLANK(CUR)) {
7272 if ((RAW == '<') && (NXT(1) == '?')) {
7273 xmlParsePI(ctxt);
7274 } else if (IS_BLANK(CUR)) {
7275 NEXT;
7276 } else
7277 xmlParseComment(ctxt);
7278 }
7279}
7280
7281/**
7282 * xmlParseDocument:
7283 * @ctxt: an XML parser context
7284 *
7285 * parse an XML document (and build a tree if using the standard SAX
7286 * interface).
7287 *
7288 * [1] document ::= prolog element Misc*
7289 *
7290 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7291 *
7292 * Returns 0, -1 in case of error. the parser context is augmented
7293 * as a result of the parsing.
7294 */
7295
7296int
7297xmlParseDocument(xmlParserCtxtPtr ctxt) {
7298 xmlChar start[4];
7299 xmlCharEncoding enc;
7300
7301 xmlInitParser();
7302
7303 GROW;
7304
7305 /*
7306 * SAX: beginning of the document processing.
7307 */
7308 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7309 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7310
Daniel Veillard50f34372001-08-03 12:06:36 +00007311 if (ctxt->encoding == (const xmlChar *)XML_CHAR_ENCODING_NONE) {
Daniel Veillard4aafa792001-07-28 17:21:12 +00007312 /*
7313 * Get the 4 first bytes and decode the charset
7314 * if enc != XML_CHAR_ENCODING_NONE
7315 * plug some encoding conversion routines.
7316 */
7317 start[0] = RAW;
7318 start[1] = NXT(1);
7319 start[2] = NXT(2);
7320 start[3] = NXT(3);
7321 enc = xmlDetectCharEncoding(start, 4);
7322 if (enc != XML_CHAR_ENCODING_NONE) {
7323 xmlSwitchEncoding(ctxt, enc);
7324 }
Owen Taylor3473f882001-02-23 17:55:21 +00007325 }
7326
7327
7328 if (CUR == 0) {
7329 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7332 ctxt->wellFormed = 0;
7333 ctxt->disableSAX = 1;
7334 }
7335
7336 /*
7337 * Check for the XMLDecl in the Prolog.
7338 */
7339 GROW;
7340 if ((RAW == '<') && (NXT(1) == '?') &&
7341 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7342 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7343
7344 /*
7345 * Note that we will switch encoding on the fly.
7346 */
7347 xmlParseXMLDecl(ctxt);
7348 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7349 /*
7350 * The XML REC instructs us to stop parsing right here
7351 */
7352 return(-1);
7353 }
7354 ctxt->standalone = ctxt->input->standalone;
7355 SKIP_BLANKS;
7356 } else {
7357 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7358 }
7359 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7360 ctxt->sax->startDocument(ctxt->userData);
7361
7362 /*
7363 * The Misc part of the Prolog
7364 */
7365 GROW;
7366 xmlParseMisc(ctxt);
7367
7368 /*
7369 * Then possibly doc type declaration(s) and more Misc
7370 * (doctypedecl Misc*)?
7371 */
7372 GROW;
7373 if ((RAW == '<') && (NXT(1) == '!') &&
7374 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7375 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7376 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7377 (NXT(8) == 'E')) {
7378
7379 ctxt->inSubset = 1;
7380 xmlParseDocTypeDecl(ctxt);
7381 if (RAW == '[') {
7382 ctxt->instate = XML_PARSER_DTD;
7383 xmlParseInternalSubset(ctxt);
7384 }
7385
7386 /*
7387 * Create and update the external subset.
7388 */
7389 ctxt->inSubset = 2;
7390 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7391 (!ctxt->disableSAX))
7392 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7393 ctxt->extSubSystem, ctxt->extSubURI);
7394 ctxt->inSubset = 0;
7395
7396
7397 ctxt->instate = XML_PARSER_PROLOG;
7398 xmlParseMisc(ctxt);
7399 }
7400
7401 /*
7402 * Time to start parsing the tree itself
7403 */
7404 GROW;
7405 if (RAW != '<') {
7406 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7408 ctxt->sax->error(ctxt->userData,
7409 "Start tag expected, '<' not found\n");
7410 ctxt->wellFormed = 0;
7411 ctxt->disableSAX = 1;
7412 ctxt->instate = XML_PARSER_EOF;
7413 } else {
7414 ctxt->instate = XML_PARSER_CONTENT;
7415 xmlParseElement(ctxt);
7416 ctxt->instate = XML_PARSER_EPILOG;
7417
7418
7419 /*
7420 * The Misc part at the end
7421 */
7422 xmlParseMisc(ctxt);
7423
7424 if (RAW != 0) {
7425 ctxt->errNo = XML_ERR_DOCUMENT_END;
7426 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7427 ctxt->sax->error(ctxt->userData,
7428 "Extra content at the end of the document\n");
7429 ctxt->wellFormed = 0;
7430 ctxt->disableSAX = 1;
7431 }
7432 ctxt->instate = XML_PARSER_EOF;
7433 }
7434
7435 /*
7436 * SAX: end of the document processing.
7437 */
7438 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7439 (!ctxt->disableSAX))
7440 ctxt->sax->endDocument(ctxt->userData);
7441
7442 if (! ctxt->wellFormed) return(-1);
7443 return(0);
7444}
7445
7446/**
7447 * xmlParseExtParsedEnt:
7448 * @ctxt: an XML parser context
7449 *
7450 * parse a genreral parsed entity
7451 * An external general parsed entity is well-formed if it matches the
7452 * production labeled extParsedEnt.
7453 *
7454 * [78] extParsedEnt ::= TextDecl? content
7455 *
7456 * Returns 0, -1 in case of error. the parser context is augmented
7457 * as a result of the parsing.
7458 */
7459
7460int
7461xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7462 xmlChar start[4];
7463 xmlCharEncoding enc;
7464
7465 xmlDefaultSAXHandlerInit();
7466
7467 GROW;
7468
7469 /*
7470 * SAX: beginning of the document processing.
7471 */
7472 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7473 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7474
7475 /*
7476 * Get the 4 first bytes and decode the charset
7477 * if enc != XML_CHAR_ENCODING_NONE
7478 * plug some encoding conversion routines.
7479 */
7480 start[0] = RAW;
7481 start[1] = NXT(1);
7482 start[2] = NXT(2);
7483 start[3] = NXT(3);
7484 enc = xmlDetectCharEncoding(start, 4);
7485 if (enc != XML_CHAR_ENCODING_NONE) {
7486 xmlSwitchEncoding(ctxt, enc);
7487 }
7488
7489
7490 if (CUR == 0) {
7491 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7492 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7493 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7494 ctxt->wellFormed = 0;
7495 ctxt->disableSAX = 1;
7496 }
7497
7498 /*
7499 * Check for the XMLDecl in the Prolog.
7500 */
7501 GROW;
7502 if ((RAW == '<') && (NXT(1) == '?') &&
7503 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7504 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7505
7506 /*
7507 * Note that we will switch encoding on the fly.
7508 */
7509 xmlParseXMLDecl(ctxt);
7510 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7511 /*
7512 * The XML REC instructs us to stop parsing right here
7513 */
7514 return(-1);
7515 }
7516 SKIP_BLANKS;
7517 } else {
7518 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7519 }
7520 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7521 ctxt->sax->startDocument(ctxt->userData);
7522
7523 /*
7524 * Doing validity checking on chunk doesn't make sense
7525 */
7526 ctxt->instate = XML_PARSER_CONTENT;
7527 ctxt->validate = 0;
7528 ctxt->loadsubset = 0;
7529 ctxt->depth = 0;
7530
7531 xmlParseContent(ctxt);
7532
7533 if ((RAW == '<') && (NXT(1) == '/')) {
7534 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData,
7537 "chunk is not well balanced\n");
7538 ctxt->wellFormed = 0;
7539 ctxt->disableSAX = 1;
7540 } else if (RAW != 0) {
7541 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7542 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7543 ctxt->sax->error(ctxt->userData,
7544 "extra content at the end of well balanced chunk\n");
7545 ctxt->wellFormed = 0;
7546 ctxt->disableSAX = 1;
7547 }
7548
7549 /*
7550 * SAX: end of the document processing.
7551 */
7552 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7553 (!ctxt->disableSAX))
7554 ctxt->sax->endDocument(ctxt->userData);
7555
7556 if (! ctxt->wellFormed) return(-1);
7557 return(0);
7558}
7559
7560/************************************************************************
7561 * *
7562 * Progressive parsing interfaces *
7563 * *
7564 ************************************************************************/
7565
7566/**
7567 * xmlParseLookupSequence:
7568 * @ctxt: an XML parser context
7569 * @first: the first char to lookup
7570 * @next: the next char to lookup or zero
7571 * @third: the next char to lookup or zero
7572 *
7573 * Try to find if a sequence (first, next, third) or just (first next) or
7574 * (first) is available in the input stream.
7575 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7576 * to avoid rescanning sequences of bytes, it DOES change the state of the
7577 * parser, do not use liberally.
7578 *
7579 * Returns the index to the current parsing point if the full sequence
7580 * is available, -1 otherwise.
7581 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007582static int
Owen Taylor3473f882001-02-23 17:55:21 +00007583xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7584 xmlChar next, xmlChar third) {
7585 int base, len;
7586 xmlParserInputPtr in;
7587 const xmlChar *buf;
7588
7589 in = ctxt->input;
7590 if (in == NULL) return(-1);
7591 base = in->cur - in->base;
7592 if (base < 0) return(-1);
7593 if (ctxt->checkIndex > base)
7594 base = ctxt->checkIndex;
7595 if (in->buf == NULL) {
7596 buf = in->base;
7597 len = in->length;
7598 } else {
7599 buf = in->buf->buffer->content;
7600 len = in->buf->buffer->use;
7601 }
7602 /* take into account the sequence length */
7603 if (third) len -= 2;
7604 else if (next) len --;
7605 for (;base < len;base++) {
7606 if (buf[base] == first) {
7607 if (third != 0) {
7608 if ((buf[base + 1] != next) ||
7609 (buf[base + 2] != third)) continue;
7610 } else if (next != 0) {
7611 if (buf[base + 1] != next) continue;
7612 }
7613 ctxt->checkIndex = 0;
7614#ifdef DEBUG_PUSH
7615 if (next == 0)
7616 xmlGenericError(xmlGenericErrorContext,
7617 "PP: lookup '%c' found at %d\n",
7618 first, base);
7619 else if (third == 0)
7620 xmlGenericError(xmlGenericErrorContext,
7621 "PP: lookup '%c%c' found at %d\n",
7622 first, next, base);
7623 else
7624 xmlGenericError(xmlGenericErrorContext,
7625 "PP: lookup '%c%c%c' found at %d\n",
7626 first, next, third, base);
7627#endif
7628 return(base - (in->cur - in->base));
7629 }
7630 }
7631 ctxt->checkIndex = base;
7632#ifdef DEBUG_PUSH
7633 if (next == 0)
7634 xmlGenericError(xmlGenericErrorContext,
7635 "PP: lookup '%c' failed\n", first);
7636 else if (third == 0)
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: lookup '%c%c' failed\n", first, next);
7639 else
7640 xmlGenericError(xmlGenericErrorContext,
7641 "PP: lookup '%c%c%c' failed\n", first, next, third);
7642#endif
7643 return(-1);
7644}
7645
7646/**
7647 * xmlParseTryOrFinish:
7648 * @ctxt: an XML parser context
7649 * @terminate: last chunk indicator
7650 *
7651 * Try to progress on parsing
7652 *
7653 * Returns zero if no parsing was possible
7654 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007655static int
Owen Taylor3473f882001-02-23 17:55:21 +00007656xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7657 int ret = 0;
7658 int avail;
7659 xmlChar cur, next;
7660
7661#ifdef DEBUG_PUSH
7662 switch (ctxt->instate) {
7663 case XML_PARSER_EOF:
7664 xmlGenericError(xmlGenericErrorContext,
7665 "PP: try EOF\n"); break;
7666 case XML_PARSER_START:
7667 xmlGenericError(xmlGenericErrorContext,
7668 "PP: try START\n"); break;
7669 case XML_PARSER_MISC:
7670 xmlGenericError(xmlGenericErrorContext,
7671 "PP: try MISC\n");break;
7672 case XML_PARSER_COMMENT:
7673 xmlGenericError(xmlGenericErrorContext,
7674 "PP: try COMMENT\n");break;
7675 case XML_PARSER_PROLOG:
7676 xmlGenericError(xmlGenericErrorContext,
7677 "PP: try PROLOG\n");break;
7678 case XML_PARSER_START_TAG:
7679 xmlGenericError(xmlGenericErrorContext,
7680 "PP: try START_TAG\n");break;
7681 case XML_PARSER_CONTENT:
7682 xmlGenericError(xmlGenericErrorContext,
7683 "PP: try CONTENT\n");break;
7684 case XML_PARSER_CDATA_SECTION:
7685 xmlGenericError(xmlGenericErrorContext,
7686 "PP: try CDATA_SECTION\n");break;
7687 case XML_PARSER_END_TAG:
7688 xmlGenericError(xmlGenericErrorContext,
7689 "PP: try END_TAG\n");break;
7690 case XML_PARSER_ENTITY_DECL:
7691 xmlGenericError(xmlGenericErrorContext,
7692 "PP: try ENTITY_DECL\n");break;
7693 case XML_PARSER_ENTITY_VALUE:
7694 xmlGenericError(xmlGenericErrorContext,
7695 "PP: try ENTITY_VALUE\n");break;
7696 case XML_PARSER_ATTRIBUTE_VALUE:
7697 xmlGenericError(xmlGenericErrorContext,
7698 "PP: try ATTRIBUTE_VALUE\n");break;
7699 case XML_PARSER_DTD:
7700 xmlGenericError(xmlGenericErrorContext,
7701 "PP: try DTD\n");break;
7702 case XML_PARSER_EPILOG:
7703 xmlGenericError(xmlGenericErrorContext,
7704 "PP: try EPILOG\n");break;
7705 case XML_PARSER_PI:
7706 xmlGenericError(xmlGenericErrorContext,
7707 "PP: try PI\n");break;
7708 case XML_PARSER_IGNORE:
7709 xmlGenericError(xmlGenericErrorContext,
7710 "PP: try IGNORE\n");break;
7711 }
7712#endif
7713
7714 while (1) {
7715 /*
7716 * Pop-up of finished entities.
7717 */
7718 while ((RAW == 0) && (ctxt->inputNr > 1))
7719 xmlPopInput(ctxt);
7720
7721 if (ctxt->input ==NULL) break;
7722 if (ctxt->input->buf == NULL)
7723 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7724 else
7725 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7726 if (avail < 1)
7727 goto done;
7728 switch (ctxt->instate) {
7729 case XML_PARSER_EOF:
7730 /*
7731 * Document parsing is done !
7732 */
7733 goto done;
7734 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007735 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7736 xmlChar start[4];
7737 xmlCharEncoding enc;
7738
7739 /*
7740 * Very first chars read from the document flow.
7741 */
7742 if (avail < 4)
7743 goto done;
7744
7745 /*
7746 * Get the 4 first bytes and decode the charset
7747 * if enc != XML_CHAR_ENCODING_NONE
7748 * plug some encoding conversion routines.
7749 */
7750 start[0] = RAW;
7751 start[1] = NXT(1);
7752 start[2] = NXT(2);
7753 start[3] = NXT(3);
7754 enc = xmlDetectCharEncoding(start, 4);
7755 if (enc != XML_CHAR_ENCODING_NONE) {
7756 xmlSwitchEncoding(ctxt, enc);
7757 }
7758 break;
7759 }
Owen Taylor3473f882001-02-23 17:55:21 +00007760
7761 cur = ctxt->input->cur[0];
7762 next = ctxt->input->cur[1];
7763 if (cur == 0) {
7764 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7765 ctxt->sax->setDocumentLocator(ctxt->userData,
7766 &xmlDefaultSAXLocator);
7767 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7768 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7769 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7770 ctxt->wellFormed = 0;
7771 ctxt->disableSAX = 1;
7772 ctxt->instate = XML_PARSER_EOF;
7773#ifdef DEBUG_PUSH
7774 xmlGenericError(xmlGenericErrorContext,
7775 "PP: entering EOF\n");
7776#endif
7777 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7778 ctxt->sax->endDocument(ctxt->userData);
7779 goto done;
7780 }
7781 if ((cur == '<') && (next == '?')) {
7782 /* PI or XML decl */
7783 if (avail < 5) return(ret);
7784 if ((!terminate) &&
7785 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7786 return(ret);
7787 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7788 ctxt->sax->setDocumentLocator(ctxt->userData,
7789 &xmlDefaultSAXLocator);
7790 if ((ctxt->input->cur[2] == 'x') &&
7791 (ctxt->input->cur[3] == 'm') &&
7792 (ctxt->input->cur[4] == 'l') &&
7793 (IS_BLANK(ctxt->input->cur[5]))) {
7794 ret += 5;
7795#ifdef DEBUG_PUSH
7796 xmlGenericError(xmlGenericErrorContext,
7797 "PP: Parsing XML Decl\n");
7798#endif
7799 xmlParseXMLDecl(ctxt);
7800 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7801 /*
7802 * The XML REC instructs us to stop parsing right
7803 * here
7804 */
7805 ctxt->instate = XML_PARSER_EOF;
7806 return(0);
7807 }
7808 ctxt->standalone = ctxt->input->standalone;
7809 if ((ctxt->encoding == NULL) &&
7810 (ctxt->input->encoding != NULL))
7811 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7812 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7813 (!ctxt->disableSAX))
7814 ctxt->sax->startDocument(ctxt->userData);
7815 ctxt->instate = XML_PARSER_MISC;
7816#ifdef DEBUG_PUSH
7817 xmlGenericError(xmlGenericErrorContext,
7818 "PP: entering MISC\n");
7819#endif
7820 } else {
7821 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7822 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7823 (!ctxt->disableSAX))
7824 ctxt->sax->startDocument(ctxt->userData);
7825 ctxt->instate = XML_PARSER_MISC;
7826#ifdef DEBUG_PUSH
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: entering MISC\n");
7829#endif
7830 }
7831 } else {
7832 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7833 ctxt->sax->setDocumentLocator(ctxt->userData,
7834 &xmlDefaultSAXLocator);
7835 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7836 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7837 (!ctxt->disableSAX))
7838 ctxt->sax->startDocument(ctxt->userData);
7839 ctxt->instate = XML_PARSER_MISC;
7840#ifdef DEBUG_PUSH
7841 xmlGenericError(xmlGenericErrorContext,
7842 "PP: entering MISC\n");
7843#endif
7844 }
7845 break;
7846 case XML_PARSER_MISC:
7847 SKIP_BLANKS;
7848 if (ctxt->input->buf == NULL)
7849 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7850 else
7851 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7852 if (avail < 2)
7853 goto done;
7854 cur = ctxt->input->cur[0];
7855 next = ctxt->input->cur[1];
7856 if ((cur == '<') && (next == '?')) {
7857 if ((!terminate) &&
7858 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7859 goto done;
7860#ifdef DEBUG_PUSH
7861 xmlGenericError(xmlGenericErrorContext,
7862 "PP: Parsing PI\n");
7863#endif
7864 xmlParsePI(ctxt);
7865 } else if ((cur == '<') && (next == '!') &&
7866 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7867 if ((!terminate) &&
7868 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7869 goto done;
7870#ifdef DEBUG_PUSH
7871 xmlGenericError(xmlGenericErrorContext,
7872 "PP: Parsing Comment\n");
7873#endif
7874 xmlParseComment(ctxt);
7875 ctxt->instate = XML_PARSER_MISC;
7876 } else if ((cur == '<') && (next == '!') &&
7877 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7878 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7879 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7880 (ctxt->input->cur[8] == 'E')) {
7881 if ((!terminate) &&
7882 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7883 goto done;
7884#ifdef DEBUG_PUSH
7885 xmlGenericError(xmlGenericErrorContext,
7886 "PP: Parsing internal subset\n");
7887#endif
7888 ctxt->inSubset = 1;
7889 xmlParseDocTypeDecl(ctxt);
7890 if (RAW == '[') {
7891 ctxt->instate = XML_PARSER_DTD;
7892#ifdef DEBUG_PUSH
7893 xmlGenericError(xmlGenericErrorContext,
7894 "PP: entering DTD\n");
7895#endif
7896 } else {
7897 /*
7898 * Create and update the external subset.
7899 */
7900 ctxt->inSubset = 2;
7901 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7902 (ctxt->sax->externalSubset != NULL))
7903 ctxt->sax->externalSubset(ctxt->userData,
7904 ctxt->intSubName, ctxt->extSubSystem,
7905 ctxt->extSubURI);
7906 ctxt->inSubset = 0;
7907 ctxt->instate = XML_PARSER_PROLOG;
7908#ifdef DEBUG_PUSH
7909 xmlGenericError(xmlGenericErrorContext,
7910 "PP: entering PROLOG\n");
7911#endif
7912 }
7913 } else if ((cur == '<') && (next == '!') &&
7914 (avail < 9)) {
7915 goto done;
7916 } else {
7917 ctxt->instate = XML_PARSER_START_TAG;
7918#ifdef DEBUG_PUSH
7919 xmlGenericError(xmlGenericErrorContext,
7920 "PP: entering START_TAG\n");
7921#endif
7922 }
7923 break;
7924 case XML_PARSER_IGNORE:
7925 xmlGenericError(xmlGenericErrorContext,
7926 "PP: internal error, state == IGNORE");
7927 ctxt->instate = XML_PARSER_DTD;
7928#ifdef DEBUG_PUSH
7929 xmlGenericError(xmlGenericErrorContext,
7930 "PP: entering DTD\n");
7931#endif
7932 break;
7933 case XML_PARSER_PROLOG:
7934 SKIP_BLANKS;
7935 if (ctxt->input->buf == NULL)
7936 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7937 else
7938 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7939 if (avail < 2)
7940 goto done;
7941 cur = ctxt->input->cur[0];
7942 next = ctxt->input->cur[1];
7943 if ((cur == '<') && (next == '?')) {
7944 if ((!terminate) &&
7945 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7946 goto done;
7947#ifdef DEBUG_PUSH
7948 xmlGenericError(xmlGenericErrorContext,
7949 "PP: Parsing PI\n");
7950#endif
7951 xmlParsePI(ctxt);
7952 } else if ((cur == '<') && (next == '!') &&
7953 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7954 if ((!terminate) &&
7955 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7956 goto done;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: Parsing Comment\n");
7960#endif
7961 xmlParseComment(ctxt);
7962 ctxt->instate = XML_PARSER_PROLOG;
7963 } else if ((cur == '<') && (next == '!') &&
7964 (avail < 4)) {
7965 goto done;
7966 } else {
7967 ctxt->instate = XML_PARSER_START_TAG;
7968#ifdef DEBUG_PUSH
7969 xmlGenericError(xmlGenericErrorContext,
7970 "PP: entering START_TAG\n");
7971#endif
7972 }
7973 break;
7974 case XML_PARSER_EPILOG:
7975 SKIP_BLANKS;
7976 if (ctxt->input->buf == NULL)
7977 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7978 else
7979 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7980 if (avail < 2)
7981 goto done;
7982 cur = ctxt->input->cur[0];
7983 next = ctxt->input->cur[1];
7984 if ((cur == '<') && (next == '?')) {
7985 if ((!terminate) &&
7986 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7987 goto done;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: Parsing PI\n");
7991#endif
7992 xmlParsePI(ctxt);
7993 ctxt->instate = XML_PARSER_EPILOG;
7994 } else if ((cur == '<') && (next == '!') &&
7995 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7996 if ((!terminate) &&
7997 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7998 goto done;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: Parsing Comment\n");
8002#endif
8003 xmlParseComment(ctxt);
8004 ctxt->instate = XML_PARSER_EPILOG;
8005 } else if ((cur == '<') && (next == '!') &&
8006 (avail < 4)) {
8007 goto done;
8008 } else {
8009 ctxt->errNo = XML_ERR_DOCUMENT_END;
8010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8011 ctxt->sax->error(ctxt->userData,
8012 "Extra content at the end of the document\n");
8013 ctxt->wellFormed = 0;
8014 ctxt->disableSAX = 1;
8015 ctxt->instate = XML_PARSER_EOF;
8016#ifdef DEBUG_PUSH
8017 xmlGenericError(xmlGenericErrorContext,
8018 "PP: entering EOF\n");
8019#endif
8020 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8021 (!ctxt->disableSAX))
8022 ctxt->sax->endDocument(ctxt->userData);
8023 goto done;
8024 }
8025 break;
8026 case XML_PARSER_START_TAG: {
8027 xmlChar *name, *oldname;
8028
8029 if ((avail < 2) && (ctxt->inputNr == 1))
8030 goto done;
8031 cur = ctxt->input->cur[0];
8032 if (cur != '<') {
8033 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8034 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8035 ctxt->sax->error(ctxt->userData,
8036 "Start tag expect, '<' not found\n");
8037 ctxt->wellFormed = 0;
8038 ctxt->disableSAX = 1;
8039 ctxt->instate = XML_PARSER_EOF;
8040#ifdef DEBUG_PUSH
8041 xmlGenericError(xmlGenericErrorContext,
8042 "PP: entering EOF\n");
8043#endif
8044 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8045 (!ctxt->disableSAX))
8046 ctxt->sax->endDocument(ctxt->userData);
8047 goto done;
8048 }
8049 if ((!terminate) &&
8050 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8051 goto done;
8052 if (ctxt->spaceNr == 0)
8053 spacePush(ctxt, -1);
8054 else
8055 spacePush(ctxt, *ctxt->space);
8056 name = xmlParseStartTag(ctxt);
8057 if (name == NULL) {
8058 spacePop(ctxt);
8059 ctxt->instate = XML_PARSER_EOF;
8060#ifdef DEBUG_PUSH
8061 xmlGenericError(xmlGenericErrorContext,
8062 "PP: entering EOF\n");
8063#endif
8064 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8065 (!ctxt->disableSAX))
8066 ctxt->sax->endDocument(ctxt->userData);
8067 goto done;
8068 }
8069 namePush(ctxt, xmlStrdup(name));
8070
8071 /*
8072 * [ VC: Root Element Type ]
8073 * The Name in the document type declaration must match
8074 * the element type of the root element.
8075 */
8076 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8077 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8078 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8079
8080 /*
8081 * Check for an Empty Element.
8082 */
8083 if ((RAW == '/') && (NXT(1) == '>')) {
8084 SKIP(2);
8085 if ((ctxt->sax != NULL) &&
8086 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8087 ctxt->sax->endElement(ctxt->userData, name);
8088 xmlFree(name);
8089 oldname = namePop(ctxt);
8090 spacePop(ctxt);
8091 if (oldname != NULL) {
8092#ifdef DEBUG_STACK
8093 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8094#endif
8095 xmlFree(oldname);
8096 }
8097 if (ctxt->name == NULL) {
8098 ctxt->instate = XML_PARSER_EPILOG;
8099#ifdef DEBUG_PUSH
8100 xmlGenericError(xmlGenericErrorContext,
8101 "PP: entering EPILOG\n");
8102#endif
8103 } else {
8104 ctxt->instate = XML_PARSER_CONTENT;
8105#ifdef DEBUG_PUSH
8106 xmlGenericError(xmlGenericErrorContext,
8107 "PP: entering CONTENT\n");
8108#endif
8109 }
8110 break;
8111 }
8112 if (RAW == '>') {
8113 NEXT;
8114 } else {
8115 ctxt->errNo = XML_ERR_GT_REQUIRED;
8116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8117 ctxt->sax->error(ctxt->userData,
8118 "Couldn't find end of Start Tag %s\n",
8119 name);
8120 ctxt->wellFormed = 0;
8121 ctxt->disableSAX = 1;
8122
8123 /*
8124 * end of parsing of this node.
8125 */
8126 nodePop(ctxt);
8127 oldname = namePop(ctxt);
8128 spacePop(ctxt);
8129 if (oldname != NULL) {
8130#ifdef DEBUG_STACK
8131 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8132#endif
8133 xmlFree(oldname);
8134 }
8135 }
8136 xmlFree(name);
8137 ctxt->instate = XML_PARSER_CONTENT;
8138#ifdef DEBUG_PUSH
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: entering CONTENT\n");
8141#endif
8142 break;
8143 }
8144 case XML_PARSER_CONTENT: {
8145 const xmlChar *test;
8146 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008147 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008148
8149 /*
8150 * Handle preparsed entities and charRef
8151 */
8152 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008153 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008154
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008155 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008156 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8157 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008158 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008159 ctxt->token = 0;
8160 }
8161 if ((avail < 2) && (ctxt->inputNr == 1))
8162 goto done;
8163 cur = ctxt->input->cur[0];
8164 next = ctxt->input->cur[1];
8165
8166 test = CUR_PTR;
8167 cons = ctxt->input->consumed;
8168 tok = ctxt->token;
8169 if ((cur == '<') && (next == '?')) {
8170 if ((!terminate) &&
8171 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8172 goto done;
8173#ifdef DEBUG_PUSH
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: Parsing PI\n");
8176#endif
8177 xmlParsePI(ctxt);
8178 } else if ((cur == '<') && (next == '!') &&
8179 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8180 if ((!terminate) &&
8181 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8182 goto done;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: Parsing Comment\n");
8186#endif
8187 xmlParseComment(ctxt);
8188 ctxt->instate = XML_PARSER_CONTENT;
8189 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8190 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8191 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8192 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8193 (ctxt->input->cur[8] == '[')) {
8194 SKIP(9);
8195 ctxt->instate = XML_PARSER_CDATA_SECTION;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext,
8198 "PP: entering CDATA_SECTION\n");
8199#endif
8200 break;
8201 } else if ((cur == '<') && (next == '!') &&
8202 (avail < 9)) {
8203 goto done;
8204 } else if ((cur == '<') && (next == '/')) {
8205 ctxt->instate = XML_PARSER_END_TAG;
8206#ifdef DEBUG_PUSH
8207 xmlGenericError(xmlGenericErrorContext,
8208 "PP: entering END_TAG\n");
8209#endif
8210 break;
8211 } else if (cur == '<') {
8212 ctxt->instate = XML_PARSER_START_TAG;
8213#ifdef DEBUG_PUSH
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: entering START_TAG\n");
8216#endif
8217 break;
8218 } else if (cur == '&') {
8219 if ((!terminate) &&
8220 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8221 goto done;
8222#ifdef DEBUG_PUSH
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: Parsing Reference\n");
8225#endif
8226 xmlParseReference(ctxt);
8227 } else {
8228 /* TODO Avoid the extra copy, handle directly !!! */
8229 /*
8230 * Goal of the following test is:
8231 * - minimize calls to the SAX 'character' callback
8232 * when they are mergeable
8233 * - handle an problem for isBlank when we only parse
8234 * a sequence of blank chars and the next one is
8235 * not available to check against '<' presence.
8236 * - tries to homogenize the differences in SAX
8237 * callbacks beween the push and pull versions
8238 * of the parser.
8239 */
8240 if ((ctxt->inputNr == 1) &&
8241 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8242 if ((!terminate) &&
8243 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8244 goto done;
8245 }
8246 ctxt->checkIndex = 0;
8247#ifdef DEBUG_PUSH
8248 xmlGenericError(xmlGenericErrorContext,
8249 "PP: Parsing char data\n");
8250#endif
8251 xmlParseCharData(ctxt, 0);
8252 }
8253 /*
8254 * Pop-up of finished entities.
8255 */
8256 while ((RAW == 0) && (ctxt->inputNr > 1))
8257 xmlPopInput(ctxt);
8258 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8259 (tok == ctxt->token)) {
8260 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8262 ctxt->sax->error(ctxt->userData,
8263 "detected an error in element content\n");
8264 ctxt->wellFormed = 0;
8265 ctxt->disableSAX = 1;
8266 ctxt->instate = XML_PARSER_EOF;
8267 break;
8268 }
8269 break;
8270 }
8271 case XML_PARSER_CDATA_SECTION: {
8272 /*
8273 * The Push mode need to have the SAX callback for
8274 * cdataBlock merge back contiguous callbacks.
8275 */
8276 int base;
8277
8278 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8279 if (base < 0) {
8280 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8281 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8282 if (ctxt->sax->cdataBlock != NULL)
8283 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8284 XML_PARSER_BIG_BUFFER_SIZE);
8285 }
8286 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8287 ctxt->checkIndex = 0;
8288 }
8289 goto done;
8290 } else {
8291 if ((ctxt->sax != NULL) && (base > 0) &&
8292 (!ctxt->disableSAX)) {
8293 if (ctxt->sax->cdataBlock != NULL)
8294 ctxt->sax->cdataBlock(ctxt->userData,
8295 ctxt->input->cur, base);
8296 }
8297 SKIP(base + 3);
8298 ctxt->checkIndex = 0;
8299 ctxt->instate = XML_PARSER_CONTENT;
8300#ifdef DEBUG_PUSH
8301 xmlGenericError(xmlGenericErrorContext,
8302 "PP: entering CONTENT\n");
8303#endif
8304 }
8305 break;
8306 }
8307 case XML_PARSER_END_TAG:
8308 if (avail < 2)
8309 goto done;
8310 if ((!terminate) &&
8311 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8312 goto done;
8313 xmlParseEndTag(ctxt);
8314 if (ctxt->name == NULL) {
8315 ctxt->instate = XML_PARSER_EPILOG;
8316#ifdef DEBUG_PUSH
8317 xmlGenericError(xmlGenericErrorContext,
8318 "PP: entering EPILOG\n");
8319#endif
8320 } else {
8321 ctxt->instate = XML_PARSER_CONTENT;
8322#ifdef DEBUG_PUSH
8323 xmlGenericError(xmlGenericErrorContext,
8324 "PP: entering CONTENT\n");
8325#endif
8326 }
8327 break;
8328 case XML_PARSER_DTD: {
8329 /*
8330 * Sorry but progressive parsing of the internal subset
8331 * is not expected to be supported. We first check that
8332 * the full content of the internal subset is available and
8333 * the parsing is launched only at that point.
8334 * Internal subset ends up with "']' S? '>'" in an unescaped
8335 * section and not in a ']]>' sequence which are conditional
8336 * sections (whoever argued to keep that crap in XML deserve
8337 * a place in hell !).
8338 */
8339 int base, i;
8340 xmlChar *buf;
8341 xmlChar quote = 0;
8342
8343 base = ctxt->input->cur - ctxt->input->base;
8344 if (base < 0) return(0);
8345 if (ctxt->checkIndex > base)
8346 base = ctxt->checkIndex;
8347 buf = ctxt->input->buf->buffer->content;
8348 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8349 base++) {
8350 if (quote != 0) {
8351 if (buf[base] == quote)
8352 quote = 0;
8353 continue;
8354 }
8355 if (buf[base] == '"') {
8356 quote = '"';
8357 continue;
8358 }
8359 if (buf[base] == '\'') {
8360 quote = '\'';
8361 continue;
8362 }
8363 if (buf[base] == ']') {
8364 if ((unsigned int) base +1 >=
8365 ctxt->input->buf->buffer->use)
8366 break;
8367 if (buf[base + 1] == ']') {
8368 /* conditional crap, skip both ']' ! */
8369 base++;
8370 continue;
8371 }
8372 for (i = 0;
8373 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8374 i++) {
8375 if (buf[base + i] == '>')
8376 goto found_end_int_subset;
8377 }
8378 break;
8379 }
8380 }
8381 /*
8382 * We didn't found the end of the Internal subset
8383 */
8384 if (quote == 0)
8385 ctxt->checkIndex = base;
8386#ifdef DEBUG_PUSH
8387 if (next == 0)
8388 xmlGenericError(xmlGenericErrorContext,
8389 "PP: lookup of int subset end filed\n");
8390#endif
8391 goto done;
8392
8393found_end_int_subset:
8394 xmlParseInternalSubset(ctxt);
8395 ctxt->inSubset = 2;
8396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8397 (ctxt->sax->externalSubset != NULL))
8398 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8399 ctxt->extSubSystem, ctxt->extSubURI);
8400 ctxt->inSubset = 0;
8401 ctxt->instate = XML_PARSER_PROLOG;
8402 ctxt->checkIndex = 0;
8403#ifdef DEBUG_PUSH
8404 xmlGenericError(xmlGenericErrorContext,
8405 "PP: entering PROLOG\n");
8406#endif
8407 break;
8408 }
8409 case XML_PARSER_COMMENT:
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: internal error, state == COMMENT\n");
8412 ctxt->instate = XML_PARSER_CONTENT;
8413#ifdef DEBUG_PUSH
8414 xmlGenericError(xmlGenericErrorContext,
8415 "PP: entering CONTENT\n");
8416#endif
8417 break;
8418 case XML_PARSER_PI:
8419 xmlGenericError(xmlGenericErrorContext,
8420 "PP: internal error, state == PI\n");
8421 ctxt->instate = XML_PARSER_CONTENT;
8422#ifdef DEBUG_PUSH
8423 xmlGenericError(xmlGenericErrorContext,
8424 "PP: entering CONTENT\n");
8425#endif
8426 break;
8427 case XML_PARSER_ENTITY_DECL:
8428 xmlGenericError(xmlGenericErrorContext,
8429 "PP: internal error, state == ENTITY_DECL\n");
8430 ctxt->instate = XML_PARSER_DTD;
8431#ifdef DEBUG_PUSH
8432 xmlGenericError(xmlGenericErrorContext,
8433 "PP: entering DTD\n");
8434#endif
8435 break;
8436 case XML_PARSER_ENTITY_VALUE:
8437 xmlGenericError(xmlGenericErrorContext,
8438 "PP: internal error, state == ENTITY_VALUE\n");
8439 ctxt->instate = XML_PARSER_CONTENT;
8440#ifdef DEBUG_PUSH
8441 xmlGenericError(xmlGenericErrorContext,
8442 "PP: entering DTD\n");
8443#endif
8444 break;
8445 case XML_PARSER_ATTRIBUTE_VALUE:
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8448 ctxt->instate = XML_PARSER_START_TAG;
8449#ifdef DEBUG_PUSH
8450 xmlGenericError(xmlGenericErrorContext,
8451 "PP: entering START_TAG\n");
8452#endif
8453 break;
8454 case XML_PARSER_SYSTEM_LITERAL:
8455 xmlGenericError(xmlGenericErrorContext,
8456 "PP: internal error, state == SYSTEM_LITERAL\n");
8457 ctxt->instate = XML_PARSER_START_TAG;
8458#ifdef DEBUG_PUSH
8459 xmlGenericError(xmlGenericErrorContext,
8460 "PP: entering START_TAG\n");
8461#endif
8462 break;
8463 }
8464 }
8465done:
8466#ifdef DEBUG_PUSH
8467 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8468#endif
8469 return(ret);
8470}
8471
8472/**
Owen Taylor3473f882001-02-23 17:55:21 +00008473 * xmlParseChunk:
8474 * @ctxt: an XML parser context
8475 * @chunk: an char array
8476 * @size: the size in byte of the chunk
8477 * @terminate: last chunk indicator
8478 *
8479 * Parse a Chunk of memory
8480 *
8481 * Returns zero if no error, the xmlParserErrors otherwise.
8482 */
8483int
8484xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8485 int terminate) {
8486 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8487 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8488 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8489 int cur = ctxt->input->cur - ctxt->input->base;
8490
8491 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8492 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8493 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008494 ctxt->input->end =
8495 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008496#ifdef DEBUG_PUSH
8497 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8498#endif
8499
8500 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8501 xmlParseTryOrFinish(ctxt, terminate);
8502 } else if (ctxt->instate != XML_PARSER_EOF) {
8503 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8504 xmlParserInputBufferPtr in = ctxt->input->buf;
8505 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8506 (in->raw != NULL)) {
8507 int nbchars;
8508
8509 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8510 if (nbchars < 0) {
8511 xmlGenericError(xmlGenericErrorContext,
8512 "xmlParseChunk: encoder error\n");
8513 return(XML_ERR_INVALID_ENCODING);
8514 }
8515 }
8516 }
8517 }
8518 xmlParseTryOrFinish(ctxt, terminate);
8519 if (terminate) {
8520 /*
8521 * Check for termination
8522 */
8523 if ((ctxt->instate != XML_PARSER_EOF) &&
8524 (ctxt->instate != XML_PARSER_EPILOG)) {
8525 ctxt->errNo = XML_ERR_DOCUMENT_END;
8526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8527 ctxt->sax->error(ctxt->userData,
8528 "Extra content at the end of the document\n");
8529 ctxt->wellFormed = 0;
8530 ctxt->disableSAX = 1;
8531 }
8532 if (ctxt->instate != XML_PARSER_EOF) {
8533 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8534 (!ctxt->disableSAX))
8535 ctxt->sax->endDocument(ctxt->userData);
8536 }
8537 ctxt->instate = XML_PARSER_EOF;
8538 }
8539 return((xmlParserErrors) ctxt->errNo);
8540}
8541
8542/************************************************************************
8543 * *
8544 * I/O front end functions to the parser *
8545 * *
8546 ************************************************************************/
8547
8548/**
8549 * xmlStopParser:
8550 * @ctxt: an XML parser context
8551 *
8552 * Blocks further parser processing
8553 */
8554void
8555xmlStopParser(xmlParserCtxtPtr ctxt) {
8556 ctxt->instate = XML_PARSER_EOF;
8557 if (ctxt->input != NULL)
8558 ctxt->input->cur = BAD_CAST"";
8559}
8560
8561/**
8562 * xmlCreatePushParserCtxt:
8563 * @sax: a SAX handler
8564 * @user_data: The user data returned on SAX callbacks
8565 * @chunk: a pointer to an array of chars
8566 * @size: number of chars in the array
8567 * @filename: an optional file name or URI
8568 *
8569 * Create a parser context for using the XML parser in push mode
8570 * To allow content encoding detection, @size should be >= 4
8571 * The value of @filename is used for fetching external entities
8572 * and error/warning reports.
8573 *
8574 * Returns the new parser context or NULL
8575 */
8576xmlParserCtxtPtr
8577xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8578 const char *chunk, int size, const char *filename) {
8579 xmlParserCtxtPtr ctxt;
8580 xmlParserInputPtr inputStream;
8581 xmlParserInputBufferPtr buf;
8582 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8583
8584 /*
8585 * plug some encoding conversion routines
8586 */
8587 if ((chunk != NULL) && (size >= 4))
8588 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8589
8590 buf = xmlAllocParserInputBuffer(enc);
8591 if (buf == NULL) return(NULL);
8592
8593 ctxt = xmlNewParserCtxt();
8594 if (ctxt == NULL) {
8595 xmlFree(buf);
8596 return(NULL);
8597 }
8598 if (sax != NULL) {
8599 if (ctxt->sax != &xmlDefaultSAXHandler)
8600 xmlFree(ctxt->sax);
8601 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8602 if (ctxt->sax == NULL) {
8603 xmlFree(buf);
8604 xmlFree(ctxt);
8605 return(NULL);
8606 }
8607 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8608 if (user_data != NULL)
8609 ctxt->userData = user_data;
8610 }
8611 if (filename == NULL) {
8612 ctxt->directory = NULL;
8613 } else {
8614 ctxt->directory = xmlParserGetDirectory(filename);
8615 }
8616
8617 inputStream = xmlNewInputStream(ctxt);
8618 if (inputStream == NULL) {
8619 xmlFreeParserCtxt(ctxt);
8620 return(NULL);
8621 }
8622
8623 if (filename == NULL)
8624 inputStream->filename = NULL;
8625 else
8626 inputStream->filename = xmlMemStrdup(filename);
8627 inputStream->buf = buf;
8628 inputStream->base = inputStream->buf->buffer->content;
8629 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008630 inputStream->end =
8631 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008632
8633 inputPush(ctxt, inputStream);
8634
8635 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8636 (ctxt->input->buf != NULL)) {
8637 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8638#ifdef DEBUG_PUSH
8639 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8640#endif
8641 }
8642
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008643 if (enc != XML_CHAR_ENCODING_NONE) {
8644 xmlSwitchEncoding(ctxt, enc);
8645 }
8646
Owen Taylor3473f882001-02-23 17:55:21 +00008647 return(ctxt);
8648}
8649
8650/**
8651 * xmlCreateIOParserCtxt:
8652 * @sax: a SAX handler
8653 * @user_data: The user data returned on SAX callbacks
8654 * @ioread: an I/O read function
8655 * @ioclose: an I/O close function
8656 * @ioctx: an I/O handler
8657 * @enc: the charset encoding if known
8658 *
8659 * Create a parser context for using the XML parser with an existing
8660 * I/O stream
8661 *
8662 * Returns the new parser context or NULL
8663 */
8664xmlParserCtxtPtr
8665xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8666 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8667 void *ioctx, xmlCharEncoding enc) {
8668 xmlParserCtxtPtr ctxt;
8669 xmlParserInputPtr inputStream;
8670 xmlParserInputBufferPtr buf;
8671
8672 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8673 if (buf == NULL) return(NULL);
8674
8675 ctxt = xmlNewParserCtxt();
8676 if (ctxt == NULL) {
8677 xmlFree(buf);
8678 return(NULL);
8679 }
8680 if (sax != NULL) {
8681 if (ctxt->sax != &xmlDefaultSAXHandler)
8682 xmlFree(ctxt->sax);
8683 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8684 if (ctxt->sax == NULL) {
8685 xmlFree(buf);
8686 xmlFree(ctxt);
8687 return(NULL);
8688 }
8689 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8690 if (user_data != NULL)
8691 ctxt->userData = user_data;
8692 }
8693
8694 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8695 if (inputStream == NULL) {
8696 xmlFreeParserCtxt(ctxt);
8697 return(NULL);
8698 }
8699 inputPush(ctxt, inputStream);
8700
8701 return(ctxt);
8702}
8703
8704/************************************************************************
8705 * *
8706 * Front ends when parsing a Dtd *
8707 * *
8708 ************************************************************************/
8709
8710/**
8711 * xmlIOParseDTD:
8712 * @sax: the SAX handler block or NULL
8713 * @input: an Input Buffer
8714 * @enc: the charset encoding if known
8715 *
8716 * Load and parse a DTD
8717 *
8718 * Returns the resulting xmlDtdPtr or NULL in case of error.
8719 * @input will be freed at parsing end.
8720 */
8721
8722xmlDtdPtr
8723xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8724 xmlCharEncoding enc) {
8725 xmlDtdPtr ret = NULL;
8726 xmlParserCtxtPtr ctxt;
8727 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008728 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008729
8730 if (input == NULL)
8731 return(NULL);
8732
8733 ctxt = xmlNewParserCtxt();
8734 if (ctxt == NULL) {
8735 return(NULL);
8736 }
8737
8738 /*
8739 * Set-up the SAX context
8740 */
8741 if (sax != NULL) {
8742 if (ctxt->sax != NULL)
8743 xmlFree(ctxt->sax);
8744 ctxt->sax = sax;
8745 ctxt->userData = NULL;
8746 }
8747
8748 /*
8749 * generate a parser input from the I/O handler
8750 */
8751
8752 pinput = xmlNewIOInputStream(ctxt, input, enc);
8753 if (pinput == NULL) {
8754 if (sax != NULL) ctxt->sax = NULL;
8755 xmlFreeParserCtxt(ctxt);
8756 return(NULL);
8757 }
8758
8759 /*
8760 * plug some encoding conversion routines here.
8761 */
8762 xmlPushInput(ctxt, pinput);
8763
8764 pinput->filename = NULL;
8765 pinput->line = 1;
8766 pinput->col = 1;
8767 pinput->base = ctxt->input->cur;
8768 pinput->cur = ctxt->input->cur;
8769 pinput->free = NULL;
8770
8771 /*
8772 * let's parse that entity knowing it's an external subset.
8773 */
8774 ctxt->inSubset = 2;
8775 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8776 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8777 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008778
8779 if (enc == XML_CHAR_ENCODING_NONE) {
8780 /*
8781 * Get the 4 first bytes and decode the charset
8782 * if enc != XML_CHAR_ENCODING_NONE
8783 * plug some encoding conversion routines.
8784 */
8785 start[0] = RAW;
8786 start[1] = NXT(1);
8787 start[2] = NXT(2);
8788 start[3] = NXT(3);
8789 enc = xmlDetectCharEncoding(start, 4);
8790 if (enc != XML_CHAR_ENCODING_NONE) {
8791 xmlSwitchEncoding(ctxt, enc);
8792 }
8793 }
8794
Owen Taylor3473f882001-02-23 17:55:21 +00008795 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8796
8797 if (ctxt->myDoc != NULL) {
8798 if (ctxt->wellFormed) {
8799 ret = ctxt->myDoc->extSubset;
8800 ctxt->myDoc->extSubset = NULL;
8801 } else {
8802 ret = NULL;
8803 }
8804 xmlFreeDoc(ctxt->myDoc);
8805 ctxt->myDoc = NULL;
8806 }
8807 if (sax != NULL) ctxt->sax = NULL;
8808 xmlFreeParserCtxt(ctxt);
8809
8810 return(ret);
8811}
8812
8813/**
8814 * xmlSAXParseDTD:
8815 * @sax: the SAX handler block
8816 * @ExternalID: a NAME* containing the External ID of the DTD
8817 * @SystemID: a NAME* containing the URL to the DTD
8818 *
8819 * Load and parse an external subset.
8820 *
8821 * Returns the resulting xmlDtdPtr or NULL in case of error.
8822 */
8823
8824xmlDtdPtr
8825xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8826 const xmlChar *SystemID) {
8827 xmlDtdPtr ret = NULL;
8828 xmlParserCtxtPtr ctxt;
8829 xmlParserInputPtr input = NULL;
8830 xmlCharEncoding enc;
8831
8832 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8833
8834 ctxt = xmlNewParserCtxt();
8835 if (ctxt == NULL) {
8836 return(NULL);
8837 }
8838
8839 /*
8840 * Set-up the SAX context
8841 */
8842 if (sax != NULL) {
8843 if (ctxt->sax != NULL)
8844 xmlFree(ctxt->sax);
8845 ctxt->sax = sax;
8846 ctxt->userData = NULL;
8847 }
8848
8849 /*
8850 * Ask the Entity resolver to load the damn thing
8851 */
8852
8853 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8854 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8855 if (input == NULL) {
8856 if (sax != NULL) ctxt->sax = NULL;
8857 xmlFreeParserCtxt(ctxt);
8858 return(NULL);
8859 }
8860
8861 /*
8862 * plug some encoding conversion routines here.
8863 */
8864 xmlPushInput(ctxt, input);
8865 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8866 xmlSwitchEncoding(ctxt, enc);
8867
8868 if (input->filename == NULL)
8869 input->filename = (char *) xmlStrdup(SystemID);
8870 input->line = 1;
8871 input->col = 1;
8872 input->base = ctxt->input->cur;
8873 input->cur = ctxt->input->cur;
8874 input->free = NULL;
8875
8876 /*
8877 * let's parse that entity knowing it's an external subset.
8878 */
8879 ctxt->inSubset = 2;
8880 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8881 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8882 ExternalID, SystemID);
8883 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8884
8885 if (ctxt->myDoc != NULL) {
8886 if (ctxt->wellFormed) {
8887 ret = ctxt->myDoc->extSubset;
8888 ctxt->myDoc->extSubset = NULL;
8889 } else {
8890 ret = NULL;
8891 }
8892 xmlFreeDoc(ctxt->myDoc);
8893 ctxt->myDoc = NULL;
8894 }
8895 if (sax != NULL) ctxt->sax = NULL;
8896 xmlFreeParserCtxt(ctxt);
8897
8898 return(ret);
8899}
8900
8901/**
8902 * xmlParseDTD:
8903 * @ExternalID: a NAME* containing the External ID of the DTD
8904 * @SystemID: a NAME* containing the URL to the DTD
8905 *
8906 * Load and parse an external subset.
8907 *
8908 * Returns the resulting xmlDtdPtr or NULL in case of error.
8909 */
8910
8911xmlDtdPtr
8912xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8913 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8914}
8915
8916/************************************************************************
8917 * *
8918 * Front ends when parsing an Entity *
8919 * *
8920 ************************************************************************/
8921
8922/**
Owen Taylor3473f882001-02-23 17:55:21 +00008923 * xmlParseCtxtExternalEntity:
8924 * @ctx: the existing parsing context
8925 * @URL: the URL for the entity to load
8926 * @ID: the System ID for the entity to load
8927 * @list: the return value for the set of parsed nodes
8928 *
8929 * Parse an external general entity within an existing parsing context
8930 * An external general parsed entity is well-formed if it matches the
8931 * production labeled extParsedEnt.
8932 *
8933 * [78] extParsedEnt ::= TextDecl? content
8934 *
8935 * Returns 0 if the entity is well formed, -1 in case of args problem and
8936 * the parser error code otherwise
8937 */
8938
8939int
8940xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8941 const xmlChar *ID, xmlNodePtr *list) {
8942 xmlParserCtxtPtr ctxt;
8943 xmlDocPtr newDoc;
8944 xmlSAXHandlerPtr oldsax = NULL;
8945 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008946 xmlChar start[4];
8947 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008948
8949 if (ctx->depth > 40) {
8950 return(XML_ERR_ENTITY_LOOP);
8951 }
8952
8953 if (list != NULL)
8954 *list = NULL;
8955 if ((URL == NULL) && (ID == NULL))
8956 return(-1);
8957 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8958 return(-1);
8959
8960
8961 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8962 if (ctxt == NULL) return(-1);
8963 ctxt->userData = ctxt;
8964 oldsax = ctxt->sax;
8965 ctxt->sax = ctx->sax;
8966 newDoc = xmlNewDoc(BAD_CAST "1.0");
8967 if (newDoc == NULL) {
8968 xmlFreeParserCtxt(ctxt);
8969 return(-1);
8970 }
8971 if (ctx->myDoc != NULL) {
8972 newDoc->intSubset = ctx->myDoc->intSubset;
8973 newDoc->extSubset = ctx->myDoc->extSubset;
8974 }
8975 if (ctx->myDoc->URL != NULL) {
8976 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8977 }
8978 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8979 if (newDoc->children == NULL) {
8980 ctxt->sax = oldsax;
8981 xmlFreeParserCtxt(ctxt);
8982 newDoc->intSubset = NULL;
8983 newDoc->extSubset = NULL;
8984 xmlFreeDoc(newDoc);
8985 return(-1);
8986 }
8987 nodePush(ctxt, newDoc->children);
8988 if (ctx->myDoc == NULL) {
8989 ctxt->myDoc = newDoc;
8990 } else {
8991 ctxt->myDoc = ctx->myDoc;
8992 newDoc->children->doc = ctx->myDoc;
8993 }
8994
Daniel Veillard87a764e2001-06-20 17:41:10 +00008995 /*
8996 * Get the 4 first bytes and decode the charset
8997 * if enc != XML_CHAR_ENCODING_NONE
8998 * plug some encoding conversion routines.
8999 */
9000 GROW
9001 start[0] = RAW;
9002 start[1] = NXT(1);
9003 start[2] = NXT(2);
9004 start[3] = NXT(3);
9005 enc = xmlDetectCharEncoding(start, 4);
9006 if (enc != XML_CHAR_ENCODING_NONE) {
9007 xmlSwitchEncoding(ctxt, enc);
9008 }
9009
Owen Taylor3473f882001-02-23 17:55:21 +00009010 /*
9011 * Parse a possible text declaration first
9012 */
Owen Taylor3473f882001-02-23 17:55:21 +00009013 if ((RAW == '<') && (NXT(1) == '?') &&
9014 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9015 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9016 xmlParseTextDecl(ctxt);
9017 }
9018
9019 /*
9020 * Doing validity checking on chunk doesn't make sense
9021 */
9022 ctxt->instate = XML_PARSER_CONTENT;
9023 ctxt->validate = ctx->validate;
9024 ctxt->loadsubset = ctx->loadsubset;
9025 ctxt->depth = ctx->depth + 1;
9026 ctxt->replaceEntities = ctx->replaceEntities;
9027 if (ctxt->validate) {
9028 ctxt->vctxt.error = ctx->vctxt.error;
9029 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009030 } else {
9031 ctxt->vctxt.error = NULL;
9032 ctxt->vctxt.warning = NULL;
9033 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009034 ctxt->vctxt.nodeTab = NULL;
9035 ctxt->vctxt.nodeNr = 0;
9036 ctxt->vctxt.nodeMax = 0;
9037 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009038
9039 xmlParseContent(ctxt);
9040
9041 if ((RAW == '<') && (NXT(1) == '/')) {
9042 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9044 ctxt->sax->error(ctxt->userData,
9045 "chunk is not well balanced\n");
9046 ctxt->wellFormed = 0;
9047 ctxt->disableSAX = 1;
9048 } else if (RAW != 0) {
9049 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9050 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9051 ctxt->sax->error(ctxt->userData,
9052 "extra content at the end of well balanced chunk\n");
9053 ctxt->wellFormed = 0;
9054 ctxt->disableSAX = 1;
9055 }
9056 if (ctxt->node != newDoc->children) {
9057 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9058 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9059 ctxt->sax->error(ctxt->userData,
9060 "chunk is not well balanced\n");
9061 ctxt->wellFormed = 0;
9062 ctxt->disableSAX = 1;
9063 }
9064
9065 if (!ctxt->wellFormed) {
9066 if (ctxt->errNo == 0)
9067 ret = 1;
9068 else
9069 ret = ctxt->errNo;
9070 } else {
9071 if (list != NULL) {
9072 xmlNodePtr cur;
9073
9074 /*
9075 * Return the newly created nodeset after unlinking it from
9076 * they pseudo parent.
9077 */
9078 cur = newDoc->children->children;
9079 *list = cur;
9080 while (cur != NULL) {
9081 cur->parent = NULL;
9082 cur = cur->next;
9083 }
9084 newDoc->children->children = NULL;
9085 }
9086 ret = 0;
9087 }
9088 ctxt->sax = oldsax;
9089 xmlFreeParserCtxt(ctxt);
9090 newDoc->intSubset = NULL;
9091 newDoc->extSubset = NULL;
9092 xmlFreeDoc(newDoc);
9093
9094 return(ret);
9095}
9096
9097/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009098 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009099 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009100 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009101 * @sax: the SAX handler bloc (possibly NULL)
9102 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9103 * @depth: Used for loop detection, use 0
9104 * @URL: the URL for the entity to load
9105 * @ID: the System ID for the entity to load
9106 * @list: the return value for the set of parsed nodes
9107 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009108 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009109 *
9110 * Returns 0 if the entity is well formed, -1 in case of args problem and
9111 * the parser error code otherwise
9112 */
9113
Daniel Veillard257d9102001-05-08 10:41:44 +00009114static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009115xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9116 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009117 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009118 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009119 xmlParserCtxtPtr ctxt;
9120 xmlDocPtr newDoc;
9121 xmlSAXHandlerPtr oldsax = NULL;
9122 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009123 xmlChar start[4];
9124 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009125
9126 if (depth > 40) {
9127 return(XML_ERR_ENTITY_LOOP);
9128 }
9129
9130
9131
9132 if (list != NULL)
9133 *list = NULL;
9134 if ((URL == NULL) && (ID == NULL))
9135 return(-1);
9136 if (doc == NULL) /* @@ relax but check for dereferences */
9137 return(-1);
9138
9139
9140 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9141 if (ctxt == NULL) return(-1);
9142 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009143 if (oldctxt != NULL) {
9144 ctxt->_private = oldctxt->_private;
9145 ctxt->loadsubset = oldctxt->loadsubset;
9146 ctxt->validate = oldctxt->validate;
9147 ctxt->external = oldctxt->external;
9148 } else {
9149 /*
9150 * Doing validity checking on chunk without context
9151 * doesn't make sense
9152 */
9153 ctxt->_private = NULL;
9154 ctxt->validate = 0;
9155 ctxt->external = 2;
9156 ctxt->loadsubset = 0;
9157 }
Owen Taylor3473f882001-02-23 17:55:21 +00009158 if (sax != NULL) {
9159 oldsax = ctxt->sax;
9160 ctxt->sax = sax;
9161 if (user_data != NULL)
9162 ctxt->userData = user_data;
9163 }
9164 newDoc = xmlNewDoc(BAD_CAST "1.0");
9165 if (newDoc == NULL) {
9166 xmlFreeParserCtxt(ctxt);
9167 return(-1);
9168 }
9169 if (doc != NULL) {
9170 newDoc->intSubset = doc->intSubset;
9171 newDoc->extSubset = doc->extSubset;
9172 }
9173 if (doc->URL != NULL) {
9174 newDoc->URL = xmlStrdup(doc->URL);
9175 }
9176 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9177 if (newDoc->children == NULL) {
9178 if (sax != NULL)
9179 ctxt->sax = oldsax;
9180 xmlFreeParserCtxt(ctxt);
9181 newDoc->intSubset = NULL;
9182 newDoc->extSubset = NULL;
9183 xmlFreeDoc(newDoc);
9184 return(-1);
9185 }
9186 nodePush(ctxt, newDoc->children);
9187 if (doc == NULL) {
9188 ctxt->myDoc = newDoc;
9189 } else {
9190 ctxt->myDoc = doc;
9191 newDoc->children->doc = doc;
9192 }
9193
Daniel Veillard87a764e2001-06-20 17:41:10 +00009194 /*
9195 * Get the 4 first bytes and decode the charset
9196 * if enc != XML_CHAR_ENCODING_NONE
9197 * plug some encoding conversion routines.
9198 */
9199 GROW;
9200 start[0] = RAW;
9201 start[1] = NXT(1);
9202 start[2] = NXT(2);
9203 start[3] = NXT(3);
9204 enc = xmlDetectCharEncoding(start, 4);
9205 if (enc != XML_CHAR_ENCODING_NONE) {
9206 xmlSwitchEncoding(ctxt, enc);
9207 }
9208
Owen Taylor3473f882001-02-23 17:55:21 +00009209 /*
9210 * Parse a possible text declaration first
9211 */
Owen Taylor3473f882001-02-23 17:55:21 +00009212 if ((RAW == '<') && (NXT(1) == '?') &&
9213 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9214 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9215 xmlParseTextDecl(ctxt);
9216 }
9217
Owen Taylor3473f882001-02-23 17:55:21 +00009218 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009219 ctxt->depth = depth;
9220
9221 xmlParseContent(ctxt);
9222
9223 if ((RAW == '<') && (NXT(1) == '/')) {
9224 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9225 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9226 ctxt->sax->error(ctxt->userData,
9227 "chunk is not well balanced\n");
9228 ctxt->wellFormed = 0;
9229 ctxt->disableSAX = 1;
9230 } else if (RAW != 0) {
9231 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9233 ctxt->sax->error(ctxt->userData,
9234 "extra content at the end of well balanced chunk\n");
9235 ctxt->wellFormed = 0;
9236 ctxt->disableSAX = 1;
9237 }
9238 if (ctxt->node != newDoc->children) {
9239 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9241 ctxt->sax->error(ctxt->userData,
9242 "chunk is not well balanced\n");
9243 ctxt->wellFormed = 0;
9244 ctxt->disableSAX = 1;
9245 }
9246
9247 if (!ctxt->wellFormed) {
9248 if (ctxt->errNo == 0)
9249 ret = 1;
9250 else
9251 ret = ctxt->errNo;
9252 } else {
9253 if (list != NULL) {
9254 xmlNodePtr cur;
9255
9256 /*
9257 * Return the newly created nodeset after unlinking it from
9258 * they pseudo parent.
9259 */
9260 cur = newDoc->children->children;
9261 *list = cur;
9262 while (cur != NULL) {
9263 cur->parent = NULL;
9264 cur = cur->next;
9265 }
9266 newDoc->children->children = NULL;
9267 }
9268 ret = 0;
9269 }
9270 if (sax != NULL)
9271 ctxt->sax = oldsax;
9272 xmlFreeParserCtxt(ctxt);
9273 newDoc->intSubset = NULL;
9274 newDoc->extSubset = NULL;
9275 xmlFreeDoc(newDoc);
9276
9277 return(ret);
9278}
9279
9280/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009281 * xmlParseExternalEntity:
9282 * @doc: the document the chunk pertains to
9283 * @sax: the SAX handler bloc (possibly NULL)
9284 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9285 * @depth: Used for loop detection, use 0
9286 * @URL: the URL for the entity to load
9287 * @ID: the System ID for the entity to load
9288 * @list: the return value for the set of parsed nodes
9289 *
9290 * Parse an external general entity
9291 * An external general parsed entity is well-formed if it matches the
9292 * production labeled extParsedEnt.
9293 *
9294 * [78] extParsedEnt ::= TextDecl? content
9295 *
9296 * Returns 0 if the entity is well formed, -1 in case of args problem and
9297 * the parser error code otherwise
9298 */
9299
9300int
9301xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9302 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009303 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9304 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009305}
9306
9307/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009308 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009309 * @doc: the document the chunk pertains to
9310 * @sax: the SAX handler bloc (possibly NULL)
9311 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9312 * @depth: Used for loop detection, use 0
9313 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9314 * @list: the return value for the set of parsed nodes
9315 *
9316 * Parse a well-balanced chunk of an XML document
9317 * called by the parser
9318 * The allowed sequence for the Well Balanced Chunk is the one defined by
9319 * the content production in the XML grammar:
9320 *
9321 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9322 *
9323 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9324 * the parser error code otherwise
9325 */
9326
9327int
9328xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9329 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9330 xmlParserCtxtPtr ctxt;
9331 xmlDocPtr newDoc;
9332 xmlSAXHandlerPtr oldsax = NULL;
9333 int size;
9334 int ret = 0;
9335
9336 if (depth > 40) {
9337 return(XML_ERR_ENTITY_LOOP);
9338 }
9339
9340
9341 if (list != NULL)
9342 *list = NULL;
9343 if (string == NULL)
9344 return(-1);
9345
9346 size = xmlStrlen(string);
9347
9348 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9349 if (ctxt == NULL) return(-1);
9350 ctxt->userData = ctxt;
9351 if (sax != NULL) {
9352 oldsax = ctxt->sax;
9353 ctxt->sax = sax;
9354 if (user_data != NULL)
9355 ctxt->userData = user_data;
9356 }
9357 newDoc = xmlNewDoc(BAD_CAST "1.0");
9358 if (newDoc == NULL) {
9359 xmlFreeParserCtxt(ctxt);
9360 return(-1);
9361 }
9362 if (doc != NULL) {
9363 newDoc->intSubset = doc->intSubset;
9364 newDoc->extSubset = doc->extSubset;
9365 }
9366 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9367 if (newDoc->children == NULL) {
9368 if (sax != NULL)
9369 ctxt->sax = oldsax;
9370 xmlFreeParserCtxt(ctxt);
9371 newDoc->intSubset = NULL;
9372 newDoc->extSubset = NULL;
9373 xmlFreeDoc(newDoc);
9374 return(-1);
9375 }
9376 nodePush(ctxt, newDoc->children);
9377 if (doc == NULL) {
9378 ctxt->myDoc = newDoc;
9379 } else {
9380 ctxt->myDoc = doc;
9381 newDoc->children->doc = doc;
9382 }
9383 ctxt->instate = XML_PARSER_CONTENT;
9384 ctxt->depth = depth;
9385
9386 /*
9387 * Doing validity checking on chunk doesn't make sense
9388 */
9389 ctxt->validate = 0;
9390 ctxt->loadsubset = 0;
9391
9392 xmlParseContent(ctxt);
9393
9394 if ((RAW == '<') && (NXT(1) == '/')) {
9395 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9397 ctxt->sax->error(ctxt->userData,
9398 "chunk is not well balanced\n");
9399 ctxt->wellFormed = 0;
9400 ctxt->disableSAX = 1;
9401 } else if (RAW != 0) {
9402 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9404 ctxt->sax->error(ctxt->userData,
9405 "extra content at the end of well balanced chunk\n");
9406 ctxt->wellFormed = 0;
9407 ctxt->disableSAX = 1;
9408 }
9409 if (ctxt->node != newDoc->children) {
9410 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9412 ctxt->sax->error(ctxt->userData,
9413 "chunk is not well balanced\n");
9414 ctxt->wellFormed = 0;
9415 ctxt->disableSAX = 1;
9416 }
9417
9418 if (!ctxt->wellFormed) {
9419 if (ctxt->errNo == 0)
9420 ret = 1;
9421 else
9422 ret = ctxt->errNo;
9423 } else {
9424 if (list != NULL) {
9425 xmlNodePtr cur;
9426
9427 /*
9428 * Return the newly created nodeset after unlinking it from
9429 * they pseudo parent.
9430 */
9431 cur = newDoc->children->children;
9432 *list = cur;
9433 while (cur != NULL) {
9434 cur->parent = NULL;
9435 cur = cur->next;
9436 }
9437 newDoc->children->children = NULL;
9438 }
9439 ret = 0;
9440 }
9441 if (sax != NULL)
9442 ctxt->sax = oldsax;
9443 xmlFreeParserCtxt(ctxt);
9444 newDoc->intSubset = NULL;
9445 newDoc->extSubset = NULL;
9446 xmlFreeDoc(newDoc);
9447
9448 return(ret);
9449}
9450
9451/**
9452 * xmlSAXParseEntity:
9453 * @sax: the SAX handler block
9454 * @filename: the filename
9455 *
9456 * parse an XML external entity out of context and build a tree.
9457 * It use the given SAX function block to handle the parsing callback.
9458 * If sax is NULL, fallback to the default DOM tree building routines.
9459 *
9460 * [78] extParsedEnt ::= TextDecl? content
9461 *
9462 * This correspond to a "Well Balanced" chunk
9463 *
9464 * Returns the resulting document tree
9465 */
9466
9467xmlDocPtr
9468xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9469 xmlDocPtr ret;
9470 xmlParserCtxtPtr ctxt;
9471 char *directory = NULL;
9472
9473 ctxt = xmlCreateFileParserCtxt(filename);
9474 if (ctxt == NULL) {
9475 return(NULL);
9476 }
9477 if (sax != NULL) {
9478 if (ctxt->sax != NULL)
9479 xmlFree(ctxt->sax);
9480 ctxt->sax = sax;
9481 ctxt->userData = NULL;
9482 }
9483
9484 if ((ctxt->directory == NULL) && (directory == NULL))
9485 directory = xmlParserGetDirectory(filename);
9486
9487 xmlParseExtParsedEnt(ctxt);
9488
9489 if (ctxt->wellFormed)
9490 ret = ctxt->myDoc;
9491 else {
9492 ret = NULL;
9493 xmlFreeDoc(ctxt->myDoc);
9494 ctxt->myDoc = NULL;
9495 }
9496 if (sax != NULL)
9497 ctxt->sax = NULL;
9498 xmlFreeParserCtxt(ctxt);
9499
9500 return(ret);
9501}
9502
9503/**
9504 * xmlParseEntity:
9505 * @filename: the filename
9506 *
9507 * parse an XML external entity out of context and build a tree.
9508 *
9509 * [78] extParsedEnt ::= TextDecl? content
9510 *
9511 * This correspond to a "Well Balanced" chunk
9512 *
9513 * Returns the resulting document tree
9514 */
9515
9516xmlDocPtr
9517xmlParseEntity(const char *filename) {
9518 return(xmlSAXParseEntity(NULL, filename));
9519}
9520
9521/**
9522 * xmlCreateEntityParserCtxt:
9523 * @URL: the entity URL
9524 * @ID: the entity PUBLIC ID
9525 * @base: a posible base for the target URI
9526 *
9527 * Create a parser context for an external entity
9528 * Automatic support for ZLIB/Compress compressed document is provided
9529 * by default if found at compile-time.
9530 *
9531 * Returns the new parser context or NULL
9532 */
9533xmlParserCtxtPtr
9534xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9535 const xmlChar *base) {
9536 xmlParserCtxtPtr ctxt;
9537 xmlParserInputPtr inputStream;
9538 char *directory = NULL;
9539 xmlChar *uri;
9540
9541 ctxt = xmlNewParserCtxt();
9542 if (ctxt == NULL) {
9543 return(NULL);
9544 }
9545
9546 uri = xmlBuildURI(URL, base);
9547
9548 if (uri == NULL) {
9549 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9550 if (inputStream == NULL) {
9551 xmlFreeParserCtxt(ctxt);
9552 return(NULL);
9553 }
9554
9555 inputPush(ctxt, inputStream);
9556
9557 if ((ctxt->directory == NULL) && (directory == NULL))
9558 directory = xmlParserGetDirectory((char *)URL);
9559 if ((ctxt->directory == NULL) && (directory != NULL))
9560 ctxt->directory = directory;
9561 } else {
9562 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9563 if (inputStream == NULL) {
9564 xmlFree(uri);
9565 xmlFreeParserCtxt(ctxt);
9566 return(NULL);
9567 }
9568
9569 inputPush(ctxt, inputStream);
9570
9571 if ((ctxt->directory == NULL) && (directory == NULL))
9572 directory = xmlParserGetDirectory((char *)uri);
9573 if ((ctxt->directory == NULL) && (directory != NULL))
9574 ctxt->directory = directory;
9575 xmlFree(uri);
9576 }
9577
9578 return(ctxt);
9579}
9580
9581/************************************************************************
9582 * *
9583 * Front ends when parsing from a file *
9584 * *
9585 ************************************************************************/
9586
9587/**
9588 * xmlCreateFileParserCtxt:
9589 * @filename: the filename
9590 *
9591 * Create a parser context for a file content.
9592 * Automatic support for ZLIB/Compress compressed document is provided
9593 * by default if found at compile-time.
9594 *
9595 * Returns the new parser context or NULL
9596 */
9597xmlParserCtxtPtr
9598xmlCreateFileParserCtxt(const char *filename)
9599{
9600 xmlParserCtxtPtr ctxt;
9601 xmlParserInputPtr inputStream;
9602 xmlParserInputBufferPtr buf;
9603 char *directory = NULL;
9604
9605 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9606 if (buf == NULL) {
9607 return(NULL);
9608 }
9609
9610 ctxt = xmlNewParserCtxt();
9611 if (ctxt == NULL) {
9612 if (xmlDefaultSAXHandler.error != NULL) {
9613 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9614 }
9615 return(NULL);
9616 }
9617
9618 inputStream = xmlNewInputStream(ctxt);
9619 if (inputStream == NULL) {
9620 xmlFreeParserCtxt(ctxt);
9621 return(NULL);
9622 }
9623
9624 inputStream->filename = xmlMemStrdup(filename);
9625 inputStream->buf = buf;
9626 inputStream->base = inputStream->buf->buffer->content;
9627 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009628 inputStream->end =
9629 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009630
9631 inputPush(ctxt, inputStream);
9632 if ((ctxt->directory == NULL) && (directory == NULL))
9633 directory = xmlParserGetDirectory(filename);
9634 if ((ctxt->directory == NULL) && (directory != NULL))
9635 ctxt->directory = directory;
9636
9637 return(ctxt);
9638}
9639
9640/**
9641 * xmlSAXParseFile:
9642 * @sax: the SAX handler block
9643 * @filename: the filename
9644 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9645 * documents
9646 *
9647 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9648 * compressed document is provided by default if found at compile-time.
9649 * It use the given SAX function block to handle the parsing callback.
9650 * If sax is NULL, fallback to the default DOM tree building routines.
9651 *
9652 * Returns the resulting document tree
9653 */
9654
9655xmlDocPtr
9656xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9657 int recovery) {
9658 xmlDocPtr ret;
9659 xmlParserCtxtPtr ctxt;
9660 char *directory = NULL;
9661
9662 ctxt = xmlCreateFileParserCtxt(filename);
9663 if (ctxt == NULL) {
9664 return(NULL);
9665 }
9666 if (sax != NULL) {
9667 if (ctxt->sax != NULL)
9668 xmlFree(ctxt->sax);
9669 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009670 }
9671
9672 if ((ctxt->directory == NULL) && (directory == NULL))
9673 directory = xmlParserGetDirectory(filename);
9674 if ((ctxt->directory == NULL) && (directory != NULL))
9675 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9676
9677 xmlParseDocument(ctxt);
9678
9679 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9680 else {
9681 ret = NULL;
9682 xmlFreeDoc(ctxt->myDoc);
9683 ctxt->myDoc = NULL;
9684 }
9685 if (sax != NULL)
9686 ctxt->sax = NULL;
9687 xmlFreeParserCtxt(ctxt);
9688
9689 return(ret);
9690}
9691
9692/**
9693 * xmlRecoverDoc:
9694 * @cur: a pointer to an array of xmlChar
9695 *
9696 * parse an XML in-memory document and build a tree.
9697 * In the case the document is not Well Formed, a tree is built anyway
9698 *
9699 * Returns the resulting document tree
9700 */
9701
9702xmlDocPtr
9703xmlRecoverDoc(xmlChar *cur) {
9704 return(xmlSAXParseDoc(NULL, cur, 1));
9705}
9706
9707/**
9708 * xmlParseFile:
9709 * @filename: the filename
9710 *
9711 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9712 * compressed document is provided by default if found at compile-time.
9713 *
9714 * Returns the resulting document tree
9715 */
9716
9717xmlDocPtr
9718xmlParseFile(const char *filename) {
9719 return(xmlSAXParseFile(NULL, filename, 0));
9720}
9721
9722/**
9723 * xmlRecoverFile:
9724 * @filename: the filename
9725 *
9726 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9727 * compressed document is provided by default if found at compile-time.
9728 * In the case the document is not Well Formed, a tree is built anyway
9729 *
9730 * Returns the resulting document tree
9731 */
9732
9733xmlDocPtr
9734xmlRecoverFile(const char *filename) {
9735 return(xmlSAXParseFile(NULL, filename, 1));
9736}
9737
9738
9739/**
9740 * xmlSetupParserForBuffer:
9741 * @ctxt: an XML parser context
9742 * @buffer: a xmlChar * buffer
9743 * @filename: a file name
9744 *
9745 * Setup the parser context to parse a new buffer; Clears any prior
9746 * contents from the parser context. The buffer parameter must not be
9747 * NULL, but the filename parameter can be
9748 */
9749void
9750xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9751 const char* filename)
9752{
9753 xmlParserInputPtr input;
9754
9755 input = xmlNewInputStream(ctxt);
9756 if (input == NULL) {
9757 perror("malloc");
9758 xmlFree(ctxt);
9759 return;
9760 }
9761
9762 xmlClearParserCtxt(ctxt);
9763 if (filename != NULL)
9764 input->filename = xmlMemStrdup(filename);
9765 input->base = buffer;
9766 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009767 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009768 inputPush(ctxt, input);
9769}
9770
9771/**
9772 * xmlSAXUserParseFile:
9773 * @sax: a SAX handler
9774 * @user_data: The user data returned on SAX callbacks
9775 * @filename: a file name
9776 *
9777 * parse an XML file and call the given SAX handler routines.
9778 * Automatic support for ZLIB/Compress compressed document is provided
9779 *
9780 * Returns 0 in case of success or a error number otherwise
9781 */
9782int
9783xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9784 const char *filename) {
9785 int ret = 0;
9786 xmlParserCtxtPtr ctxt;
9787
9788 ctxt = xmlCreateFileParserCtxt(filename);
9789 if (ctxt == NULL) return -1;
9790 if (ctxt->sax != &xmlDefaultSAXHandler)
9791 xmlFree(ctxt->sax);
9792 ctxt->sax = sax;
9793 if (user_data != NULL)
9794 ctxt->userData = user_data;
9795
9796 xmlParseDocument(ctxt);
9797
9798 if (ctxt->wellFormed)
9799 ret = 0;
9800 else {
9801 if (ctxt->errNo != 0)
9802 ret = ctxt->errNo;
9803 else
9804 ret = -1;
9805 }
9806 if (sax != NULL)
9807 ctxt->sax = NULL;
9808 xmlFreeParserCtxt(ctxt);
9809
9810 return ret;
9811}
9812
9813/************************************************************************
9814 * *
9815 * Front ends when parsing from memory *
9816 * *
9817 ************************************************************************/
9818
9819/**
9820 * xmlCreateMemoryParserCtxt:
9821 * @buffer: a pointer to a char array
9822 * @size: the size of the array
9823 *
9824 * Create a parser context for an XML in-memory document.
9825 *
9826 * Returns the new parser context or NULL
9827 */
9828xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009829xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009830 xmlParserCtxtPtr ctxt;
9831 xmlParserInputPtr input;
9832 xmlParserInputBufferPtr buf;
9833
9834 if (buffer == NULL)
9835 return(NULL);
9836 if (size <= 0)
9837 return(NULL);
9838
9839 ctxt = xmlNewParserCtxt();
9840 if (ctxt == NULL)
9841 return(NULL);
9842
9843 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9844 if (buf == NULL) return(NULL);
9845
9846 input = xmlNewInputStream(ctxt);
9847 if (input == NULL) {
9848 xmlFreeParserCtxt(ctxt);
9849 return(NULL);
9850 }
9851
9852 input->filename = NULL;
9853 input->buf = buf;
9854 input->base = input->buf->buffer->content;
9855 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009856 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009857
9858 inputPush(ctxt, input);
9859 return(ctxt);
9860}
9861
9862/**
9863 * xmlSAXParseMemory:
9864 * @sax: the SAX handler block
9865 * @buffer: an pointer to a char array
9866 * @size: the size of the array
9867 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9868 * documents
9869 *
9870 * parse an XML in-memory block and use the given SAX function block
9871 * to handle the parsing callback. If sax is NULL, fallback to the default
9872 * DOM tree building routines.
9873 *
9874 * Returns the resulting document tree
9875 */
9876xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009877xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9878 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009879 xmlDocPtr ret;
9880 xmlParserCtxtPtr ctxt;
9881
9882 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9883 if (ctxt == NULL) return(NULL);
9884 if (sax != NULL) {
9885 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009886 }
9887
9888 xmlParseDocument(ctxt);
9889
9890 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9891 else {
9892 ret = NULL;
9893 xmlFreeDoc(ctxt->myDoc);
9894 ctxt->myDoc = NULL;
9895 }
9896 if (sax != NULL)
9897 ctxt->sax = NULL;
9898 xmlFreeParserCtxt(ctxt);
9899
9900 return(ret);
9901}
9902
9903/**
9904 * xmlParseMemory:
9905 * @buffer: an pointer to a char array
9906 * @size: the size of the array
9907 *
9908 * parse an XML in-memory block and build a tree.
9909 *
9910 * Returns the resulting document tree
9911 */
9912
Daniel Veillard50822cb2001-07-26 20:05:51 +00009913xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009914 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9915}
9916
9917/**
9918 * xmlRecoverMemory:
9919 * @buffer: an pointer to a char array
9920 * @size: the size of the array
9921 *
9922 * parse an XML in-memory block and build a tree.
9923 * In the case the document is not Well Formed, a tree is built anyway
9924 *
9925 * Returns the resulting document tree
9926 */
9927
Daniel Veillard50822cb2001-07-26 20:05:51 +00009928xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009929 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9930}
9931
9932/**
9933 * xmlSAXUserParseMemory:
9934 * @sax: a SAX handler
9935 * @user_data: The user data returned on SAX callbacks
9936 * @buffer: an in-memory XML document input
9937 * @size: the length of the XML document in bytes
9938 *
9939 * A better SAX parsing routine.
9940 * parse an XML in-memory buffer and call the given SAX handler routines.
9941 *
9942 * Returns 0 in case of success or a error number otherwise
9943 */
9944int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009945 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009946 int ret = 0;
9947 xmlParserCtxtPtr ctxt;
9948 xmlSAXHandlerPtr oldsax = NULL;
9949
9950 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9951 if (ctxt == NULL) return -1;
9952 if (sax != NULL) {
9953 oldsax = ctxt->sax;
9954 ctxt->sax = sax;
9955 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009956 if (user_data != NULL)
9957 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009958
9959 xmlParseDocument(ctxt);
9960
9961 if (ctxt->wellFormed)
9962 ret = 0;
9963 else {
9964 if (ctxt->errNo != 0)
9965 ret = ctxt->errNo;
9966 else
9967 ret = -1;
9968 }
9969 if (sax != NULL) {
9970 ctxt->sax = oldsax;
9971 }
9972 xmlFreeParserCtxt(ctxt);
9973
9974 return ret;
9975}
9976
9977/**
9978 * xmlCreateDocParserCtxt:
9979 * @cur: a pointer to an array of xmlChar
9980 *
9981 * Creates a parser context for an XML in-memory document.
9982 *
9983 * Returns the new parser context or NULL
9984 */
9985xmlParserCtxtPtr
9986xmlCreateDocParserCtxt(xmlChar *cur) {
9987 int len;
9988
9989 if (cur == NULL)
9990 return(NULL);
9991 len = xmlStrlen(cur);
9992 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9993}
9994
9995/**
9996 * xmlSAXParseDoc:
9997 * @sax: the SAX handler block
9998 * @cur: a pointer to an array of xmlChar
9999 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
10000 * documents
10001 *
10002 * parse an XML in-memory document and build a tree.
10003 * It use the given SAX function block to handle the parsing callback.
10004 * If sax is NULL, fallback to the default DOM tree building routines.
10005 *
10006 * Returns the resulting document tree
10007 */
10008
10009xmlDocPtr
10010xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10011 xmlDocPtr ret;
10012 xmlParserCtxtPtr ctxt;
10013
10014 if (cur == NULL) return(NULL);
10015
10016
10017 ctxt = xmlCreateDocParserCtxt(cur);
10018 if (ctxt == NULL) return(NULL);
10019 if (sax != NULL) {
10020 ctxt->sax = sax;
10021 ctxt->userData = NULL;
10022 }
10023
10024 xmlParseDocument(ctxt);
10025 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10026 else {
10027 ret = NULL;
10028 xmlFreeDoc(ctxt->myDoc);
10029 ctxt->myDoc = NULL;
10030 }
10031 if (sax != NULL)
10032 ctxt->sax = NULL;
10033 xmlFreeParserCtxt(ctxt);
10034
10035 return(ret);
10036}
10037
10038/**
10039 * xmlParseDoc:
10040 * @cur: a pointer to an array of xmlChar
10041 *
10042 * parse an XML in-memory document and build a tree.
10043 *
10044 * Returns the resulting document tree
10045 */
10046
10047xmlDocPtr
10048xmlParseDoc(xmlChar *cur) {
10049 return(xmlSAXParseDoc(NULL, cur, 0));
10050}
10051
10052
10053/************************************************************************
10054 * *
10055 * Miscellaneous *
10056 * *
10057 ************************************************************************/
10058
10059#ifdef LIBXML_XPATH_ENABLED
10060#include <libxml/xpath.h>
10061#endif
10062
10063static int xmlParserInitialized = 0;
10064
10065/**
10066 * xmlInitParser:
10067 *
10068 * Initialization function for the XML parser.
10069 * This is not reentrant. Call once before processing in case of
10070 * use in multithreaded programs.
10071 */
10072
10073void
10074xmlInitParser(void) {
10075 if (xmlParserInitialized) return;
10076
10077 xmlInitCharEncodingHandlers();
10078 xmlInitializePredefinedEntities();
10079 xmlDefaultSAXHandlerInit();
10080 xmlRegisterDefaultInputCallbacks();
10081 xmlRegisterDefaultOutputCallbacks();
10082#ifdef LIBXML_HTML_ENABLED
10083 htmlInitAutoClose();
10084 htmlDefaultSAXHandlerInit();
10085#endif
10086#ifdef LIBXML_XPATH_ENABLED
10087 xmlXPathInit();
10088#endif
10089 xmlParserInitialized = 1;
10090}
10091
10092/**
10093 * xmlCleanupParser:
10094 *
10095 * Cleanup function for the XML parser. It tries to reclaim all
10096 * parsing related global memory allocated for the parser processing.
10097 * It doesn't deallocate any document related memory. Calling this
10098 * function should not prevent reusing the parser.
10099 */
10100
10101void
10102xmlCleanupParser(void) {
10103 xmlParserInitialized = 0;
10104 xmlCleanupCharEncodingHandlers();
10105 xmlCleanupPredefinedEntities();
10106}
10107