blob: eb94eb99096fdbee0d1439d3d875c1325d450880 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard2f362242001-03-02 17:36:21 +00001478 if (ctxt->keepBlanks)
1479 return(0);
1480
Owen Taylor3473f882001-02-23 17:55:21 +00001481 /*
1482 * Check for xml:space value.
1483 */
1484 if (*(ctxt->space) == 1)
1485 return(0);
1486
1487 /*
1488 * Check that the string is made of blanks
1489 */
1490 for (i = 0;i < len;i++)
1491 if (!(IS_BLANK(str[i]))) return(0);
1492
1493 /*
1494 * Look if the element is mixed content in the Dtd if available
1495 */
1496 if (ctxt->myDoc != NULL) {
1497 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1498 if (ret == 0) return(1);
1499 if (ret == 1) return(0);
1500 }
1501
1502 /*
1503 * Otherwise, heuristic :-\
1504 */
Owen Taylor3473f882001-02-23 17:55:21 +00001505 if (RAW != '<') return(0);
1506 if (ctxt->node == NULL) return(0);
1507 if ((ctxt->node->children == NULL) &&
1508 (RAW == '<') && (NXT(1) == '/')) return(0);
1509
1510 lastChild = xmlGetLastChild(ctxt->node);
1511 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001512 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1513 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001514 } else if (xmlNodeIsText(lastChild))
1515 return(0);
1516 else if ((ctxt->node->children != NULL) &&
1517 (xmlNodeIsText(ctxt->node->children)))
1518 return(0);
1519 return(1);
1520}
1521
1522/*
1523 * Forward definition for recusive behaviour.
1524 */
1525void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1526void xmlParseReference(xmlParserCtxtPtr ctxt);
1527
1528/************************************************************************
1529 * *
1530 * Extra stuff for namespace support *
1531 * Relates to http://www.w3.org/TR/WD-xml-names *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * xmlSplitQName:
1537 * @ctxt: an XML parser context
1538 * @name: an XML parser context
1539 * @prefix: a xmlChar **
1540 *
1541 * parse an UTF8 encoded XML qualified name string
1542 *
1543 * [NS 5] QName ::= (Prefix ':')? LocalPart
1544 *
1545 * [NS 6] Prefix ::= NCName
1546 *
1547 * [NS 7] LocalPart ::= NCName
1548 *
1549 * Returns the local part, and prefix is updated
1550 * to get the Prefix if any.
1551 */
1552
1553xmlChar *
1554xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1555 xmlChar buf[XML_MAX_NAMELEN + 5];
1556 xmlChar *buffer = NULL;
1557 int len = 0;
1558 int max = XML_MAX_NAMELEN;
1559 xmlChar *ret = NULL;
1560 const xmlChar *cur = name;
1561 int c;
1562
1563 *prefix = NULL;
1564
1565#ifndef XML_XML_NAMESPACE
1566 /* xml: prefix is not really a namespace */
1567 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1568 (cur[2] == 'l') && (cur[3] == ':'))
1569 return(xmlStrdup(name));
1570#endif
1571
1572 /* nasty but valid */
1573 if (cur[0] == ':')
1574 return(xmlStrdup(name));
1575
1576 c = *cur++;
1577 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 buffer = NULL;
1619 max = XML_MAX_NAMELEN;
1620 }
1621
1622
1623 if (c == ':') {
1624 c = *cur++;
1625 if (c == 0) return(ret);
1626 *prefix = ret;
1627 len = 0;
1628
1629 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1630 buf[len++] = c;
1631 c = *cur++;
1632 }
1633 if (len >= max) {
1634 /*
1635 * Okay someone managed to make a huge name, so he's ready to pay
1636 * for the processing speed.
1637 */
1638 max = len * 2;
1639
1640 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1641 if (buffer == NULL) {
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "xmlSplitQName: out of memory\n");
1645 return(NULL);
1646 }
1647 memcpy(buffer, buf, len);
1648 while (c != 0) { /* tested bigname2.xml */
1649 if (len + 10 > max) {
1650 max *= 2;
1651 buffer = (xmlChar *) xmlRealloc(buffer,
1652 max * sizeof(xmlChar));
1653 if (buffer == NULL) {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "xmlSplitQName: out of memory\n");
1657 return(NULL);
1658 }
1659 }
1660 buffer[len++] = c;
1661 c = *cur++;
1662 }
1663 buffer[len] = 0;
1664 }
1665
1666 if (buffer == NULL)
1667 ret = xmlStrndup(buf, len);
1668 else {
1669 ret = buffer;
1670 }
1671 }
1672
1673 return(ret);
1674}
1675
1676/************************************************************************
1677 * *
1678 * The parser itself *
1679 * Relates to http://www.w3.org/TR/REC-xml *
1680 * *
1681 ************************************************************************/
1682
Daniel Veillard76d66f42001-05-16 21:05:17 +00001683static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001684/**
1685 * xmlParseName:
1686 * @ctxt: an XML parser context
1687 *
1688 * parse an XML name.
1689 *
1690 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1691 * CombiningChar | Extender
1692 *
1693 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1694 *
1695 * [6] Names ::= Name (S Name)*
1696 *
1697 * Returns the Name parsed or NULL
1698 */
1699
1700xmlChar *
1701xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001702 const xmlChar *in;
1703 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 int count = 0;
1705
1706 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001707
1708 /*
1709 * Accelerator for simple ASCII names
1710 */
1711 in = ctxt->input->cur;
1712 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1713 ((*in >= 0x41) && (*in <= 0x5A)) ||
1714 (*in == '_') || (*in == ':')) {
1715 in++;
1716 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001719 (*in == '_') || (*in == '-') ||
1720 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001722 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723 count = in - ctxt->input->cur;
1724 ret = xmlStrndup(ctxt->input->cur, count);
1725 ctxt->input->cur = in;
1726 return(ret);
1727 }
1728 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001729 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001730}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001731
Daniel Veillard76d66f42001-05-16 21:05:17 +00001732static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001733xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1734 xmlChar buf[XML_MAX_NAMELEN + 5];
1735 int len = 0, l;
1736 int c;
1737 int count = 0;
1738
1739 /*
1740 * Handler for more complex cases
1741 */
1742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001743 c = CUR_CHAR(l);
1744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1745 (!IS_LETTER(c) && (c != '_') &&
1746 (c != ':'))) {
1747 return(NULL);
1748 }
1749
1750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1751 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1752 (c == '.') || (c == '-') ||
1753 (c == '_') || (c == ':') ||
1754 (IS_COMBINING(c)) ||
1755 (IS_EXTENDER(c)))) {
1756 if (count++ > 100) {
1757 count = 0;
1758 GROW;
1759 }
1760 COPY_BUF(l,buf,len,c);
1761 NEXTL(l);
1762 c = CUR_CHAR(l);
1763 if (len >= XML_MAX_NAMELEN) {
1764 /*
1765 * Okay someone managed to make a huge name, so he's ready to pay
1766 * for the processing speed.
1767 */
1768 xmlChar *buffer;
1769 int max = len * 2;
1770
1771 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1772 if (buffer == NULL) {
1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001775 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001776 return(NULL);
1777 }
1778 memcpy(buffer, buf, len);
1779 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1780 (c == '.') || (c == '-') ||
1781 (c == '_') || (c == ':') ||
1782 (IS_COMBINING(c)) ||
1783 (IS_EXTENDER(c))) {
1784 if (count++ > 100) {
1785 count = 0;
1786 GROW;
1787 }
1788 if (len + 10 > max) {
1789 max *= 2;
1790 buffer = (xmlChar *) xmlRealloc(buffer,
1791 max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001795 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001796 return(NULL);
1797 }
1798 }
1799 COPY_BUF(l,buffer,len,c);
1800 NEXTL(l);
1801 c = CUR_CHAR(l);
1802 }
1803 buffer[len] = 0;
1804 return(buffer);
1805 }
1806 }
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseStringName:
1812 * @ctxt: an XML parser context
1813 * @str: a pointer to the string pointer (IN/OUT)
1814 *
1815 * parse an XML name.
1816 *
1817 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1818 * CombiningChar | Extender
1819 *
1820 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1821 *
1822 * [6] Names ::= Name (S Name)*
1823 *
1824 * Returns the Name parsed or NULL. The str pointer
1825 * is updated to the current location in the string.
1826 */
1827
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001828static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001829xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1830 xmlChar buf[XML_MAX_NAMELEN + 5];
1831 const xmlChar *cur = *str;
1832 int len = 0, l;
1833 int c;
1834
1835 c = CUR_SCHAR(cur, l);
1836 if (!IS_LETTER(c) && (c != '_') &&
1837 (c != ':')) {
1838 return(NULL);
1839 }
1840
1841 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1842 (c == '.') || (c == '-') ||
1843 (c == '_') || (c == ':') ||
1844 (IS_COMBINING(c)) ||
1845 (IS_EXTENDER(c))) {
1846 COPY_BUF(l,buf,len,c);
1847 cur += l;
1848 c = CUR_SCHAR(cur, l);
1849 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1850 /*
1851 * Okay someone managed to make a huge name, so he's ready to pay
1852 * for the processing speed.
1853 */
1854 xmlChar *buffer;
1855 int max = len * 2;
1856
1857 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1858 if (buffer == NULL) {
1859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1860 ctxt->sax->error(ctxt->userData,
1861 "xmlParseStringName: out of memory\n");
1862 return(NULL);
1863 }
1864 memcpy(buffer, buf, len);
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
1877 "xmlParseStringName: out of memory\n");
1878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 cur += l;
1883 c = CUR_SCHAR(cur, l);
1884 }
1885 buffer[len] = 0;
1886 *str = cur;
1887 return(buffer);
1888 }
1889 }
1890 *str = cur;
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseNmtoken:
1896 * @ctxt: an XML parser context
1897 *
1898 * parse an XML Nmtoken.
1899 *
1900 * [7] Nmtoken ::= (NameChar)+
1901 *
1902 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1903 *
1904 * Returns the Nmtoken parsed or NULL
1905 */
1906
1907xmlChar *
1908xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1909 xmlChar buf[XML_MAX_NAMELEN + 5];
1910 int len = 0, l;
1911 int c;
1912 int count = 0;
1913
1914 GROW;
1915 c = CUR_CHAR(l);
1916
1917 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1918 (c == '.') || (c == '-') ||
1919 (c == '_') || (c == ':') ||
1920 (IS_COMBINING(c)) ||
1921 (IS_EXTENDER(c))) {
1922 if (count++ > 100) {
1923 count = 0;
1924 GROW;
1925 }
1926 COPY_BUF(l,buf,len,c);
1927 NEXTL(l);
1928 c = CUR_CHAR(l);
1929 if (len >= XML_MAX_NAMELEN) {
1930 /*
1931 * Okay someone managed to make a huge token, so he's ready to pay
1932 * for the processing speed.
1933 */
1934 xmlChar *buffer;
1935 int max = len * 2;
1936
1937 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1938 if (buffer == NULL) {
1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1940 ctxt->sax->error(ctxt->userData,
1941 "xmlParseNmtoken: out of memory\n");
1942 return(NULL);
1943 }
1944 memcpy(buffer, buf, len);
1945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1946 (c == '.') || (c == '-') ||
1947 (c == '_') || (c == ':') ||
1948 (IS_COMBINING(c)) ||
1949 (IS_EXTENDER(c))) {
1950 if (count++ > 100) {
1951 count = 0;
1952 GROW;
1953 }
1954 if (len + 10 > max) {
1955 max *= 2;
1956 buffer = (xmlChar *) xmlRealloc(buffer,
1957 max * sizeof(xmlChar));
1958 if (buffer == NULL) {
1959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001961 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001962 return(NULL);
1963 }
1964 }
1965 COPY_BUF(l,buffer,len,c);
1966 NEXTL(l);
1967 c = CUR_CHAR(l);
1968 }
1969 buffer[len] = 0;
1970 return(buffer);
1971 }
1972 }
1973 if (len == 0)
1974 return(NULL);
1975 return(xmlStrndup(buf, len));
1976}
1977
1978/**
1979 * xmlParseEntityValue:
1980 * @ctxt: an XML parser context
1981 * @orig: if non-NULL store a copy of the original entity value
1982 *
1983 * parse a value for ENTITY declarations
1984 *
1985 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1986 * "'" ([^%&'] | PEReference | Reference)* "'"
1987 *
1988 * Returns the EntityValue parsed with reference substitued or NULL
1989 */
1990
1991xmlChar *
1992xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1993 xmlChar *buf = NULL;
1994 int len = 0;
1995 int size = XML_PARSER_BUFFER_SIZE;
1996 int c, l;
1997 xmlChar stop;
1998 xmlChar *ret = NULL;
1999 const xmlChar *cur = NULL;
2000 xmlParserInputPtr input;
2001
2002 if (RAW == '"') stop = '"';
2003 else if (RAW == '\'') stop = '\'';
2004 else {
2005 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 return(NULL);
2011 }
2012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2013 if (buf == NULL) {
2014 xmlGenericError(xmlGenericErrorContext,
2015 "malloc of %d byte failed\n", size);
2016 return(NULL);
2017 }
2018
2019 /*
2020 * The content of the entity definition is copied in a buffer.
2021 */
2022
2023 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2024 input = ctxt->input;
2025 GROW;
2026 NEXT;
2027 c = CUR_CHAR(l);
2028 /*
2029 * NOTE: 4.4.5 Included in Literal
2030 * When a parameter entity reference appears in a literal entity
2031 * value, ... a single or double quote character in the replacement
2032 * text is always treated as a normal data character and will not
2033 * terminate the literal.
2034 * In practice it means we stop the loop only when back at parsing
2035 * the initial entity and the quote is found
2036 */
2037 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2038 (ctxt->input != input))) {
2039 if (len + 5 >= size) {
2040 size *= 2;
2041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2042 if (buf == NULL) {
2043 xmlGenericError(xmlGenericErrorContext,
2044 "realloc of %d byte failed\n", size);
2045 return(NULL);
2046 }
2047 }
2048 COPY_BUF(l,buf,len,c);
2049 NEXTL(l);
2050 /*
2051 * Pop-up of finished entities.
2052 */
2053 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2054 xmlPopInput(ctxt);
2055
2056 GROW;
2057 c = CUR_CHAR(l);
2058 if (c == 0) {
2059 GROW;
2060 c = CUR_CHAR(l);
2061 }
2062 }
2063 buf[len] = 0;
2064
2065 /*
2066 * Raise problem w.r.t. '&' and '%' being used in non-entities
2067 * reference constructs. Note Charref will be handled in
2068 * xmlStringDecodeEntities()
2069 */
2070 cur = buf;
2071 while (*cur != 0) { /* non input consuming */
2072 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2073 xmlChar *name;
2074 xmlChar tmp = *cur;
2075
2076 cur++;
2077 name = xmlParseStringName(ctxt, &cur);
2078 if ((name == NULL) || (*cur != ';')) {
2079 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2081 ctxt->sax->error(ctxt->userData,
2082 "EntityValue: '%c' forbidden except for entities references\n",
2083 tmp);
2084 ctxt->wellFormed = 0;
2085 ctxt->disableSAX = 1;
2086 }
2087 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2088 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2090 ctxt->sax->error(ctxt->userData,
2091 "EntityValue: PEReferences forbidden in internal subset\n",
2092 tmp);
2093 ctxt->wellFormed = 0;
2094 ctxt->disableSAX = 1;
2095 }
2096 if (name != NULL)
2097 xmlFree(name);
2098 }
2099 cur++;
2100 }
2101
2102 /*
2103 * Then PEReference entities are substituted.
2104 */
2105 if (c != stop) {
2106 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2109 ctxt->wellFormed = 0;
2110 ctxt->disableSAX = 1;
2111 xmlFree(buf);
2112 } else {
2113 NEXT;
2114 /*
2115 * NOTE: 4.4.7 Bypassed
2116 * When a general entity reference appears in the EntityValue in
2117 * an entity declaration, it is bypassed and left as is.
2118 * so XML_SUBSTITUTE_REF is not set here.
2119 */
2120 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2121 0, 0, 0);
2122 if (orig != NULL)
2123 *orig = buf;
2124 else
2125 xmlFree(buf);
2126 }
2127
2128 return(ret);
2129}
2130
2131/**
2132 * xmlParseAttValue:
2133 * @ctxt: an XML parser context
2134 *
2135 * parse a value for an attribute
2136 * Note: the parser won't do substitution of entities here, this
2137 * will be handled later in xmlStringGetNodeList
2138 *
2139 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2140 * "'" ([^<&'] | Reference)* "'"
2141 *
2142 * 3.3.3 Attribute-Value Normalization:
2143 * Before the value of an attribute is passed to the application or
2144 * checked for validity, the XML processor must normalize it as follows:
2145 * - a character reference is processed by appending the referenced
2146 * character to the attribute value
2147 * - an entity reference is processed by recursively processing the
2148 * replacement text of the entity
2149 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2150 * appending #x20 to the normalized value, except that only a single
2151 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2152 * parsed entity or the literal entity value of an internal parsed entity
2153 * - other characters are processed by appending them to the normalized value
2154 * If the declared value is not CDATA, then the XML processor must further
2155 * process the normalized attribute value by discarding any leading and
2156 * trailing space (#x20) characters, and by replacing sequences of space
2157 * (#x20) characters by a single space (#x20) character.
2158 * All attributes for which no declaration has been read should be treated
2159 * by a non-validating parser as if declared CDATA.
2160 *
2161 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2162 */
2163
2164xmlChar *
2165xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2166 xmlChar limit = 0;
2167 xmlChar *buf = NULL;
2168 int len = 0;
2169 int buf_size = 0;
2170 int c, l;
2171 xmlChar *current = NULL;
2172 xmlEntityPtr ent;
2173
2174
2175 SHRINK;
2176 if (NXT(0) == '"') {
2177 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2178 limit = '"';
2179 NEXT;
2180 } else if (NXT(0) == '\'') {
2181 limit = '\'';
2182 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2183 NEXT;
2184 } else {
2185 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2187 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2188 ctxt->wellFormed = 0;
2189 ctxt->disableSAX = 1;
2190 return(NULL);
2191 }
2192
2193 /*
2194 * allocate a translation buffer.
2195 */
2196 buf_size = XML_PARSER_BUFFER_SIZE;
2197 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2198 if (buf == NULL) {
2199 perror("xmlParseAttValue: malloc failed");
2200 return(NULL);
2201 }
2202
2203 /*
2204 * Ok loop until we reach one of the ending char or a size limit.
2205 */
2206 c = CUR_CHAR(l);
2207 while (((NXT(0) != limit) && /* checked */
2208 (c != '<')) || (ctxt->token != 0)) {
2209 if (c == 0) break;
2210 if (ctxt->token == '&') {
2211 /*
2212 * The reparsing will be done in xmlStringGetNodeList()
2213 * called by the attribute() function in SAX.c
2214 */
2215 static xmlChar buffer[6] = "&#38;";
2216
2217 if (len > buf_size - 10) {
2218 growBuffer(buf);
2219 }
2220 current = &buffer[0];
2221 while (*current != 0) { /* non input consuming */
2222 buf[len++] = *current++;
2223 }
2224 ctxt->token = 0;
2225 } else if (c == '&') {
2226 if (NXT(1) == '#') {
2227 int val = xmlParseCharRef(ctxt);
2228 if (val == '&') {
2229 /*
2230 * The reparsing will be done in xmlStringGetNodeList()
2231 * called by the attribute() function in SAX.c
2232 */
2233 static xmlChar buffer[6] = "&#38;";
2234
2235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
2238 current = &buffer[0];
2239 while (*current != 0) { /* non input consuming */
2240 buf[len++] = *current++;
2241 }
2242 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002243 if (len > buf_size - 10) {
2244 growBuffer(buf);
2245 }
Owen Taylor3473f882001-02-23 17:55:21 +00002246 len += xmlCopyChar(0, &buf[len], val);
2247 }
2248 } else {
2249 ent = xmlParseEntityRef(ctxt);
2250 if ((ent != NULL) &&
2251 (ctxt->replaceEntities != 0)) {
2252 xmlChar *rep;
2253
2254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2255 rep = xmlStringDecodeEntities(ctxt, ent->content,
2256 XML_SUBSTITUTE_REF, 0, 0, 0);
2257 if (rep != NULL) {
2258 current = rep;
2259 while (*current != 0) { /* non input consuming */
2260 buf[len++] = *current++;
2261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
2264 }
2265 xmlFree(rep);
2266 }
2267 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002268 if (len > buf_size - 10) {
2269 growBuffer(buf);
2270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (ent->content != NULL)
2272 buf[len++] = ent->content[0];
2273 }
2274 } else if (ent != NULL) {
2275 int i = xmlStrlen(ent->name);
2276 const xmlChar *cur = ent->name;
2277
2278 /*
2279 * This may look absurd but is needed to detect
2280 * entities problems
2281 */
2282 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2283 (ent->content != NULL)) {
2284 xmlChar *rep;
2285 rep = xmlStringDecodeEntities(ctxt, ent->content,
2286 XML_SUBSTITUTE_REF, 0, 0, 0);
2287 if (rep != NULL)
2288 xmlFree(rep);
2289 }
2290
2291 /*
2292 * Just output the reference
2293 */
2294 buf[len++] = '&';
2295 if (len > buf_size - i - 10) {
2296 growBuffer(buf);
2297 }
2298 for (;i > 0;i--)
2299 buf[len++] = *cur++;
2300 buf[len++] = ';';
2301 }
2302 }
2303 } else {
2304 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2305 COPY_BUF(l,buf,len,0x20);
2306 if (len > buf_size - 10) {
2307 growBuffer(buf);
2308 }
2309 } else {
2310 COPY_BUF(l,buf,len,c);
2311 if (len > buf_size - 10) {
2312 growBuffer(buf);
2313 }
2314 }
2315 NEXTL(l);
2316 }
2317 GROW;
2318 c = CUR_CHAR(l);
2319 }
2320 buf[len++] = 0;
2321 if (RAW == '<') {
2322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324 ctxt->sax->error(ctxt->userData,
2325 "Unescaped '<' not allowed in attributes values\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else if (RAW != limit) {
2329 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else
2335 NEXT;
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParseSystemLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML Literal
2344 *
2345 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2346 *
2347 * Returns the SystemLiteral parsed or NULL
2348 */
2349
2350xmlChar *
2351xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 int cur, l;
2356 xmlChar stop;
2357 int state = ctxt->instate;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376
2377 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2378 if (buf == NULL) {
2379 xmlGenericError(xmlGenericErrorContext,
2380 "malloc of %d byte failed\n", size);
2381 return(NULL);
2382 }
2383 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2384 cur = CUR_CHAR(l);
2385 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2386 if (len + 5 >= size) {
2387 size *= 2;
2388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2389 if (buf == NULL) {
2390 xmlGenericError(xmlGenericErrorContext,
2391 "realloc of %d byte failed\n", size);
2392 ctxt->instate = (xmlParserInputState) state;
2393 return(NULL);
2394 }
2395 }
2396 count++;
2397 if (count > 50) {
2398 GROW;
2399 count = 0;
2400 }
2401 COPY_BUF(l,buf,len,cur);
2402 NEXTL(l);
2403 cur = CUR_CHAR(l);
2404 if (cur == 0) {
2405 GROW;
2406 SHRINK;
2407 cur = CUR_CHAR(l);
2408 }
2409 }
2410 buf[len] = 0;
2411 ctxt->instate = (xmlParserInputState) state;
2412 if (!IS_CHAR(cur)) {
2413 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2416 ctxt->wellFormed = 0;
2417 ctxt->disableSAX = 1;
2418 } else {
2419 NEXT;
2420 }
2421 return(buf);
2422}
2423
2424/**
2425 * xmlParsePubidLiteral:
2426 * @ctxt: an XML parser context
2427 *
2428 * parse an XML public literal
2429 *
2430 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2431 *
2432 * Returns the PubidLiteral parsed or NULL.
2433 */
2434
2435xmlChar *
2436xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2437 xmlChar *buf = NULL;
2438 int len = 0;
2439 int size = XML_PARSER_BUFFER_SIZE;
2440 xmlChar cur;
2441 xmlChar stop;
2442 int count = 0;
2443
2444 SHRINK;
2445 if (RAW == '"') {
2446 NEXT;
2447 stop = '"';
2448 } else if (RAW == '\'') {
2449 NEXT;
2450 stop = '\'';
2451 } else {
2452 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData,
2455 "SystemLiteral \" or ' expected\n");
2456 ctxt->wellFormed = 0;
2457 ctxt->disableSAX = 1;
2458 return(NULL);
2459 }
2460 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2461 if (buf == NULL) {
2462 xmlGenericError(xmlGenericErrorContext,
2463 "malloc of %d byte failed\n", size);
2464 return(NULL);
2465 }
2466 cur = CUR;
2467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2468 if (len + 1 >= size) {
2469 size *= 2;
2470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2471 if (buf == NULL) {
2472 xmlGenericError(xmlGenericErrorContext,
2473 "realloc of %d byte failed\n", size);
2474 return(NULL);
2475 }
2476 }
2477 buf[len++] = cur;
2478 count++;
2479 if (count > 50) {
2480 GROW;
2481 count = 0;
2482 }
2483 NEXT;
2484 cur = CUR;
2485 if (cur == 0) {
2486 GROW;
2487 SHRINK;
2488 cur = CUR;
2489 }
2490 }
2491 buf[len] = 0;
2492 if (cur != stop) {
2493 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2496 ctxt->wellFormed = 0;
2497 ctxt->disableSAX = 1;
2498 } else {
2499 NEXT;
2500 }
2501 return(buf);
2502}
2503
Daniel Veillard48b2f892001-02-25 16:11:03 +00002504void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002505/**
2506 * xmlParseCharData:
2507 * @ctxt: an XML parser context
2508 * @cdata: int indicating whether we are within a CDATA section
2509 *
2510 * parse a CharData section.
2511 * if we are within a CDATA section ']]>' marks an end of section.
2512 *
2513 * The right angle bracket (>) may be represented using the string "&gt;",
2514 * and must, for compatibility, be escaped using "&gt;" or a character
2515 * reference when it appears in the string "]]>" in content, when that
2516 * string is not marking the end of a CDATA section.
2517 *
2518 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2519 */
2520
2521void
2522xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002523 const xmlChar *in;
2524 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002525 int line = ctxt->input->line;
2526 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002527
2528 SHRINK;
2529 GROW;
2530 /*
2531 * Accelerated common case where input don't need to be
2532 * modified before passing it to the handler.
2533 */
2534 if ((ctxt->token == 0) && (!cdata)) {
2535 in = ctxt->input->cur;
2536 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002537get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538 while (((*in >= 0x20) && (*in != '<') &&
2539 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2540 in++;
2541 if (*in == 0xA) {
2542 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002543 in++;
2544 while (*in == 0xA) {
2545 ctxt->input->line++;
2546 in++;
2547 }
2548 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002549 }
2550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002551 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002552 if (IS_BLANK(*ctxt->input->cur)) {
2553 const xmlChar *tmp = ctxt->input->cur;
2554 ctxt->input->cur = in;
2555 if (areBlanks(ctxt, tmp, nbchar)) {
2556 if (ctxt->sax->ignorableWhitespace != NULL)
2557 ctxt->sax->ignorableWhitespace(ctxt->userData,
2558 tmp, nbchar);
2559 } else {
2560 if (ctxt->sax->characters != NULL)
2561 ctxt->sax->characters(ctxt->userData,
2562 tmp, nbchar);
2563 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002564 line = ctxt->input->line;
2565 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002566 } else {
2567 if (ctxt->sax->characters != NULL)
2568 ctxt->sax->characters(ctxt->userData,
2569 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002570 line = ctxt->input->line;
2571 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002572 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002573 }
2574 ctxt->input->cur = in;
2575 if (*in == 0xD) {
2576 in++;
2577 if (*in == 0xA) {
2578 ctxt->input->cur = in;
2579 in++;
2580 ctxt->input->line++;
2581 continue; /* while */
2582 }
2583 in--;
2584 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002585 if (*in == '<') {
2586 return;
2587 }
2588 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 return;
2590 }
2591 SHRINK;
2592 GROW;
2593 in = ctxt->input->cur;
2594 } while ((*in >= 0x20) && (*in <= 0x7F));
2595 nbchar = 0;
2596 }
Daniel Veillard50582112001-03-26 22:52:16 +00002597 ctxt->input->line = line;
2598 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 xmlParseCharDataComplex(ctxt, cdata);
2600}
2601
2602void
2603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2605 int nbchar = 0;
2606 int cur, l;
2607 int count = 0;
2608
2609 SHRINK;
2610 GROW;
2611 cur = CUR_CHAR(l);
2612 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2613 ((cur != '&') || (ctxt->token == '&')) &&
2614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2615 if ((cur == ']') && (NXT(1) == ']') &&
2616 (NXT(2) == '>')) {
2617 if (cdata) break;
2618 else {
2619 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2621 ctxt->sax->error(ctxt->userData,
2622 "Sequence ']]>' not allowed in content\n");
2623 /* Should this be relaxed ??? I see a "must here */
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 }
2628 COPY_BUF(l,buf,nbchar,cur);
2629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2630 /*
2631 * Ok the segment is to be consumed as chars.
2632 */
2633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2634 if (areBlanks(ctxt, buf, nbchar)) {
2635 if (ctxt->sax->ignorableWhitespace != NULL)
2636 ctxt->sax->ignorableWhitespace(ctxt->userData,
2637 buf, nbchar);
2638 } else {
2639 if (ctxt->sax->characters != NULL)
2640 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2641 }
2642 }
2643 nbchar = 0;
2644 }
2645 count++;
2646 if (count > 50) {
2647 GROW;
2648 count = 0;
2649 }
2650 NEXTL(l);
2651 cur = CUR_CHAR(l);
2652 }
2653 if (nbchar != 0) {
2654 /*
2655 * Ok the segment is to be consumed as chars.
2656 */
2657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2658 if (areBlanks(ctxt, buf, nbchar)) {
2659 if (ctxt->sax->ignorableWhitespace != NULL)
2660 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2661 } else {
2662 if (ctxt->sax->characters != NULL)
2663 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2664 }
2665 }
2666 }
2667}
2668
2669/**
2670 * xmlParseExternalID:
2671 * @ctxt: an XML parser context
2672 * @publicID: a xmlChar** receiving PubidLiteral
2673 * @strict: indicate whether we should restrict parsing to only
2674 * production [75], see NOTE below
2675 *
2676 * Parse an External ID or a Public ID
2677 *
2678 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2679 * 'PUBLIC' S PubidLiteral S SystemLiteral
2680 *
2681 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2682 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2683 *
2684 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2685 *
2686 * Returns the function returns SystemLiteral and in the second
2687 * case publicID receives PubidLiteral, is strict is off
2688 * it is possible to return NULL and have publicID set.
2689 */
2690
2691xmlChar *
2692xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2693 xmlChar *URI = NULL;
2694
2695 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002696
2697 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2699 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2700 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2701 SKIP(6);
2702 if (!IS_BLANK(CUR)) {
2703 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "Space required after 'SYSTEM'\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 SKIP_BLANKS;
2711 URI = xmlParseSystemLiteral(ctxt);
2712 if (URI == NULL) {
2713 ctxt->errNo = XML_ERR_URI_REQUIRED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "xmlParseExternalID: SYSTEM, no URI\n");
2717 ctxt->wellFormed = 0;
2718 ctxt->disableSAX = 1;
2719 }
2720 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2721 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2722 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2723 SKIP(6);
2724 if (!IS_BLANK(CUR)) {
2725 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "Space required after 'PUBLIC'\n");
2729 ctxt->wellFormed = 0;
2730 ctxt->disableSAX = 1;
2731 }
2732 SKIP_BLANKS;
2733 *publicID = xmlParsePubidLiteral(ctxt);
2734 if (*publicID == NULL) {
2735 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2737 ctxt->sax->error(ctxt->userData,
2738 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2739 ctxt->wellFormed = 0;
2740 ctxt->disableSAX = 1;
2741 }
2742 if (strict) {
2743 /*
2744 * We don't handle [83] so "S SystemLiteral" is required.
2745 */
2746 if (!IS_BLANK(CUR)) {
2747 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2749 ctxt->sax->error(ctxt->userData,
2750 "Space required after the Public Identifier\n");
2751 ctxt->wellFormed = 0;
2752 ctxt->disableSAX = 1;
2753 }
2754 } else {
2755 /*
2756 * We handle [83] so we return immediately, if
2757 * "S SystemLiteral" is not detected. From a purely parsing
2758 * point of view that's a nice mess.
2759 */
2760 const xmlChar *ptr;
2761 GROW;
2762
2763 ptr = CUR_PTR;
2764 if (!IS_BLANK(*ptr)) return(NULL);
2765
2766 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2768 }
2769 SKIP_BLANKS;
2770 URI = xmlParseSystemLiteral(ctxt);
2771 if (URI == NULL) {
2772 ctxt->errNo = XML_ERR_URI_REQUIRED;
2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2774 ctxt->sax->error(ctxt->userData,
2775 "xmlParseExternalID: PUBLIC, no URI\n");
2776 ctxt->wellFormed = 0;
2777 ctxt->disableSAX = 1;
2778 }
2779 }
2780 return(URI);
2781}
2782
2783/**
2784 * xmlParseComment:
2785 * @ctxt: an XML parser context
2786 *
2787 * Skip an XML (SGML) comment <!-- .... -->
2788 * The spec says that "For compatibility, the string "--" (double-hyphen)
2789 * must not occur within comments. "
2790 *
2791 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2792 */
2793void
2794xmlParseComment(xmlParserCtxtPtr ctxt) {
2795 xmlChar *buf = NULL;
2796 int len;
2797 int size = XML_PARSER_BUFFER_SIZE;
2798 int q, ql;
2799 int r, rl;
2800 int cur, l;
2801 xmlParserInputState state;
2802 xmlParserInputPtr input = ctxt->input;
2803 int count = 0;
2804
2805 /*
2806 * Check that there is a comment right here.
2807 */
2808 if ((RAW != '<') || (NXT(1) != '!') ||
2809 (NXT(2) != '-') || (NXT(3) != '-')) return;
2810
2811 state = ctxt->instate;
2812 ctxt->instate = XML_PARSER_COMMENT;
2813 SHRINK;
2814 SKIP(4);
2815 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2816 if (buf == NULL) {
2817 xmlGenericError(xmlGenericErrorContext,
2818 "malloc of %d byte failed\n", size);
2819 ctxt->instate = state;
2820 return;
2821 }
2822 q = CUR_CHAR(ql);
2823 NEXTL(ql);
2824 r = CUR_CHAR(rl);
2825 NEXTL(rl);
2826 cur = CUR_CHAR(l);
2827 len = 0;
2828 while (IS_CHAR(cur) && /* checked */
2829 ((cur != '>') ||
2830 (r != '-') || (q != '-'))) {
2831 if ((r == '-') && (q == '-') && (len > 1)) {
2832 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2834 ctxt->sax->error(ctxt->userData,
2835 "Comment must not contain '--' (double-hyphen)`\n");
2836 ctxt->wellFormed = 0;
2837 ctxt->disableSAX = 1;
2838 }
2839 if (len + 5 >= size) {
2840 size *= 2;
2841 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2842 if (buf == NULL) {
2843 xmlGenericError(xmlGenericErrorContext,
2844 "realloc of %d byte failed\n", size);
2845 ctxt->instate = state;
2846 return;
2847 }
2848 }
2849 COPY_BUF(ql,buf,len,q);
2850 q = r;
2851 ql = rl;
2852 r = cur;
2853 rl = l;
2854
2855 count++;
2856 if (count > 50) {
2857 GROW;
2858 count = 0;
2859 }
2860 NEXTL(l);
2861 cur = CUR_CHAR(l);
2862 if (cur == 0) {
2863 SHRINK;
2864 GROW;
2865 cur = CUR_CHAR(l);
2866 }
2867 }
2868 buf[len] = 0;
2869 if (!IS_CHAR(cur)) {
2870 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2872 ctxt->sax->error(ctxt->userData,
2873 "Comment not terminated \n<!--%.50s\n", buf);
2874 ctxt->wellFormed = 0;
2875 ctxt->disableSAX = 1;
2876 xmlFree(buf);
2877 } else {
2878 if (input != ctxt->input) {
2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882"Comment doesn't start and stop in the same entity\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 NEXT;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2888 (!ctxt->disableSAX))
2889 ctxt->sax->comment(ctxt->userData, buf);
2890 xmlFree(buf);
2891 }
2892 ctxt->instate = state;
2893}
2894
2895/**
2896 * xmlParsePITarget:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse the name of a PI
2900 *
2901 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2902 *
2903 * Returns the PITarget name or NULL
2904 */
2905
2906xmlChar *
2907xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2908 xmlChar *name;
2909
2910 name = xmlParseName(ctxt);
2911 if ((name != NULL) &&
2912 ((name[0] == 'x') || (name[0] == 'X')) &&
2913 ((name[1] == 'm') || (name[1] == 'M')) &&
2914 ((name[2] == 'l') || (name[2] == 'L'))) {
2915 int i;
2916 if ((name[0] == 'x') && (name[1] == 'm') &&
2917 (name[2] == 'l') && (name[3] == 0)) {
2918 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2920 ctxt->sax->error(ctxt->userData,
2921 "XML declaration allowed only at the start of the document\n");
2922 ctxt->wellFormed = 0;
2923 ctxt->disableSAX = 1;
2924 return(name);
2925 } else if (name[3] == 0) {
2926 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 return(name);
2932 }
2933 for (i = 0;;i++) {
2934 if (xmlW3CPIs[i] == NULL) break;
2935 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2936 return(name);
2937 }
2938 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 ctxt->sax->warning(ctxt->userData,
2941 "xmlParsePItarget: invalid name prefix 'xml'\n");
2942 }
2943 }
2944 return(name);
2945}
2946
2947/**
2948 * xmlParsePI:
2949 * @ctxt: an XML parser context
2950 *
2951 * parse an XML Processing Instruction.
2952 *
2953 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2954 *
2955 * The processing is transfered to SAX once parsed.
2956 */
2957
2958void
2959xmlParsePI(xmlParserCtxtPtr ctxt) {
2960 xmlChar *buf = NULL;
2961 int len = 0;
2962 int size = XML_PARSER_BUFFER_SIZE;
2963 int cur, l;
2964 xmlChar *target;
2965 xmlParserInputState state;
2966 int count = 0;
2967
2968 if ((RAW == '<') && (NXT(1) == '?')) {
2969 xmlParserInputPtr input = ctxt->input;
2970 state = ctxt->instate;
2971 ctxt->instate = XML_PARSER_PI;
2972 /*
2973 * this is a Processing Instruction.
2974 */
2975 SKIP(2);
2976 SHRINK;
2977
2978 /*
2979 * Parse the target name and check for special support like
2980 * namespace.
2981 */
2982 target = xmlParsePITarget(ctxt);
2983 if (target != NULL) {
2984 if ((RAW == '?') && (NXT(1) == '>')) {
2985 if (input != ctxt->input) {
2986 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988 ctxt->sax->error(ctxt->userData,
2989 "PI declaration doesn't start and stop in the same entity\n");
2990 ctxt->wellFormed = 0;
2991 ctxt->disableSAX = 1;
2992 }
2993 SKIP(2);
2994
2995 /*
2996 * SAX: PI detected.
2997 */
2998 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2999 (ctxt->sax->processingInstruction != NULL))
3000 ctxt->sax->processingInstruction(ctxt->userData,
3001 target, NULL);
3002 ctxt->instate = state;
3003 xmlFree(target);
3004 return;
3005 }
3006 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3007 if (buf == NULL) {
3008 xmlGenericError(xmlGenericErrorContext,
3009 "malloc of %d byte failed\n", size);
3010 ctxt->instate = state;
3011 return;
3012 }
3013 cur = CUR;
3014 if (!IS_BLANK(cur)) {
3015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParsePI: PI %s space expected\n", target);
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 }
3022 SKIP_BLANKS;
3023 cur = CUR_CHAR(l);
3024 while (IS_CHAR(cur) && /* checked */
3025 ((cur != '?') || (NXT(1) != '>'))) {
3026 if (len + 5 >= size) {
3027 size *= 2;
3028 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3029 if (buf == NULL) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "realloc of %d byte failed\n", size);
3032 ctxt->instate = state;
3033 return;
3034 }
3035 }
3036 count++;
3037 if (count > 50) {
3038 GROW;
3039 count = 0;
3040 }
3041 COPY_BUF(l,buf,len,cur);
3042 NEXTL(l);
3043 cur = CUR_CHAR(l);
3044 if (cur == 0) {
3045 SHRINK;
3046 GROW;
3047 cur = CUR_CHAR(l);
3048 }
3049 }
3050 buf[len] = 0;
3051 if (cur != '?') {
3052 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParsePI: PI %s never end ...\n", target);
3056 ctxt->wellFormed = 0;
3057 ctxt->disableSAX = 1;
3058 } else {
3059 if (input != ctxt->input) {
3060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "PI declaration doesn't start and stop in the same entity\n");
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 }
3067 SKIP(2);
3068
3069 /*
3070 * SAX: PI detected.
3071 */
3072 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3073 (ctxt->sax->processingInstruction != NULL))
3074 ctxt->sax->processingInstruction(ctxt->userData,
3075 target, buf);
3076 }
3077 xmlFree(buf);
3078 xmlFree(target);
3079 } else {
3080 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3082 ctxt->sax->error(ctxt->userData,
3083 "xmlParsePI : no target name\n");
3084 ctxt->wellFormed = 0;
3085 ctxt->disableSAX = 1;
3086 }
3087 ctxt->instate = state;
3088 }
3089}
3090
3091/**
3092 * xmlParseNotationDecl:
3093 * @ctxt: an XML parser context
3094 *
3095 * parse a notation declaration
3096 *
3097 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3098 *
3099 * Hence there is actually 3 choices:
3100 * 'PUBLIC' S PubidLiteral
3101 * 'PUBLIC' S PubidLiteral S SystemLiteral
3102 * and 'SYSTEM' S SystemLiteral
3103 *
3104 * See the NOTE on xmlParseExternalID().
3105 */
3106
3107void
3108xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3109 xmlChar *name;
3110 xmlChar *Pubid;
3111 xmlChar *Systemid;
3112
3113 if ((RAW == '<') && (NXT(1) == '!') &&
3114 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3115 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3116 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3117 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3118 xmlParserInputPtr input = ctxt->input;
3119 SHRINK;
3120 SKIP(10);
3121 if (!IS_BLANK(CUR)) {
3122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124 ctxt->sax->error(ctxt->userData,
3125 "Space required after '<!NOTATION'\n");
3126 ctxt->wellFormed = 0;
3127 ctxt->disableSAX = 1;
3128 return;
3129 }
3130 SKIP_BLANKS;
3131
Daniel Veillard76d66f42001-05-16 21:05:17 +00003132 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (name == NULL) {
3134 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData,
3137 "NOTATION: Name expected here\n");
3138 ctxt->wellFormed = 0;
3139 ctxt->disableSAX = 1;
3140 return;
3141 }
3142 if (!IS_BLANK(CUR)) {
3143 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3145 ctxt->sax->error(ctxt->userData,
3146 "Space required after the NOTATION name'\n");
3147 ctxt->wellFormed = 0;
3148 ctxt->disableSAX = 1;
3149 return;
3150 }
3151 SKIP_BLANKS;
3152
3153 /*
3154 * Parse the IDs.
3155 */
3156 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3157 SKIP_BLANKS;
3158
3159 if (RAW == '>') {
3160 if (input != ctxt->input) {
3161 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164"Notation declaration doesn't start and stop in the same entity\n");
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 }
3168 NEXT;
3169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3170 (ctxt->sax->notationDecl != NULL))
3171 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3172 } else {
3173 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "'>' required to close NOTATION declaration\n");
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 }
3180 xmlFree(name);
3181 if (Systemid != NULL) xmlFree(Systemid);
3182 if (Pubid != NULL) xmlFree(Pubid);
3183 }
3184}
3185
3186/**
3187 * xmlParseEntityDecl:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse <!ENTITY declarations
3191 *
3192 * [70] EntityDecl ::= GEDecl | PEDecl
3193 *
3194 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3195 *
3196 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3197 *
3198 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3199 *
3200 * [74] PEDef ::= EntityValue | ExternalID
3201 *
3202 * [76] NDataDecl ::= S 'NDATA' S Name
3203 *
3204 * [ VC: Notation Declared ]
3205 * The Name must match the declared name of a notation.
3206 */
3207
3208void
3209xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3210 xmlChar *name = NULL;
3211 xmlChar *value = NULL;
3212 xmlChar *URI = NULL, *literal = NULL;
3213 xmlChar *ndata = NULL;
3214 int isParameter = 0;
3215 xmlChar *orig = NULL;
3216
3217 GROW;
3218 if ((RAW == '<') && (NXT(1) == '!') &&
3219 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3220 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3221 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3222 xmlParserInputPtr input = ctxt->input;
3223 ctxt->instate = XML_PARSER_ENTITY_DECL;
3224 SHRINK;
3225 SKIP(8);
3226 if (!IS_BLANK(CUR)) {
3227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Space required after '<!ENTITY'\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 SKIP_BLANKS;
3235
3236 if (RAW == '%') {
3237 NEXT;
3238 if (!IS_BLANK(CUR)) {
3239 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "Space required after '%'\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 }
3246 SKIP_BLANKS;
3247 isParameter = 1;
3248 }
3249
Daniel Veillard76d66f42001-05-16 21:05:17 +00003250 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (name == NULL) {
3252 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 return;
3258 }
3259 if (!IS_BLANK(CUR)) {
3260 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3262 ctxt->sax->error(ctxt->userData,
3263 "Space required after the entity name\n");
3264 ctxt->wellFormed = 0;
3265 ctxt->disableSAX = 1;
3266 }
3267 SKIP_BLANKS;
3268
3269 /*
3270 * handle the various case of definitions...
3271 */
3272 if (isParameter) {
3273 if ((RAW == '"') || (RAW == '\'')) {
3274 value = xmlParseEntityValue(ctxt, &orig);
3275 if (value) {
3276 if ((ctxt->sax != NULL) &&
3277 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3278 ctxt->sax->entityDecl(ctxt->userData, name,
3279 XML_INTERNAL_PARAMETER_ENTITY,
3280 NULL, NULL, value);
3281 }
3282 } else {
3283 URI = xmlParseExternalID(ctxt, &literal, 1);
3284 if ((URI == NULL) && (literal == NULL)) {
3285 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288 "Entity value required\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 if (URI) {
3293 xmlURIPtr uri;
3294
3295 uri = xmlParseURI((const char *) URI);
3296 if (uri == NULL) {
3297 ctxt->errNo = XML_ERR_INVALID_URI;
3298 if ((ctxt->sax != NULL) &&
3299 (!ctxt->disableSAX) &&
3300 (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "Invalid URI: %s\n", URI);
3303 ctxt->wellFormed = 0;
3304 } else {
3305 if (uri->fragment != NULL) {
3306 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3307 if ((ctxt->sax != NULL) &&
3308 (!ctxt->disableSAX) &&
3309 (ctxt->sax->error != NULL))
3310 ctxt->sax->error(ctxt->userData,
3311 "Fragment not allowed: %s\n", URI);
3312 ctxt->wellFormed = 0;
3313 } else {
3314 if ((ctxt->sax != NULL) &&
3315 (!ctxt->disableSAX) &&
3316 (ctxt->sax->entityDecl != NULL))
3317 ctxt->sax->entityDecl(ctxt->userData, name,
3318 XML_EXTERNAL_PARAMETER_ENTITY,
3319 literal, URI, NULL);
3320 }
3321 xmlFreeURI(uri);
3322 }
3323 }
3324 }
3325 } else {
3326 if ((RAW == '"') || (RAW == '\'')) {
3327 value = xmlParseEntityValue(ctxt, &orig);
3328 if ((ctxt->sax != NULL) &&
3329 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3330 ctxt->sax->entityDecl(ctxt->userData, name,
3331 XML_INTERNAL_GENERAL_ENTITY,
3332 NULL, NULL, value);
3333 } else {
3334 URI = xmlParseExternalID(ctxt, &literal, 1);
3335 if ((URI == NULL) && (literal == NULL)) {
3336 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "Entity value required\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 }
3343 if (URI) {
3344 xmlURIPtr uri;
3345
3346 uri = xmlParseURI((const char *)URI);
3347 if (uri == NULL) {
3348 ctxt->errNo = XML_ERR_INVALID_URI;
3349 if ((ctxt->sax != NULL) &&
3350 (!ctxt->disableSAX) &&
3351 (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Invalid URI: %s\n", URI);
3354 ctxt->wellFormed = 0;
3355 } else {
3356 if (uri->fragment != NULL) {
3357 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3358 if ((ctxt->sax != NULL) &&
3359 (!ctxt->disableSAX) &&
3360 (ctxt->sax->error != NULL))
3361 ctxt->sax->error(ctxt->userData,
3362 "Fragment not allowed: %s\n", URI);
3363 ctxt->wellFormed = 0;
3364 }
3365 xmlFreeURI(uri);
3366 }
3367 }
3368 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3369 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3371 ctxt->sax->error(ctxt->userData,
3372 "Space required before 'NDATA'\n");
3373 ctxt->wellFormed = 0;
3374 ctxt->disableSAX = 1;
3375 }
3376 SKIP_BLANKS;
3377 if ((RAW == 'N') && (NXT(1) == 'D') &&
3378 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3379 (NXT(4) == 'A')) {
3380 SKIP(5);
3381 if (!IS_BLANK(CUR)) {
3382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "Space required after 'NDATA'\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 }
3389 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003390 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003391 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3392 (ctxt->sax->unparsedEntityDecl != NULL))
3393 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3394 literal, URI, ndata);
3395 } else {
3396 if ((ctxt->sax != NULL) &&
3397 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3398 ctxt->sax->entityDecl(ctxt->userData, name,
3399 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3400 literal, URI, NULL);
3401 }
3402 }
3403 }
3404 SKIP_BLANKS;
3405 if (RAW != '>') {
3406 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "xmlParseEntityDecl: entity %s not terminated\n", name);
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 } else {
3413 if (input != ctxt->input) {
3414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3416 ctxt->sax->error(ctxt->userData,
3417"Entity declaration doesn't start and stop in the same entity\n");
3418 ctxt->wellFormed = 0;
3419 ctxt->disableSAX = 1;
3420 }
3421 NEXT;
3422 }
3423 if (orig != NULL) {
3424 /*
3425 * Ugly mechanism to save the raw entity value.
3426 */
3427 xmlEntityPtr cur = NULL;
3428
3429 if (isParameter) {
3430 if ((ctxt->sax != NULL) &&
3431 (ctxt->sax->getParameterEntity != NULL))
3432 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3433 } else {
3434 if ((ctxt->sax != NULL) &&
3435 (ctxt->sax->getEntity != NULL))
3436 cur = ctxt->sax->getEntity(ctxt->userData, name);
3437 }
3438 if (cur != NULL) {
3439 if (cur->orig != NULL)
3440 xmlFree(orig);
3441 else
3442 cur->orig = orig;
3443 } else
3444 xmlFree(orig);
3445 }
3446 if (name != NULL) xmlFree(name);
3447 if (value != NULL) xmlFree(value);
3448 if (URI != NULL) xmlFree(URI);
3449 if (literal != NULL) xmlFree(literal);
3450 if (ndata != NULL) xmlFree(ndata);
3451 }
3452}
3453
3454/**
3455 * xmlParseDefaultDecl:
3456 * @ctxt: an XML parser context
3457 * @value: Receive a possible fixed default value for the attribute
3458 *
3459 * Parse an attribute default declaration
3460 *
3461 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3462 *
3463 * [ VC: Required Attribute ]
3464 * if the default declaration is the keyword #REQUIRED, then the
3465 * attribute must be specified for all elements of the type in the
3466 * attribute-list declaration.
3467 *
3468 * [ VC: Attribute Default Legal ]
3469 * The declared default value must meet the lexical constraints of
3470 * the declared attribute type c.f. xmlValidateAttributeDecl()
3471 *
3472 * [ VC: Fixed Attribute Default ]
3473 * if an attribute has a default value declared with the #FIXED
3474 * keyword, instances of that attribute must match the default value.
3475 *
3476 * [ WFC: No < in Attribute Values ]
3477 * handled in xmlParseAttValue()
3478 *
3479 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3480 * or XML_ATTRIBUTE_FIXED.
3481 */
3482
3483int
3484xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3485 int val;
3486 xmlChar *ret;
3487
3488 *value = NULL;
3489 if ((RAW == '#') && (NXT(1) == 'R') &&
3490 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3491 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3492 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3493 (NXT(8) == 'D')) {
3494 SKIP(9);
3495 return(XML_ATTRIBUTE_REQUIRED);
3496 }
3497 if ((RAW == '#') && (NXT(1) == 'I') &&
3498 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3499 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3500 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3501 SKIP(8);
3502 return(XML_ATTRIBUTE_IMPLIED);
3503 }
3504 val = XML_ATTRIBUTE_NONE;
3505 if ((RAW == '#') && (NXT(1) == 'F') &&
3506 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3507 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3508 SKIP(6);
3509 val = XML_ATTRIBUTE_FIXED;
3510 if (!IS_BLANK(CUR)) {
3511 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513 ctxt->sax->error(ctxt->userData,
3514 "Space required after '#FIXED'\n");
3515 ctxt->wellFormed = 0;
3516 ctxt->disableSAX = 1;
3517 }
3518 SKIP_BLANKS;
3519 }
3520 ret = xmlParseAttValue(ctxt);
3521 ctxt->instate = XML_PARSER_DTD;
3522 if (ret == NULL) {
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525 "Attribute default value declaration error\n");
3526 ctxt->wellFormed = 0;
3527 ctxt->disableSAX = 1;
3528 } else
3529 *value = ret;
3530 return(val);
3531}
3532
3533/**
3534 * xmlParseNotationType:
3535 * @ctxt: an XML parser context
3536 *
3537 * parse an Notation attribute type.
3538 *
3539 * Note: the leading 'NOTATION' S part has already being parsed...
3540 *
3541 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3542 *
3543 * [ VC: Notation Attributes ]
3544 * Values of this type must match one of the notation names included
3545 * in the declaration; all notation names in the declaration must be declared.
3546 *
3547 * Returns: the notation attribute tree built while parsing
3548 */
3549
3550xmlEnumerationPtr
3551xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3552 xmlChar *name;
3553 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3554
3555 if (RAW != '(') {
3556 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "'(' required to start 'NOTATION'\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 return(NULL);
3563 }
3564 SHRINK;
3565 do {
3566 NEXT;
3567 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003568 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 if (name == NULL) {
3570 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3572 ctxt->sax->error(ctxt->userData,
3573 "Name expected in NOTATION declaration\n");
3574 ctxt->wellFormed = 0;
3575 ctxt->disableSAX = 1;
3576 return(ret);
3577 }
3578 cur = xmlCreateEnumeration(name);
3579 xmlFree(name);
3580 if (cur == NULL) return(ret);
3581 if (last == NULL) ret = last = cur;
3582 else {
3583 last->next = cur;
3584 last = cur;
3585 }
3586 SKIP_BLANKS;
3587 } while (RAW == '|');
3588 if (RAW != ')') {
3589 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "')' required to finish NOTATION declaration\n");
3593 ctxt->wellFormed = 0;
3594 ctxt->disableSAX = 1;
3595 if ((last != NULL) && (last != ret))
3596 xmlFreeEnumeration(last);
3597 return(ret);
3598 }
3599 NEXT;
3600 return(ret);
3601}
3602
3603/**
3604 * xmlParseEnumerationType:
3605 * @ctxt: an XML parser context
3606 *
3607 * parse an Enumeration attribute type.
3608 *
3609 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3610 *
3611 * [ VC: Enumeration ]
3612 * Values of this type must match one of the Nmtoken tokens in
3613 * the declaration
3614 *
3615 * Returns: the enumeration attribute tree built while parsing
3616 */
3617
3618xmlEnumerationPtr
3619xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3620 xmlChar *name;
3621 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3622
3623 if (RAW != '(') {
3624 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "'(' required to start ATTLIST enumeration\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return(NULL);
3631 }
3632 SHRINK;
3633 do {
3634 NEXT;
3635 SKIP_BLANKS;
3636 name = xmlParseNmtoken(ctxt);
3637 if (name == NULL) {
3638 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "NmToken expected in ATTLIST enumeration\n");
3642 ctxt->wellFormed = 0;
3643 ctxt->disableSAX = 1;
3644 return(ret);
3645 }
3646 cur = xmlCreateEnumeration(name);
3647 xmlFree(name);
3648 if (cur == NULL) return(ret);
3649 if (last == NULL) ret = last = cur;
3650 else {
3651 last->next = cur;
3652 last = cur;
3653 }
3654 SKIP_BLANKS;
3655 } while (RAW == '|');
3656 if (RAW != ')') {
3657 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3659 ctxt->sax->error(ctxt->userData,
3660 "')' required to finish ATTLIST enumeration\n");
3661 ctxt->wellFormed = 0;
3662 ctxt->disableSAX = 1;
3663 return(ret);
3664 }
3665 NEXT;
3666 return(ret);
3667}
3668
3669/**
3670 * xmlParseEnumeratedType:
3671 * @ctxt: an XML parser context
3672 * @tree: the enumeration tree built while parsing
3673 *
3674 * parse an Enumerated attribute type.
3675 *
3676 * [57] EnumeratedType ::= NotationType | Enumeration
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 *
3681 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3682 */
3683
3684int
3685xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3686 if ((RAW == 'N') && (NXT(1) == 'O') &&
3687 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3688 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3689 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3690 SKIP(8);
3691 if (!IS_BLANK(CUR)) {
3692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Space required after 'NOTATION'\n");
3696 ctxt->wellFormed = 0;
3697 ctxt->disableSAX = 1;
3698 return(0);
3699 }
3700 SKIP_BLANKS;
3701 *tree = xmlParseNotationType(ctxt);
3702 if (*tree == NULL) return(0);
3703 return(XML_ATTRIBUTE_NOTATION);
3704 }
3705 *tree = xmlParseEnumerationType(ctxt);
3706 if (*tree == NULL) return(0);
3707 return(XML_ATTRIBUTE_ENUMERATION);
3708}
3709
3710/**
3711 * xmlParseAttributeType:
3712 * @ctxt: an XML parser context
3713 * @tree: the enumeration tree built while parsing
3714 *
3715 * parse the Attribute list def for an element
3716 *
3717 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3718 *
3719 * [55] StringType ::= 'CDATA'
3720 *
3721 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3722 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3723 *
3724 * Validity constraints for attribute values syntax are checked in
3725 * xmlValidateAttributeValue()
3726 *
3727 * [ VC: ID ]
3728 * Values of type ID must match the Name production. A name must not
3729 * appear more than once in an XML document as a value of this type;
3730 * i.e., ID values must uniquely identify the elements which bear them.
3731 *
3732 * [ VC: One ID per Element Type ]
3733 * No element type may have more than one ID attribute specified.
3734 *
3735 * [ VC: ID Attribute Default ]
3736 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3737 *
3738 * [ VC: IDREF ]
3739 * Values of type IDREF must match the Name production, and values
3740 * of type IDREFS must match Names; each IDREF Name must match the value
3741 * of an ID attribute on some element in the XML document; i.e. IDREF
3742 * values must match the value of some ID attribute.
3743 *
3744 * [ VC: Entity Name ]
3745 * Values of type ENTITY must match the Name production, values
3746 * of type ENTITIES must match Names; each Entity Name must match the
3747 * name of an unparsed entity declared in the DTD.
3748 *
3749 * [ VC: Name Token ]
3750 * Values of type NMTOKEN must match the Nmtoken production; values
3751 * of type NMTOKENS must match Nmtokens.
3752 *
3753 * Returns the attribute type
3754 */
3755int
3756xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3757 SHRINK;
3758 if ((RAW == 'C') && (NXT(1) == 'D') &&
3759 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3760 (NXT(4) == 'A')) {
3761 SKIP(5);
3762 return(XML_ATTRIBUTE_CDATA);
3763 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3764 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3765 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3766 SKIP(6);
3767 return(XML_ATTRIBUTE_IDREFS);
3768 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3769 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3770 (NXT(4) == 'F')) {
3771 SKIP(5);
3772 return(XML_ATTRIBUTE_IDREF);
3773 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3774 SKIP(2);
3775 return(XML_ATTRIBUTE_ID);
3776 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3777 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3778 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3779 SKIP(6);
3780 return(XML_ATTRIBUTE_ENTITY);
3781 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3782 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3783 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3784 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3785 SKIP(8);
3786 return(XML_ATTRIBUTE_ENTITIES);
3787 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3788 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3789 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3790 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3791 SKIP(8);
3792 return(XML_ATTRIBUTE_NMTOKENS);
3793 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3794 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3795 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3796 (NXT(6) == 'N')) {
3797 SKIP(7);
3798 return(XML_ATTRIBUTE_NMTOKEN);
3799 }
3800 return(xmlParseEnumeratedType(ctxt, tree));
3801}
3802
3803/**
3804 * xmlParseAttributeListDecl:
3805 * @ctxt: an XML parser context
3806 *
3807 * : parse the Attribute list def for an element
3808 *
3809 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3810 *
3811 * [53] AttDef ::= S Name S AttType S DefaultDecl
3812 *
3813 */
3814void
3815xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3816 xmlChar *elemName;
3817 xmlChar *attrName;
3818 xmlEnumerationPtr tree;
3819
3820 if ((RAW == '<') && (NXT(1) == '!') &&
3821 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3822 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3823 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3824 (NXT(8) == 'T')) {
3825 xmlParserInputPtr input = ctxt->input;
3826
3827 SKIP(9);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after '<!ATTLIST'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 }
3836 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003837 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if (elemName == NULL) {
3839 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3841 ctxt->sax->error(ctxt->userData,
3842 "ATTLIST: no name for Element\n");
3843 ctxt->wellFormed = 0;
3844 ctxt->disableSAX = 1;
3845 return;
3846 }
3847 SKIP_BLANKS;
3848 GROW;
3849 while (RAW != '>') {
3850 const xmlChar *check = CUR_PTR;
3851 int type;
3852 int def;
3853 xmlChar *defaultValue = NULL;
3854
3855 GROW;
3856 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003857 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (attrName == NULL) {
3859 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3861 ctxt->sax->error(ctxt->userData,
3862 "ATTLIST: no name for Attribute\n");
3863 ctxt->wellFormed = 0;
3864 ctxt->disableSAX = 1;
3865 break;
3866 }
3867 GROW;
3868 if (!IS_BLANK(CUR)) {
3869 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3871 ctxt->sax->error(ctxt->userData,
3872 "Space required after the attribute name\n");
3873 ctxt->wellFormed = 0;
3874 ctxt->disableSAX = 1;
3875 if (attrName != NULL)
3876 xmlFree(attrName);
3877 if (defaultValue != NULL)
3878 xmlFree(defaultValue);
3879 break;
3880 }
3881 SKIP_BLANKS;
3882
3883 type = xmlParseAttributeType(ctxt, &tree);
3884 if (type <= 0) {
3885 if (attrName != NULL)
3886 xmlFree(attrName);
3887 if (defaultValue != NULL)
3888 xmlFree(defaultValue);
3889 break;
3890 }
3891
3892 GROW;
3893 if (!IS_BLANK(CUR)) {
3894 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3896 ctxt->sax->error(ctxt->userData,
3897 "Space required after the attribute type\n");
3898 ctxt->wellFormed = 0;
3899 ctxt->disableSAX = 1;
3900 if (attrName != NULL)
3901 xmlFree(attrName);
3902 if (defaultValue != NULL)
3903 xmlFree(defaultValue);
3904 if (tree != NULL)
3905 xmlFreeEnumeration(tree);
3906 break;
3907 }
3908 SKIP_BLANKS;
3909
3910 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3911 if (def <= 0) {
3912 if (attrName != NULL)
3913 xmlFree(attrName);
3914 if (defaultValue != NULL)
3915 xmlFree(defaultValue);
3916 if (tree != NULL)
3917 xmlFreeEnumeration(tree);
3918 break;
3919 }
3920
3921 GROW;
3922 if (RAW != '>') {
3923 if (!IS_BLANK(CUR)) {
3924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "Space required after the attribute default value\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 if (attrName != NULL)
3931 xmlFree(attrName);
3932 if (defaultValue != NULL)
3933 xmlFree(defaultValue);
3934 if (tree != NULL)
3935 xmlFreeEnumeration(tree);
3936 break;
3937 }
3938 SKIP_BLANKS;
3939 }
3940 if (check == CUR_PTR) {
3941 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "xmlParseAttributeListDecl: detected internal error\n");
3945 if (attrName != NULL)
3946 xmlFree(attrName);
3947 if (defaultValue != NULL)
3948 xmlFree(defaultValue);
3949 if (tree != NULL)
3950 xmlFreeEnumeration(tree);
3951 break;
3952 }
3953 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3954 (ctxt->sax->attributeDecl != NULL))
3955 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3956 type, def, defaultValue, tree);
3957 if (attrName != NULL)
3958 xmlFree(attrName);
3959 if (defaultValue != NULL)
3960 xmlFree(defaultValue);
3961 GROW;
3962 }
3963 if (RAW == '>') {
3964 if (input != ctxt->input) {
3965 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3967 ctxt->sax->error(ctxt->userData,
3968"Attribute list declaration doesn't start and stop in the same entity\n");
3969 ctxt->wellFormed = 0;
3970 ctxt->disableSAX = 1;
3971 }
3972 NEXT;
3973 }
3974
3975 xmlFree(elemName);
3976 }
3977}
3978
3979/**
3980 * xmlParseElementMixedContentDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * parse the declaration for a Mixed Element content
3984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3985 *
3986 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3987 * '(' S? '#PCDATA' S? ')'
3988 *
3989 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3990 *
3991 * [ VC: No Duplicate Types ]
3992 * The same name must not appear more than once in a single
3993 * mixed-content declaration.
3994 *
3995 * returns: the list of the xmlElementContentPtr describing the element choices
3996 */
3997xmlElementContentPtr
3998xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3999 xmlElementContentPtr ret = NULL, cur = NULL, n;
4000 xmlChar *elem = NULL;
4001
4002 GROW;
4003 if ((RAW == '#') && (NXT(1) == 'P') &&
4004 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4005 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4006 (NXT(6) == 'A')) {
4007 SKIP(7);
4008 SKIP_BLANKS;
4009 SHRINK;
4010 if (RAW == ')') {
4011 ctxt->entity = ctxt->input;
4012 NEXT;
4013 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4014 if (RAW == '*') {
4015 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4016 NEXT;
4017 }
4018 return(ret);
4019 }
4020 if ((RAW == '(') || (RAW == '|')) {
4021 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4022 if (ret == NULL) return(NULL);
4023 }
4024 while (RAW == '|') {
4025 NEXT;
4026 if (elem == NULL) {
4027 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4028 if (ret == NULL) return(NULL);
4029 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004030 if (cur != NULL)
4031 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 cur = ret;
4033 } else {
4034 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4035 if (n == NULL) return(NULL);
4036 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004037 if (n->c1 != NULL)
4038 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004040 if (n != NULL)
4041 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004042 cur = n;
4043 xmlFree(elem);
4044 }
4045 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004046 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (elem == NULL) {
4048 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4050 ctxt->sax->error(ctxt->userData,
4051 "xmlParseElementMixedContentDecl : Name expected\n");
4052 ctxt->wellFormed = 0;
4053 ctxt->disableSAX = 1;
4054 xmlFreeElementContent(cur);
4055 return(NULL);
4056 }
4057 SKIP_BLANKS;
4058 GROW;
4059 }
4060 if ((RAW == ')') && (NXT(1) == '*')) {
4061 if (elem != NULL) {
4062 cur->c2 = xmlNewElementContent(elem,
4063 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004064 if (cur->c2 != NULL)
4065 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 xmlFree(elem);
4067 }
4068 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4069 ctxt->entity = ctxt->input;
4070 SKIP(2);
4071 } else {
4072 if (elem != NULL) xmlFree(elem);
4073 xmlFreeElementContent(ret);
4074 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076 ctxt->sax->error(ctxt->userData,
4077 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4078 ctxt->wellFormed = 0;
4079 ctxt->disableSAX = 1;
4080 return(NULL);
4081 }
4082
4083 } else {
4084 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4086 ctxt->sax->error(ctxt->userData,
4087 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4088 ctxt->wellFormed = 0;
4089 ctxt->disableSAX = 1;
4090 }
4091 return(ret);
4092}
4093
4094/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004095 * xmlParseElementChildrenContentD:
4096 * @ctxt: an XML parser context
4097 *
4098 * VMS version of xmlParseElementChildrenContentDecl()
4099 *
4100 * Returns the tree of xmlElementContentPtr describing the element
4101 * hierarchy.
4102 */
4103/**
Owen Taylor3473f882001-02-23 17:55:21 +00004104 * xmlParseElementChildrenContentDecl:
4105 * @ctxt: an XML parser context
4106 *
4107 * parse the declaration for a Mixed Element content
4108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4109 *
4110 *
4111 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4112 *
4113 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4114 *
4115 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4116 *
4117 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4118 *
4119 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4120 * TODO Parameter-entity replacement text must be properly nested
4121 * with parenthetized groups. That is to say, if either of the
4122 * opening or closing parentheses in a choice, seq, or Mixed
4123 * construct is contained in the replacement text for a parameter
4124 * entity, both must be contained in the same replacement text. For
4125 * interoperability, if a parameter-entity reference appears in a
4126 * choice, seq, or Mixed construct, its replacement text should not
4127 * be empty, and neither the first nor last non-blank character of
4128 * the replacement text should be a connector (| or ,).
4129 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004130 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004131 * hierarchy.
4132 */
4133xmlElementContentPtr
4134#ifdef VMS
4135xmlParseElementChildrenContentD
4136#else
4137xmlParseElementChildrenContentDecl
4138#endif
4139(xmlParserCtxtPtr ctxt) {
4140 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4141 xmlChar *elem;
4142 xmlChar type = 0;
4143
4144 SKIP_BLANKS;
4145 GROW;
4146 if (RAW == '(') {
4147 /* Recurse on first child */
4148 NEXT;
4149 SKIP_BLANKS;
4150 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4151 SKIP_BLANKS;
4152 GROW;
4153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004155 if (elem == NULL) {
4156 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4158 ctxt->sax->error(ctxt->userData,
4159 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4160 ctxt->wellFormed = 0;
4161 ctxt->disableSAX = 1;
4162 return(NULL);
4163 }
4164 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4165 GROW;
4166 if (RAW == '?') {
4167 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4168 NEXT;
4169 } else if (RAW == '*') {
4170 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4171 NEXT;
4172 } else if (RAW == '+') {
4173 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4174 NEXT;
4175 } else {
4176 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4177 }
4178 xmlFree(elem);
4179 GROW;
4180 }
4181 SKIP_BLANKS;
4182 SHRINK;
4183 while (RAW != ')') {
4184 /*
4185 * Each loop we parse one separator and one element.
4186 */
4187 if (RAW == ',') {
4188 if (type == 0) type = CUR;
4189
4190 /*
4191 * Detect "Name | Name , Name" error
4192 */
4193 else if (type != CUR) {
4194 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4196 ctxt->sax->error(ctxt->userData,
4197 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4198 type);
4199 ctxt->wellFormed = 0;
4200 ctxt->disableSAX = 1;
4201 if ((op != NULL) && (op != ret))
4202 xmlFreeElementContent(op);
4203 if ((last != NULL) && (last != ret) &&
4204 (last != ret->c1) && (last != ret->c2))
4205 xmlFreeElementContent(last);
4206 if (ret != NULL)
4207 xmlFreeElementContent(ret);
4208 return(NULL);
4209 }
4210 NEXT;
4211
4212 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4213 if (op == NULL) {
4214 xmlFreeElementContent(ret);
4215 return(NULL);
4216 }
4217 if (last == NULL) {
4218 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (ret != NULL)
4220 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 ret = cur = op;
4222 } else {
4223 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004224 if (op != NULL)
4225 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004227 if (last != NULL)
4228 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004229 cur =op;
4230 last = NULL;
4231 }
4232 } else if (RAW == '|') {
4233 if (type == 0) type = CUR;
4234
4235 /*
4236 * Detect "Name , Name | Name" error
4237 */
4238 else if (type != CUR) {
4239 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241 ctxt->sax->error(ctxt->userData,
4242 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4243 type);
4244 ctxt->wellFormed = 0;
4245 ctxt->disableSAX = 1;
4246 if ((op != NULL) && (op != ret) && (op != last))
4247 xmlFreeElementContent(op);
4248 if ((last != NULL) && (last != ret) &&
4249 (last != ret->c1) && (last != ret->c2))
4250 xmlFreeElementContent(last);
4251 if (ret != NULL)
4252 xmlFreeElementContent(ret);
4253 return(NULL);
4254 }
4255 NEXT;
4256
4257 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4258 if (op == NULL) {
4259 if ((op != NULL) && (op != ret))
4260 xmlFreeElementContent(op);
4261 if ((last != NULL) && (last != ret) &&
4262 (last != ret->c1) && (last != ret->c2))
4263 xmlFreeElementContent(last);
4264 if (ret != NULL)
4265 xmlFreeElementContent(ret);
4266 return(NULL);
4267 }
4268 if (last == NULL) {
4269 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004270 if (ret != NULL)
4271 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 ret = cur = op;
4273 } else {
4274 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004275 if (op != NULL)
4276 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004278 if (last != NULL)
4279 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004280 cur =op;
4281 last = NULL;
4282 }
4283 } else {
4284 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286 ctxt->sax->error(ctxt->userData,
4287 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4288 ctxt->wellFormed = 0;
4289 ctxt->disableSAX = 1;
4290 if ((op != NULL) && (op != ret))
4291 xmlFreeElementContent(op);
4292 if ((last != NULL) && (last != ret) &&
4293 (last != ret->c1) && (last != ret->c2))
4294 xmlFreeElementContent(last);
4295 if (ret != NULL)
4296 xmlFreeElementContent(ret);
4297 return(NULL);
4298 }
4299 GROW;
4300 SKIP_BLANKS;
4301 GROW;
4302 if (RAW == '(') {
4303 /* Recurse on second child */
4304 NEXT;
4305 SKIP_BLANKS;
4306 last = xmlParseElementChildrenContentDecl(ctxt);
4307 SKIP_BLANKS;
4308 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004309 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 if (elem == NULL) {
4311 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4313 ctxt->sax->error(ctxt->userData,
4314 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4315 ctxt->wellFormed = 0;
4316 ctxt->disableSAX = 1;
4317 if ((op != NULL) && (op != ret))
4318 xmlFreeElementContent(op);
4319 if ((last != NULL) && (last != ret) &&
4320 (last != ret->c1) && (last != ret->c2))
4321 xmlFreeElementContent(last);
4322 if (ret != NULL)
4323 xmlFreeElementContent(ret);
4324 return(NULL);
4325 }
4326 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4327 xmlFree(elem);
4328 if (RAW == '?') {
4329 last->ocur = XML_ELEMENT_CONTENT_OPT;
4330 NEXT;
4331 } else if (RAW == '*') {
4332 last->ocur = XML_ELEMENT_CONTENT_MULT;
4333 NEXT;
4334 } else if (RAW == '+') {
4335 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4336 NEXT;
4337 } else {
4338 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4339 }
4340 }
4341 SKIP_BLANKS;
4342 GROW;
4343 }
4344 if ((cur != NULL) && (last != NULL)) {
4345 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004346 if (last != NULL)
4347 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
4349 ctxt->entity = ctxt->input;
4350 NEXT;
4351 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004352 if (ret != NULL)
4353 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 NEXT;
4355 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004356 if (ret != NULL)
4357 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 NEXT;
4359 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004360 if (ret != NULL)
4361 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004362 NEXT;
4363 }
4364 return(ret);
4365}
4366
4367/**
4368 * xmlParseElementContentDecl:
4369 * @ctxt: an XML parser context
4370 * @name: the name of the element being defined.
4371 * @result: the Element Content pointer will be stored here if any
4372 *
4373 * parse the declaration for an Element content either Mixed or Children,
4374 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4375 *
4376 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4377 *
4378 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4379 */
4380
4381int
4382xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4383 xmlElementContentPtr *result) {
4384
4385 xmlElementContentPtr tree = NULL;
4386 xmlParserInputPtr input = ctxt->input;
4387 int res;
4388
4389 *result = NULL;
4390
4391 if (RAW != '(') {
4392 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004395 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 ctxt->wellFormed = 0;
4397 ctxt->disableSAX = 1;
4398 return(-1);
4399 }
4400 NEXT;
4401 GROW;
4402 SKIP_BLANKS;
4403 if ((RAW == '#') && (NXT(1) == 'P') &&
4404 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4405 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4406 (NXT(6) == 'A')) {
4407 tree = xmlParseElementMixedContentDecl(ctxt);
4408 res = XML_ELEMENT_TYPE_MIXED;
4409 } else {
4410 tree = xmlParseElementChildrenContentDecl(ctxt);
4411 res = XML_ELEMENT_TYPE_ELEMENT;
4412 }
4413 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416 ctxt->sax->error(ctxt->userData,
4417"Element content declaration doesn't start and stop in the same entity\n");
4418 ctxt->wellFormed = 0;
4419 ctxt->disableSAX = 1;
4420 }
4421 SKIP_BLANKS;
4422 *result = tree;
4423 return(res);
4424}
4425
4426/**
4427 * xmlParseElementDecl:
4428 * @ctxt: an XML parser context
4429 *
4430 * parse an Element declaration.
4431 *
4432 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4433 *
4434 * [ VC: Unique Element Type Declaration ]
4435 * No element type may be declared more than once
4436 *
4437 * Returns the type of the element, or -1 in case of error
4438 */
4439int
4440xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4441 xmlChar *name;
4442 int ret = -1;
4443 xmlElementContentPtr content = NULL;
4444
4445 GROW;
4446 if ((RAW == '<') && (NXT(1) == '!') &&
4447 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4448 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4449 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4450 (NXT(8) == 'T')) {
4451 xmlParserInputPtr input = ctxt->input;
4452
4453 SKIP(9);
4454 if (!IS_BLANK(CUR)) {
4455 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4457 ctxt->sax->error(ctxt->userData,
4458 "Space required after 'ELEMENT'\n");
4459 ctxt->wellFormed = 0;
4460 ctxt->disableSAX = 1;
4461 }
4462 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004463 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004464 if (name == NULL) {
4465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "xmlParseElementDecl: no name for Element\n");
4469 ctxt->wellFormed = 0;
4470 ctxt->disableSAX = 1;
4471 return(-1);
4472 }
4473 while ((RAW == 0) && (ctxt->inputNr > 1))
4474 xmlPopInput(ctxt);
4475 if (!IS_BLANK(CUR)) {
4476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4478 ctxt->sax->error(ctxt->userData,
4479 "Space required after the element name\n");
4480 ctxt->wellFormed = 0;
4481 ctxt->disableSAX = 1;
4482 }
4483 SKIP_BLANKS;
4484 if ((RAW == 'E') && (NXT(1) == 'M') &&
4485 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4486 (NXT(4) == 'Y')) {
4487 SKIP(5);
4488 /*
4489 * Element must always be empty.
4490 */
4491 ret = XML_ELEMENT_TYPE_EMPTY;
4492 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4493 (NXT(2) == 'Y')) {
4494 SKIP(3);
4495 /*
4496 * Element is a generic container.
4497 */
4498 ret = XML_ELEMENT_TYPE_ANY;
4499 } else if (RAW == '(') {
4500 ret = xmlParseElementContentDecl(ctxt, name, &content);
4501 } else {
4502 /*
4503 * [ WFC: PEs in Internal Subset ] error handling.
4504 */
4505 if ((RAW == '%') && (ctxt->external == 0) &&
4506 (ctxt->inputNr == 1)) {
4507 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData,
4510 "PEReference: forbidden within markup decl in internal subset\n");
4511 } else {
4512 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4514 ctxt->sax->error(ctxt->userData,
4515 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4516 }
4517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
4519 if (name != NULL) xmlFree(name);
4520 return(-1);
4521 }
4522
4523 SKIP_BLANKS;
4524 /*
4525 * Pop-up of finished entities.
4526 */
4527 while ((RAW == 0) && (ctxt->inputNr > 1))
4528 xmlPopInput(ctxt);
4529 SKIP_BLANKS;
4530
4531 if (RAW != '>') {
4532 ctxt->errNo = XML_ERR_GT_REQUIRED;
4533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4534 ctxt->sax->error(ctxt->userData,
4535 "xmlParseElementDecl: expected '>' at the end\n");
4536 ctxt->wellFormed = 0;
4537 ctxt->disableSAX = 1;
4538 } else {
4539 if (input != ctxt->input) {
4540 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4542 ctxt->sax->error(ctxt->userData,
4543"Element declaration doesn't start and stop in the same entity\n");
4544 ctxt->wellFormed = 0;
4545 ctxt->disableSAX = 1;
4546 }
4547
4548 NEXT;
4549 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4550 (ctxt->sax->elementDecl != NULL))
4551 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4552 content);
4553 }
4554 if (content != NULL) {
4555 xmlFreeElementContent(content);
4556 }
4557 if (name != NULL) {
4558 xmlFree(name);
4559 }
4560 }
4561 return(ret);
4562}
4563
4564/**
4565 * xmlParseMarkupDecl:
4566 * @ctxt: an XML parser context
4567 *
4568 * parse Markup declarations
4569 *
4570 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4571 * NotationDecl | PI | Comment
4572 *
4573 * [ VC: Proper Declaration/PE Nesting ]
4574 * Parameter-entity replacement text must be properly nested with
4575 * markup declarations. That is to say, if either the first character
4576 * or the last character of a markup declaration (markupdecl above) is
4577 * contained in the replacement text for a parameter-entity reference,
4578 * both must be contained in the same replacement text.
4579 *
4580 * [ WFC: PEs in Internal Subset ]
4581 * In the internal DTD subset, parameter-entity references can occur
4582 * only where markup declarations can occur, not within markup declarations.
4583 * (This does not apply to references that occur in external parameter
4584 * entities or to the external subset.)
4585 */
4586void
4587xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4588 GROW;
4589 xmlParseElementDecl(ctxt);
4590 xmlParseAttributeListDecl(ctxt);
4591 xmlParseEntityDecl(ctxt);
4592 xmlParseNotationDecl(ctxt);
4593 xmlParsePI(ctxt);
4594 xmlParseComment(ctxt);
4595 /*
4596 * This is only for internal subset. On external entities,
4597 * the replacement is done before parsing stage
4598 */
4599 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4600 xmlParsePEReference(ctxt);
4601 ctxt->instate = XML_PARSER_DTD;
4602}
4603
4604/**
4605 * xmlParseTextDecl:
4606 * @ctxt: an XML parser context
4607 *
4608 * parse an XML declaration header for external entities
4609 *
4610 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4611 *
4612 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4613 */
4614
4615void
4616xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4617 xmlChar *version;
4618
4619 /*
4620 * We know that '<?xml' is here.
4621 */
4622 if ((RAW == '<') && (NXT(1) == '?') &&
4623 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4624 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4625 SKIP(5);
4626 } else {
4627 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4629 ctxt->sax->error(ctxt->userData,
4630 "Text declaration '<?xml' required\n");
4631 ctxt->wellFormed = 0;
4632 ctxt->disableSAX = 1;
4633
4634 return;
4635 }
4636
4637 if (!IS_BLANK(CUR)) {
4638 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4640 ctxt->sax->error(ctxt->userData,
4641 "Space needed after '<?xml'\n");
4642 ctxt->wellFormed = 0;
4643 ctxt->disableSAX = 1;
4644 }
4645 SKIP_BLANKS;
4646
4647 /*
4648 * We may have the VersionInfo here.
4649 */
4650 version = xmlParseVersionInfo(ctxt);
4651 if (version == NULL)
4652 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4653 ctxt->input->version = version;
4654
4655 /*
4656 * We must have the encoding declaration
4657 */
4658 if (!IS_BLANK(CUR)) {
4659 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4661 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4662 ctxt->wellFormed = 0;
4663 ctxt->disableSAX = 1;
4664 }
4665 xmlParseEncodingDecl(ctxt);
4666 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4667 /*
4668 * The XML REC instructs us to stop parsing right here
4669 */
4670 return;
4671 }
4672
4673 SKIP_BLANKS;
4674 if ((RAW == '?') && (NXT(1) == '>')) {
4675 SKIP(2);
4676 } else if (RAW == '>') {
4677 /* Deprecated old WD ... */
4678 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4680 ctxt->sax->error(ctxt->userData,
4681 "XML declaration must end-up with '?>'\n");
4682 ctxt->wellFormed = 0;
4683 ctxt->disableSAX = 1;
4684 NEXT;
4685 } else {
4686 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "parsing XML declaration: '?>' expected\n");
4690 ctxt->wellFormed = 0;
4691 ctxt->disableSAX = 1;
4692 MOVETO_ENDTAG(CUR_PTR);
4693 NEXT;
4694 }
4695}
4696
4697/*
4698 * xmlParseConditionalSections
4699 * @ctxt: an XML parser context
4700 *
4701 * [61] conditionalSect ::= includeSect | ignoreSect
4702 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4703 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4704 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4705 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4706 */
4707
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004708static void
Owen Taylor3473f882001-02-23 17:55:21 +00004709xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4710 SKIP(3);
4711 SKIP_BLANKS;
4712 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4713 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4714 (NXT(6) == 'E')) {
4715 SKIP(7);
4716 SKIP_BLANKS;
4717 if (RAW != '[') {
4718 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4720 ctxt->sax->error(ctxt->userData,
4721 "XML conditional section '[' expected\n");
4722 ctxt->wellFormed = 0;
4723 ctxt->disableSAX = 1;
4724 } else {
4725 NEXT;
4726 }
4727 if (xmlParserDebugEntities) {
4728 if ((ctxt->input != NULL) && (ctxt->input->filename))
4729 xmlGenericError(xmlGenericErrorContext,
4730 "%s(%d): ", ctxt->input->filename,
4731 ctxt->input->line);
4732 xmlGenericError(xmlGenericErrorContext,
4733 "Entering INCLUDE Conditional Section\n");
4734 }
4735
4736 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4737 (NXT(2) != '>'))) {
4738 const xmlChar *check = CUR_PTR;
4739 int cons = ctxt->input->consumed;
4740 int tok = ctxt->token;
4741
4742 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4743 xmlParseConditionalSections(ctxt);
4744 } else if (IS_BLANK(CUR)) {
4745 NEXT;
4746 } else if (RAW == '%') {
4747 xmlParsePEReference(ctxt);
4748 } else
4749 xmlParseMarkupDecl(ctxt);
4750
4751 /*
4752 * Pop-up of finished entities.
4753 */
4754 while ((RAW == 0) && (ctxt->inputNr > 1))
4755 xmlPopInput(ctxt);
4756
4757 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4758 (tok == ctxt->token)) {
4759 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4761 ctxt->sax->error(ctxt->userData,
4762 "Content error in the external subset\n");
4763 ctxt->wellFormed = 0;
4764 ctxt->disableSAX = 1;
4765 break;
4766 }
4767 }
4768 if (xmlParserDebugEntities) {
4769 if ((ctxt->input != NULL) && (ctxt->input->filename))
4770 xmlGenericError(xmlGenericErrorContext,
4771 "%s(%d): ", ctxt->input->filename,
4772 ctxt->input->line);
4773 xmlGenericError(xmlGenericErrorContext,
4774 "Leaving INCLUDE Conditional Section\n");
4775 }
4776
4777 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4778 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4779 int state;
4780 int instate;
4781 int depth = 0;
4782
4783 SKIP(6);
4784 SKIP_BLANKS;
4785 if (RAW != '[') {
4786 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section '[' expected\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 } else {
4793 NEXT;
4794 }
4795 if (xmlParserDebugEntities) {
4796 if ((ctxt->input != NULL) && (ctxt->input->filename))
4797 xmlGenericError(xmlGenericErrorContext,
4798 "%s(%d): ", ctxt->input->filename,
4799 ctxt->input->line);
4800 xmlGenericError(xmlGenericErrorContext,
4801 "Entering IGNORE Conditional Section\n");
4802 }
4803
4804 /*
4805 * Parse up to the end of the conditionnal section
4806 * But disable SAX event generating DTD building in the meantime
4807 */
4808 state = ctxt->disableSAX;
4809 instate = ctxt->instate;
4810 ctxt->disableSAX = 1;
4811 ctxt->instate = XML_PARSER_IGNORE;
4812
4813 while (depth >= 0) {
4814 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4815 depth++;
4816 SKIP(3);
4817 continue;
4818 }
4819 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4820 if (--depth >= 0) SKIP(3);
4821 continue;
4822 }
4823 NEXT;
4824 continue;
4825 }
4826
4827 ctxt->disableSAX = state;
4828 ctxt->instate = instate;
4829
4830 if (xmlParserDebugEntities) {
4831 if ((ctxt->input != NULL) && (ctxt->input->filename))
4832 xmlGenericError(xmlGenericErrorContext,
4833 "%s(%d): ", ctxt->input->filename,
4834 ctxt->input->line);
4835 xmlGenericError(xmlGenericErrorContext,
4836 "Leaving IGNORE Conditional Section\n");
4837 }
4838
4839 } else {
4840 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 }
4847
4848 if (RAW == 0)
4849 SHRINK;
4850
4851 if (RAW == 0) {
4852 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "XML conditional section not closed\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 } else {
4859 SKIP(3);
4860 }
4861}
4862
4863/**
4864 * xmlParseExternalSubset:
4865 * @ctxt: an XML parser context
4866 * @ExternalID: the external identifier
4867 * @SystemID: the system identifier (or URL)
4868 *
4869 * parse Markup declarations from an external subset
4870 *
4871 * [30] extSubset ::= textDecl? extSubsetDecl
4872 *
4873 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4874 */
4875void
4876xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4877 const xmlChar *SystemID) {
4878 GROW;
4879 if ((RAW == '<') && (NXT(1) == '?') &&
4880 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4881 (NXT(4) == 'l')) {
4882 xmlParseTextDecl(ctxt);
4883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4884 /*
4885 * The XML REC instructs us to stop parsing right here
4886 */
4887 ctxt->instate = XML_PARSER_EOF;
4888 return;
4889 }
4890 }
4891 if (ctxt->myDoc == NULL) {
4892 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4893 }
4894 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4895 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4896
4897 ctxt->instate = XML_PARSER_DTD;
4898 ctxt->external = 1;
4899 while (((RAW == '<') && (NXT(1) == '?')) ||
4900 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004901 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004902 const xmlChar *check = CUR_PTR;
4903 int cons = ctxt->input->consumed;
4904 int tok = ctxt->token;
4905
4906 GROW;
4907 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4908 xmlParseConditionalSections(ctxt);
4909 } else if (IS_BLANK(CUR)) {
4910 NEXT;
4911 } else if (RAW == '%') {
4912 xmlParsePEReference(ctxt);
4913 } else
4914 xmlParseMarkupDecl(ctxt);
4915
4916 /*
4917 * Pop-up of finished entities.
4918 */
4919 while ((RAW == 0) && (ctxt->inputNr > 1))
4920 xmlPopInput(ctxt);
4921
4922 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4923 (tok == ctxt->token)) {
4924 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "Content error in the external subset\n");
4928 ctxt->wellFormed = 0;
4929 ctxt->disableSAX = 1;
4930 break;
4931 }
4932 }
4933
4934 if (RAW != 0) {
4935 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938 "Extra content at the end of the document\n");
4939 ctxt->wellFormed = 0;
4940 ctxt->disableSAX = 1;
4941 }
4942
4943}
4944
4945/**
4946 * xmlParseReference:
4947 * @ctxt: an XML parser context
4948 *
4949 * parse and handle entity references in content, depending on the SAX
4950 * interface, this may end-up in a call to character() if this is a
4951 * CharRef, a predefined entity, if there is no reference() callback.
4952 * or if the parser was asked to switch to that mode.
4953 *
4954 * [67] Reference ::= EntityRef | CharRef
4955 */
4956void
4957xmlParseReference(xmlParserCtxtPtr ctxt) {
4958 xmlEntityPtr ent;
4959 xmlChar *val;
4960 if (RAW != '&') return;
4961
4962 if (NXT(1) == '#') {
4963 int i = 0;
4964 xmlChar out[10];
4965 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004966 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004967
4968 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4969 /*
4970 * So we are using non-UTF-8 buffers
4971 * Check that the char fit on 8bits, if not
4972 * generate a CharRef.
4973 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004974 if (value <= 0xFF) {
4975 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 out[1] = 0;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4978 (!ctxt->disableSAX))
4979 ctxt->sax->characters(ctxt->userData, out, 1);
4980 } else {
4981 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004982 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004983 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004984 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4986 (!ctxt->disableSAX))
4987 ctxt->sax->reference(ctxt->userData, out);
4988 }
4989 } else {
4990 /*
4991 * Just encode the value in UTF-8
4992 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004993 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004994 out[i] = 0;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4996 (!ctxt->disableSAX))
4997 ctxt->sax->characters(ctxt->userData, out, i);
4998 }
4999 } else {
5000 ent = xmlParseEntityRef(ctxt);
5001 if (ent == NULL) return;
5002 if ((ent->name != NULL) &&
5003 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5004 xmlNodePtr list = NULL;
5005 int ret;
5006
5007
5008 /*
5009 * The first reference to the entity trigger a parsing phase
5010 * where the ent->children is filled with the result from
5011 * the parsing.
5012 */
5013 if (ent->children == NULL) {
5014 xmlChar *value;
5015 value = ent->content;
5016
5017 /*
5018 * Check that this entity is well formed
5019 */
5020 if ((value != NULL) &&
5021 (value[1] == 0) && (value[0] == '<') &&
5022 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5023 /*
5024 * DONE: get definite answer on this !!!
5025 * Lots of entity decls are used to declare a single
5026 * char
5027 * <!ENTITY lt "<">
5028 * Which seems to be valid since
5029 * 2.4: The ampersand character (&) and the left angle
5030 * bracket (<) may appear in their literal form only
5031 * when used ... They are also legal within the literal
5032 * entity value of an internal entity declaration;i
5033 * see "4.3.2 Well-Formed Parsed Entities".
5034 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5035 * Looking at the OASIS test suite and James Clark
5036 * tests, this is broken. However the XML REC uses
5037 * it. Is the XML REC not well-formed ????
5038 * This is a hack to avoid this problem
5039 *
5040 * ANSWER: since lt gt amp .. are already defined,
5041 * this is a redefinition and hence the fact that the
5042 * contentis not well balanced is not a Wf error, this
5043 * is lousy but acceptable.
5044 */
5045 list = xmlNewDocText(ctxt->myDoc, value);
5046 if (list != NULL) {
5047 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5048 (ent->children == NULL)) {
5049 ent->children = list;
5050 ent->last = list;
5051 list->parent = (xmlNodePtr) ent;
5052 } else {
5053 xmlFreeNodeList(list);
5054 }
5055 } else if (list != NULL) {
5056 xmlFreeNodeList(list);
5057 }
5058 } else {
5059 /*
5060 * 4.3.2: An internal general parsed entity is well-formed
5061 * if its replacement text matches the production labeled
5062 * content.
5063 */
5064 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5065 ctxt->depth++;
5066 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5067 ctxt->sax, NULL, ctxt->depth,
5068 value, &list);
5069 ctxt->depth--;
5070 } else if (ent->etype ==
5071 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5072 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005073 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005074 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005075 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 ctxt->depth--;
5077 } else {
5078 ret = -1;
5079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5080 ctxt->sax->error(ctxt->userData,
5081 "Internal: invalid entity type\n");
5082 }
5083 if (ret == XML_ERR_ENTITY_LOOP) {
5084 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5086 ctxt->sax->error(ctxt->userData,
5087 "Detected entity reference loop\n");
5088 ctxt->wellFormed = 0;
5089 ctxt->disableSAX = 1;
5090 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005091 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5092 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005093 (ent->children == NULL)) {
5094 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005095 if (ctxt->replaceEntities) {
5096 /*
5097 * Prune it directly in the generated document
5098 * except for single text nodes.
5099 */
5100 if ((list->type == XML_TEXT_NODE) &&
5101 (list->next == NULL)) {
5102 list->parent = (xmlNodePtr) ent;
5103 list = NULL;
5104 } else {
5105 while (list != NULL) {
5106 list->parent = (xmlNodePtr) ctxt->node;
5107 if (list->next == NULL)
5108 ent->last = list;
5109 list = list->next;
5110 }
5111 list = ent->children;
5112 }
5113 } else {
5114 while (list != NULL) {
5115 list->parent = (xmlNodePtr) ent;
5116 if (list->next == NULL)
5117 ent->last = list;
5118 list = list->next;
5119 }
Owen Taylor3473f882001-02-23 17:55:21 +00005120 }
5121 } else {
5122 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005123 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005124 }
5125 } else if (ret > 0) {
5126 ctxt->errNo = ret;
5127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5128 ctxt->sax->error(ctxt->userData,
5129 "Entity value required\n");
5130 ctxt->wellFormed = 0;
5131 ctxt->disableSAX = 1;
5132 } else if (list != NULL) {
5133 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005134 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 }
5136 }
5137 }
5138 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5139 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5140 /*
5141 * Create a node.
5142 */
5143 ctxt->sax->reference(ctxt->userData, ent->name);
5144 return;
5145 } else if (ctxt->replaceEntities) {
5146 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5147 /*
5148 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005149 * a simple tree copy for all references except the first
5150 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005151 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005152 if (list == NULL) {
5153 xmlNodePtr new, cur;
5154 cur = ent->children;
5155 while (cur != NULL) {
5156 new = xmlCopyNode(cur, 1);
5157 xmlAddChild(ctxt->node, new);
5158 if (cur == ent->last)
5159 break;
5160 cur = cur->next;
5161 }
5162 } else {
5163 /*
5164 * the name change is to avoid coalescing of the
5165 * node with a prossible previous text one which
5166 * would make ent->children a dandling pointer
5167 */
5168 if (ent->children->type == XML_TEXT_NODE)
5169 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5170 if ((ent->last != ent->children) &&
5171 (ent->last->type == XML_TEXT_NODE))
5172 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5173 xmlAddChildList(ctxt->node, ent->children);
5174 }
5175
Owen Taylor3473f882001-02-23 17:55:21 +00005176 /*
5177 * This is to avoid a nasty side effect, see
5178 * characters() in SAX.c
5179 */
5180 ctxt->nodemem = 0;
5181 ctxt->nodelen = 0;
5182 return;
5183 } else {
5184 /*
5185 * Probably running in SAX mode
5186 */
5187 xmlParserInputPtr input;
5188
5189 input = xmlNewEntityInputStream(ctxt, ent);
5190 xmlPushInput(ctxt, input);
5191 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5192 (RAW == '<') && (NXT(1) == '?') &&
5193 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5195 xmlParseTextDecl(ctxt);
5196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5197 /*
5198 * The XML REC instructs us to stop parsing right here
5199 */
5200 ctxt->instate = XML_PARSER_EOF;
5201 return;
5202 }
5203 if (input->standalone == 1) {
5204 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5206 ctxt->sax->error(ctxt->userData,
5207 "external parsed entities cannot be standalone\n");
5208 ctxt->wellFormed = 0;
5209 ctxt->disableSAX = 1;
5210 }
5211 }
5212 return;
5213 }
5214 }
5215 } else {
5216 val = ent->content;
5217 if (val == NULL) return;
5218 /*
5219 * inline the entity.
5220 */
5221 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5222 (!ctxt->disableSAX))
5223 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5224 }
5225 }
5226}
5227
5228/**
5229 * xmlParseEntityRef:
5230 * @ctxt: an XML parser context
5231 *
5232 * parse ENTITY references declarations
5233 *
5234 * [68] EntityRef ::= '&' Name ';'
5235 *
5236 * [ WFC: Entity Declared ]
5237 * In a document without any DTD, a document with only an internal DTD
5238 * subset which contains no parameter entity references, or a document
5239 * with "standalone='yes'", the Name given in the entity reference
5240 * must match that in an entity declaration, except that well-formed
5241 * documents need not declare any of the following entities: amp, lt,
5242 * gt, apos, quot. The declaration of a parameter entity must precede
5243 * any reference to it. Similarly, the declaration of a general entity
5244 * must precede any reference to it which appears in a default value in an
5245 * attribute-list declaration. Note that if entities are declared in the
5246 * external subset or in external parameter entities, a non-validating
5247 * processor is not obligated to read and process their declarations;
5248 * for such documents, the rule that an entity must be declared is a
5249 * well-formedness constraint only if standalone='yes'.
5250 *
5251 * [ WFC: Parsed Entity ]
5252 * An entity reference must not contain the name of an unparsed entity
5253 *
5254 * Returns the xmlEntityPtr if found, or NULL otherwise.
5255 */
5256xmlEntityPtr
5257xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5258 xmlChar *name;
5259 xmlEntityPtr ent = NULL;
5260
5261 GROW;
5262
5263 if (RAW == '&') {
5264 NEXT;
5265 name = xmlParseName(ctxt);
5266 if (name == NULL) {
5267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5269 ctxt->sax->error(ctxt->userData,
5270 "xmlParseEntityRef: no name\n");
5271 ctxt->wellFormed = 0;
5272 ctxt->disableSAX = 1;
5273 } else {
5274 if (RAW == ';') {
5275 NEXT;
5276 /*
5277 * Ask first SAX for entity resolution, otherwise try the
5278 * predefined set.
5279 */
5280 if (ctxt->sax != NULL) {
5281 if (ctxt->sax->getEntity != NULL)
5282 ent = ctxt->sax->getEntity(ctxt->userData, name);
5283 if (ent == NULL)
5284 ent = xmlGetPredefinedEntity(name);
5285 }
5286 /*
5287 * [ WFC: Entity Declared ]
5288 * In a document without any DTD, a document with only an
5289 * internal DTD subset which contains no parameter entity
5290 * references, or a document with "standalone='yes'", the
5291 * Name given in the entity reference must match that in an
5292 * entity declaration, except that well-formed documents
5293 * need not declare any of the following entities: amp, lt,
5294 * gt, apos, quot.
5295 * The declaration of a parameter entity must precede any
5296 * reference to it.
5297 * Similarly, the declaration of a general entity must
5298 * precede any reference to it which appears in a default
5299 * value in an attribute-list declaration. Note that if
5300 * entities are declared in the external subset or in
5301 * external parameter entities, a non-validating processor
5302 * is not obligated to read and process their declarations;
5303 * for such documents, the rule that an entity must be
5304 * declared is a well-formedness constraint only if
5305 * standalone='yes'.
5306 */
5307 if (ent == NULL) {
5308 if ((ctxt->standalone == 1) ||
5309 ((ctxt->hasExternalSubset == 0) &&
5310 (ctxt->hasPErefs == 0))) {
5311 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5313 ctxt->sax->error(ctxt->userData,
5314 "Entity '%s' not defined\n", name);
5315 ctxt->wellFormed = 0;
5316 ctxt->disableSAX = 1;
5317 } else {
5318 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5319 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005320 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005321 "Entity '%s' not defined\n", name);
5322 }
5323 }
5324
5325 /*
5326 * [ WFC: Parsed Entity ]
5327 * An entity reference must not contain the name of an
5328 * unparsed entity
5329 */
5330 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5331 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5333 ctxt->sax->error(ctxt->userData,
5334 "Entity reference to unparsed entity %s\n", name);
5335 ctxt->wellFormed = 0;
5336 ctxt->disableSAX = 1;
5337 }
5338
5339 /*
5340 * [ WFC: No External Entity References ]
5341 * Attribute values cannot contain direct or indirect
5342 * entity references to external entities.
5343 */
5344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5345 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5346 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5348 ctxt->sax->error(ctxt->userData,
5349 "Attribute references external entity '%s'\n", name);
5350 ctxt->wellFormed = 0;
5351 ctxt->disableSAX = 1;
5352 }
5353 /*
5354 * [ WFC: No < in Attribute Values ]
5355 * The replacement text of any entity referred to directly or
5356 * indirectly in an attribute value (other than "&lt;") must
5357 * not contain a <.
5358 */
5359 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5360 (ent != NULL) &&
5361 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5362 (ent->content != NULL) &&
5363 (xmlStrchr(ent->content, '<'))) {
5364 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5366 ctxt->sax->error(ctxt->userData,
5367 "'<' in entity '%s' is not allowed in attributes values\n", name);
5368 ctxt->wellFormed = 0;
5369 ctxt->disableSAX = 1;
5370 }
5371
5372 /*
5373 * Internal check, no parameter entities here ...
5374 */
5375 else {
5376 switch (ent->etype) {
5377 case XML_INTERNAL_PARAMETER_ENTITY:
5378 case XML_EXTERNAL_PARAMETER_ENTITY:
5379 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Attempt to reference the parameter entity '%s'\n", name);
5383 ctxt->wellFormed = 0;
5384 ctxt->disableSAX = 1;
5385 break;
5386 default:
5387 break;
5388 }
5389 }
5390
5391 /*
5392 * [ WFC: No Recursion ]
5393 * A parsed entity must not contain a recursive reference
5394 * to itself, either directly or indirectly.
5395 * Done somewhere else
5396 */
5397
5398 } else {
5399 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5401 ctxt->sax->error(ctxt->userData,
5402 "xmlParseEntityRef: expecting ';'\n");
5403 ctxt->wellFormed = 0;
5404 ctxt->disableSAX = 1;
5405 }
5406 xmlFree(name);
5407 }
5408 }
5409 return(ent);
5410}
5411
5412/**
5413 * xmlParseStringEntityRef:
5414 * @ctxt: an XML parser context
5415 * @str: a pointer to an index in the string
5416 *
5417 * parse ENTITY references declarations, but this version parses it from
5418 * a string value.
5419 *
5420 * [68] EntityRef ::= '&' Name ';'
5421 *
5422 * [ WFC: Entity Declared ]
5423 * In a document without any DTD, a document with only an internal DTD
5424 * subset which contains no parameter entity references, or a document
5425 * with "standalone='yes'", the Name given in the entity reference
5426 * must match that in an entity declaration, except that well-formed
5427 * documents need not declare any of the following entities: amp, lt,
5428 * gt, apos, quot. The declaration of a parameter entity must precede
5429 * any reference to it. Similarly, the declaration of a general entity
5430 * must precede any reference to it which appears in a default value in an
5431 * attribute-list declaration. Note that if entities are declared in the
5432 * external subset or in external parameter entities, a non-validating
5433 * processor is not obligated to read and process their declarations;
5434 * for such documents, the rule that an entity must be declared is a
5435 * well-formedness constraint only if standalone='yes'.
5436 *
5437 * [ WFC: Parsed Entity ]
5438 * An entity reference must not contain the name of an unparsed entity
5439 *
5440 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5441 * is updated to the current location in the string.
5442 */
5443xmlEntityPtr
5444xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5445 xmlChar *name;
5446 const xmlChar *ptr;
5447 xmlChar cur;
5448 xmlEntityPtr ent = NULL;
5449
5450 if ((str == NULL) || (*str == NULL))
5451 return(NULL);
5452 ptr = *str;
5453 cur = *ptr;
5454 if (cur == '&') {
5455 ptr++;
5456 cur = *ptr;
5457 name = xmlParseStringName(ctxt, &ptr);
5458 if (name == NULL) {
5459 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5461 ctxt->sax->error(ctxt->userData,
5462 "xmlParseEntityRef: no name\n");
5463 ctxt->wellFormed = 0;
5464 ctxt->disableSAX = 1;
5465 } else {
5466 if (*ptr == ';') {
5467 ptr++;
5468 /*
5469 * Ask first SAX for entity resolution, otherwise try the
5470 * predefined set.
5471 */
5472 if (ctxt->sax != NULL) {
5473 if (ctxt->sax->getEntity != NULL)
5474 ent = ctxt->sax->getEntity(ctxt->userData, name);
5475 if (ent == NULL)
5476 ent = xmlGetPredefinedEntity(name);
5477 }
5478 /*
5479 * [ WFC: Entity Declared ]
5480 * In a document without any DTD, a document with only an
5481 * internal DTD subset which contains no parameter entity
5482 * references, or a document with "standalone='yes'", the
5483 * Name given in the entity reference must match that in an
5484 * entity declaration, except that well-formed documents
5485 * need not declare any of the following entities: amp, lt,
5486 * gt, apos, quot.
5487 * The declaration of a parameter entity must precede any
5488 * reference to it.
5489 * Similarly, the declaration of a general entity must
5490 * precede any reference to it which appears in a default
5491 * value in an attribute-list declaration. Note that if
5492 * entities are declared in the external subset or in
5493 * external parameter entities, a non-validating processor
5494 * is not obligated to read and process their declarations;
5495 * for such documents, the rule that an entity must be
5496 * declared is a well-formedness constraint only if
5497 * standalone='yes'.
5498 */
5499 if (ent == NULL) {
5500 if ((ctxt->standalone == 1) ||
5501 ((ctxt->hasExternalSubset == 0) &&
5502 (ctxt->hasPErefs == 0))) {
5503 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5505 ctxt->sax->error(ctxt->userData,
5506 "Entity '%s' not defined\n", name);
5507 ctxt->wellFormed = 0;
5508 ctxt->disableSAX = 1;
5509 } else {
5510 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5511 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5512 ctxt->sax->warning(ctxt->userData,
5513 "Entity '%s' not defined\n", name);
5514 }
5515 }
5516
5517 /*
5518 * [ WFC: Parsed Entity ]
5519 * An entity reference must not contain the name of an
5520 * unparsed entity
5521 */
5522 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5523 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5525 ctxt->sax->error(ctxt->userData,
5526 "Entity reference to unparsed entity %s\n", name);
5527 ctxt->wellFormed = 0;
5528 ctxt->disableSAX = 1;
5529 }
5530
5531 /*
5532 * [ WFC: No External Entity References ]
5533 * Attribute values cannot contain direct or indirect
5534 * entity references to external entities.
5535 */
5536 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5537 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5538 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5540 ctxt->sax->error(ctxt->userData,
5541 "Attribute references external entity '%s'\n", name);
5542 ctxt->wellFormed = 0;
5543 ctxt->disableSAX = 1;
5544 }
5545 /*
5546 * [ WFC: No < in Attribute Values ]
5547 * The replacement text of any entity referred to directly or
5548 * indirectly in an attribute value (other than "&lt;") must
5549 * not contain a <.
5550 */
5551 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5552 (ent != NULL) &&
5553 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5554 (ent->content != NULL) &&
5555 (xmlStrchr(ent->content, '<'))) {
5556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5558 ctxt->sax->error(ctxt->userData,
5559 "'<' in entity '%s' is not allowed in attributes values\n", name);
5560 ctxt->wellFormed = 0;
5561 ctxt->disableSAX = 1;
5562 }
5563
5564 /*
5565 * Internal check, no parameter entities here ...
5566 */
5567 else {
5568 switch (ent->etype) {
5569 case XML_INTERNAL_PARAMETER_ENTITY:
5570 case XML_EXTERNAL_PARAMETER_ENTITY:
5571 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Attempt to reference the parameter entity '%s'\n", name);
5575 ctxt->wellFormed = 0;
5576 ctxt->disableSAX = 1;
5577 break;
5578 default:
5579 break;
5580 }
5581 }
5582
5583 /*
5584 * [ WFC: No Recursion ]
5585 * A parsed entity must not contain a recursive reference
5586 * to itself, either directly or indirectly.
5587 * Done somewhwere else
5588 */
5589
5590 } else {
5591 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "xmlParseEntityRef: expecting ';'\n");
5595 ctxt->wellFormed = 0;
5596 ctxt->disableSAX = 1;
5597 }
5598 xmlFree(name);
5599 }
5600 }
5601 *str = ptr;
5602 return(ent);
5603}
5604
5605/**
5606 * xmlParsePEReference:
5607 * @ctxt: an XML parser context
5608 *
5609 * parse PEReference declarations
5610 * The entity content is handled directly by pushing it's content as
5611 * a new input stream.
5612 *
5613 * [69] PEReference ::= '%' Name ';'
5614 *
5615 * [ WFC: No Recursion ]
5616 * A parsed entity must not contain a recursive
5617 * reference to itself, either directly or indirectly.
5618 *
5619 * [ WFC: Entity Declared ]
5620 * In a document without any DTD, a document with only an internal DTD
5621 * subset which contains no parameter entity references, or a document
5622 * with "standalone='yes'", ... ... The declaration of a parameter
5623 * entity must precede any reference to it...
5624 *
5625 * [ VC: Entity Declared ]
5626 * In a document with an external subset or external parameter entities
5627 * with "standalone='no'", ... ... The declaration of a parameter entity
5628 * must precede any reference to it...
5629 *
5630 * [ WFC: In DTD ]
5631 * Parameter-entity references may only appear in the DTD.
5632 * NOTE: misleading but this is handled.
5633 */
5634void
5635xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5636 xmlChar *name;
5637 xmlEntityPtr entity = NULL;
5638 xmlParserInputPtr input;
5639
5640 if (RAW == '%') {
5641 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005642 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 if (name == NULL) {
5644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5646 ctxt->sax->error(ctxt->userData,
5647 "xmlParsePEReference: no name\n");
5648 ctxt->wellFormed = 0;
5649 ctxt->disableSAX = 1;
5650 } else {
5651 if (RAW == ';') {
5652 NEXT;
5653 if ((ctxt->sax != NULL) &&
5654 (ctxt->sax->getParameterEntity != NULL))
5655 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5656 name);
5657 if (entity == NULL) {
5658 /*
5659 * [ WFC: Entity Declared ]
5660 * In a document without any DTD, a document with only an
5661 * internal DTD subset which contains no parameter entity
5662 * references, or a document with "standalone='yes'", ...
5663 * ... The declaration of a parameter entity must precede
5664 * any reference to it...
5665 */
5666 if ((ctxt->standalone == 1) ||
5667 ((ctxt->hasExternalSubset == 0) &&
5668 (ctxt->hasPErefs == 0))) {
5669 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5670 if ((!ctxt->disableSAX) &&
5671 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5672 ctxt->sax->error(ctxt->userData,
5673 "PEReference: %%%s; not found\n", name);
5674 ctxt->wellFormed = 0;
5675 ctxt->disableSAX = 1;
5676 } else {
5677 /*
5678 * [ VC: Entity Declared ]
5679 * In a document with an external subset or external
5680 * parameter entities with "standalone='no'", ...
5681 * ... The declaration of a parameter entity must precede
5682 * any reference to it...
5683 */
5684 if ((!ctxt->disableSAX) &&
5685 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5686 ctxt->sax->warning(ctxt->userData,
5687 "PEReference: %%%s; not found\n", name);
5688 ctxt->valid = 0;
5689 }
5690 } else {
5691 /*
5692 * Internal checking in case the entity quest barfed
5693 */
5694 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5695 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5696 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5697 ctxt->sax->warning(ctxt->userData,
5698 "Internal: %%%s; is not a parameter entity\n", name);
5699 } else {
5700 /*
5701 * TODO !!!
5702 * handle the extra spaces added before and after
5703 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5704 */
5705 input = xmlNewEntityInputStream(ctxt, entity);
5706 xmlPushInput(ctxt, input);
5707 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5708 (RAW == '<') && (NXT(1) == '?') &&
5709 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5710 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5711 xmlParseTextDecl(ctxt);
5712 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5713 /*
5714 * The XML REC instructs us to stop parsing
5715 * right here
5716 */
5717 ctxt->instate = XML_PARSER_EOF;
5718 xmlFree(name);
5719 return;
5720 }
5721 }
5722 if (ctxt->token == 0)
5723 ctxt->token = ' ';
5724 }
5725 }
5726 ctxt->hasPErefs = 1;
5727 } else {
5728 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731 "xmlParsePEReference: expecting ';'\n");
5732 ctxt->wellFormed = 0;
5733 ctxt->disableSAX = 1;
5734 }
5735 xmlFree(name);
5736 }
5737 }
5738}
5739
5740/**
5741 * xmlParseStringPEReference:
5742 * @ctxt: an XML parser context
5743 * @str: a pointer to an index in the string
5744 *
5745 * parse PEReference declarations
5746 *
5747 * [69] PEReference ::= '%' Name ';'
5748 *
5749 * [ WFC: No Recursion ]
5750 * A parsed entity must not contain a recursive
5751 * reference to itself, either directly or indirectly.
5752 *
5753 * [ WFC: Entity Declared ]
5754 * In a document without any DTD, a document with only an internal DTD
5755 * subset which contains no parameter entity references, or a document
5756 * with "standalone='yes'", ... ... The declaration of a parameter
5757 * entity must precede any reference to it...
5758 *
5759 * [ VC: Entity Declared ]
5760 * In a document with an external subset or external parameter entities
5761 * with "standalone='no'", ... ... The declaration of a parameter entity
5762 * must precede any reference to it...
5763 *
5764 * [ WFC: In DTD ]
5765 * Parameter-entity references may only appear in the DTD.
5766 * NOTE: misleading but this is handled.
5767 *
5768 * Returns the string of the entity content.
5769 * str is updated to the current value of the index
5770 */
5771xmlEntityPtr
5772xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5773 const xmlChar *ptr;
5774 xmlChar cur;
5775 xmlChar *name;
5776 xmlEntityPtr entity = NULL;
5777
5778 if ((str == NULL) || (*str == NULL)) return(NULL);
5779 ptr = *str;
5780 cur = *ptr;
5781 if (cur == '%') {
5782 ptr++;
5783 cur = *ptr;
5784 name = xmlParseStringName(ctxt, &ptr);
5785 if (name == NULL) {
5786 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5788 ctxt->sax->error(ctxt->userData,
5789 "xmlParseStringPEReference: no name\n");
5790 ctxt->wellFormed = 0;
5791 ctxt->disableSAX = 1;
5792 } else {
5793 cur = *ptr;
5794 if (cur == ';') {
5795 ptr++;
5796 cur = *ptr;
5797 if ((ctxt->sax != NULL) &&
5798 (ctxt->sax->getParameterEntity != NULL))
5799 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5800 name);
5801 if (entity == NULL) {
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", ...
5807 * ... The declaration of a parameter entity must precede
5808 * any reference to it...
5809 */
5810 if ((ctxt->standalone == 1) ||
5811 ((ctxt->hasExternalSubset == 0) &&
5812 (ctxt->hasPErefs == 0))) {
5813 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData,
5816 "PEReference: %%%s; not found\n", name);
5817 ctxt->wellFormed = 0;
5818 ctxt->disableSAX = 1;
5819 } else {
5820 /*
5821 * [ VC: Entity Declared ]
5822 * In a document with an external subset or external
5823 * parameter entities with "standalone='no'", ...
5824 * ... The declaration of a parameter entity must
5825 * precede any reference to it...
5826 */
5827 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5828 ctxt->sax->warning(ctxt->userData,
5829 "PEReference: %%%s; not found\n", name);
5830 ctxt->valid = 0;
5831 }
5832 } else {
5833 /*
5834 * Internal checking in case the entity quest barfed
5835 */
5836 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5837 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5838 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5839 ctxt->sax->warning(ctxt->userData,
5840 "Internal: %%%s; is not a parameter entity\n", name);
5841 }
5842 }
5843 ctxt->hasPErefs = 1;
5844 } else {
5845 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5847 ctxt->sax->error(ctxt->userData,
5848 "xmlParseStringPEReference: expecting ';'\n");
5849 ctxt->wellFormed = 0;
5850 ctxt->disableSAX = 1;
5851 }
5852 xmlFree(name);
5853 }
5854 }
5855 *str = ptr;
5856 return(entity);
5857}
5858
5859/**
5860 * xmlParseDocTypeDecl:
5861 * @ctxt: an XML parser context
5862 *
5863 * parse a DOCTYPE declaration
5864 *
5865 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5866 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5867 *
5868 * [ VC: Root Element Type ]
5869 * The Name in the document type declaration must match the element
5870 * type of the root element.
5871 */
5872
5873void
5874xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5875 xmlChar *name = NULL;
5876 xmlChar *ExternalID = NULL;
5877 xmlChar *URI = NULL;
5878
5879 /*
5880 * We know that '<!DOCTYPE' has been detected.
5881 */
5882 SKIP(9);
5883
5884 SKIP_BLANKS;
5885
5886 /*
5887 * Parse the DOCTYPE name.
5888 */
5889 name = xmlParseName(ctxt);
5890 if (name == NULL) {
5891 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData,
5894 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5895 ctxt->wellFormed = 0;
5896 ctxt->disableSAX = 1;
5897 }
5898 ctxt->intSubName = name;
5899
5900 SKIP_BLANKS;
5901
5902 /*
5903 * Check for SystemID and ExternalID
5904 */
5905 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5906
5907 if ((URI != NULL) || (ExternalID != NULL)) {
5908 ctxt->hasExternalSubset = 1;
5909 }
5910 ctxt->extSubURI = URI;
5911 ctxt->extSubSystem = ExternalID;
5912
5913 SKIP_BLANKS;
5914
5915 /*
5916 * Create and update the internal subset.
5917 */
5918 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5919 (!ctxt->disableSAX))
5920 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5921
5922 /*
5923 * Is there any internal subset declarations ?
5924 * they are handled separately in xmlParseInternalSubset()
5925 */
5926 if (RAW == '[')
5927 return;
5928
5929 /*
5930 * We should be at the end of the DOCTYPE declaration.
5931 */
5932 if (RAW != '>') {
5933 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5935 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 }
5939 NEXT;
5940}
5941
5942/**
5943 * xmlParseInternalsubset:
5944 * @ctxt: an XML parser context
5945 *
5946 * parse the internal subset declaration
5947 *
5948 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5949 */
5950
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005951static void
Owen Taylor3473f882001-02-23 17:55:21 +00005952xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5953 /*
5954 * Is there any DTD definition ?
5955 */
5956 if (RAW == '[') {
5957 ctxt->instate = XML_PARSER_DTD;
5958 NEXT;
5959 /*
5960 * Parse the succession of Markup declarations and
5961 * PEReferences.
5962 * Subsequence (markupdecl | PEReference | S)*
5963 */
5964 while (RAW != ']') {
5965 const xmlChar *check = CUR_PTR;
5966 int cons = ctxt->input->consumed;
5967
5968 SKIP_BLANKS;
5969 xmlParseMarkupDecl(ctxt);
5970 xmlParsePEReference(ctxt);
5971
5972 /*
5973 * Pop-up of finished entities.
5974 */
5975 while ((RAW == 0) && (ctxt->inputNr > 1))
5976 xmlPopInput(ctxt);
5977
5978 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5981 ctxt->sax->error(ctxt->userData,
5982 "xmlParseInternalSubset: error detected in Markup declaration\n");
5983 ctxt->wellFormed = 0;
5984 ctxt->disableSAX = 1;
5985 break;
5986 }
5987 }
5988 if (RAW == ']') {
5989 NEXT;
5990 SKIP_BLANKS;
5991 }
5992 }
5993
5994 /*
5995 * We should be at the end of the DOCTYPE declaration.
5996 */
5997 if (RAW != '>') {
5998 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6000 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6001 ctxt->wellFormed = 0;
6002 ctxt->disableSAX = 1;
6003 }
6004 NEXT;
6005}
6006
6007/**
6008 * xmlParseAttribute:
6009 * @ctxt: an XML parser context
6010 * @value: a xmlChar ** used to store the value of the attribute
6011 *
6012 * parse an attribute
6013 *
6014 * [41] Attribute ::= Name Eq AttValue
6015 *
6016 * [ WFC: No External Entity References ]
6017 * Attribute values cannot contain direct or indirect entity references
6018 * to external entities.
6019 *
6020 * [ WFC: No < in Attribute Values ]
6021 * The replacement text of any entity referred to directly or indirectly in
6022 * an attribute value (other than "&lt;") must not contain a <.
6023 *
6024 * [ VC: Attribute Value Type ]
6025 * The attribute must have been declared; the value must be of the type
6026 * declared for it.
6027 *
6028 * [25] Eq ::= S? '=' S?
6029 *
6030 * With namespace:
6031 *
6032 * [NS 11] Attribute ::= QName Eq AttValue
6033 *
6034 * Also the case QName == xmlns:??? is handled independently as a namespace
6035 * definition.
6036 *
6037 * Returns the attribute name, and the value in *value.
6038 */
6039
6040xmlChar *
6041xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6042 xmlChar *name, *val;
6043
6044 *value = NULL;
6045 name = xmlParseName(ctxt);
6046 if (name == NULL) {
6047 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6049 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6050 ctxt->wellFormed = 0;
6051 ctxt->disableSAX = 1;
6052 return(NULL);
6053 }
6054
6055 /*
6056 * read the value
6057 */
6058 SKIP_BLANKS;
6059 if (RAW == '=') {
6060 NEXT;
6061 SKIP_BLANKS;
6062 val = xmlParseAttValue(ctxt);
6063 ctxt->instate = XML_PARSER_CONTENT;
6064 } else {
6065 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6067 ctxt->sax->error(ctxt->userData,
6068 "Specification mandate value for attribute %s\n", name);
6069 ctxt->wellFormed = 0;
6070 ctxt->disableSAX = 1;
6071 xmlFree(name);
6072 return(NULL);
6073 }
6074
6075 /*
6076 * Check that xml:lang conforms to the specification
6077 * No more registered as an error, just generate a warning now
6078 * since this was deprecated in XML second edition
6079 */
6080 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6081 if (!xmlCheckLanguageID(val)) {
6082 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6083 ctxt->sax->warning(ctxt->userData,
6084 "Malformed value for xml:lang : %s\n", val);
6085 }
6086 }
6087
6088 /*
6089 * Check that xml:space conforms to the specification
6090 */
6091 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6092 if (xmlStrEqual(val, BAD_CAST "default"))
6093 *(ctxt->space) = 0;
6094 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6095 *(ctxt->space) = 1;
6096 else {
6097 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099 ctxt->sax->error(ctxt->userData,
6100"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6101 val);
6102 ctxt->wellFormed = 0;
6103 ctxt->disableSAX = 1;
6104 }
6105 }
6106
6107 *value = val;
6108 return(name);
6109}
6110
6111/**
6112 * xmlParseStartTag:
6113 * @ctxt: an XML parser context
6114 *
6115 * parse a start of tag either for rule element or
6116 * EmptyElement. In both case we don't parse the tag closing chars.
6117 *
6118 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6119 *
6120 * [ WFC: Unique Att Spec ]
6121 * No attribute name may appear more than once in the same start-tag or
6122 * empty-element tag.
6123 *
6124 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6125 *
6126 * [ WFC: Unique Att Spec ]
6127 * No attribute name may appear more than once in the same start-tag or
6128 * empty-element tag.
6129 *
6130 * With namespace:
6131 *
6132 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6133 *
6134 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6135 *
6136 * Returns the element name parsed
6137 */
6138
6139xmlChar *
6140xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6141 xmlChar *name;
6142 xmlChar *attname;
6143 xmlChar *attvalue;
6144 const xmlChar **atts = NULL;
6145 int nbatts = 0;
6146 int maxatts = 0;
6147 int i;
6148
6149 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006150 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006151
6152 name = xmlParseName(ctxt);
6153 if (name == NULL) {
6154 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6156 ctxt->sax->error(ctxt->userData,
6157 "xmlParseStartTag: invalid element name\n");
6158 ctxt->wellFormed = 0;
6159 ctxt->disableSAX = 1;
6160 return(NULL);
6161 }
6162
6163 /*
6164 * Now parse the attributes, it ends up with the ending
6165 *
6166 * (S Attribute)* S?
6167 */
6168 SKIP_BLANKS;
6169 GROW;
6170
Daniel Veillard21a0f912001-02-25 19:54:14 +00006171 while ((RAW != '>') &&
6172 ((RAW != '/') || (NXT(1) != '>')) &&
6173 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006174 const xmlChar *q = CUR_PTR;
6175 int cons = ctxt->input->consumed;
6176
6177 attname = xmlParseAttribute(ctxt, &attvalue);
6178 if ((attname != NULL) && (attvalue != NULL)) {
6179 /*
6180 * [ WFC: Unique Att Spec ]
6181 * No attribute name may appear more than once in the same
6182 * start-tag or empty-element tag.
6183 */
6184 for (i = 0; i < nbatts;i += 2) {
6185 if (xmlStrEqual(atts[i], attname)) {
6186 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6188 ctxt->sax->error(ctxt->userData,
6189 "Attribute %s redefined\n",
6190 attname);
6191 ctxt->wellFormed = 0;
6192 ctxt->disableSAX = 1;
6193 xmlFree(attname);
6194 xmlFree(attvalue);
6195 goto failed;
6196 }
6197 }
6198
6199 /*
6200 * Add the pair to atts
6201 */
6202 if (atts == NULL) {
6203 maxatts = 10;
6204 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6205 if (atts == NULL) {
6206 xmlGenericError(xmlGenericErrorContext,
6207 "malloc of %ld byte failed\n",
6208 maxatts * (long)sizeof(xmlChar *));
6209 return(NULL);
6210 }
6211 } else if (nbatts + 4 > maxatts) {
6212 maxatts *= 2;
6213 atts = (const xmlChar **) xmlRealloc((void *) atts,
6214 maxatts * sizeof(xmlChar *));
6215 if (atts == NULL) {
6216 xmlGenericError(xmlGenericErrorContext,
6217 "realloc of %ld byte failed\n",
6218 maxatts * (long)sizeof(xmlChar *));
6219 return(NULL);
6220 }
6221 }
6222 atts[nbatts++] = attname;
6223 atts[nbatts++] = attvalue;
6224 atts[nbatts] = NULL;
6225 atts[nbatts + 1] = NULL;
6226 } else {
6227 if (attname != NULL)
6228 xmlFree(attname);
6229 if (attvalue != NULL)
6230 xmlFree(attvalue);
6231 }
6232
6233failed:
6234
6235 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6236 break;
6237 if (!IS_BLANK(RAW)) {
6238 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6240 ctxt->sax->error(ctxt->userData,
6241 "attributes construct error\n");
6242 ctxt->wellFormed = 0;
6243 ctxt->disableSAX = 1;
6244 }
6245 SKIP_BLANKS;
6246 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6247 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6249 ctxt->sax->error(ctxt->userData,
6250 "xmlParseStartTag: problem parsing attributes\n");
6251 ctxt->wellFormed = 0;
6252 ctxt->disableSAX = 1;
6253 break;
6254 }
6255 GROW;
6256 }
6257
6258 /*
6259 * SAX: Start of Element !
6260 */
6261 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6262 (!ctxt->disableSAX))
6263 ctxt->sax->startElement(ctxt->userData, name, atts);
6264
6265 if (atts != NULL) {
6266 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6267 xmlFree((void *) atts);
6268 }
6269 return(name);
6270}
6271
6272/**
6273 * xmlParseEndTag:
6274 * @ctxt: an XML parser context
6275 *
6276 * parse an end of tag
6277 *
6278 * [42] ETag ::= '</' Name S? '>'
6279 *
6280 * With namespace
6281 *
6282 * [NS 9] ETag ::= '</' QName S? '>'
6283 */
6284
6285void
6286xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6287 xmlChar *name;
6288 xmlChar *oldname;
6289
6290 GROW;
6291 if ((RAW != '<') || (NXT(1) != '/')) {
6292 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6294 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6295 ctxt->wellFormed = 0;
6296 ctxt->disableSAX = 1;
6297 return;
6298 }
6299 SKIP(2);
6300
6301 name = xmlParseName(ctxt);
6302
6303 /*
6304 * We should definitely be at the ending "S? '>'" part
6305 */
6306 GROW;
6307 SKIP_BLANKS;
6308 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6309 ctxt->errNo = XML_ERR_GT_REQUIRED;
6310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6311 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6312 ctxt->wellFormed = 0;
6313 ctxt->disableSAX = 1;
6314 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006315 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006316
6317 /*
6318 * [ WFC: Element Type Match ]
6319 * The Name in an element's end-tag must match the element type in the
6320 * start-tag.
6321 *
6322 */
6323 if ((name == NULL) || (ctxt->name == NULL) ||
6324 (!xmlStrEqual(name, ctxt->name))) {
6325 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6327 if ((name != NULL) && (ctxt->name != NULL)) {
6328 ctxt->sax->error(ctxt->userData,
6329 "Opening and ending tag mismatch: %s and %s\n",
6330 ctxt->name, name);
6331 } else if (ctxt->name != NULL) {
6332 ctxt->sax->error(ctxt->userData,
6333 "Ending tag eror for: %s\n", ctxt->name);
6334 } else {
6335 ctxt->sax->error(ctxt->userData,
6336 "Ending tag error: internal error ???\n");
6337 }
6338
6339 }
6340 ctxt->wellFormed = 0;
6341 ctxt->disableSAX = 1;
6342 }
6343
6344 /*
6345 * SAX: End of Tag
6346 */
6347 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6348 (!ctxt->disableSAX))
6349 ctxt->sax->endElement(ctxt->userData, name);
6350
6351 if (name != NULL)
6352 xmlFree(name);
6353 oldname = namePop(ctxt);
6354 spacePop(ctxt);
6355 if (oldname != NULL) {
6356#ifdef DEBUG_STACK
6357 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6358#endif
6359 xmlFree(oldname);
6360 }
6361 return;
6362}
6363
6364/**
6365 * xmlParseCDSect:
6366 * @ctxt: an XML parser context
6367 *
6368 * Parse escaped pure raw content.
6369 *
6370 * [18] CDSect ::= CDStart CData CDEnd
6371 *
6372 * [19] CDStart ::= '<![CDATA['
6373 *
6374 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6375 *
6376 * [21] CDEnd ::= ']]>'
6377 */
6378void
6379xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6380 xmlChar *buf = NULL;
6381 int len = 0;
6382 int size = XML_PARSER_BUFFER_SIZE;
6383 int r, rl;
6384 int s, sl;
6385 int cur, l;
6386 int count = 0;
6387
6388 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6389 (NXT(2) == '[') && (NXT(3) == 'C') &&
6390 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6391 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6392 (NXT(8) == '[')) {
6393 SKIP(9);
6394 } else
6395 return;
6396
6397 ctxt->instate = XML_PARSER_CDATA_SECTION;
6398 r = CUR_CHAR(rl);
6399 if (!IS_CHAR(r)) {
6400 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6402 ctxt->sax->error(ctxt->userData,
6403 "CData section not finished\n");
6404 ctxt->wellFormed = 0;
6405 ctxt->disableSAX = 1;
6406 ctxt->instate = XML_PARSER_CONTENT;
6407 return;
6408 }
6409 NEXTL(rl);
6410 s = CUR_CHAR(sl);
6411 if (!IS_CHAR(s)) {
6412 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6414 ctxt->sax->error(ctxt->userData,
6415 "CData section not finished\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 ctxt->instate = XML_PARSER_CONTENT;
6419 return;
6420 }
6421 NEXTL(sl);
6422 cur = CUR_CHAR(l);
6423 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6424 if (buf == NULL) {
6425 xmlGenericError(xmlGenericErrorContext,
6426 "malloc of %d byte failed\n", size);
6427 return;
6428 }
6429 while (IS_CHAR(cur) &&
6430 ((r != ']') || (s != ']') || (cur != '>'))) {
6431 if (len + 5 >= size) {
6432 size *= 2;
6433 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6434 if (buf == NULL) {
6435 xmlGenericError(xmlGenericErrorContext,
6436 "realloc of %d byte failed\n", size);
6437 return;
6438 }
6439 }
6440 COPY_BUF(rl,buf,len,r);
6441 r = s;
6442 rl = sl;
6443 s = cur;
6444 sl = l;
6445 count++;
6446 if (count > 50) {
6447 GROW;
6448 count = 0;
6449 }
6450 NEXTL(l);
6451 cur = CUR_CHAR(l);
6452 }
6453 buf[len] = 0;
6454 ctxt->instate = XML_PARSER_CONTENT;
6455 if (cur != '>') {
6456 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6458 ctxt->sax->error(ctxt->userData,
6459 "CData section not finished\n%.50s\n", buf);
6460 ctxt->wellFormed = 0;
6461 ctxt->disableSAX = 1;
6462 xmlFree(buf);
6463 return;
6464 }
6465 NEXTL(l);
6466
6467 /*
6468 * Ok the buffer is to be consumed as cdata.
6469 */
6470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6471 if (ctxt->sax->cdataBlock != NULL)
6472 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006473 else if (ctxt->sax->characters != NULL)
6474 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006475 }
6476 xmlFree(buf);
6477}
6478
6479/**
6480 * xmlParseContent:
6481 * @ctxt: an XML parser context
6482 *
6483 * Parse a content:
6484 *
6485 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6486 */
6487
6488void
6489xmlParseContent(xmlParserCtxtPtr ctxt) {
6490 GROW;
6491 while (((RAW != 0) || (ctxt->token != 0)) &&
6492 ((RAW != '<') || (NXT(1) != '/'))) {
6493 const xmlChar *test = CUR_PTR;
6494 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006495 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006496 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006497
6498 /*
6499 * Handle possible processed charrefs.
6500 */
6501 if (ctxt->token != 0) {
6502 xmlParseCharData(ctxt, 0);
6503 }
6504 /*
6505 * First case : a Processing Instruction.
6506 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006507 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006508 xmlParsePI(ctxt);
6509 }
6510
6511 /*
6512 * Second case : a CDSection
6513 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006514 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006515 (NXT(2) == '[') && (NXT(3) == 'C') &&
6516 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6517 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6518 (NXT(8) == '[')) {
6519 xmlParseCDSect(ctxt);
6520 }
6521
6522 /*
6523 * Third case : a comment
6524 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006525 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006526 (NXT(2) == '-') && (NXT(3) == '-')) {
6527 xmlParseComment(ctxt);
6528 ctxt->instate = XML_PARSER_CONTENT;
6529 }
6530
6531 /*
6532 * Fourth case : a sub-element.
6533 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006534 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006535 xmlParseElement(ctxt);
6536 }
6537
6538 /*
6539 * Fifth case : a reference. If if has not been resolved,
6540 * parsing returns it's Name, create the node
6541 */
6542
Daniel Veillard21a0f912001-02-25 19:54:14 +00006543 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 xmlParseReference(ctxt);
6545 }
6546
6547 /*
6548 * Last case, text. Note that References are handled directly.
6549 */
6550 else {
6551 xmlParseCharData(ctxt, 0);
6552 }
6553
6554 GROW;
6555 /*
6556 * Pop-up of finished entities.
6557 */
6558 while ((RAW == 0) && (ctxt->inputNr > 1))
6559 xmlPopInput(ctxt);
6560 SHRINK;
6561
6562 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6563 (tok == ctxt->token)) {
6564 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6566 ctxt->sax->error(ctxt->userData,
6567 "detected an error in element content\n");
6568 ctxt->wellFormed = 0;
6569 ctxt->disableSAX = 1;
6570 ctxt->instate = XML_PARSER_EOF;
6571 break;
6572 }
6573 }
6574}
6575
6576/**
6577 * xmlParseElement:
6578 * @ctxt: an XML parser context
6579 *
6580 * parse an XML element, this is highly recursive
6581 *
6582 * [39] element ::= EmptyElemTag | STag content ETag
6583 *
6584 * [ WFC: Element Type Match ]
6585 * The Name in an element's end-tag must match the element type in the
6586 * start-tag.
6587 *
6588 * [ VC: Element Valid ]
6589 * An element is valid if there is a declaration matching elementdecl
6590 * where the Name matches the element type and one of the following holds:
6591 * - The declaration matches EMPTY and the element has no content.
6592 * - The declaration matches children and the sequence of child elements
6593 * belongs to the language generated by the regular expression in the
6594 * content model, with optional white space (characters matching the
6595 * nonterminal S) between each pair of child elements.
6596 * - The declaration matches Mixed and the content consists of character
6597 * data and child elements whose types match names in the content model.
6598 * - The declaration matches ANY, and the types of any child elements have
6599 * been declared.
6600 */
6601
6602void
6603xmlParseElement(xmlParserCtxtPtr ctxt) {
6604 const xmlChar *openTag = CUR_PTR;
6605 xmlChar *name;
6606 xmlChar *oldname;
6607 xmlParserNodeInfo node_info;
6608 xmlNodePtr ret;
6609
6610 /* Capture start position */
6611 if (ctxt->record_info) {
6612 node_info.begin_pos = ctxt->input->consumed +
6613 (CUR_PTR - ctxt->input->base);
6614 node_info.begin_line = ctxt->input->line;
6615 }
6616
6617 if (ctxt->spaceNr == 0)
6618 spacePush(ctxt, -1);
6619 else
6620 spacePush(ctxt, *ctxt->space);
6621
6622 name = xmlParseStartTag(ctxt);
6623 if (name == NULL) {
6624 spacePop(ctxt);
6625 return;
6626 }
6627 namePush(ctxt, name);
6628 ret = ctxt->node;
6629
6630 /*
6631 * [ VC: Root Element Type ]
6632 * The Name in the document type declaration must match the element
6633 * type of the root element.
6634 */
6635 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6636 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6637 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6638
6639 /*
6640 * Check for an Empty Element.
6641 */
6642 if ((RAW == '/') && (NXT(1) == '>')) {
6643 SKIP(2);
6644 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6645 (!ctxt->disableSAX))
6646 ctxt->sax->endElement(ctxt->userData, name);
6647 oldname = namePop(ctxt);
6648 spacePop(ctxt);
6649 if (oldname != NULL) {
6650#ifdef DEBUG_STACK
6651 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6652#endif
6653 xmlFree(oldname);
6654 }
6655 if ( ret != NULL && ctxt->record_info ) {
6656 node_info.end_pos = ctxt->input->consumed +
6657 (CUR_PTR - ctxt->input->base);
6658 node_info.end_line = ctxt->input->line;
6659 node_info.node = ret;
6660 xmlParserAddNodeInfo(ctxt, &node_info);
6661 }
6662 return;
6663 }
6664 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006665 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 } else {
6667 ctxt->errNo = XML_ERR_GT_REQUIRED;
6668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6669 ctxt->sax->error(ctxt->userData,
6670 "Couldn't find end of Start Tag\n%.30s\n",
6671 openTag);
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674
6675 /*
6676 * end of parsing of this node.
6677 */
6678 nodePop(ctxt);
6679 oldname = namePop(ctxt);
6680 spacePop(ctxt);
6681 if (oldname != NULL) {
6682#ifdef DEBUG_STACK
6683 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6684#endif
6685 xmlFree(oldname);
6686 }
6687
6688 /*
6689 * Capture end position and add node
6690 */
6691 if ( ret != NULL && ctxt->record_info ) {
6692 node_info.end_pos = ctxt->input->consumed +
6693 (CUR_PTR - ctxt->input->base);
6694 node_info.end_line = ctxt->input->line;
6695 node_info.node = ret;
6696 xmlParserAddNodeInfo(ctxt, &node_info);
6697 }
6698 return;
6699 }
6700
6701 /*
6702 * Parse the content of the element:
6703 */
6704 xmlParseContent(ctxt);
6705 if (!IS_CHAR(RAW)) {
6706 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "Premature end of data in tag %.30s\n", openTag);
6710 ctxt->wellFormed = 0;
6711 ctxt->disableSAX = 1;
6712
6713 /*
6714 * end of parsing of this node.
6715 */
6716 nodePop(ctxt);
6717 oldname = namePop(ctxt);
6718 spacePop(ctxt);
6719 if (oldname != NULL) {
6720#ifdef DEBUG_STACK
6721 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6722#endif
6723 xmlFree(oldname);
6724 }
6725 return;
6726 }
6727
6728 /*
6729 * parse the end of tag: '</' should be here.
6730 */
6731 xmlParseEndTag(ctxt);
6732
6733 /*
6734 * Capture end position and add node
6735 */
6736 if ( ret != NULL && ctxt->record_info ) {
6737 node_info.end_pos = ctxt->input->consumed +
6738 (CUR_PTR - ctxt->input->base);
6739 node_info.end_line = ctxt->input->line;
6740 node_info.node = ret;
6741 xmlParserAddNodeInfo(ctxt, &node_info);
6742 }
6743}
6744
6745/**
6746 * xmlParseVersionNum:
6747 * @ctxt: an XML parser context
6748 *
6749 * parse the XML version value.
6750 *
6751 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6752 *
6753 * Returns the string giving the XML version number, or NULL
6754 */
6755xmlChar *
6756xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6757 xmlChar *buf = NULL;
6758 int len = 0;
6759 int size = 10;
6760 xmlChar cur;
6761
6762 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6763 if (buf == NULL) {
6764 xmlGenericError(xmlGenericErrorContext,
6765 "malloc of %d byte failed\n", size);
6766 return(NULL);
6767 }
6768 cur = CUR;
6769 while (((cur >= 'a') && (cur <= 'z')) ||
6770 ((cur >= 'A') && (cur <= 'Z')) ||
6771 ((cur >= '0') && (cur <= '9')) ||
6772 (cur == '_') || (cur == '.') ||
6773 (cur == ':') || (cur == '-')) {
6774 if (len + 1 >= size) {
6775 size *= 2;
6776 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6777 if (buf == NULL) {
6778 xmlGenericError(xmlGenericErrorContext,
6779 "realloc of %d byte failed\n", size);
6780 return(NULL);
6781 }
6782 }
6783 buf[len++] = cur;
6784 NEXT;
6785 cur=CUR;
6786 }
6787 buf[len] = 0;
6788 return(buf);
6789}
6790
6791/**
6792 * xmlParseVersionInfo:
6793 * @ctxt: an XML parser context
6794 *
6795 * parse the XML version.
6796 *
6797 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6798 *
6799 * [25] Eq ::= S? '=' S?
6800 *
6801 * Returns the version string, e.g. "1.0"
6802 */
6803
6804xmlChar *
6805xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6806 xmlChar *version = NULL;
6807 const xmlChar *q;
6808
6809 if ((RAW == 'v') && (NXT(1) == 'e') &&
6810 (NXT(2) == 'r') && (NXT(3) == 's') &&
6811 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6812 (NXT(6) == 'n')) {
6813 SKIP(7);
6814 SKIP_BLANKS;
6815 if (RAW != '=') {
6816 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6818 ctxt->sax->error(ctxt->userData,
6819 "xmlParseVersionInfo : expected '='\n");
6820 ctxt->wellFormed = 0;
6821 ctxt->disableSAX = 1;
6822 return(NULL);
6823 }
6824 NEXT;
6825 SKIP_BLANKS;
6826 if (RAW == '"') {
6827 NEXT;
6828 q = CUR_PTR;
6829 version = xmlParseVersionNum(ctxt);
6830 if (RAW != '"') {
6831 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "String not closed\n%.50s\n", q);
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 } else
6838 NEXT;
6839 } else if (RAW == '\''){
6840 NEXT;
6841 q = CUR_PTR;
6842 version = xmlParseVersionNum(ctxt);
6843 if (RAW != '\'') {
6844 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846 ctxt->sax->error(ctxt->userData,
6847 "String not closed\n%.50s\n", q);
6848 ctxt->wellFormed = 0;
6849 ctxt->disableSAX = 1;
6850 } else
6851 NEXT;
6852 } else {
6853 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6855 ctxt->sax->error(ctxt->userData,
6856 "xmlParseVersionInfo : expected ' or \"\n");
6857 ctxt->wellFormed = 0;
6858 ctxt->disableSAX = 1;
6859 }
6860 }
6861 return(version);
6862}
6863
6864/**
6865 * xmlParseEncName:
6866 * @ctxt: an XML parser context
6867 *
6868 * parse the XML encoding name
6869 *
6870 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6871 *
6872 * Returns the encoding name value or NULL
6873 */
6874xmlChar *
6875xmlParseEncName(xmlParserCtxtPtr ctxt) {
6876 xmlChar *buf = NULL;
6877 int len = 0;
6878 int size = 10;
6879 xmlChar cur;
6880
6881 cur = CUR;
6882 if (((cur >= 'a') && (cur <= 'z')) ||
6883 ((cur >= 'A') && (cur <= 'Z'))) {
6884 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6885 if (buf == NULL) {
6886 xmlGenericError(xmlGenericErrorContext,
6887 "malloc of %d byte failed\n", size);
6888 return(NULL);
6889 }
6890
6891 buf[len++] = cur;
6892 NEXT;
6893 cur = CUR;
6894 while (((cur >= 'a') && (cur <= 'z')) ||
6895 ((cur >= 'A') && (cur <= 'Z')) ||
6896 ((cur >= '0') && (cur <= '9')) ||
6897 (cur == '.') || (cur == '_') ||
6898 (cur == '-')) {
6899 if (len + 1 >= size) {
6900 size *= 2;
6901 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6902 if (buf == NULL) {
6903 xmlGenericError(xmlGenericErrorContext,
6904 "realloc of %d byte failed\n", size);
6905 return(NULL);
6906 }
6907 }
6908 buf[len++] = cur;
6909 NEXT;
6910 cur = CUR;
6911 if (cur == 0) {
6912 SHRINK;
6913 GROW;
6914 cur = CUR;
6915 }
6916 }
6917 buf[len] = 0;
6918 } else {
6919 ctxt->errNo = XML_ERR_ENCODING_NAME;
6920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6921 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6922 ctxt->wellFormed = 0;
6923 ctxt->disableSAX = 1;
6924 }
6925 return(buf);
6926}
6927
6928/**
6929 * xmlParseEncodingDecl:
6930 * @ctxt: an XML parser context
6931 *
6932 * parse the XML encoding declaration
6933 *
6934 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6935 *
6936 * this setups the conversion filters.
6937 *
6938 * Returns the encoding value or NULL
6939 */
6940
6941xmlChar *
6942xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6943 xmlChar *encoding = NULL;
6944 const xmlChar *q;
6945
6946 SKIP_BLANKS;
6947 if ((RAW == 'e') && (NXT(1) == 'n') &&
6948 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6949 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6950 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6951 SKIP(8);
6952 SKIP_BLANKS;
6953 if (RAW != '=') {
6954 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6956 ctxt->sax->error(ctxt->userData,
6957 "xmlParseEncodingDecl : expected '='\n");
6958 ctxt->wellFormed = 0;
6959 ctxt->disableSAX = 1;
6960 return(NULL);
6961 }
6962 NEXT;
6963 SKIP_BLANKS;
6964 if (RAW == '"') {
6965 NEXT;
6966 q = CUR_PTR;
6967 encoding = xmlParseEncName(ctxt);
6968 if (RAW != '"') {
6969 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6971 ctxt->sax->error(ctxt->userData,
6972 "String not closed\n%.50s\n", q);
6973 ctxt->wellFormed = 0;
6974 ctxt->disableSAX = 1;
6975 } else
6976 NEXT;
6977 } else if (RAW == '\''){
6978 NEXT;
6979 q = CUR_PTR;
6980 encoding = xmlParseEncName(ctxt);
6981 if (RAW != '\'') {
6982 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6984 ctxt->sax->error(ctxt->userData,
6985 "String not closed\n%.50s\n", q);
6986 ctxt->wellFormed = 0;
6987 ctxt->disableSAX = 1;
6988 } else
6989 NEXT;
6990 } else if (RAW == '"'){
6991 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6993 ctxt->sax->error(ctxt->userData,
6994 "xmlParseEncodingDecl : expected ' or \"\n");
6995 ctxt->wellFormed = 0;
6996 ctxt->disableSAX = 1;
6997 }
6998 if (encoding != NULL) {
6999 xmlCharEncoding enc;
7000 xmlCharEncodingHandlerPtr handler;
7001
7002 if (ctxt->input->encoding != NULL)
7003 xmlFree((xmlChar *) ctxt->input->encoding);
7004 ctxt->input->encoding = encoding;
7005
7006 enc = xmlParseCharEncoding((const char *) encoding);
7007 /*
7008 * registered set of known encodings
7009 */
7010 if (enc != XML_CHAR_ENCODING_ERROR) {
7011 xmlSwitchEncoding(ctxt, enc);
7012 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7013 xmlFree(encoding);
7014 return(NULL);
7015 }
7016 } else {
7017 /*
7018 * fallback for unknown encodings
7019 */
7020 handler = xmlFindCharEncodingHandler((const char *) encoding);
7021 if (handler != NULL) {
7022 xmlSwitchToEncoding(ctxt, handler);
7023 } else {
7024 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7026 ctxt->sax->error(ctxt->userData,
7027 "Unsupported encoding %s\n", encoding);
7028 return(NULL);
7029 }
7030 }
7031 }
7032 }
7033 return(encoding);
7034}
7035
7036/**
7037 * xmlParseSDDecl:
7038 * @ctxt: an XML parser context
7039 *
7040 * parse the XML standalone declaration
7041 *
7042 * [32] SDDecl ::= S 'standalone' Eq
7043 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7044 *
7045 * [ VC: Standalone Document Declaration ]
7046 * TODO The standalone document declaration must have the value "no"
7047 * if any external markup declarations contain declarations of:
7048 * - attributes with default values, if elements to which these
7049 * attributes apply appear in the document without specifications
7050 * of values for these attributes, or
7051 * - entities (other than amp, lt, gt, apos, quot), if references
7052 * to those entities appear in the document, or
7053 * - attributes with values subject to normalization, where the
7054 * attribute appears in the document with a value which will change
7055 * as a result of normalization, or
7056 * - element types with element content, if white space occurs directly
7057 * within any instance of those types.
7058 *
7059 * Returns 1 if standalone, 0 otherwise
7060 */
7061
7062int
7063xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7064 int standalone = -1;
7065
7066 SKIP_BLANKS;
7067 if ((RAW == 's') && (NXT(1) == 't') &&
7068 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7069 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7070 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7071 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7072 SKIP(10);
7073 SKIP_BLANKS;
7074 if (RAW != '=') {
7075 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7077 ctxt->sax->error(ctxt->userData,
7078 "XML standalone declaration : expected '='\n");
7079 ctxt->wellFormed = 0;
7080 ctxt->disableSAX = 1;
7081 return(standalone);
7082 }
7083 NEXT;
7084 SKIP_BLANKS;
7085 if (RAW == '\''){
7086 NEXT;
7087 if ((RAW == 'n') && (NXT(1) == 'o')) {
7088 standalone = 0;
7089 SKIP(2);
7090 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7091 (NXT(2) == 's')) {
7092 standalone = 1;
7093 SKIP(3);
7094 } else {
7095 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7097 ctxt->sax->error(ctxt->userData,
7098 "standalone accepts only 'yes' or 'no'\n");
7099 ctxt->wellFormed = 0;
7100 ctxt->disableSAX = 1;
7101 }
7102 if (RAW != '\'') {
7103 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData, "String not closed\n");
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 } else
7109 NEXT;
7110 } else if (RAW == '"'){
7111 NEXT;
7112 if ((RAW == 'n') && (NXT(1) == 'o')) {
7113 standalone = 0;
7114 SKIP(2);
7115 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7116 (NXT(2) == 's')) {
7117 standalone = 1;
7118 SKIP(3);
7119 } else {
7120 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7122 ctxt->sax->error(ctxt->userData,
7123 "standalone accepts only 'yes' or 'no'\n");
7124 ctxt->wellFormed = 0;
7125 ctxt->disableSAX = 1;
7126 }
7127 if (RAW != '"') {
7128 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7130 ctxt->sax->error(ctxt->userData, "String not closed\n");
7131 ctxt->wellFormed = 0;
7132 ctxt->disableSAX = 1;
7133 } else
7134 NEXT;
7135 } else {
7136 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7138 ctxt->sax->error(ctxt->userData,
7139 "Standalone value not found\n");
7140 ctxt->wellFormed = 0;
7141 ctxt->disableSAX = 1;
7142 }
7143 }
7144 return(standalone);
7145}
7146
7147/**
7148 * xmlParseXMLDecl:
7149 * @ctxt: an XML parser context
7150 *
7151 * parse an XML declaration header
7152 *
7153 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7154 */
7155
7156void
7157xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7158 xmlChar *version;
7159
7160 /*
7161 * We know that '<?xml' is here.
7162 */
7163 SKIP(5);
7164
7165 if (!IS_BLANK(RAW)) {
7166 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7168 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7169 ctxt->wellFormed = 0;
7170 ctxt->disableSAX = 1;
7171 }
7172 SKIP_BLANKS;
7173
7174 /*
7175 * We should have the VersionInfo here.
7176 */
7177 version = xmlParseVersionInfo(ctxt);
7178 if (version == NULL)
7179 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7180 ctxt->version = xmlStrdup(version);
7181 xmlFree(version);
7182
7183 /*
7184 * We may have the encoding declaration
7185 */
7186 if (!IS_BLANK(RAW)) {
7187 if ((RAW == '?') && (NXT(1) == '>')) {
7188 SKIP(2);
7189 return;
7190 }
7191 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7194 ctxt->wellFormed = 0;
7195 ctxt->disableSAX = 1;
7196 }
7197 xmlParseEncodingDecl(ctxt);
7198 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7199 /*
7200 * The XML REC instructs us to stop parsing right here
7201 */
7202 return;
7203 }
7204
7205 /*
7206 * We may have the standalone status.
7207 */
7208 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7209 if ((RAW == '?') && (NXT(1) == '>')) {
7210 SKIP(2);
7211 return;
7212 }
7213 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7215 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7216 ctxt->wellFormed = 0;
7217 ctxt->disableSAX = 1;
7218 }
7219 SKIP_BLANKS;
7220 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7221
7222 SKIP_BLANKS;
7223 if ((RAW == '?') && (NXT(1) == '>')) {
7224 SKIP(2);
7225 } else if (RAW == '>') {
7226 /* Deprecated old WD ... */
7227 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "XML declaration must end-up with '?>'\n");
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 NEXT;
7234 } else {
7235 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7237 ctxt->sax->error(ctxt->userData,
7238 "parsing XML declaration: '?>' expected\n");
7239 ctxt->wellFormed = 0;
7240 ctxt->disableSAX = 1;
7241 MOVETO_ENDTAG(CUR_PTR);
7242 NEXT;
7243 }
7244}
7245
7246/**
7247 * xmlParseMisc:
7248 * @ctxt: an XML parser context
7249 *
7250 * parse an XML Misc* optionnal field.
7251 *
7252 * [27] Misc ::= Comment | PI | S
7253 */
7254
7255void
7256xmlParseMisc(xmlParserCtxtPtr ctxt) {
7257 while (((RAW == '<') && (NXT(1) == '?')) ||
7258 ((RAW == '<') && (NXT(1) == '!') &&
7259 (NXT(2) == '-') && (NXT(3) == '-')) ||
7260 IS_BLANK(CUR)) {
7261 if ((RAW == '<') && (NXT(1) == '?')) {
7262 xmlParsePI(ctxt);
7263 } else if (IS_BLANK(CUR)) {
7264 NEXT;
7265 } else
7266 xmlParseComment(ctxt);
7267 }
7268}
7269
7270/**
7271 * xmlParseDocument:
7272 * @ctxt: an XML parser context
7273 *
7274 * parse an XML document (and build a tree if using the standard SAX
7275 * interface).
7276 *
7277 * [1] document ::= prolog element Misc*
7278 *
7279 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7280 *
7281 * Returns 0, -1 in case of error. the parser context is augmented
7282 * as a result of the parsing.
7283 */
7284
7285int
7286xmlParseDocument(xmlParserCtxtPtr ctxt) {
7287 xmlChar start[4];
7288 xmlCharEncoding enc;
7289
7290 xmlInitParser();
7291
7292 GROW;
7293
7294 /*
7295 * SAX: beginning of the document processing.
7296 */
7297 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7298 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7299
7300 /*
7301 * Get the 4 first bytes and decode the charset
7302 * if enc != XML_CHAR_ENCODING_NONE
7303 * plug some encoding conversion routines.
7304 */
7305 start[0] = RAW;
7306 start[1] = NXT(1);
7307 start[2] = NXT(2);
7308 start[3] = NXT(3);
7309 enc = xmlDetectCharEncoding(start, 4);
7310 if (enc != XML_CHAR_ENCODING_NONE) {
7311 xmlSwitchEncoding(ctxt, enc);
7312 }
7313
7314
7315 if (CUR == 0) {
7316 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7317 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7318 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7319 ctxt->wellFormed = 0;
7320 ctxt->disableSAX = 1;
7321 }
7322
7323 /*
7324 * Check for the XMLDecl in the Prolog.
7325 */
7326 GROW;
7327 if ((RAW == '<') && (NXT(1) == '?') &&
7328 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7329 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7330
7331 /*
7332 * Note that we will switch encoding on the fly.
7333 */
7334 xmlParseXMLDecl(ctxt);
7335 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7336 /*
7337 * The XML REC instructs us to stop parsing right here
7338 */
7339 return(-1);
7340 }
7341 ctxt->standalone = ctxt->input->standalone;
7342 SKIP_BLANKS;
7343 } else {
7344 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7345 }
7346 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7347 ctxt->sax->startDocument(ctxt->userData);
7348
7349 /*
7350 * The Misc part of the Prolog
7351 */
7352 GROW;
7353 xmlParseMisc(ctxt);
7354
7355 /*
7356 * Then possibly doc type declaration(s) and more Misc
7357 * (doctypedecl Misc*)?
7358 */
7359 GROW;
7360 if ((RAW == '<') && (NXT(1) == '!') &&
7361 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7362 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7363 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7364 (NXT(8) == 'E')) {
7365
7366 ctxt->inSubset = 1;
7367 xmlParseDocTypeDecl(ctxt);
7368 if (RAW == '[') {
7369 ctxt->instate = XML_PARSER_DTD;
7370 xmlParseInternalSubset(ctxt);
7371 }
7372
7373 /*
7374 * Create and update the external subset.
7375 */
7376 ctxt->inSubset = 2;
7377 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7378 (!ctxt->disableSAX))
7379 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7380 ctxt->extSubSystem, ctxt->extSubURI);
7381 ctxt->inSubset = 0;
7382
7383
7384 ctxt->instate = XML_PARSER_PROLOG;
7385 xmlParseMisc(ctxt);
7386 }
7387
7388 /*
7389 * Time to start parsing the tree itself
7390 */
7391 GROW;
7392 if (RAW != '<') {
7393 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7394 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7395 ctxt->sax->error(ctxt->userData,
7396 "Start tag expected, '<' not found\n");
7397 ctxt->wellFormed = 0;
7398 ctxt->disableSAX = 1;
7399 ctxt->instate = XML_PARSER_EOF;
7400 } else {
7401 ctxt->instate = XML_PARSER_CONTENT;
7402 xmlParseElement(ctxt);
7403 ctxt->instate = XML_PARSER_EPILOG;
7404
7405
7406 /*
7407 * The Misc part at the end
7408 */
7409 xmlParseMisc(ctxt);
7410
7411 if (RAW != 0) {
7412 ctxt->errNo = XML_ERR_DOCUMENT_END;
7413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7414 ctxt->sax->error(ctxt->userData,
7415 "Extra content at the end of the document\n");
7416 ctxt->wellFormed = 0;
7417 ctxt->disableSAX = 1;
7418 }
7419 ctxt->instate = XML_PARSER_EOF;
7420 }
7421
7422 /*
7423 * SAX: end of the document processing.
7424 */
7425 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7426 (!ctxt->disableSAX))
7427 ctxt->sax->endDocument(ctxt->userData);
7428
7429 if (! ctxt->wellFormed) return(-1);
7430 return(0);
7431}
7432
7433/**
7434 * xmlParseExtParsedEnt:
7435 * @ctxt: an XML parser context
7436 *
7437 * parse a genreral parsed entity
7438 * An external general parsed entity is well-formed if it matches the
7439 * production labeled extParsedEnt.
7440 *
7441 * [78] extParsedEnt ::= TextDecl? content
7442 *
7443 * Returns 0, -1 in case of error. the parser context is augmented
7444 * as a result of the parsing.
7445 */
7446
7447int
7448xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7449 xmlChar start[4];
7450 xmlCharEncoding enc;
7451
7452 xmlDefaultSAXHandlerInit();
7453
7454 GROW;
7455
7456 /*
7457 * SAX: beginning of the document processing.
7458 */
7459 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7460 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7461
7462 /*
7463 * Get the 4 first bytes and decode the charset
7464 * if enc != XML_CHAR_ENCODING_NONE
7465 * plug some encoding conversion routines.
7466 */
7467 start[0] = RAW;
7468 start[1] = NXT(1);
7469 start[2] = NXT(2);
7470 start[3] = NXT(3);
7471 enc = xmlDetectCharEncoding(start, 4);
7472 if (enc != XML_CHAR_ENCODING_NONE) {
7473 xmlSwitchEncoding(ctxt, enc);
7474 }
7475
7476
7477 if (CUR == 0) {
7478 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7479 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7480 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7481 ctxt->wellFormed = 0;
7482 ctxt->disableSAX = 1;
7483 }
7484
7485 /*
7486 * Check for the XMLDecl in the Prolog.
7487 */
7488 GROW;
7489 if ((RAW == '<') && (NXT(1) == '?') &&
7490 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7491 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7492
7493 /*
7494 * Note that we will switch encoding on the fly.
7495 */
7496 xmlParseXMLDecl(ctxt);
7497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7498 /*
7499 * The XML REC instructs us to stop parsing right here
7500 */
7501 return(-1);
7502 }
7503 SKIP_BLANKS;
7504 } else {
7505 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7506 }
7507 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7508 ctxt->sax->startDocument(ctxt->userData);
7509
7510 /*
7511 * Doing validity checking on chunk doesn't make sense
7512 */
7513 ctxt->instate = XML_PARSER_CONTENT;
7514 ctxt->validate = 0;
7515 ctxt->loadsubset = 0;
7516 ctxt->depth = 0;
7517
7518 xmlParseContent(ctxt);
7519
7520 if ((RAW == '<') && (NXT(1) == '/')) {
7521 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7522 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7523 ctxt->sax->error(ctxt->userData,
7524 "chunk is not well balanced\n");
7525 ctxt->wellFormed = 0;
7526 ctxt->disableSAX = 1;
7527 } else if (RAW != 0) {
7528 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7529 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7530 ctxt->sax->error(ctxt->userData,
7531 "extra content at the end of well balanced chunk\n");
7532 ctxt->wellFormed = 0;
7533 ctxt->disableSAX = 1;
7534 }
7535
7536 /*
7537 * SAX: end of the document processing.
7538 */
7539 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7540 (!ctxt->disableSAX))
7541 ctxt->sax->endDocument(ctxt->userData);
7542
7543 if (! ctxt->wellFormed) return(-1);
7544 return(0);
7545}
7546
7547/************************************************************************
7548 * *
7549 * Progressive parsing interfaces *
7550 * *
7551 ************************************************************************/
7552
7553/**
7554 * xmlParseLookupSequence:
7555 * @ctxt: an XML parser context
7556 * @first: the first char to lookup
7557 * @next: the next char to lookup or zero
7558 * @third: the next char to lookup or zero
7559 *
7560 * Try to find if a sequence (first, next, third) or just (first next) or
7561 * (first) is available in the input stream.
7562 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7563 * to avoid rescanning sequences of bytes, it DOES change the state of the
7564 * parser, do not use liberally.
7565 *
7566 * Returns the index to the current parsing point if the full sequence
7567 * is available, -1 otherwise.
7568 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007569static int
Owen Taylor3473f882001-02-23 17:55:21 +00007570xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7571 xmlChar next, xmlChar third) {
7572 int base, len;
7573 xmlParserInputPtr in;
7574 const xmlChar *buf;
7575
7576 in = ctxt->input;
7577 if (in == NULL) return(-1);
7578 base = in->cur - in->base;
7579 if (base < 0) return(-1);
7580 if (ctxt->checkIndex > base)
7581 base = ctxt->checkIndex;
7582 if (in->buf == NULL) {
7583 buf = in->base;
7584 len = in->length;
7585 } else {
7586 buf = in->buf->buffer->content;
7587 len = in->buf->buffer->use;
7588 }
7589 /* take into account the sequence length */
7590 if (third) len -= 2;
7591 else if (next) len --;
7592 for (;base < len;base++) {
7593 if (buf[base] == first) {
7594 if (third != 0) {
7595 if ((buf[base + 1] != next) ||
7596 (buf[base + 2] != third)) continue;
7597 } else if (next != 0) {
7598 if (buf[base + 1] != next) continue;
7599 }
7600 ctxt->checkIndex = 0;
7601#ifdef DEBUG_PUSH
7602 if (next == 0)
7603 xmlGenericError(xmlGenericErrorContext,
7604 "PP: lookup '%c' found at %d\n",
7605 first, base);
7606 else if (third == 0)
7607 xmlGenericError(xmlGenericErrorContext,
7608 "PP: lookup '%c%c' found at %d\n",
7609 first, next, base);
7610 else
7611 xmlGenericError(xmlGenericErrorContext,
7612 "PP: lookup '%c%c%c' found at %d\n",
7613 first, next, third, base);
7614#endif
7615 return(base - (in->cur - in->base));
7616 }
7617 }
7618 ctxt->checkIndex = base;
7619#ifdef DEBUG_PUSH
7620 if (next == 0)
7621 xmlGenericError(xmlGenericErrorContext,
7622 "PP: lookup '%c' failed\n", first);
7623 else if (third == 0)
7624 xmlGenericError(xmlGenericErrorContext,
7625 "PP: lookup '%c%c' failed\n", first, next);
7626 else
7627 xmlGenericError(xmlGenericErrorContext,
7628 "PP: lookup '%c%c%c' failed\n", first, next, third);
7629#endif
7630 return(-1);
7631}
7632
7633/**
7634 * xmlParseTryOrFinish:
7635 * @ctxt: an XML parser context
7636 * @terminate: last chunk indicator
7637 *
7638 * Try to progress on parsing
7639 *
7640 * Returns zero if no parsing was possible
7641 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007642static int
Owen Taylor3473f882001-02-23 17:55:21 +00007643xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7644 int ret = 0;
7645 int avail;
7646 xmlChar cur, next;
7647
7648#ifdef DEBUG_PUSH
7649 switch (ctxt->instate) {
7650 case XML_PARSER_EOF:
7651 xmlGenericError(xmlGenericErrorContext,
7652 "PP: try EOF\n"); break;
7653 case XML_PARSER_START:
7654 xmlGenericError(xmlGenericErrorContext,
7655 "PP: try START\n"); break;
7656 case XML_PARSER_MISC:
7657 xmlGenericError(xmlGenericErrorContext,
7658 "PP: try MISC\n");break;
7659 case XML_PARSER_COMMENT:
7660 xmlGenericError(xmlGenericErrorContext,
7661 "PP: try COMMENT\n");break;
7662 case XML_PARSER_PROLOG:
7663 xmlGenericError(xmlGenericErrorContext,
7664 "PP: try PROLOG\n");break;
7665 case XML_PARSER_START_TAG:
7666 xmlGenericError(xmlGenericErrorContext,
7667 "PP: try START_TAG\n");break;
7668 case XML_PARSER_CONTENT:
7669 xmlGenericError(xmlGenericErrorContext,
7670 "PP: try CONTENT\n");break;
7671 case XML_PARSER_CDATA_SECTION:
7672 xmlGenericError(xmlGenericErrorContext,
7673 "PP: try CDATA_SECTION\n");break;
7674 case XML_PARSER_END_TAG:
7675 xmlGenericError(xmlGenericErrorContext,
7676 "PP: try END_TAG\n");break;
7677 case XML_PARSER_ENTITY_DECL:
7678 xmlGenericError(xmlGenericErrorContext,
7679 "PP: try ENTITY_DECL\n");break;
7680 case XML_PARSER_ENTITY_VALUE:
7681 xmlGenericError(xmlGenericErrorContext,
7682 "PP: try ENTITY_VALUE\n");break;
7683 case XML_PARSER_ATTRIBUTE_VALUE:
7684 xmlGenericError(xmlGenericErrorContext,
7685 "PP: try ATTRIBUTE_VALUE\n");break;
7686 case XML_PARSER_DTD:
7687 xmlGenericError(xmlGenericErrorContext,
7688 "PP: try DTD\n");break;
7689 case XML_PARSER_EPILOG:
7690 xmlGenericError(xmlGenericErrorContext,
7691 "PP: try EPILOG\n");break;
7692 case XML_PARSER_PI:
7693 xmlGenericError(xmlGenericErrorContext,
7694 "PP: try PI\n");break;
7695 case XML_PARSER_IGNORE:
7696 xmlGenericError(xmlGenericErrorContext,
7697 "PP: try IGNORE\n");break;
7698 }
7699#endif
7700
7701 while (1) {
7702 /*
7703 * Pop-up of finished entities.
7704 */
7705 while ((RAW == 0) && (ctxt->inputNr > 1))
7706 xmlPopInput(ctxt);
7707
7708 if (ctxt->input ==NULL) break;
7709 if (ctxt->input->buf == NULL)
7710 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7711 else
7712 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7713 if (avail < 1)
7714 goto done;
7715 switch (ctxt->instate) {
7716 case XML_PARSER_EOF:
7717 /*
7718 * Document parsing is done !
7719 */
7720 goto done;
7721 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007722 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7723 xmlChar start[4];
7724 xmlCharEncoding enc;
7725
7726 /*
7727 * Very first chars read from the document flow.
7728 */
7729 if (avail < 4)
7730 goto done;
7731
7732 /*
7733 * Get the 4 first bytes and decode the charset
7734 * if enc != XML_CHAR_ENCODING_NONE
7735 * plug some encoding conversion routines.
7736 */
7737 start[0] = RAW;
7738 start[1] = NXT(1);
7739 start[2] = NXT(2);
7740 start[3] = NXT(3);
7741 enc = xmlDetectCharEncoding(start, 4);
7742 if (enc != XML_CHAR_ENCODING_NONE) {
7743 xmlSwitchEncoding(ctxt, enc);
7744 }
7745 break;
7746 }
Owen Taylor3473f882001-02-23 17:55:21 +00007747
7748 cur = ctxt->input->cur[0];
7749 next = ctxt->input->cur[1];
7750 if (cur == 0) {
7751 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7752 ctxt->sax->setDocumentLocator(ctxt->userData,
7753 &xmlDefaultSAXLocator);
7754 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7756 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7757 ctxt->wellFormed = 0;
7758 ctxt->disableSAX = 1;
7759 ctxt->instate = XML_PARSER_EOF;
7760#ifdef DEBUG_PUSH
7761 xmlGenericError(xmlGenericErrorContext,
7762 "PP: entering EOF\n");
7763#endif
7764 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7765 ctxt->sax->endDocument(ctxt->userData);
7766 goto done;
7767 }
7768 if ((cur == '<') && (next == '?')) {
7769 /* PI or XML decl */
7770 if (avail < 5) return(ret);
7771 if ((!terminate) &&
7772 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7773 return(ret);
7774 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7775 ctxt->sax->setDocumentLocator(ctxt->userData,
7776 &xmlDefaultSAXLocator);
7777 if ((ctxt->input->cur[2] == 'x') &&
7778 (ctxt->input->cur[3] == 'm') &&
7779 (ctxt->input->cur[4] == 'l') &&
7780 (IS_BLANK(ctxt->input->cur[5]))) {
7781 ret += 5;
7782#ifdef DEBUG_PUSH
7783 xmlGenericError(xmlGenericErrorContext,
7784 "PP: Parsing XML Decl\n");
7785#endif
7786 xmlParseXMLDecl(ctxt);
7787 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7788 /*
7789 * The XML REC instructs us to stop parsing right
7790 * here
7791 */
7792 ctxt->instate = XML_PARSER_EOF;
7793 return(0);
7794 }
7795 ctxt->standalone = ctxt->input->standalone;
7796 if ((ctxt->encoding == NULL) &&
7797 (ctxt->input->encoding != NULL))
7798 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7799 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7800 (!ctxt->disableSAX))
7801 ctxt->sax->startDocument(ctxt->userData);
7802 ctxt->instate = XML_PARSER_MISC;
7803#ifdef DEBUG_PUSH
7804 xmlGenericError(xmlGenericErrorContext,
7805 "PP: entering MISC\n");
7806#endif
7807 } else {
7808 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7809 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7810 (!ctxt->disableSAX))
7811 ctxt->sax->startDocument(ctxt->userData);
7812 ctxt->instate = XML_PARSER_MISC;
7813#ifdef DEBUG_PUSH
7814 xmlGenericError(xmlGenericErrorContext,
7815 "PP: entering MISC\n");
7816#endif
7817 }
7818 } else {
7819 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7820 ctxt->sax->setDocumentLocator(ctxt->userData,
7821 &xmlDefaultSAXLocator);
7822 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7823 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7824 (!ctxt->disableSAX))
7825 ctxt->sax->startDocument(ctxt->userData);
7826 ctxt->instate = XML_PARSER_MISC;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: entering MISC\n");
7830#endif
7831 }
7832 break;
7833 case XML_PARSER_MISC:
7834 SKIP_BLANKS;
7835 if (ctxt->input->buf == NULL)
7836 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7837 else
7838 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7839 if (avail < 2)
7840 goto done;
7841 cur = ctxt->input->cur[0];
7842 next = ctxt->input->cur[1];
7843 if ((cur == '<') && (next == '?')) {
7844 if ((!terminate) &&
7845 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7846 goto done;
7847#ifdef DEBUG_PUSH
7848 xmlGenericError(xmlGenericErrorContext,
7849 "PP: Parsing PI\n");
7850#endif
7851 xmlParsePI(ctxt);
7852 } else if ((cur == '<') && (next == '!') &&
7853 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7854 if ((!terminate) &&
7855 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7856 goto done;
7857#ifdef DEBUG_PUSH
7858 xmlGenericError(xmlGenericErrorContext,
7859 "PP: Parsing Comment\n");
7860#endif
7861 xmlParseComment(ctxt);
7862 ctxt->instate = XML_PARSER_MISC;
7863 } else if ((cur == '<') && (next == '!') &&
7864 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7865 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7866 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7867 (ctxt->input->cur[8] == 'E')) {
7868 if ((!terminate) &&
7869 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7870 goto done;
7871#ifdef DEBUG_PUSH
7872 xmlGenericError(xmlGenericErrorContext,
7873 "PP: Parsing internal subset\n");
7874#endif
7875 ctxt->inSubset = 1;
7876 xmlParseDocTypeDecl(ctxt);
7877 if (RAW == '[') {
7878 ctxt->instate = XML_PARSER_DTD;
7879#ifdef DEBUG_PUSH
7880 xmlGenericError(xmlGenericErrorContext,
7881 "PP: entering DTD\n");
7882#endif
7883 } else {
7884 /*
7885 * Create and update the external subset.
7886 */
7887 ctxt->inSubset = 2;
7888 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7889 (ctxt->sax->externalSubset != NULL))
7890 ctxt->sax->externalSubset(ctxt->userData,
7891 ctxt->intSubName, ctxt->extSubSystem,
7892 ctxt->extSubURI);
7893 ctxt->inSubset = 0;
7894 ctxt->instate = XML_PARSER_PROLOG;
7895#ifdef DEBUG_PUSH
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: entering PROLOG\n");
7898#endif
7899 }
7900 } else if ((cur == '<') && (next == '!') &&
7901 (avail < 9)) {
7902 goto done;
7903 } else {
7904 ctxt->instate = XML_PARSER_START_TAG;
7905#ifdef DEBUG_PUSH
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: entering START_TAG\n");
7908#endif
7909 }
7910 break;
7911 case XML_PARSER_IGNORE:
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: internal error, state == IGNORE");
7914 ctxt->instate = XML_PARSER_DTD;
7915#ifdef DEBUG_PUSH
7916 xmlGenericError(xmlGenericErrorContext,
7917 "PP: entering DTD\n");
7918#endif
7919 break;
7920 case XML_PARSER_PROLOG:
7921 SKIP_BLANKS;
7922 if (ctxt->input->buf == NULL)
7923 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7924 else
7925 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7926 if (avail < 2)
7927 goto done;
7928 cur = ctxt->input->cur[0];
7929 next = ctxt->input->cur[1];
7930 if ((cur == '<') && (next == '?')) {
7931 if ((!terminate) &&
7932 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7933 goto done;
7934#ifdef DEBUG_PUSH
7935 xmlGenericError(xmlGenericErrorContext,
7936 "PP: Parsing PI\n");
7937#endif
7938 xmlParsePI(ctxt);
7939 } else if ((cur == '<') && (next == '!') &&
7940 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7941 if ((!terminate) &&
7942 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7943 goto done;
7944#ifdef DEBUG_PUSH
7945 xmlGenericError(xmlGenericErrorContext,
7946 "PP: Parsing Comment\n");
7947#endif
7948 xmlParseComment(ctxt);
7949 ctxt->instate = XML_PARSER_PROLOG;
7950 } else if ((cur == '<') && (next == '!') &&
7951 (avail < 4)) {
7952 goto done;
7953 } else {
7954 ctxt->instate = XML_PARSER_START_TAG;
7955#ifdef DEBUG_PUSH
7956 xmlGenericError(xmlGenericErrorContext,
7957 "PP: entering START_TAG\n");
7958#endif
7959 }
7960 break;
7961 case XML_PARSER_EPILOG:
7962 SKIP_BLANKS;
7963 if (ctxt->input->buf == NULL)
7964 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7965 else
7966 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7967 if (avail < 2)
7968 goto done;
7969 cur = ctxt->input->cur[0];
7970 next = ctxt->input->cur[1];
7971 if ((cur == '<') && (next == '?')) {
7972 if ((!terminate) &&
7973 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7974 goto done;
7975#ifdef DEBUG_PUSH
7976 xmlGenericError(xmlGenericErrorContext,
7977 "PP: Parsing PI\n");
7978#endif
7979 xmlParsePI(ctxt);
7980 ctxt->instate = XML_PARSER_EPILOG;
7981 } else if ((cur == '<') && (next == '!') &&
7982 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7983 if ((!terminate) &&
7984 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7985 goto done;
7986#ifdef DEBUG_PUSH
7987 xmlGenericError(xmlGenericErrorContext,
7988 "PP: Parsing Comment\n");
7989#endif
7990 xmlParseComment(ctxt);
7991 ctxt->instate = XML_PARSER_EPILOG;
7992 } else if ((cur == '<') && (next == '!') &&
7993 (avail < 4)) {
7994 goto done;
7995 } else {
7996 ctxt->errNo = XML_ERR_DOCUMENT_END;
7997 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7998 ctxt->sax->error(ctxt->userData,
7999 "Extra content at the end of the document\n");
8000 ctxt->wellFormed = 0;
8001 ctxt->disableSAX = 1;
8002 ctxt->instate = XML_PARSER_EOF;
8003#ifdef DEBUG_PUSH
8004 xmlGenericError(xmlGenericErrorContext,
8005 "PP: entering EOF\n");
8006#endif
8007 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8008 (!ctxt->disableSAX))
8009 ctxt->sax->endDocument(ctxt->userData);
8010 goto done;
8011 }
8012 break;
8013 case XML_PARSER_START_TAG: {
8014 xmlChar *name, *oldname;
8015
8016 if ((avail < 2) && (ctxt->inputNr == 1))
8017 goto done;
8018 cur = ctxt->input->cur[0];
8019 if (cur != '<') {
8020 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8022 ctxt->sax->error(ctxt->userData,
8023 "Start tag expect, '<' not found\n");
8024 ctxt->wellFormed = 0;
8025 ctxt->disableSAX = 1;
8026 ctxt->instate = XML_PARSER_EOF;
8027#ifdef DEBUG_PUSH
8028 xmlGenericError(xmlGenericErrorContext,
8029 "PP: entering EOF\n");
8030#endif
8031 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8032 (!ctxt->disableSAX))
8033 ctxt->sax->endDocument(ctxt->userData);
8034 goto done;
8035 }
8036 if ((!terminate) &&
8037 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8038 goto done;
8039 if (ctxt->spaceNr == 0)
8040 spacePush(ctxt, -1);
8041 else
8042 spacePush(ctxt, *ctxt->space);
8043 name = xmlParseStartTag(ctxt);
8044 if (name == NULL) {
8045 spacePop(ctxt);
8046 ctxt->instate = XML_PARSER_EOF;
8047#ifdef DEBUG_PUSH
8048 xmlGenericError(xmlGenericErrorContext,
8049 "PP: entering EOF\n");
8050#endif
8051 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8052 (!ctxt->disableSAX))
8053 ctxt->sax->endDocument(ctxt->userData);
8054 goto done;
8055 }
8056 namePush(ctxt, xmlStrdup(name));
8057
8058 /*
8059 * [ VC: Root Element Type ]
8060 * The Name in the document type declaration must match
8061 * the element type of the root element.
8062 */
8063 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8064 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8065 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8066
8067 /*
8068 * Check for an Empty Element.
8069 */
8070 if ((RAW == '/') && (NXT(1) == '>')) {
8071 SKIP(2);
8072 if ((ctxt->sax != NULL) &&
8073 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8074 ctxt->sax->endElement(ctxt->userData, name);
8075 xmlFree(name);
8076 oldname = namePop(ctxt);
8077 spacePop(ctxt);
8078 if (oldname != NULL) {
8079#ifdef DEBUG_STACK
8080 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8081#endif
8082 xmlFree(oldname);
8083 }
8084 if (ctxt->name == NULL) {
8085 ctxt->instate = XML_PARSER_EPILOG;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: entering EPILOG\n");
8089#endif
8090 } else {
8091 ctxt->instate = XML_PARSER_CONTENT;
8092#ifdef DEBUG_PUSH
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: entering CONTENT\n");
8095#endif
8096 }
8097 break;
8098 }
8099 if (RAW == '>') {
8100 NEXT;
8101 } else {
8102 ctxt->errNo = XML_ERR_GT_REQUIRED;
8103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8104 ctxt->sax->error(ctxt->userData,
8105 "Couldn't find end of Start Tag %s\n",
8106 name);
8107 ctxt->wellFormed = 0;
8108 ctxt->disableSAX = 1;
8109
8110 /*
8111 * end of parsing of this node.
8112 */
8113 nodePop(ctxt);
8114 oldname = namePop(ctxt);
8115 spacePop(ctxt);
8116 if (oldname != NULL) {
8117#ifdef DEBUG_STACK
8118 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8119#endif
8120 xmlFree(oldname);
8121 }
8122 }
8123 xmlFree(name);
8124 ctxt->instate = XML_PARSER_CONTENT;
8125#ifdef DEBUG_PUSH
8126 xmlGenericError(xmlGenericErrorContext,
8127 "PP: entering CONTENT\n");
8128#endif
8129 break;
8130 }
8131 case XML_PARSER_CONTENT: {
8132 const xmlChar *test;
8133 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008134 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008135
8136 /*
8137 * Handle preparsed entities and charRef
8138 */
8139 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008140 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008141
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008142 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008143 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8144 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008145 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008146 ctxt->token = 0;
8147 }
8148 if ((avail < 2) && (ctxt->inputNr == 1))
8149 goto done;
8150 cur = ctxt->input->cur[0];
8151 next = ctxt->input->cur[1];
8152
8153 test = CUR_PTR;
8154 cons = ctxt->input->consumed;
8155 tok = ctxt->token;
8156 if ((cur == '<') && (next == '?')) {
8157 if ((!terminate) &&
8158 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8159 goto done;
8160#ifdef DEBUG_PUSH
8161 xmlGenericError(xmlGenericErrorContext,
8162 "PP: Parsing PI\n");
8163#endif
8164 xmlParsePI(ctxt);
8165 } else if ((cur == '<') && (next == '!') &&
8166 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8167 if ((!terminate) &&
8168 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8169 goto done;
8170#ifdef DEBUG_PUSH
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: Parsing Comment\n");
8173#endif
8174 xmlParseComment(ctxt);
8175 ctxt->instate = XML_PARSER_CONTENT;
8176 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8177 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8178 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8179 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8180 (ctxt->input->cur[8] == '[')) {
8181 SKIP(9);
8182 ctxt->instate = XML_PARSER_CDATA_SECTION;
8183#ifdef DEBUG_PUSH
8184 xmlGenericError(xmlGenericErrorContext,
8185 "PP: entering CDATA_SECTION\n");
8186#endif
8187 break;
8188 } else if ((cur == '<') && (next == '!') &&
8189 (avail < 9)) {
8190 goto done;
8191 } else if ((cur == '<') && (next == '/')) {
8192 ctxt->instate = XML_PARSER_END_TAG;
8193#ifdef DEBUG_PUSH
8194 xmlGenericError(xmlGenericErrorContext,
8195 "PP: entering END_TAG\n");
8196#endif
8197 break;
8198 } else if (cur == '<') {
8199 ctxt->instate = XML_PARSER_START_TAG;
8200#ifdef DEBUG_PUSH
8201 xmlGenericError(xmlGenericErrorContext,
8202 "PP: entering START_TAG\n");
8203#endif
8204 break;
8205 } else if (cur == '&') {
8206 if ((!terminate) &&
8207 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8208 goto done;
8209#ifdef DEBUG_PUSH
8210 xmlGenericError(xmlGenericErrorContext,
8211 "PP: Parsing Reference\n");
8212#endif
8213 xmlParseReference(ctxt);
8214 } else {
8215 /* TODO Avoid the extra copy, handle directly !!! */
8216 /*
8217 * Goal of the following test is:
8218 * - minimize calls to the SAX 'character' callback
8219 * when they are mergeable
8220 * - handle an problem for isBlank when we only parse
8221 * a sequence of blank chars and the next one is
8222 * not available to check against '<' presence.
8223 * - tries to homogenize the differences in SAX
8224 * callbacks beween the push and pull versions
8225 * of the parser.
8226 */
8227 if ((ctxt->inputNr == 1) &&
8228 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8229 if ((!terminate) &&
8230 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8231 goto done;
8232 }
8233 ctxt->checkIndex = 0;
8234#ifdef DEBUG_PUSH
8235 xmlGenericError(xmlGenericErrorContext,
8236 "PP: Parsing char data\n");
8237#endif
8238 xmlParseCharData(ctxt, 0);
8239 }
8240 /*
8241 * Pop-up of finished entities.
8242 */
8243 while ((RAW == 0) && (ctxt->inputNr > 1))
8244 xmlPopInput(ctxt);
8245 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8246 (tok == ctxt->token)) {
8247 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8249 ctxt->sax->error(ctxt->userData,
8250 "detected an error in element content\n");
8251 ctxt->wellFormed = 0;
8252 ctxt->disableSAX = 1;
8253 ctxt->instate = XML_PARSER_EOF;
8254 break;
8255 }
8256 break;
8257 }
8258 case XML_PARSER_CDATA_SECTION: {
8259 /*
8260 * The Push mode need to have the SAX callback for
8261 * cdataBlock merge back contiguous callbacks.
8262 */
8263 int base;
8264
8265 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8266 if (base < 0) {
8267 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8268 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8269 if (ctxt->sax->cdataBlock != NULL)
8270 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8271 XML_PARSER_BIG_BUFFER_SIZE);
8272 }
8273 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8274 ctxt->checkIndex = 0;
8275 }
8276 goto done;
8277 } else {
8278 if ((ctxt->sax != NULL) && (base > 0) &&
8279 (!ctxt->disableSAX)) {
8280 if (ctxt->sax->cdataBlock != NULL)
8281 ctxt->sax->cdataBlock(ctxt->userData,
8282 ctxt->input->cur, base);
8283 }
8284 SKIP(base + 3);
8285 ctxt->checkIndex = 0;
8286 ctxt->instate = XML_PARSER_CONTENT;
8287#ifdef DEBUG_PUSH
8288 xmlGenericError(xmlGenericErrorContext,
8289 "PP: entering CONTENT\n");
8290#endif
8291 }
8292 break;
8293 }
8294 case XML_PARSER_END_TAG:
8295 if (avail < 2)
8296 goto done;
8297 if ((!terminate) &&
8298 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8299 goto done;
8300 xmlParseEndTag(ctxt);
8301 if (ctxt->name == NULL) {
8302 ctxt->instate = XML_PARSER_EPILOG;
8303#ifdef DEBUG_PUSH
8304 xmlGenericError(xmlGenericErrorContext,
8305 "PP: entering EPILOG\n");
8306#endif
8307 } else {
8308 ctxt->instate = XML_PARSER_CONTENT;
8309#ifdef DEBUG_PUSH
8310 xmlGenericError(xmlGenericErrorContext,
8311 "PP: entering CONTENT\n");
8312#endif
8313 }
8314 break;
8315 case XML_PARSER_DTD: {
8316 /*
8317 * Sorry but progressive parsing of the internal subset
8318 * is not expected to be supported. We first check that
8319 * the full content of the internal subset is available and
8320 * the parsing is launched only at that point.
8321 * Internal subset ends up with "']' S? '>'" in an unescaped
8322 * section and not in a ']]>' sequence which are conditional
8323 * sections (whoever argued to keep that crap in XML deserve
8324 * a place in hell !).
8325 */
8326 int base, i;
8327 xmlChar *buf;
8328 xmlChar quote = 0;
8329
8330 base = ctxt->input->cur - ctxt->input->base;
8331 if (base < 0) return(0);
8332 if (ctxt->checkIndex > base)
8333 base = ctxt->checkIndex;
8334 buf = ctxt->input->buf->buffer->content;
8335 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8336 base++) {
8337 if (quote != 0) {
8338 if (buf[base] == quote)
8339 quote = 0;
8340 continue;
8341 }
8342 if (buf[base] == '"') {
8343 quote = '"';
8344 continue;
8345 }
8346 if (buf[base] == '\'') {
8347 quote = '\'';
8348 continue;
8349 }
8350 if (buf[base] == ']') {
8351 if ((unsigned int) base +1 >=
8352 ctxt->input->buf->buffer->use)
8353 break;
8354 if (buf[base + 1] == ']') {
8355 /* conditional crap, skip both ']' ! */
8356 base++;
8357 continue;
8358 }
8359 for (i = 0;
8360 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8361 i++) {
8362 if (buf[base + i] == '>')
8363 goto found_end_int_subset;
8364 }
8365 break;
8366 }
8367 }
8368 /*
8369 * We didn't found the end of the Internal subset
8370 */
8371 if (quote == 0)
8372 ctxt->checkIndex = base;
8373#ifdef DEBUG_PUSH
8374 if (next == 0)
8375 xmlGenericError(xmlGenericErrorContext,
8376 "PP: lookup of int subset end filed\n");
8377#endif
8378 goto done;
8379
8380found_end_int_subset:
8381 xmlParseInternalSubset(ctxt);
8382 ctxt->inSubset = 2;
8383 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8384 (ctxt->sax->externalSubset != NULL))
8385 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8386 ctxt->extSubSystem, ctxt->extSubURI);
8387 ctxt->inSubset = 0;
8388 ctxt->instate = XML_PARSER_PROLOG;
8389 ctxt->checkIndex = 0;
8390#ifdef DEBUG_PUSH
8391 xmlGenericError(xmlGenericErrorContext,
8392 "PP: entering PROLOG\n");
8393#endif
8394 break;
8395 }
8396 case XML_PARSER_COMMENT:
8397 xmlGenericError(xmlGenericErrorContext,
8398 "PP: internal error, state == COMMENT\n");
8399 ctxt->instate = XML_PARSER_CONTENT;
8400#ifdef DEBUG_PUSH
8401 xmlGenericError(xmlGenericErrorContext,
8402 "PP: entering CONTENT\n");
8403#endif
8404 break;
8405 case XML_PARSER_PI:
8406 xmlGenericError(xmlGenericErrorContext,
8407 "PP: internal error, state == PI\n");
8408 ctxt->instate = XML_PARSER_CONTENT;
8409#ifdef DEBUG_PUSH
8410 xmlGenericError(xmlGenericErrorContext,
8411 "PP: entering CONTENT\n");
8412#endif
8413 break;
8414 case XML_PARSER_ENTITY_DECL:
8415 xmlGenericError(xmlGenericErrorContext,
8416 "PP: internal error, state == ENTITY_DECL\n");
8417 ctxt->instate = XML_PARSER_DTD;
8418#ifdef DEBUG_PUSH
8419 xmlGenericError(xmlGenericErrorContext,
8420 "PP: entering DTD\n");
8421#endif
8422 break;
8423 case XML_PARSER_ENTITY_VALUE:
8424 xmlGenericError(xmlGenericErrorContext,
8425 "PP: internal error, state == ENTITY_VALUE\n");
8426 ctxt->instate = XML_PARSER_CONTENT;
8427#ifdef DEBUG_PUSH
8428 xmlGenericError(xmlGenericErrorContext,
8429 "PP: entering DTD\n");
8430#endif
8431 break;
8432 case XML_PARSER_ATTRIBUTE_VALUE:
8433 xmlGenericError(xmlGenericErrorContext,
8434 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8435 ctxt->instate = XML_PARSER_START_TAG;
8436#ifdef DEBUG_PUSH
8437 xmlGenericError(xmlGenericErrorContext,
8438 "PP: entering START_TAG\n");
8439#endif
8440 break;
8441 case XML_PARSER_SYSTEM_LITERAL:
8442 xmlGenericError(xmlGenericErrorContext,
8443 "PP: internal error, state == SYSTEM_LITERAL\n");
8444 ctxt->instate = XML_PARSER_START_TAG;
8445#ifdef DEBUG_PUSH
8446 xmlGenericError(xmlGenericErrorContext,
8447 "PP: entering START_TAG\n");
8448#endif
8449 break;
8450 }
8451 }
8452done:
8453#ifdef DEBUG_PUSH
8454 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8455#endif
8456 return(ret);
8457}
8458
8459/**
Owen Taylor3473f882001-02-23 17:55:21 +00008460 * xmlParseChunk:
8461 * @ctxt: an XML parser context
8462 * @chunk: an char array
8463 * @size: the size in byte of the chunk
8464 * @terminate: last chunk indicator
8465 *
8466 * Parse a Chunk of memory
8467 *
8468 * Returns zero if no error, the xmlParserErrors otherwise.
8469 */
8470int
8471xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8472 int terminate) {
8473 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8474 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8475 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8476 int cur = ctxt->input->cur - ctxt->input->base;
8477
8478 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8479 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8480 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008481 ctxt->input->end =
8482 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008483#ifdef DEBUG_PUSH
8484 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8485#endif
8486
8487 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8488 xmlParseTryOrFinish(ctxt, terminate);
8489 } else if (ctxt->instate != XML_PARSER_EOF) {
8490 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8491 xmlParserInputBufferPtr in = ctxt->input->buf;
8492 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8493 (in->raw != NULL)) {
8494 int nbchars;
8495
8496 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8497 if (nbchars < 0) {
8498 xmlGenericError(xmlGenericErrorContext,
8499 "xmlParseChunk: encoder error\n");
8500 return(XML_ERR_INVALID_ENCODING);
8501 }
8502 }
8503 }
8504 }
8505 xmlParseTryOrFinish(ctxt, terminate);
8506 if (terminate) {
8507 /*
8508 * Check for termination
8509 */
8510 if ((ctxt->instate != XML_PARSER_EOF) &&
8511 (ctxt->instate != XML_PARSER_EPILOG)) {
8512 ctxt->errNo = XML_ERR_DOCUMENT_END;
8513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8514 ctxt->sax->error(ctxt->userData,
8515 "Extra content at the end of the document\n");
8516 ctxt->wellFormed = 0;
8517 ctxt->disableSAX = 1;
8518 }
8519 if (ctxt->instate != XML_PARSER_EOF) {
8520 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8521 (!ctxt->disableSAX))
8522 ctxt->sax->endDocument(ctxt->userData);
8523 }
8524 ctxt->instate = XML_PARSER_EOF;
8525 }
8526 return((xmlParserErrors) ctxt->errNo);
8527}
8528
8529/************************************************************************
8530 * *
8531 * I/O front end functions to the parser *
8532 * *
8533 ************************************************************************/
8534
8535/**
8536 * xmlStopParser:
8537 * @ctxt: an XML parser context
8538 *
8539 * Blocks further parser processing
8540 */
8541void
8542xmlStopParser(xmlParserCtxtPtr ctxt) {
8543 ctxt->instate = XML_PARSER_EOF;
8544 if (ctxt->input != NULL)
8545 ctxt->input->cur = BAD_CAST"";
8546}
8547
8548/**
8549 * xmlCreatePushParserCtxt:
8550 * @sax: a SAX handler
8551 * @user_data: The user data returned on SAX callbacks
8552 * @chunk: a pointer to an array of chars
8553 * @size: number of chars in the array
8554 * @filename: an optional file name or URI
8555 *
8556 * Create a parser context for using the XML parser in push mode
8557 * To allow content encoding detection, @size should be >= 4
8558 * The value of @filename is used for fetching external entities
8559 * and error/warning reports.
8560 *
8561 * Returns the new parser context or NULL
8562 */
8563xmlParserCtxtPtr
8564xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8565 const char *chunk, int size, const char *filename) {
8566 xmlParserCtxtPtr ctxt;
8567 xmlParserInputPtr inputStream;
8568 xmlParserInputBufferPtr buf;
8569 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8570
8571 /*
8572 * plug some encoding conversion routines
8573 */
8574 if ((chunk != NULL) && (size >= 4))
8575 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8576
8577 buf = xmlAllocParserInputBuffer(enc);
8578 if (buf == NULL) return(NULL);
8579
8580 ctxt = xmlNewParserCtxt();
8581 if (ctxt == NULL) {
8582 xmlFree(buf);
8583 return(NULL);
8584 }
8585 if (sax != NULL) {
8586 if (ctxt->sax != &xmlDefaultSAXHandler)
8587 xmlFree(ctxt->sax);
8588 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8589 if (ctxt->sax == NULL) {
8590 xmlFree(buf);
8591 xmlFree(ctxt);
8592 return(NULL);
8593 }
8594 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8595 if (user_data != NULL)
8596 ctxt->userData = user_data;
8597 }
8598 if (filename == NULL) {
8599 ctxt->directory = NULL;
8600 } else {
8601 ctxt->directory = xmlParserGetDirectory(filename);
8602 }
8603
8604 inputStream = xmlNewInputStream(ctxt);
8605 if (inputStream == NULL) {
8606 xmlFreeParserCtxt(ctxt);
8607 return(NULL);
8608 }
8609
8610 if (filename == NULL)
8611 inputStream->filename = NULL;
8612 else
8613 inputStream->filename = xmlMemStrdup(filename);
8614 inputStream->buf = buf;
8615 inputStream->base = inputStream->buf->buffer->content;
8616 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008617 inputStream->end =
8618 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008619
8620 inputPush(ctxt, inputStream);
8621
8622 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8623 (ctxt->input->buf != NULL)) {
8624 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8625#ifdef DEBUG_PUSH
8626 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8627#endif
8628 }
8629
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008630 if (enc != XML_CHAR_ENCODING_NONE) {
8631 xmlSwitchEncoding(ctxt, enc);
8632 }
8633
Owen Taylor3473f882001-02-23 17:55:21 +00008634 return(ctxt);
8635}
8636
8637/**
8638 * xmlCreateIOParserCtxt:
8639 * @sax: a SAX handler
8640 * @user_data: The user data returned on SAX callbacks
8641 * @ioread: an I/O read function
8642 * @ioclose: an I/O close function
8643 * @ioctx: an I/O handler
8644 * @enc: the charset encoding if known
8645 *
8646 * Create a parser context for using the XML parser with an existing
8647 * I/O stream
8648 *
8649 * Returns the new parser context or NULL
8650 */
8651xmlParserCtxtPtr
8652xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8653 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8654 void *ioctx, xmlCharEncoding enc) {
8655 xmlParserCtxtPtr ctxt;
8656 xmlParserInputPtr inputStream;
8657 xmlParserInputBufferPtr buf;
8658
8659 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8660 if (buf == NULL) return(NULL);
8661
8662 ctxt = xmlNewParserCtxt();
8663 if (ctxt == NULL) {
8664 xmlFree(buf);
8665 return(NULL);
8666 }
8667 if (sax != NULL) {
8668 if (ctxt->sax != &xmlDefaultSAXHandler)
8669 xmlFree(ctxt->sax);
8670 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8671 if (ctxt->sax == NULL) {
8672 xmlFree(buf);
8673 xmlFree(ctxt);
8674 return(NULL);
8675 }
8676 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8677 if (user_data != NULL)
8678 ctxt->userData = user_data;
8679 }
8680
8681 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8682 if (inputStream == NULL) {
8683 xmlFreeParserCtxt(ctxt);
8684 return(NULL);
8685 }
8686 inputPush(ctxt, inputStream);
8687
8688 return(ctxt);
8689}
8690
8691/************************************************************************
8692 * *
8693 * Front ends when parsing a Dtd *
8694 * *
8695 ************************************************************************/
8696
8697/**
8698 * xmlIOParseDTD:
8699 * @sax: the SAX handler block or NULL
8700 * @input: an Input Buffer
8701 * @enc: the charset encoding if known
8702 *
8703 * Load and parse a DTD
8704 *
8705 * Returns the resulting xmlDtdPtr or NULL in case of error.
8706 * @input will be freed at parsing end.
8707 */
8708
8709xmlDtdPtr
8710xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8711 xmlCharEncoding enc) {
8712 xmlDtdPtr ret = NULL;
8713 xmlParserCtxtPtr ctxt;
8714 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008715 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008716
8717 if (input == NULL)
8718 return(NULL);
8719
8720 ctxt = xmlNewParserCtxt();
8721 if (ctxt == NULL) {
8722 return(NULL);
8723 }
8724
8725 /*
8726 * Set-up the SAX context
8727 */
8728 if (sax != NULL) {
8729 if (ctxt->sax != NULL)
8730 xmlFree(ctxt->sax);
8731 ctxt->sax = sax;
8732 ctxt->userData = NULL;
8733 }
8734
8735 /*
8736 * generate a parser input from the I/O handler
8737 */
8738
8739 pinput = xmlNewIOInputStream(ctxt, input, enc);
8740 if (pinput == NULL) {
8741 if (sax != NULL) ctxt->sax = NULL;
8742 xmlFreeParserCtxt(ctxt);
8743 return(NULL);
8744 }
8745
8746 /*
8747 * plug some encoding conversion routines here.
8748 */
8749 xmlPushInput(ctxt, pinput);
8750
8751 pinput->filename = NULL;
8752 pinput->line = 1;
8753 pinput->col = 1;
8754 pinput->base = ctxt->input->cur;
8755 pinput->cur = ctxt->input->cur;
8756 pinput->free = NULL;
8757
8758 /*
8759 * let's parse that entity knowing it's an external subset.
8760 */
8761 ctxt->inSubset = 2;
8762 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8763 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8764 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008765
8766 if (enc == XML_CHAR_ENCODING_NONE) {
8767 /*
8768 * Get the 4 first bytes and decode the charset
8769 * if enc != XML_CHAR_ENCODING_NONE
8770 * plug some encoding conversion routines.
8771 */
8772 start[0] = RAW;
8773 start[1] = NXT(1);
8774 start[2] = NXT(2);
8775 start[3] = NXT(3);
8776 enc = xmlDetectCharEncoding(start, 4);
8777 if (enc != XML_CHAR_ENCODING_NONE) {
8778 xmlSwitchEncoding(ctxt, enc);
8779 }
8780 }
8781
Owen Taylor3473f882001-02-23 17:55:21 +00008782 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8783
8784 if (ctxt->myDoc != NULL) {
8785 if (ctxt->wellFormed) {
8786 ret = ctxt->myDoc->extSubset;
8787 ctxt->myDoc->extSubset = NULL;
8788 } else {
8789 ret = NULL;
8790 }
8791 xmlFreeDoc(ctxt->myDoc);
8792 ctxt->myDoc = NULL;
8793 }
8794 if (sax != NULL) ctxt->sax = NULL;
8795 xmlFreeParserCtxt(ctxt);
8796
8797 return(ret);
8798}
8799
8800/**
8801 * xmlSAXParseDTD:
8802 * @sax: the SAX handler block
8803 * @ExternalID: a NAME* containing the External ID of the DTD
8804 * @SystemID: a NAME* containing the URL to the DTD
8805 *
8806 * Load and parse an external subset.
8807 *
8808 * Returns the resulting xmlDtdPtr or NULL in case of error.
8809 */
8810
8811xmlDtdPtr
8812xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8813 const xmlChar *SystemID) {
8814 xmlDtdPtr ret = NULL;
8815 xmlParserCtxtPtr ctxt;
8816 xmlParserInputPtr input = NULL;
8817 xmlCharEncoding enc;
8818
8819 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8820
8821 ctxt = xmlNewParserCtxt();
8822 if (ctxt == NULL) {
8823 return(NULL);
8824 }
8825
8826 /*
8827 * Set-up the SAX context
8828 */
8829 if (sax != NULL) {
8830 if (ctxt->sax != NULL)
8831 xmlFree(ctxt->sax);
8832 ctxt->sax = sax;
8833 ctxt->userData = NULL;
8834 }
8835
8836 /*
8837 * Ask the Entity resolver to load the damn thing
8838 */
8839
8840 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8841 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8842 if (input == NULL) {
8843 if (sax != NULL) ctxt->sax = NULL;
8844 xmlFreeParserCtxt(ctxt);
8845 return(NULL);
8846 }
8847
8848 /*
8849 * plug some encoding conversion routines here.
8850 */
8851 xmlPushInput(ctxt, input);
8852 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8853 xmlSwitchEncoding(ctxt, enc);
8854
8855 if (input->filename == NULL)
8856 input->filename = (char *) xmlStrdup(SystemID);
8857 input->line = 1;
8858 input->col = 1;
8859 input->base = ctxt->input->cur;
8860 input->cur = ctxt->input->cur;
8861 input->free = NULL;
8862
8863 /*
8864 * let's parse that entity knowing it's an external subset.
8865 */
8866 ctxt->inSubset = 2;
8867 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8868 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8869 ExternalID, SystemID);
8870 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8871
8872 if (ctxt->myDoc != NULL) {
8873 if (ctxt->wellFormed) {
8874 ret = ctxt->myDoc->extSubset;
8875 ctxt->myDoc->extSubset = NULL;
8876 } else {
8877 ret = NULL;
8878 }
8879 xmlFreeDoc(ctxt->myDoc);
8880 ctxt->myDoc = NULL;
8881 }
8882 if (sax != NULL) ctxt->sax = NULL;
8883 xmlFreeParserCtxt(ctxt);
8884
8885 return(ret);
8886}
8887
8888/**
8889 * xmlParseDTD:
8890 * @ExternalID: a NAME* containing the External ID of the DTD
8891 * @SystemID: a NAME* containing the URL to the DTD
8892 *
8893 * Load and parse an external subset.
8894 *
8895 * Returns the resulting xmlDtdPtr or NULL in case of error.
8896 */
8897
8898xmlDtdPtr
8899xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8900 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8901}
8902
8903/************************************************************************
8904 * *
8905 * Front ends when parsing an Entity *
8906 * *
8907 ************************************************************************/
8908
8909/**
Owen Taylor3473f882001-02-23 17:55:21 +00008910 * xmlParseCtxtExternalEntity:
8911 * @ctx: the existing parsing context
8912 * @URL: the URL for the entity to load
8913 * @ID: the System ID for the entity to load
8914 * @list: the return value for the set of parsed nodes
8915 *
8916 * Parse an external general entity within an existing parsing context
8917 * An external general parsed entity is well-formed if it matches the
8918 * production labeled extParsedEnt.
8919 *
8920 * [78] extParsedEnt ::= TextDecl? content
8921 *
8922 * Returns 0 if the entity is well formed, -1 in case of args problem and
8923 * the parser error code otherwise
8924 */
8925
8926int
8927xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8928 const xmlChar *ID, xmlNodePtr *list) {
8929 xmlParserCtxtPtr ctxt;
8930 xmlDocPtr newDoc;
8931 xmlSAXHandlerPtr oldsax = NULL;
8932 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008933 xmlChar start[4];
8934 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008935
8936 if (ctx->depth > 40) {
8937 return(XML_ERR_ENTITY_LOOP);
8938 }
8939
8940 if (list != NULL)
8941 *list = NULL;
8942 if ((URL == NULL) && (ID == NULL))
8943 return(-1);
8944 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8945 return(-1);
8946
8947
8948 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8949 if (ctxt == NULL) return(-1);
8950 ctxt->userData = ctxt;
8951 oldsax = ctxt->sax;
8952 ctxt->sax = ctx->sax;
8953 newDoc = xmlNewDoc(BAD_CAST "1.0");
8954 if (newDoc == NULL) {
8955 xmlFreeParserCtxt(ctxt);
8956 return(-1);
8957 }
8958 if (ctx->myDoc != NULL) {
8959 newDoc->intSubset = ctx->myDoc->intSubset;
8960 newDoc->extSubset = ctx->myDoc->extSubset;
8961 }
8962 if (ctx->myDoc->URL != NULL) {
8963 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8964 }
8965 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8966 if (newDoc->children == NULL) {
8967 ctxt->sax = oldsax;
8968 xmlFreeParserCtxt(ctxt);
8969 newDoc->intSubset = NULL;
8970 newDoc->extSubset = NULL;
8971 xmlFreeDoc(newDoc);
8972 return(-1);
8973 }
8974 nodePush(ctxt, newDoc->children);
8975 if (ctx->myDoc == NULL) {
8976 ctxt->myDoc = newDoc;
8977 } else {
8978 ctxt->myDoc = ctx->myDoc;
8979 newDoc->children->doc = ctx->myDoc;
8980 }
8981
Daniel Veillard87a764e2001-06-20 17:41:10 +00008982 /*
8983 * Get the 4 first bytes and decode the charset
8984 * if enc != XML_CHAR_ENCODING_NONE
8985 * plug some encoding conversion routines.
8986 */
8987 GROW
8988 start[0] = RAW;
8989 start[1] = NXT(1);
8990 start[2] = NXT(2);
8991 start[3] = NXT(3);
8992 enc = xmlDetectCharEncoding(start, 4);
8993 if (enc != XML_CHAR_ENCODING_NONE) {
8994 xmlSwitchEncoding(ctxt, enc);
8995 }
8996
Owen Taylor3473f882001-02-23 17:55:21 +00008997 /*
8998 * Parse a possible text declaration first
8999 */
Owen Taylor3473f882001-02-23 17:55:21 +00009000 if ((RAW == '<') && (NXT(1) == '?') &&
9001 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9002 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9003 xmlParseTextDecl(ctxt);
9004 }
9005
9006 /*
9007 * Doing validity checking on chunk doesn't make sense
9008 */
9009 ctxt->instate = XML_PARSER_CONTENT;
9010 ctxt->validate = ctx->validate;
9011 ctxt->loadsubset = ctx->loadsubset;
9012 ctxt->depth = ctx->depth + 1;
9013 ctxt->replaceEntities = ctx->replaceEntities;
9014 if (ctxt->validate) {
9015 ctxt->vctxt.error = ctx->vctxt.error;
9016 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009017 } else {
9018 ctxt->vctxt.error = NULL;
9019 ctxt->vctxt.warning = NULL;
9020 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009021 ctxt->vctxt.nodeTab = NULL;
9022 ctxt->vctxt.nodeNr = 0;
9023 ctxt->vctxt.nodeMax = 0;
9024 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009025
9026 xmlParseContent(ctxt);
9027
9028 if ((RAW == '<') && (NXT(1) == '/')) {
9029 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9030 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9031 ctxt->sax->error(ctxt->userData,
9032 "chunk is not well balanced\n");
9033 ctxt->wellFormed = 0;
9034 ctxt->disableSAX = 1;
9035 } else if (RAW != 0) {
9036 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9038 ctxt->sax->error(ctxt->userData,
9039 "extra content at the end of well balanced chunk\n");
9040 ctxt->wellFormed = 0;
9041 ctxt->disableSAX = 1;
9042 }
9043 if (ctxt->node != newDoc->children) {
9044 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9046 ctxt->sax->error(ctxt->userData,
9047 "chunk is not well balanced\n");
9048 ctxt->wellFormed = 0;
9049 ctxt->disableSAX = 1;
9050 }
9051
9052 if (!ctxt->wellFormed) {
9053 if (ctxt->errNo == 0)
9054 ret = 1;
9055 else
9056 ret = ctxt->errNo;
9057 } else {
9058 if (list != NULL) {
9059 xmlNodePtr cur;
9060
9061 /*
9062 * Return the newly created nodeset after unlinking it from
9063 * they pseudo parent.
9064 */
9065 cur = newDoc->children->children;
9066 *list = cur;
9067 while (cur != NULL) {
9068 cur->parent = NULL;
9069 cur = cur->next;
9070 }
9071 newDoc->children->children = NULL;
9072 }
9073 ret = 0;
9074 }
9075 ctxt->sax = oldsax;
9076 xmlFreeParserCtxt(ctxt);
9077 newDoc->intSubset = NULL;
9078 newDoc->extSubset = NULL;
9079 xmlFreeDoc(newDoc);
9080
9081 return(ret);
9082}
9083
9084/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009085 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009086 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009087 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009088 * @sax: the SAX handler bloc (possibly NULL)
9089 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9090 * @depth: Used for loop detection, use 0
9091 * @URL: the URL for the entity to load
9092 * @ID: the System ID for the entity to load
9093 * @list: the return value for the set of parsed nodes
9094 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009095 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009096 *
9097 * Returns 0 if the entity is well formed, -1 in case of args problem and
9098 * the parser error code otherwise
9099 */
9100
Daniel Veillard257d9102001-05-08 10:41:44 +00009101static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009102xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9103 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009104 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009105 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009106 xmlParserCtxtPtr ctxt;
9107 xmlDocPtr newDoc;
9108 xmlSAXHandlerPtr oldsax = NULL;
9109 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009110 xmlChar start[4];
9111 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009112
9113 if (depth > 40) {
9114 return(XML_ERR_ENTITY_LOOP);
9115 }
9116
9117
9118
9119 if (list != NULL)
9120 *list = NULL;
9121 if ((URL == NULL) && (ID == NULL))
9122 return(-1);
9123 if (doc == NULL) /* @@ relax but check for dereferences */
9124 return(-1);
9125
9126
9127 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9128 if (ctxt == NULL) return(-1);
9129 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009130 if (oldctxt != NULL) {
9131 ctxt->_private = oldctxt->_private;
9132 ctxt->loadsubset = oldctxt->loadsubset;
9133 ctxt->validate = oldctxt->validate;
9134 ctxt->external = oldctxt->external;
9135 } else {
9136 /*
9137 * Doing validity checking on chunk without context
9138 * doesn't make sense
9139 */
9140 ctxt->_private = NULL;
9141 ctxt->validate = 0;
9142 ctxt->external = 2;
9143 ctxt->loadsubset = 0;
9144 }
Owen Taylor3473f882001-02-23 17:55:21 +00009145 if (sax != NULL) {
9146 oldsax = ctxt->sax;
9147 ctxt->sax = sax;
9148 if (user_data != NULL)
9149 ctxt->userData = user_data;
9150 }
9151 newDoc = xmlNewDoc(BAD_CAST "1.0");
9152 if (newDoc == NULL) {
9153 xmlFreeParserCtxt(ctxt);
9154 return(-1);
9155 }
9156 if (doc != NULL) {
9157 newDoc->intSubset = doc->intSubset;
9158 newDoc->extSubset = doc->extSubset;
9159 }
9160 if (doc->URL != NULL) {
9161 newDoc->URL = xmlStrdup(doc->URL);
9162 }
9163 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9164 if (newDoc->children == NULL) {
9165 if (sax != NULL)
9166 ctxt->sax = oldsax;
9167 xmlFreeParserCtxt(ctxt);
9168 newDoc->intSubset = NULL;
9169 newDoc->extSubset = NULL;
9170 xmlFreeDoc(newDoc);
9171 return(-1);
9172 }
9173 nodePush(ctxt, newDoc->children);
9174 if (doc == NULL) {
9175 ctxt->myDoc = newDoc;
9176 } else {
9177 ctxt->myDoc = doc;
9178 newDoc->children->doc = doc;
9179 }
9180
Daniel Veillard87a764e2001-06-20 17:41:10 +00009181 /*
9182 * Get the 4 first bytes and decode the charset
9183 * if enc != XML_CHAR_ENCODING_NONE
9184 * plug some encoding conversion routines.
9185 */
9186 GROW;
9187 start[0] = RAW;
9188 start[1] = NXT(1);
9189 start[2] = NXT(2);
9190 start[3] = NXT(3);
9191 enc = xmlDetectCharEncoding(start, 4);
9192 if (enc != XML_CHAR_ENCODING_NONE) {
9193 xmlSwitchEncoding(ctxt, enc);
9194 }
9195
Owen Taylor3473f882001-02-23 17:55:21 +00009196 /*
9197 * Parse a possible text declaration first
9198 */
Owen Taylor3473f882001-02-23 17:55:21 +00009199 if ((RAW == '<') && (NXT(1) == '?') &&
9200 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9201 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9202 xmlParseTextDecl(ctxt);
9203 }
9204
Owen Taylor3473f882001-02-23 17:55:21 +00009205 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009206 ctxt->depth = depth;
9207
9208 xmlParseContent(ctxt);
9209
9210 if ((RAW == '<') && (NXT(1) == '/')) {
9211 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9212 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9213 ctxt->sax->error(ctxt->userData,
9214 "chunk is not well balanced\n");
9215 ctxt->wellFormed = 0;
9216 ctxt->disableSAX = 1;
9217 } else if (RAW != 0) {
9218 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9219 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9220 ctxt->sax->error(ctxt->userData,
9221 "extra content at the end of well balanced chunk\n");
9222 ctxt->wellFormed = 0;
9223 ctxt->disableSAX = 1;
9224 }
9225 if (ctxt->node != newDoc->children) {
9226 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9227 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9228 ctxt->sax->error(ctxt->userData,
9229 "chunk is not well balanced\n");
9230 ctxt->wellFormed = 0;
9231 ctxt->disableSAX = 1;
9232 }
9233
9234 if (!ctxt->wellFormed) {
9235 if (ctxt->errNo == 0)
9236 ret = 1;
9237 else
9238 ret = ctxt->errNo;
9239 } else {
9240 if (list != NULL) {
9241 xmlNodePtr cur;
9242
9243 /*
9244 * Return the newly created nodeset after unlinking it from
9245 * they pseudo parent.
9246 */
9247 cur = newDoc->children->children;
9248 *list = cur;
9249 while (cur != NULL) {
9250 cur->parent = NULL;
9251 cur = cur->next;
9252 }
9253 newDoc->children->children = NULL;
9254 }
9255 ret = 0;
9256 }
9257 if (sax != NULL)
9258 ctxt->sax = oldsax;
9259 xmlFreeParserCtxt(ctxt);
9260 newDoc->intSubset = NULL;
9261 newDoc->extSubset = NULL;
9262 xmlFreeDoc(newDoc);
9263
9264 return(ret);
9265}
9266
9267/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009268 * xmlParseExternalEntity:
9269 * @doc: the document the chunk pertains to
9270 * @sax: the SAX handler bloc (possibly NULL)
9271 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9272 * @depth: Used for loop detection, use 0
9273 * @URL: the URL for the entity to load
9274 * @ID: the System ID for the entity to load
9275 * @list: the return value for the set of parsed nodes
9276 *
9277 * Parse an external general entity
9278 * An external general parsed entity is well-formed if it matches the
9279 * production labeled extParsedEnt.
9280 *
9281 * [78] extParsedEnt ::= TextDecl? content
9282 *
9283 * Returns 0 if the entity is well formed, -1 in case of args problem and
9284 * the parser error code otherwise
9285 */
9286
9287int
9288xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9289 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009290 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9291 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009292}
9293
9294/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009295 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009296 * @doc: the document the chunk pertains to
9297 * @sax: the SAX handler bloc (possibly NULL)
9298 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9299 * @depth: Used for loop detection, use 0
9300 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9301 * @list: the return value for the set of parsed nodes
9302 *
9303 * Parse a well-balanced chunk of an XML document
9304 * called by the parser
9305 * The allowed sequence for the Well Balanced Chunk is the one defined by
9306 * the content production in the XML grammar:
9307 *
9308 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9309 *
9310 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9311 * the parser error code otherwise
9312 */
9313
9314int
9315xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9316 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9317 xmlParserCtxtPtr ctxt;
9318 xmlDocPtr newDoc;
9319 xmlSAXHandlerPtr oldsax = NULL;
9320 int size;
9321 int ret = 0;
9322
9323 if (depth > 40) {
9324 return(XML_ERR_ENTITY_LOOP);
9325 }
9326
9327
9328 if (list != NULL)
9329 *list = NULL;
9330 if (string == NULL)
9331 return(-1);
9332
9333 size = xmlStrlen(string);
9334
9335 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9336 if (ctxt == NULL) return(-1);
9337 ctxt->userData = ctxt;
9338 if (sax != NULL) {
9339 oldsax = ctxt->sax;
9340 ctxt->sax = sax;
9341 if (user_data != NULL)
9342 ctxt->userData = user_data;
9343 }
9344 newDoc = xmlNewDoc(BAD_CAST "1.0");
9345 if (newDoc == NULL) {
9346 xmlFreeParserCtxt(ctxt);
9347 return(-1);
9348 }
9349 if (doc != NULL) {
9350 newDoc->intSubset = doc->intSubset;
9351 newDoc->extSubset = doc->extSubset;
9352 }
9353 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9354 if (newDoc->children == NULL) {
9355 if (sax != NULL)
9356 ctxt->sax = oldsax;
9357 xmlFreeParserCtxt(ctxt);
9358 newDoc->intSubset = NULL;
9359 newDoc->extSubset = NULL;
9360 xmlFreeDoc(newDoc);
9361 return(-1);
9362 }
9363 nodePush(ctxt, newDoc->children);
9364 if (doc == NULL) {
9365 ctxt->myDoc = newDoc;
9366 } else {
9367 ctxt->myDoc = doc;
9368 newDoc->children->doc = doc;
9369 }
9370 ctxt->instate = XML_PARSER_CONTENT;
9371 ctxt->depth = depth;
9372
9373 /*
9374 * Doing validity checking on chunk doesn't make sense
9375 */
9376 ctxt->validate = 0;
9377 ctxt->loadsubset = 0;
9378
9379 xmlParseContent(ctxt);
9380
9381 if ((RAW == '<') && (NXT(1) == '/')) {
9382 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9384 ctxt->sax->error(ctxt->userData,
9385 "chunk is not well balanced\n");
9386 ctxt->wellFormed = 0;
9387 ctxt->disableSAX = 1;
9388 } else if (RAW != 0) {
9389 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9391 ctxt->sax->error(ctxt->userData,
9392 "extra content at the end of well balanced chunk\n");
9393 ctxt->wellFormed = 0;
9394 ctxt->disableSAX = 1;
9395 }
9396 if (ctxt->node != newDoc->children) {
9397 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9398 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9399 ctxt->sax->error(ctxt->userData,
9400 "chunk is not well balanced\n");
9401 ctxt->wellFormed = 0;
9402 ctxt->disableSAX = 1;
9403 }
9404
9405 if (!ctxt->wellFormed) {
9406 if (ctxt->errNo == 0)
9407 ret = 1;
9408 else
9409 ret = ctxt->errNo;
9410 } else {
9411 if (list != NULL) {
9412 xmlNodePtr cur;
9413
9414 /*
9415 * Return the newly created nodeset after unlinking it from
9416 * they pseudo parent.
9417 */
9418 cur = newDoc->children->children;
9419 *list = cur;
9420 while (cur != NULL) {
9421 cur->parent = NULL;
9422 cur = cur->next;
9423 }
9424 newDoc->children->children = NULL;
9425 }
9426 ret = 0;
9427 }
9428 if (sax != NULL)
9429 ctxt->sax = oldsax;
9430 xmlFreeParserCtxt(ctxt);
9431 newDoc->intSubset = NULL;
9432 newDoc->extSubset = NULL;
9433 xmlFreeDoc(newDoc);
9434
9435 return(ret);
9436}
9437
9438/**
9439 * xmlSAXParseEntity:
9440 * @sax: the SAX handler block
9441 * @filename: the filename
9442 *
9443 * parse an XML external entity out of context and build a tree.
9444 * It use the given SAX function block to handle the parsing callback.
9445 * If sax is NULL, fallback to the default DOM tree building routines.
9446 *
9447 * [78] extParsedEnt ::= TextDecl? content
9448 *
9449 * This correspond to a "Well Balanced" chunk
9450 *
9451 * Returns the resulting document tree
9452 */
9453
9454xmlDocPtr
9455xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9456 xmlDocPtr ret;
9457 xmlParserCtxtPtr ctxt;
9458 char *directory = NULL;
9459
9460 ctxt = xmlCreateFileParserCtxt(filename);
9461 if (ctxt == NULL) {
9462 return(NULL);
9463 }
9464 if (sax != NULL) {
9465 if (ctxt->sax != NULL)
9466 xmlFree(ctxt->sax);
9467 ctxt->sax = sax;
9468 ctxt->userData = NULL;
9469 }
9470
9471 if ((ctxt->directory == NULL) && (directory == NULL))
9472 directory = xmlParserGetDirectory(filename);
9473
9474 xmlParseExtParsedEnt(ctxt);
9475
9476 if (ctxt->wellFormed)
9477 ret = ctxt->myDoc;
9478 else {
9479 ret = NULL;
9480 xmlFreeDoc(ctxt->myDoc);
9481 ctxt->myDoc = NULL;
9482 }
9483 if (sax != NULL)
9484 ctxt->sax = NULL;
9485 xmlFreeParserCtxt(ctxt);
9486
9487 return(ret);
9488}
9489
9490/**
9491 * xmlParseEntity:
9492 * @filename: the filename
9493 *
9494 * parse an XML external entity out of context and build a tree.
9495 *
9496 * [78] extParsedEnt ::= TextDecl? content
9497 *
9498 * This correspond to a "Well Balanced" chunk
9499 *
9500 * Returns the resulting document tree
9501 */
9502
9503xmlDocPtr
9504xmlParseEntity(const char *filename) {
9505 return(xmlSAXParseEntity(NULL, filename));
9506}
9507
9508/**
9509 * xmlCreateEntityParserCtxt:
9510 * @URL: the entity URL
9511 * @ID: the entity PUBLIC ID
9512 * @base: a posible base for the target URI
9513 *
9514 * Create a parser context for an external entity
9515 * Automatic support for ZLIB/Compress compressed document is provided
9516 * by default if found at compile-time.
9517 *
9518 * Returns the new parser context or NULL
9519 */
9520xmlParserCtxtPtr
9521xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9522 const xmlChar *base) {
9523 xmlParserCtxtPtr ctxt;
9524 xmlParserInputPtr inputStream;
9525 char *directory = NULL;
9526 xmlChar *uri;
9527
9528 ctxt = xmlNewParserCtxt();
9529 if (ctxt == NULL) {
9530 return(NULL);
9531 }
9532
9533 uri = xmlBuildURI(URL, base);
9534
9535 if (uri == NULL) {
9536 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9537 if (inputStream == NULL) {
9538 xmlFreeParserCtxt(ctxt);
9539 return(NULL);
9540 }
9541
9542 inputPush(ctxt, inputStream);
9543
9544 if ((ctxt->directory == NULL) && (directory == NULL))
9545 directory = xmlParserGetDirectory((char *)URL);
9546 if ((ctxt->directory == NULL) && (directory != NULL))
9547 ctxt->directory = directory;
9548 } else {
9549 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9550 if (inputStream == NULL) {
9551 xmlFree(uri);
9552 xmlFreeParserCtxt(ctxt);
9553 return(NULL);
9554 }
9555
9556 inputPush(ctxt, inputStream);
9557
9558 if ((ctxt->directory == NULL) && (directory == NULL))
9559 directory = xmlParserGetDirectory((char *)uri);
9560 if ((ctxt->directory == NULL) && (directory != NULL))
9561 ctxt->directory = directory;
9562 xmlFree(uri);
9563 }
9564
9565 return(ctxt);
9566}
9567
9568/************************************************************************
9569 * *
9570 * Front ends when parsing from a file *
9571 * *
9572 ************************************************************************/
9573
9574/**
9575 * xmlCreateFileParserCtxt:
9576 * @filename: the filename
9577 *
9578 * Create a parser context for a file content.
9579 * Automatic support for ZLIB/Compress compressed document is provided
9580 * by default if found at compile-time.
9581 *
9582 * Returns the new parser context or NULL
9583 */
9584xmlParserCtxtPtr
9585xmlCreateFileParserCtxt(const char *filename)
9586{
9587 xmlParserCtxtPtr ctxt;
9588 xmlParserInputPtr inputStream;
9589 xmlParserInputBufferPtr buf;
9590 char *directory = NULL;
9591
9592 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9593 if (buf == NULL) {
9594 return(NULL);
9595 }
9596
9597 ctxt = xmlNewParserCtxt();
9598 if (ctxt == NULL) {
9599 if (xmlDefaultSAXHandler.error != NULL) {
9600 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9601 }
9602 return(NULL);
9603 }
9604
9605 inputStream = xmlNewInputStream(ctxt);
9606 if (inputStream == NULL) {
9607 xmlFreeParserCtxt(ctxt);
9608 return(NULL);
9609 }
9610
9611 inputStream->filename = xmlMemStrdup(filename);
9612 inputStream->buf = buf;
9613 inputStream->base = inputStream->buf->buffer->content;
9614 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009615 inputStream->end =
9616 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009617
9618 inputPush(ctxt, inputStream);
9619 if ((ctxt->directory == NULL) && (directory == NULL))
9620 directory = xmlParserGetDirectory(filename);
9621 if ((ctxt->directory == NULL) && (directory != NULL))
9622 ctxt->directory = directory;
9623
9624 return(ctxt);
9625}
9626
9627/**
9628 * xmlSAXParseFile:
9629 * @sax: the SAX handler block
9630 * @filename: the filename
9631 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9632 * documents
9633 *
9634 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9635 * compressed document is provided by default if found at compile-time.
9636 * It use the given SAX function block to handle the parsing callback.
9637 * If sax is NULL, fallback to the default DOM tree building routines.
9638 *
9639 * Returns the resulting document tree
9640 */
9641
9642xmlDocPtr
9643xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9644 int recovery) {
9645 xmlDocPtr ret;
9646 xmlParserCtxtPtr ctxt;
9647 char *directory = NULL;
9648
9649 ctxt = xmlCreateFileParserCtxt(filename);
9650 if (ctxt == NULL) {
9651 return(NULL);
9652 }
9653 if (sax != NULL) {
9654 if (ctxt->sax != NULL)
9655 xmlFree(ctxt->sax);
9656 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009657 }
9658
9659 if ((ctxt->directory == NULL) && (directory == NULL))
9660 directory = xmlParserGetDirectory(filename);
9661 if ((ctxt->directory == NULL) && (directory != NULL))
9662 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9663
9664 xmlParseDocument(ctxt);
9665
9666 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9667 else {
9668 ret = NULL;
9669 xmlFreeDoc(ctxt->myDoc);
9670 ctxt->myDoc = NULL;
9671 }
9672 if (sax != NULL)
9673 ctxt->sax = NULL;
9674 xmlFreeParserCtxt(ctxt);
9675
9676 return(ret);
9677}
9678
9679/**
9680 * xmlRecoverDoc:
9681 * @cur: a pointer to an array of xmlChar
9682 *
9683 * parse an XML in-memory document and build a tree.
9684 * In the case the document is not Well Formed, a tree is built anyway
9685 *
9686 * Returns the resulting document tree
9687 */
9688
9689xmlDocPtr
9690xmlRecoverDoc(xmlChar *cur) {
9691 return(xmlSAXParseDoc(NULL, cur, 1));
9692}
9693
9694/**
9695 * xmlParseFile:
9696 * @filename: the filename
9697 *
9698 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9699 * compressed document is provided by default if found at compile-time.
9700 *
9701 * Returns the resulting document tree
9702 */
9703
9704xmlDocPtr
9705xmlParseFile(const char *filename) {
9706 return(xmlSAXParseFile(NULL, filename, 0));
9707}
9708
9709/**
9710 * xmlRecoverFile:
9711 * @filename: the filename
9712 *
9713 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9714 * compressed document is provided by default if found at compile-time.
9715 * In the case the document is not Well Formed, a tree is built anyway
9716 *
9717 * Returns the resulting document tree
9718 */
9719
9720xmlDocPtr
9721xmlRecoverFile(const char *filename) {
9722 return(xmlSAXParseFile(NULL, filename, 1));
9723}
9724
9725
9726/**
9727 * xmlSetupParserForBuffer:
9728 * @ctxt: an XML parser context
9729 * @buffer: a xmlChar * buffer
9730 * @filename: a file name
9731 *
9732 * Setup the parser context to parse a new buffer; Clears any prior
9733 * contents from the parser context. The buffer parameter must not be
9734 * NULL, but the filename parameter can be
9735 */
9736void
9737xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9738 const char* filename)
9739{
9740 xmlParserInputPtr input;
9741
9742 input = xmlNewInputStream(ctxt);
9743 if (input == NULL) {
9744 perror("malloc");
9745 xmlFree(ctxt);
9746 return;
9747 }
9748
9749 xmlClearParserCtxt(ctxt);
9750 if (filename != NULL)
9751 input->filename = xmlMemStrdup(filename);
9752 input->base = buffer;
9753 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009754 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009755 inputPush(ctxt, input);
9756}
9757
9758/**
9759 * xmlSAXUserParseFile:
9760 * @sax: a SAX handler
9761 * @user_data: The user data returned on SAX callbacks
9762 * @filename: a file name
9763 *
9764 * parse an XML file and call the given SAX handler routines.
9765 * Automatic support for ZLIB/Compress compressed document is provided
9766 *
9767 * Returns 0 in case of success or a error number otherwise
9768 */
9769int
9770xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9771 const char *filename) {
9772 int ret = 0;
9773 xmlParserCtxtPtr ctxt;
9774
9775 ctxt = xmlCreateFileParserCtxt(filename);
9776 if (ctxt == NULL) return -1;
9777 if (ctxt->sax != &xmlDefaultSAXHandler)
9778 xmlFree(ctxt->sax);
9779 ctxt->sax = sax;
9780 if (user_data != NULL)
9781 ctxt->userData = user_data;
9782
9783 xmlParseDocument(ctxt);
9784
9785 if (ctxt->wellFormed)
9786 ret = 0;
9787 else {
9788 if (ctxt->errNo != 0)
9789 ret = ctxt->errNo;
9790 else
9791 ret = -1;
9792 }
9793 if (sax != NULL)
9794 ctxt->sax = NULL;
9795 xmlFreeParserCtxt(ctxt);
9796
9797 return ret;
9798}
9799
9800/************************************************************************
9801 * *
9802 * Front ends when parsing from memory *
9803 * *
9804 ************************************************************************/
9805
9806/**
9807 * xmlCreateMemoryParserCtxt:
9808 * @buffer: a pointer to a char array
9809 * @size: the size of the array
9810 *
9811 * Create a parser context for an XML in-memory document.
9812 *
9813 * Returns the new parser context or NULL
9814 */
9815xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009816xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009817 xmlParserCtxtPtr ctxt;
9818 xmlParserInputPtr input;
9819 xmlParserInputBufferPtr buf;
9820
9821 if (buffer == NULL)
9822 return(NULL);
9823 if (size <= 0)
9824 return(NULL);
9825
9826 ctxt = xmlNewParserCtxt();
9827 if (ctxt == NULL)
9828 return(NULL);
9829
9830 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9831 if (buf == NULL) return(NULL);
9832
9833 input = xmlNewInputStream(ctxt);
9834 if (input == NULL) {
9835 xmlFreeParserCtxt(ctxt);
9836 return(NULL);
9837 }
9838
9839 input->filename = NULL;
9840 input->buf = buf;
9841 input->base = input->buf->buffer->content;
9842 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009843 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009844
9845 inputPush(ctxt, input);
9846 return(ctxt);
9847}
9848
9849/**
9850 * xmlSAXParseMemory:
9851 * @sax: the SAX handler block
9852 * @buffer: an pointer to a char array
9853 * @size: the size of the array
9854 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9855 * documents
9856 *
9857 * parse an XML in-memory block and use the given SAX function block
9858 * to handle the parsing callback. If sax is NULL, fallback to the default
9859 * DOM tree building routines.
9860 *
9861 * Returns the resulting document tree
9862 */
9863xmlDocPtr
9864xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9865 xmlDocPtr ret;
9866 xmlParserCtxtPtr ctxt;
9867
9868 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9869 if (ctxt == NULL) return(NULL);
9870 if (sax != NULL) {
9871 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009872 }
9873
9874 xmlParseDocument(ctxt);
9875
9876 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9877 else {
9878 ret = NULL;
9879 xmlFreeDoc(ctxt->myDoc);
9880 ctxt->myDoc = NULL;
9881 }
9882 if (sax != NULL)
9883 ctxt->sax = NULL;
9884 xmlFreeParserCtxt(ctxt);
9885
9886 return(ret);
9887}
9888
9889/**
9890 * xmlParseMemory:
9891 * @buffer: an pointer to a char array
9892 * @size: the size of the array
9893 *
9894 * parse an XML in-memory block and build a tree.
9895 *
9896 * Returns the resulting document tree
9897 */
9898
9899xmlDocPtr xmlParseMemory(char *buffer, int size) {
9900 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9901}
9902
9903/**
9904 * xmlRecoverMemory:
9905 * @buffer: an pointer to a char array
9906 * @size: the size of the array
9907 *
9908 * parse an XML in-memory block and build a tree.
9909 * In the case the document is not Well Formed, a tree is built anyway
9910 *
9911 * Returns the resulting document tree
9912 */
9913
9914xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9915 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9916}
9917
9918/**
9919 * xmlSAXUserParseMemory:
9920 * @sax: a SAX handler
9921 * @user_data: The user data returned on SAX callbacks
9922 * @buffer: an in-memory XML document input
9923 * @size: the length of the XML document in bytes
9924 *
9925 * A better SAX parsing routine.
9926 * parse an XML in-memory buffer and call the given SAX handler routines.
9927 *
9928 * Returns 0 in case of success or a error number otherwise
9929 */
9930int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009931 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009932 int ret = 0;
9933 xmlParserCtxtPtr ctxt;
9934 xmlSAXHandlerPtr oldsax = NULL;
9935
9936 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9937 if (ctxt == NULL) return -1;
9938 if (sax != NULL) {
9939 oldsax = ctxt->sax;
9940 ctxt->sax = sax;
9941 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009942 if (user_data != NULL)
9943 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009944
9945 xmlParseDocument(ctxt);
9946
9947 if (ctxt->wellFormed)
9948 ret = 0;
9949 else {
9950 if (ctxt->errNo != 0)
9951 ret = ctxt->errNo;
9952 else
9953 ret = -1;
9954 }
9955 if (sax != NULL) {
9956 ctxt->sax = oldsax;
9957 }
9958 xmlFreeParserCtxt(ctxt);
9959
9960 return ret;
9961}
9962
9963/**
9964 * xmlCreateDocParserCtxt:
9965 * @cur: a pointer to an array of xmlChar
9966 *
9967 * Creates a parser context for an XML in-memory document.
9968 *
9969 * Returns the new parser context or NULL
9970 */
9971xmlParserCtxtPtr
9972xmlCreateDocParserCtxt(xmlChar *cur) {
9973 int len;
9974
9975 if (cur == NULL)
9976 return(NULL);
9977 len = xmlStrlen(cur);
9978 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9979}
9980
9981/**
9982 * xmlSAXParseDoc:
9983 * @sax: the SAX handler block
9984 * @cur: a pointer to an array of xmlChar
9985 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9986 * documents
9987 *
9988 * parse an XML in-memory document and build a tree.
9989 * It use the given SAX function block to handle the parsing callback.
9990 * If sax is NULL, fallback to the default DOM tree building routines.
9991 *
9992 * Returns the resulting document tree
9993 */
9994
9995xmlDocPtr
9996xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9997 xmlDocPtr ret;
9998 xmlParserCtxtPtr ctxt;
9999
10000 if (cur == NULL) return(NULL);
10001
10002
10003 ctxt = xmlCreateDocParserCtxt(cur);
10004 if (ctxt == NULL) return(NULL);
10005 if (sax != NULL) {
10006 ctxt->sax = sax;
10007 ctxt->userData = NULL;
10008 }
10009
10010 xmlParseDocument(ctxt);
10011 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10012 else {
10013 ret = NULL;
10014 xmlFreeDoc(ctxt->myDoc);
10015 ctxt->myDoc = NULL;
10016 }
10017 if (sax != NULL)
10018 ctxt->sax = NULL;
10019 xmlFreeParserCtxt(ctxt);
10020
10021 return(ret);
10022}
10023
10024/**
10025 * xmlParseDoc:
10026 * @cur: a pointer to an array of xmlChar
10027 *
10028 * parse an XML in-memory document and build a tree.
10029 *
10030 * Returns the resulting document tree
10031 */
10032
10033xmlDocPtr
10034xmlParseDoc(xmlChar *cur) {
10035 return(xmlSAXParseDoc(NULL, cur, 0));
10036}
10037
10038
10039/************************************************************************
10040 * *
10041 * Miscellaneous *
10042 * *
10043 ************************************************************************/
10044
10045#ifdef LIBXML_XPATH_ENABLED
10046#include <libxml/xpath.h>
10047#endif
10048
10049static int xmlParserInitialized = 0;
10050
10051/**
10052 * xmlInitParser:
10053 *
10054 * Initialization function for the XML parser.
10055 * This is not reentrant. Call once before processing in case of
10056 * use in multithreaded programs.
10057 */
10058
10059void
10060xmlInitParser(void) {
10061 if (xmlParserInitialized) return;
10062
10063 xmlInitCharEncodingHandlers();
10064 xmlInitializePredefinedEntities();
10065 xmlDefaultSAXHandlerInit();
10066 xmlRegisterDefaultInputCallbacks();
10067 xmlRegisterDefaultOutputCallbacks();
10068#ifdef LIBXML_HTML_ENABLED
10069 htmlInitAutoClose();
10070 htmlDefaultSAXHandlerInit();
10071#endif
10072#ifdef LIBXML_XPATH_ENABLED
10073 xmlXPathInit();
10074#endif
10075 xmlParserInitialized = 1;
10076}
10077
10078/**
10079 * xmlCleanupParser:
10080 *
10081 * Cleanup function for the XML parser. It tries to reclaim all
10082 * parsing related global memory allocated for the parser processing.
10083 * It doesn't deallocate any document related memory. Calling this
10084 * function should not prevent reusing the parser.
10085 */
10086
10087void
10088xmlCleanupParser(void) {
10089 xmlParserInitialized = 0;
10090 xmlCleanupCharEncodingHandlers();
10091 xmlCleanupPredefinedEntities();
10092}
10093