blob: c49765f8b23732a18a3d4566ea9c2cb17582abca [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
Daniel Veillardc5d64342001-06-24 12:13:24 +000030 * daniel@veillard.com
Owen Taylor3473f882001-02-23 17:55:21 +000031 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
Bjorn Reese70a9da52001-04-21 16:57:29 +000036#include "libxml.h"
37
Owen Taylor3473f882001-02-23 17:55:21 +000038#ifdef WIN32
Owen Taylor3473f882001-02-23 17:55:21 +000039#define XML_DIR_SEP '\\'
40#else
Owen Taylor3473f882001-02-23 17:55:21 +000041#define XML_DIR_SEP '/'
42#endif
43
Owen Taylor3473f882001-02-23 17:55:21 +000044#include <stdlib.h>
45#include <string.h>
46#include <libxml/xmlmemory.h>
47#include <libxml/tree.h>
48#include <libxml/parser.h>
49#include <libxml/parserInternals.h>
50#include <libxml/valid.h>
51#include <libxml/entities.h>
52#include <libxml/xmlerror.h>
53#include <libxml/encoding.h>
54#include <libxml/xmlIO.h>
55#include <libxml/uri.h>
56
57#ifdef HAVE_CTYPE_H
58#include <ctype.h>
59#endif
60#ifdef HAVE_STDLIB_H
61#include <stdlib.h>
62#endif
63#ifdef HAVE_SYS_STAT_H
64#include <sys/stat.h>
65#endif
66#ifdef HAVE_FCNTL_H
67#include <fcntl.h>
68#endif
69#ifdef HAVE_UNISTD_H
70#include <unistd.h>
71#endif
72#ifdef HAVE_ZLIB_H
73#include <zlib.h>
74#endif
75
76
Daniel Veillard21a0f912001-02-25 19:54:14 +000077#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000078#define XML_PARSER_BUFFER_SIZE 100
79
80/*
81 * Various global defaults for parsing
82 */
Owen Taylor3473f882001-02-23 17:55:21 +000083int xmlParserDebugEntities = 0;
Owen Taylor3473f882001-02-23 17:55:21 +000084
85/*
86 * List of XML prefixed PI allowed by W3C specs
87 */
88
89const char *xmlW3CPIs[] = {
90 "xml-stylesheet",
91 NULL
92};
93
94/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
95void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
96xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
97 const xmlChar **str);
98
Daniel Veillard257d9102001-05-08 10:41:44 +000099static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000100xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
101 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +0000102 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +0000103 const xmlChar *ID, xmlNodePtr *list);
Owen Taylor3473f882001-02-23 17:55:21 +0000104
105/************************************************************************
106 * *
107 * Parser stacks related functions and macros *
108 * *
109 ************************************************************************/
110
111xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
112 const xmlChar ** str);
113
114/*
115 * Generic function for accessing stacks in the Parser Context
116 */
117
118#define PUSH_AND_POP(scope, type, name) \
119scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
120 if (ctxt->name##Nr >= ctxt->name##Max) { \
121 ctxt->name##Max *= 2; \
122 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
123 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
124 if (ctxt->name##Tab == NULL) { \
125 xmlGenericError(xmlGenericErrorContext, \
126 "realloc failed !\n"); \
127 return(0); \
128 } \
129 } \
130 ctxt->name##Tab[ctxt->name##Nr] = value; \
131 ctxt->name = value; \
132 return(ctxt->name##Nr++); \
133} \
134scope type name##Pop(xmlParserCtxtPtr ctxt) { \
135 type ret; \
136 if (ctxt->name##Nr <= 0) return(0); \
137 ctxt->name##Nr--; \
138 if (ctxt->name##Nr > 0) \
139 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
140 else \
141 ctxt->name = NULL; \
142 ret = ctxt->name##Tab[ctxt->name##Nr]; \
143 ctxt->name##Tab[ctxt->name##Nr] = 0; \
144 return(ret); \
145} \
146
Daniel Veillard5e2dace2001-07-18 19:30:27 +0000147/**
148 * inputPop:
149 * @ctxt: an XML parser context
150 *
151 * Pops the top parser input from the input stack
152 *
153 * Returns the input just removed
154 */
155/**
156 * inputPush:
157 * @ctxt: an XML parser context
158 * @input: the parser input
159 *
160 * Pushes a new parser input on top of the input stack
161 */
162/**
163 * namePop:
164 * @ctxt: an XML parser context
165 *
166 * Pops the top element name from the name stack
167 *
168 * Returns the name just removed
169 */
170/**
171 * namePush:
172 * @ctxt: an XML parser context
173 * @name: the element name
174 *
175 * Pushes a new element name on top of the name stack
176 */
177/**
178 * nodePop:
179 * @ctxt: an XML parser context
180 *
181 * Pops the top element node from the node stack
182 *
183 * Returns the node just removed
184 */
185/**
186 * nodePush:
187 * @ctxt: an XML parser context
188 * @node: the element node
189 *
190 * Pushes a new element node on top of the node stack
191 */
Owen Taylor3473f882001-02-23 17:55:21 +0000192/*
193 * Those macros actually generate the functions
194 */
195PUSH_AND_POP(extern, xmlParserInputPtr, input)
196PUSH_AND_POP(extern, xmlNodePtr, node)
197PUSH_AND_POP(extern, xmlChar*, name)
198
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000199static int spacePush(xmlParserCtxtPtr ctxt, int val) {
Owen Taylor3473f882001-02-23 17:55:21 +0000200 if (ctxt->spaceNr >= ctxt->spaceMax) {
201 ctxt->spaceMax *= 2;
202 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
203 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
204 if (ctxt->spaceTab == NULL) {
205 xmlGenericError(xmlGenericErrorContext,
206 "realloc failed !\n");
207 return(0);
208 }
209 }
210 ctxt->spaceTab[ctxt->spaceNr] = val;
211 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
212 return(ctxt->spaceNr++);
213}
214
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000215static int spacePop(xmlParserCtxtPtr ctxt) {
Owen Taylor3473f882001-02-23 17:55:21 +0000216 int ret;
217 if (ctxt->spaceNr <= 0) return(0);
218 ctxt->spaceNr--;
219 if (ctxt->spaceNr > 0)
220 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
221 else
222 ctxt->space = NULL;
223 ret = ctxt->spaceTab[ctxt->spaceNr];
224 ctxt->spaceTab[ctxt->spaceNr] = -1;
225 return(ret);
226}
227
228/*
229 * Macros for accessing the content. Those should be used only by the parser,
230 * and not exported.
231 *
232 * Dirty macros, i.e. one often need to make assumption on the context to
233 * use them
234 *
235 * CUR_PTR return the current pointer to the xmlChar to be parsed.
236 * To be used with extreme caution since operations consuming
237 * characters may move the input buffer to a different location !
238 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
239 * This should be used internally by the parser
240 * only to compare to ASCII values otherwise it would break when
241 * running with UTF-8 encoding.
242 * RAW same as CUR but in the input buffer, bypass any token
243 * extraction that may have been done
244 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
245 * to compare on ASCII based substring.
246 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
247 * strings within the parser.
248 *
249 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
250 *
251 * NEXT Skip to the next character, this does the proper decoding
252 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
253 * NEXTL(l) Skip l xmlChars in the input buffer
254 * CUR_CHAR(l) returns the current unicode character (int), set l
255 * to the number of xmlChars used for the encoding [0-5].
256 * CUR_SCHAR same but operate on a string instead of the context
257 * COPY_BUF copy the current unicode char to the target buffer, increment
258 * the index
259 * GROW, SHRINK handling of input buffers
260 */
261
262#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
263#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
264#define NXT(val) ctxt->input->cur[(val)]
265#define CUR_PTR ctxt->input->cur
266
267#define SKIP(val) do { \
268 ctxt->nbChars += (val),ctxt->input->cur += (val); \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 if ((*ctxt->input->cur == 0) && \
271 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
272 xmlPopInput(ctxt); \
273 } while (0)
274
Daniel Veillard48b2f892001-02-25 16:11:03 +0000275#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000276 xmlParserInputShrink(ctxt->input); \
277 if ((*ctxt->input->cur == 0) && \
278 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
279 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000280 }
Owen Taylor3473f882001-02-23 17:55:21 +0000281
Daniel Veillard48b2f892001-02-25 16:11:03 +0000282#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000283 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
284 if ((*ctxt->input->cur == 0) && \
285 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
286 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000287 }
Owen Taylor3473f882001-02-23 17:55:21 +0000288
289#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
290
291#define NEXT xmlNextChar(ctxt)
292
Daniel Veillard21a0f912001-02-25 19:54:14 +0000293#define NEXT1 { \
294 ctxt->input->cur++; \
295 ctxt->nbChars++; \
296 if (*ctxt->input->cur == 0) \
297 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
298 }
299
Owen Taylor3473f882001-02-23 17:55:21 +0000300#define NEXTL(l) do { \
301 if (*(ctxt->input->cur) == '\n') { \
302 ctxt->input->line++; ctxt->input->col = 1; \
303 } else ctxt->input->col++; \
304 ctxt->token = 0; ctxt->input->cur += l; \
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000306 } while (0)
307
308#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
309#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
310
311#define COPY_BUF(l,b,i,v) \
312 if (l == 1) b[i++] = (xmlChar) v; \
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000313 else i += xmlCopyCharMultiByte(&b[i],v)
Owen Taylor3473f882001-02-23 17:55:21 +0000314
315/**
316 * xmlSkipBlankChars:
317 * @ctxt: the XML parser context
318 *
319 * skip all blanks character found at that point in the input streams.
320 * It pops up finished entities in the process if allowable at that point.
321 *
322 * Returns the number of space chars skipped
323 */
324
325int
326xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
Daniel Veillard02141ea2001-04-30 11:46:40 +0000327 int res = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000328
Daniel Veillard02141ea2001-04-30 11:46:40 +0000329 if (ctxt->token != 0) {
330 if (!IS_BLANK(ctxt->token))
331 return(0);
332 ctxt->token = 0;
333 res++;
334 }
Owen Taylor3473f882001-02-23 17:55:21 +0000335 /*
336 * It's Okay to use CUR/NEXT here since all the blanks are on
337 * the ASCII range.
338 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000339 if ((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) {
340 const xmlChar *cur;
Owen Taylor3473f882001-02-23 17:55:21 +0000341 /*
Daniel Veillard02141ea2001-04-30 11:46:40 +0000342 * if we are in the document content, go really fast
Owen Taylor3473f882001-02-23 17:55:21 +0000343 */
Daniel Veillard02141ea2001-04-30 11:46:40 +0000344 cur = ctxt->input->cur;
345 while (IS_BLANK(*cur)) {
346 if (*cur == '\n') {
347 ctxt->input->line++; ctxt->input->col = 1;
348 }
349 cur++;
350 res++;
351 if (*cur == 0) {
352 ctxt->input->cur = cur;
353 xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
354 cur = ctxt->input->cur;
355 }
356 }
357 ctxt->input->cur = cur;
358 } else {
359 int cur;
360 do {
361 cur = CUR;
362 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
363 NEXT;
364 cur = CUR;
365 res++;
366 }
367 while ((cur == 0) && (ctxt->inputNr > 1) &&
368 (ctxt->instate != XML_PARSER_COMMENT)) {
369 xmlPopInput(ctxt);
370 cur = CUR;
371 }
372 /*
373 * Need to handle support of entities branching here
374 */
375 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
376 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
377 }
Owen Taylor3473f882001-02-23 17:55:21 +0000378 return(res);
379}
380
381/************************************************************************
382 * *
383 * Commodity functions to handle entities *
384 * *
385 ************************************************************************/
386
387/**
388 * xmlPopInput:
389 * @ctxt: an XML parser context
390 *
391 * xmlPopInput: the current input pointed by ctxt->input came to an end
392 * pop it and return the next char.
393 *
394 * Returns the current xmlChar in the parser context
395 */
396xmlChar
397xmlPopInput(xmlParserCtxtPtr ctxt) {
398 if (ctxt->inputNr == 1) return(0); /* End of main Input */
399 if (xmlParserDebugEntities)
400 xmlGenericError(xmlGenericErrorContext,
401 "Popping input %d\n", ctxt->inputNr);
402 xmlFreeInputStream(inputPop(ctxt));
403 if ((*ctxt->input->cur == 0) &&
404 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
405 return(xmlPopInput(ctxt));
406 return(CUR);
407}
408
409/**
410 * xmlPushInput:
411 * @ctxt: an XML parser context
412 * @input: an XML parser input fragment (entity, XML fragment ...).
413 *
414 * xmlPushInput: switch to a new input stream which is stacked on top
415 * of the previous one(s).
416 */
417void
418xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
419 if (input == NULL) return;
420
421 if (xmlParserDebugEntities) {
422 if ((ctxt->input != NULL) && (ctxt->input->filename))
423 xmlGenericError(xmlGenericErrorContext,
424 "%s(%d): ", ctxt->input->filename,
425 ctxt->input->line);
426 xmlGenericError(xmlGenericErrorContext,
427 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
428 }
429 inputPush(ctxt, input);
430 GROW;
431}
432
433/**
434 * xmlParseCharRef:
435 * @ctxt: an XML parser context
436 *
437 * parse Reference declarations
438 *
439 * [66] CharRef ::= '&#' [0-9]+ ';' |
440 * '&#x' [0-9a-fA-F]+ ';'
441 *
442 * [ WFC: Legal Character ]
443 * Characters referred to using character references must match the
444 * production for Char.
445 *
446 * Returns the value parsed (as an int), 0 in case of error
447 */
448int
449xmlParseCharRef(xmlParserCtxtPtr ctxt) {
Daniel Veillard50582112001-03-26 22:52:16 +0000450 unsigned int val = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000451 int count = 0;
452
453 if (ctxt->token != 0) {
454 val = ctxt->token;
455 ctxt->token = 0;
456 return(val);
457 }
458 /*
459 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
460 */
461 if ((RAW == '&') && (NXT(1) == '#') &&
462 (NXT(2) == 'x')) {
463 SKIP(3);
464 GROW;
465 while (RAW != ';') { /* loop blocked by count */
466 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
467 val = val * 16 + (CUR - '0');
468 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
469 val = val * 16 + (CUR - 'a') + 10;
470 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
471 val = val * 16 + (CUR - 'A') + 10;
472 else {
473 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
474 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
475 ctxt->sax->error(ctxt->userData,
476 "xmlParseCharRef: invalid hexadecimal value\n");
477 ctxt->wellFormed = 0;
478 ctxt->disableSAX = 1;
479 val = 0;
480 break;
481 }
482 NEXT;
483 count++;
484 }
485 if (RAW == ';') {
486 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
487 ctxt->nbChars ++;
488 ctxt->input->cur++;
489 }
490 } else if ((RAW == '&') && (NXT(1) == '#')) {
491 SKIP(2);
492 GROW;
493 while (RAW != ';') { /* loop blocked by count */
494 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
495 val = val * 10 + (CUR - '0');
496 else {
497 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
498 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
499 ctxt->sax->error(ctxt->userData,
500 "xmlParseCharRef: invalid decimal value\n");
501 ctxt->wellFormed = 0;
502 ctxt->disableSAX = 1;
503 val = 0;
504 break;
505 }
506 NEXT;
507 count++;
508 }
509 if (RAW == ';') {
510 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
511 ctxt->nbChars ++;
512 ctxt->input->cur++;
513 }
514 } else {
515 ctxt->errNo = XML_ERR_INVALID_CHARREF;
516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
517 ctxt->sax->error(ctxt->userData,
518 "xmlParseCharRef: invalid value\n");
519 ctxt->wellFormed = 0;
520 ctxt->disableSAX = 1;
521 }
522
523 /*
524 * [ WFC: Legal Character ]
525 * Characters referred to using character references must match the
526 * production for Char.
527 */
528 if (IS_CHAR(val)) {
529 return(val);
530 } else {
531 ctxt->errNo = XML_ERR_INVALID_CHAR;
532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
533 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
534 val);
535 ctxt->wellFormed = 0;
536 ctxt->disableSAX = 1;
537 }
538 return(0);
539}
540
541/**
542 * xmlParseStringCharRef:
543 * @ctxt: an XML parser context
544 * @str: a pointer to an index in the string
545 *
546 * parse Reference declarations, variant parsing from a string rather
547 * than an an input flow.
548 *
549 * [66] CharRef ::= '&#' [0-9]+ ';' |
550 * '&#x' [0-9a-fA-F]+ ';'
551 *
552 * [ WFC: Legal Character ]
553 * Characters referred to using character references must match the
554 * production for Char.
555 *
556 * Returns the value parsed (as an int), 0 in case of error, str will be
557 * updated to the current value of the index
558 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000559static int
Owen Taylor3473f882001-02-23 17:55:21 +0000560xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
561 const xmlChar *ptr;
562 xmlChar cur;
563 int val = 0;
564
565 if ((str == NULL) || (*str == NULL)) return(0);
566 ptr = *str;
567 cur = *ptr;
568 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
569 ptr += 3;
570 cur = *ptr;
571 while (cur != ';') { /* Non input consuming loop */
572 if ((cur >= '0') && (cur <= '9'))
573 val = val * 16 + (cur - '0');
574 else if ((cur >= 'a') && (cur <= 'f'))
575 val = val * 16 + (cur - 'a') + 10;
576 else if ((cur >= 'A') && (cur <= 'F'))
577 val = val * 16 + (cur - 'A') + 10;
578 else {
579 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
580 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
581 ctxt->sax->error(ctxt->userData,
582 "xmlParseStringCharRef: invalid hexadecimal value\n");
583 ctxt->wellFormed = 0;
584 ctxt->disableSAX = 1;
585 val = 0;
586 break;
587 }
588 ptr++;
589 cur = *ptr;
590 }
591 if (cur == ';')
592 ptr++;
593 } else if ((cur == '&') && (ptr[1] == '#')){
594 ptr += 2;
595 cur = *ptr;
596 while (cur != ';') { /* Non input consuming loops */
597 if ((cur >= '0') && (cur <= '9'))
598 val = val * 10 + (cur - '0');
599 else {
600 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
601 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
602 ctxt->sax->error(ctxt->userData,
603 "xmlParseStringCharRef: invalid decimal value\n");
604 ctxt->wellFormed = 0;
605 ctxt->disableSAX = 1;
606 val = 0;
607 break;
608 }
609 ptr++;
610 cur = *ptr;
611 }
612 if (cur == ';')
613 ptr++;
614 } else {
615 ctxt->errNo = XML_ERR_INVALID_CHARREF;
616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
617 ctxt->sax->error(ctxt->userData,
618 "xmlParseCharRef: invalid value\n");
619 ctxt->wellFormed = 0;
620 ctxt->disableSAX = 1;
621 return(0);
622 }
623 *str = ptr;
624
625 /*
626 * [ WFC: Legal Character ]
627 * Characters referred to using character references must match the
628 * production for Char.
629 */
630 if (IS_CHAR(val)) {
631 return(val);
632 } else {
633 ctxt->errNo = XML_ERR_INVALID_CHAR;
634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
635 ctxt->sax->error(ctxt->userData,
636 "CharRef: invalid xmlChar value %d\n", val);
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 }
640 return(0);
641}
642
643/**
644 * xmlParserHandlePEReference:
645 * @ctxt: the parser context
646 *
647 * [69] PEReference ::= '%' Name ';'
648 *
649 * [ WFC: No Recursion ]
650 * A parsed entity must not contain a recursive
651 * reference to itself, either directly or indirectly.
652 *
653 * [ WFC: Entity Declared ]
654 * In a document without any DTD, a document with only an internal DTD
655 * subset which contains no parameter entity references, or a document
656 * with "standalone='yes'", ... ... The declaration of a parameter
657 * entity must precede any reference to it...
658 *
659 * [ VC: Entity Declared ]
660 * In a document with an external subset or external parameter entities
661 * with "standalone='no'", ... ... The declaration of a parameter entity
662 * must precede any reference to it...
663 *
664 * [ WFC: In DTD ]
665 * Parameter-entity references may only appear in the DTD.
666 * NOTE: misleading but this is handled.
667 *
668 * A PEReference may have been detected in the current input stream
669 * the handling is done accordingly to
670 * http://www.w3.org/TR/REC-xml#entproc
671 * i.e.
672 * - Included in literal in entity values
673 * - Included as Paraemeter Entity reference within DTDs
674 */
675void
676xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
677 xmlChar *name;
678 xmlEntityPtr entity = NULL;
679 xmlParserInputPtr input;
680
681 if (ctxt->token != 0) {
682 return;
683 }
684 if (RAW != '%') return;
685 switch(ctxt->instate) {
686 case XML_PARSER_CDATA_SECTION:
687 return;
688 case XML_PARSER_COMMENT:
689 return;
690 case XML_PARSER_START_TAG:
691 return;
692 case XML_PARSER_END_TAG:
693 return;
694 case XML_PARSER_EOF:
695 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
697 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
698 ctxt->wellFormed = 0;
699 ctxt->disableSAX = 1;
700 return;
701 case XML_PARSER_PROLOG:
702 case XML_PARSER_START:
703 case XML_PARSER_MISC:
704 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
705 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
706 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
707 ctxt->wellFormed = 0;
708 ctxt->disableSAX = 1;
709 return;
710 case XML_PARSER_ENTITY_DECL:
711 case XML_PARSER_CONTENT:
712 case XML_PARSER_ATTRIBUTE_VALUE:
713 case XML_PARSER_PI:
714 case XML_PARSER_SYSTEM_LITERAL:
715 /* we just ignore it there */
716 return;
717 case XML_PARSER_EPILOG:
718 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
720 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
721 ctxt->wellFormed = 0;
722 ctxt->disableSAX = 1;
723 return;
724 case XML_PARSER_ENTITY_VALUE:
725 /*
726 * NOTE: in the case of entity values, we don't do the
727 * substitution here since we need the literal
728 * entity value to be able to save the internal
729 * subset of the document.
730 * This will be handled by xmlStringDecodeEntities
731 */
732 return;
733 case XML_PARSER_DTD:
734 /*
735 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
736 * In the internal DTD subset, parameter-entity references
737 * can occur only where markup declarations can occur, not
738 * within markup declarations.
739 * In that case this is handled in xmlParseMarkupDecl
740 */
741 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
742 return;
743 break;
744 case XML_PARSER_IGNORE:
745 return;
746 }
747
748 NEXT;
749 name = xmlParseName(ctxt);
750 if (xmlParserDebugEntities)
751 xmlGenericError(xmlGenericErrorContext,
752 "PE Reference: %s\n", name);
753 if (name == NULL) {
754 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
756 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
757 ctxt->wellFormed = 0;
758 ctxt->disableSAX = 1;
759 } else {
760 if (RAW == ';') {
761 NEXT;
762 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
763 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
764 if (entity == NULL) {
765
766 /*
767 * [ WFC: Entity Declared ]
768 * In a document without any DTD, a document with only an
769 * internal DTD subset which contains no parameter entity
770 * references, or a document with "standalone='yes'", ...
771 * ... The declaration of a parameter entity must precede
772 * any reference to it...
773 */
774 if ((ctxt->standalone == 1) ||
775 ((ctxt->hasExternalSubset == 0) &&
776 (ctxt->hasPErefs == 0))) {
777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
778 ctxt->sax->error(ctxt->userData,
779 "PEReference: %%%s; not found\n", name);
780 ctxt->wellFormed = 0;
781 ctxt->disableSAX = 1;
782 } else {
783 /*
784 * [ VC: Entity Declared ]
785 * In a document with an external subset or external
786 * parameter entities with "standalone='no'", ...
787 * ... The declaration of a parameter entity must precede
788 * any reference to it...
789 */
790 if ((!ctxt->disableSAX) &&
791 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
792 ctxt->vctxt.error(ctxt->vctxt.userData,
793 "PEReference: %%%s; not found\n", name);
794 } else if ((!ctxt->disableSAX) &&
795 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
796 ctxt->sax->warning(ctxt->userData,
797 "PEReference: %%%s; not found\n", name);
798 ctxt->valid = 0;
799 }
800 } else {
801 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
802 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
Daniel Veillard87a764e2001-06-20 17:41:10 +0000803 xmlChar start[4];
804 xmlCharEncoding enc;
805
Owen Taylor3473f882001-02-23 17:55:21 +0000806 /*
807 * handle the extra spaces added before and after
808 * c.f. http://www.w3.org/TR/REC-xml#as-PE
809 * this is done independantly.
810 */
811 input = xmlNewEntityInputStream(ctxt, entity);
812 xmlPushInput(ctxt, input);
Daniel Veillard87a764e2001-06-20 17:41:10 +0000813
814 /*
815 * Get the 4 first bytes and decode the charset
816 * if enc != XML_CHAR_ENCODING_NONE
817 * plug some encoding conversion routines.
818 */
819 GROW
820 start[0] = RAW;
821 start[1] = NXT(1);
822 start[2] = NXT(2);
823 start[3] = NXT(3);
824 enc = xmlDetectCharEncoding(start, 4);
825 if (enc != XML_CHAR_ENCODING_NONE) {
826 xmlSwitchEncoding(ctxt, enc);
827 }
828
Owen Taylor3473f882001-02-23 17:55:21 +0000829 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
830 (RAW == '<') && (NXT(1) == '?') &&
831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
833 xmlParseTextDecl(ctxt);
834 }
835 if (ctxt->token == 0)
836 ctxt->token = ' ';
837 } else {
838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
839 ctxt->sax->error(ctxt->userData,
840 "xmlHandlePEReference: %s is not a parameter entity\n",
841 name);
842 ctxt->wellFormed = 0;
843 ctxt->disableSAX = 1;
844 }
845 }
846 } else {
847 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
848 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
849 ctxt->sax->error(ctxt->userData,
850 "xmlHandlePEReference: expecting ';'\n");
851 ctxt->wellFormed = 0;
852 ctxt->disableSAX = 1;
853 }
854 xmlFree(name);
855 }
856}
857
858/*
859 * Macro used to grow the current buffer.
860 */
861#define growBuffer(buffer) { \
862 buffer##_size *= 2; \
863 buffer = (xmlChar *) \
864 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
865 if (buffer == NULL) { \
866 perror("realloc failed"); \
867 return(NULL); \
868 } \
869}
870
871/**
872 * xmlStringDecodeEntities:
873 * @ctxt: the parser context
874 * @str: the input string
875 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
876 * @end: an end marker xmlChar, 0 if none
877 * @end2: an end marker xmlChar, 0 if none
878 * @end3: an end marker xmlChar, 0 if none
879 *
880 * Takes a entity string content and process to do the adequate subtitutions.
881 *
882 * [67] Reference ::= EntityRef | CharRef
883 *
884 * [69] PEReference ::= '%' Name ';'
885 *
886 * Returns A newly allocated string with the substitution done. The caller
887 * must deallocate it !
888 */
889xmlChar *
890xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
891 xmlChar end, xmlChar end2, xmlChar end3) {
892 xmlChar *buffer = NULL;
893 int buffer_size = 0;
894
895 xmlChar *current = NULL;
896 xmlEntityPtr ent;
897 int c,l;
898 int nbchars = 0;
899
900 if (str == NULL)
901 return(NULL);
902
903 if (ctxt->depth > 40) {
904 ctxt->errNo = XML_ERR_ENTITY_LOOP;
905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
906 ctxt->sax->error(ctxt->userData,
907 "Detected entity reference loop\n");
908 ctxt->wellFormed = 0;
909 ctxt->disableSAX = 1;
910 return(NULL);
911 }
912
913 /*
914 * allocate a translation buffer.
915 */
916 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
917 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
918 if (buffer == NULL) {
919 perror("xmlDecodeEntities: malloc failed");
920 return(NULL);
921 }
922
923 /*
924 * Ok loop until we reach one of the ending char or a size limit.
925 * we are operating on already parsed values.
926 */
927 c = CUR_SCHAR(str, l);
928 while ((c != 0) && (c != end) && /* non input consuming loop */
929 (c != end2) && (c != end3)) {
930
931 if (c == 0) break;
932 if ((c == '&') && (str[1] == '#')) {
933 int val = xmlParseStringCharRef(ctxt, &str);
934 if (val != 0) {
935 COPY_BUF(0,buffer,nbchars,val);
936 }
937 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
938 if (xmlParserDebugEntities)
939 xmlGenericError(xmlGenericErrorContext,
940 "String decoding Entity Reference: %.30s\n",
941 str);
942 ent = xmlParseStringEntityRef(ctxt, &str);
943 if ((ent != NULL) &&
944 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
945 if (ent->content != NULL) {
946 COPY_BUF(0,buffer,nbchars,ent->content[0]);
947 } else {
948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
949 ctxt->sax->error(ctxt->userData,
950 "internal error entity has no content\n");
951 }
952 } else if ((ent != NULL) && (ent->content != NULL)) {
953 xmlChar *rep;
954
955 ctxt->depth++;
956 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
957 0, 0, 0);
958 ctxt->depth--;
959 if (rep != NULL) {
960 current = rep;
961 while (*current != 0) { /* non input consuming loop */
962 buffer[nbchars++] = *current++;
963 if (nbchars >
964 buffer_size - XML_PARSER_BUFFER_SIZE) {
965 growBuffer(buffer);
966 }
967 }
968 xmlFree(rep);
969 }
970 } else if (ent != NULL) {
971 int i = xmlStrlen(ent->name);
972 const xmlChar *cur = ent->name;
973
974 buffer[nbchars++] = '&';
975 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
976 growBuffer(buffer);
977 }
978 for (;i > 0;i--)
979 buffer[nbchars++] = *cur++;
980 buffer[nbchars++] = ';';
981 }
982 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
983 if (xmlParserDebugEntities)
984 xmlGenericError(xmlGenericErrorContext,
985 "String decoding PE Reference: %.30s\n", str);
986 ent = xmlParseStringPEReference(ctxt, &str);
987 if (ent != NULL) {
988 xmlChar *rep;
989
990 ctxt->depth++;
991 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
992 0, 0, 0);
993 ctxt->depth--;
994 if (rep != NULL) {
995 current = rep;
996 while (*current != 0) { /* non input consuming loop */
997 buffer[nbchars++] = *current++;
998 if (nbchars >
999 buffer_size - XML_PARSER_BUFFER_SIZE) {
1000 growBuffer(buffer);
1001 }
1002 }
1003 xmlFree(rep);
1004 }
1005 }
1006 } else {
1007 COPY_BUF(l,buffer,nbchars,c);
1008 str += l;
1009 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
1010 growBuffer(buffer);
1011 }
1012 }
1013 c = CUR_SCHAR(str, l);
1014 }
1015 buffer[nbchars++] = 0;
1016 return(buffer);
1017}
1018
1019
1020/************************************************************************
1021 * *
1022 * Commodity functions to handle xmlChars *
1023 * *
1024 ************************************************************************/
1025
1026/**
1027 * xmlStrndup:
1028 * @cur: the input xmlChar *
1029 * @len: the len of @cur
1030 *
1031 * a strndup for array of xmlChar's
1032 *
1033 * Returns a new xmlChar * or NULL
1034 */
1035xmlChar *
1036xmlStrndup(const xmlChar *cur, int len) {
1037 xmlChar *ret;
1038
1039 if ((cur == NULL) || (len < 0)) return(NULL);
1040 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1041 if (ret == NULL) {
1042 xmlGenericError(xmlGenericErrorContext,
1043 "malloc of %ld byte failed\n",
1044 (len + 1) * (long)sizeof(xmlChar));
1045 return(NULL);
1046 }
1047 memcpy(ret, cur, len * sizeof(xmlChar));
1048 ret[len] = 0;
1049 return(ret);
1050}
1051
1052/**
1053 * xmlStrdup:
1054 * @cur: the input xmlChar *
1055 *
1056 * a strdup for array of xmlChar's. Since they are supposed to be
1057 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1058 * a termination mark of '0'.
1059 *
1060 * Returns a new xmlChar * or NULL
1061 */
1062xmlChar *
1063xmlStrdup(const xmlChar *cur) {
1064 const xmlChar *p = cur;
1065
1066 if (cur == NULL) return(NULL);
1067 while (*p != 0) p++; /* non input consuming */
1068 return(xmlStrndup(cur, p - cur));
1069}
1070
1071/**
1072 * xmlCharStrndup:
1073 * @cur: the input char *
1074 * @len: the len of @cur
1075 *
1076 * a strndup for char's to xmlChar's
1077 *
1078 * Returns a new xmlChar * or NULL
1079 */
1080
1081xmlChar *
1082xmlCharStrndup(const char *cur, int len) {
1083 int i;
1084 xmlChar *ret;
1085
1086 if ((cur == NULL) || (len < 0)) return(NULL);
1087 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1088 if (ret == NULL) {
1089 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1090 (len + 1) * (long)sizeof(xmlChar));
1091 return(NULL);
1092 }
1093 for (i = 0;i < len;i++)
1094 ret[i] = (xmlChar) cur[i];
1095 ret[len] = 0;
1096 return(ret);
1097}
1098
1099/**
1100 * xmlCharStrdup:
1101 * @cur: the input char *
1102 * @len: the len of @cur
1103 *
1104 * a strdup for char's to xmlChar's
1105 *
1106 * Returns a new xmlChar * or NULL
1107 */
1108
1109xmlChar *
1110xmlCharStrdup(const char *cur) {
1111 const char *p = cur;
1112
1113 if (cur == NULL) return(NULL);
1114 while (*p != '\0') p++; /* non input consuming */
1115 return(xmlCharStrndup(cur, p - cur));
1116}
1117
1118/**
1119 * xmlStrcmp:
1120 * @str1: the first xmlChar *
1121 * @str2: the second xmlChar *
1122 *
1123 * a strcmp for xmlChar's
1124 *
1125 * Returns the integer result of the comparison
1126 */
1127
1128int
1129xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1130 register int tmp;
1131
1132 if (str1 == str2) return(0);
1133 if (str1 == NULL) return(-1);
1134 if (str2 == NULL) return(1);
1135 do {
1136 tmp = *str1++ - *str2;
1137 if (tmp != 0) return(tmp);
1138 } while (*str2++ != 0);
1139 return 0;
1140}
1141
1142/**
1143 * xmlStrEqual:
1144 * @str1: the first xmlChar *
1145 * @str2: the second xmlChar *
1146 *
1147 * Check if both string are equal of have same content
1148 * Should be a bit more readable and faster than xmlStrEqual()
1149 *
1150 * Returns 1 if they are equal, 0 if they are different
1151 */
1152
1153int
1154xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1155 if (str1 == str2) return(1);
1156 if (str1 == NULL) return(0);
1157 if (str2 == NULL) return(0);
1158 do {
1159 if (*str1++ != *str2) return(0);
1160 } while (*str2++);
1161 return(1);
1162}
1163
1164/**
1165 * xmlStrncmp:
1166 * @str1: the first xmlChar *
1167 * @str2: the second xmlChar *
1168 * @len: the max comparison length
1169 *
1170 * a strncmp for xmlChar's
1171 *
1172 * Returns the integer result of the comparison
1173 */
1174
1175int
1176xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1177 register int tmp;
1178
1179 if (len <= 0) return(0);
1180 if (str1 == str2) return(0);
1181 if (str1 == NULL) return(-1);
1182 if (str2 == NULL) return(1);
1183 do {
1184 tmp = *str1++ - *str2;
1185 if (tmp != 0 || --len == 0) return(tmp);
1186 } while (*str2++ != 0);
1187 return 0;
1188}
1189
1190static xmlChar casemap[256] = {
1191 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1192 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1193 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1194 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1195 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1196 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1197 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1198 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1199 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1200 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1201 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1202 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1203 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1204 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1205 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1206 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1207 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1208 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1209 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1210 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1211 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1212 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1213 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1214 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1215 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1216 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1217 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1218 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1219 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1220 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1221 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1222 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1223};
1224
1225/**
1226 * xmlStrcasecmp:
1227 * @str1: the first xmlChar *
1228 * @str2: the second xmlChar *
1229 *
1230 * a strcasecmp for xmlChar's
1231 *
1232 * Returns the integer result of the comparison
1233 */
1234
1235int
1236xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1237 register int tmp;
1238
1239 if (str1 == str2) return(0);
1240 if (str1 == NULL) return(-1);
1241 if (str2 == NULL) return(1);
1242 do {
1243 tmp = casemap[*str1++] - casemap[*str2];
1244 if (tmp != 0) return(tmp);
1245 } while (*str2++ != 0);
1246 return 0;
1247}
1248
1249/**
1250 * xmlStrncasecmp:
1251 * @str1: the first xmlChar *
1252 * @str2: the second xmlChar *
1253 * @len: the max comparison length
1254 *
1255 * a strncasecmp for xmlChar's
1256 *
1257 * Returns the integer result of the comparison
1258 */
1259
1260int
1261xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1262 register int tmp;
1263
1264 if (len <= 0) return(0);
1265 if (str1 == str2) return(0);
1266 if (str1 == NULL) return(-1);
1267 if (str2 == NULL) return(1);
1268 do {
1269 tmp = casemap[*str1++] - casemap[*str2];
1270 if (tmp != 0 || --len == 0) return(tmp);
1271 } while (*str2++ != 0);
1272 return 0;
1273}
1274
1275/**
1276 * xmlStrchr:
1277 * @str: the xmlChar * array
1278 * @val: the xmlChar to search
1279 *
1280 * a strchr for xmlChar's
1281 *
1282 * Returns the xmlChar * for the first occurence or NULL.
1283 */
1284
1285const xmlChar *
1286xmlStrchr(const xmlChar *str, xmlChar val) {
1287 if (str == NULL) return(NULL);
1288 while (*str != 0) { /* non input consuming */
1289 if (*str == val) return((xmlChar *) str);
1290 str++;
1291 }
1292 return(NULL);
1293}
1294
1295/**
1296 * xmlStrstr:
1297 * @str: the xmlChar * array (haystack)
1298 * @val: the xmlChar to search (needle)
1299 *
1300 * a strstr for xmlChar's
1301 *
1302 * Returns the xmlChar * for the first occurence or NULL.
1303 */
1304
1305const xmlChar *
Daniel Veillard77044732001-06-29 21:31:07 +00001306xmlStrstr(const xmlChar *str, const xmlChar *val) {
Owen Taylor3473f882001-02-23 17:55:21 +00001307 int n;
1308
1309 if (str == NULL) return(NULL);
1310 if (val == NULL) return(NULL);
1311 n = xmlStrlen(val);
1312
1313 if (n == 0) return(str);
1314 while (*str != 0) { /* non input consuming */
1315 if (*str == *val) {
1316 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1317 }
1318 str++;
1319 }
1320 return(NULL);
1321}
1322
1323/**
1324 * xmlStrcasestr:
1325 * @str: the xmlChar * array (haystack)
1326 * @val: the xmlChar to search (needle)
1327 *
1328 * a case-ignoring strstr for xmlChar's
1329 *
1330 * Returns the xmlChar * for the first occurence or NULL.
1331 */
1332
1333const xmlChar *
1334xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1335 int n;
1336
1337 if (str == NULL) return(NULL);
1338 if (val == NULL) return(NULL);
1339 n = xmlStrlen(val);
1340
1341 if (n == 0) return(str);
1342 while (*str != 0) { /* non input consuming */
1343 if (casemap[*str] == casemap[*val])
1344 if (!xmlStrncasecmp(str, val, n)) return(str);
1345 str++;
1346 }
1347 return(NULL);
1348}
1349
1350/**
1351 * xmlStrsub:
1352 * @str: the xmlChar * array (haystack)
1353 * @start: the index of the first char (zero based)
1354 * @len: the length of the substring
1355 *
1356 * Extract a substring of a given string
1357 *
1358 * Returns the xmlChar * for the first occurence or NULL.
1359 */
1360
1361xmlChar *
1362xmlStrsub(const xmlChar *str, int start, int len) {
1363 int i;
1364
1365 if (str == NULL) return(NULL);
1366 if (start < 0) return(NULL);
1367 if (len < 0) return(NULL);
1368
1369 for (i = 0;i < start;i++) {
1370 if (*str == 0) return(NULL);
1371 str++;
1372 }
1373 if (*str == 0) return(NULL);
1374 return(xmlStrndup(str, len));
1375}
1376
1377/**
1378 * xmlStrlen:
1379 * @str: the xmlChar * array
1380 *
1381 * length of a xmlChar's string
1382 *
1383 * Returns the number of xmlChar contained in the ARRAY.
1384 */
1385
1386int
1387xmlStrlen(const xmlChar *str) {
1388 int len = 0;
1389
1390 if (str == NULL) return(0);
1391 while (*str != 0) { /* non input consuming */
1392 str++;
1393 len++;
1394 }
1395 return(len);
1396}
1397
1398/**
1399 * xmlStrncat:
1400 * @cur: the original xmlChar * array
1401 * @add: the xmlChar * array added
1402 * @len: the length of @add
1403 *
1404 * a strncat for array of xmlChar's, it will extend cur with the len
1405 * first bytes of @add.
1406 *
1407 * Returns a new xmlChar *, the original @cur is reallocated if needed
1408 * and should not be freed
1409 */
1410
1411xmlChar *
1412xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1413 int size;
1414 xmlChar *ret;
1415
1416 if ((add == NULL) || (len == 0))
1417 return(cur);
1418 if (cur == NULL)
1419 return(xmlStrndup(add, len));
1420
1421 size = xmlStrlen(cur);
1422 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1423 if (ret == NULL) {
1424 xmlGenericError(xmlGenericErrorContext,
1425 "xmlStrncat: realloc of %ld byte failed\n",
1426 (size + len + 1) * (long)sizeof(xmlChar));
1427 return(cur);
1428 }
1429 memcpy(&ret[size], add, len * sizeof(xmlChar));
1430 ret[size + len] = 0;
1431 return(ret);
1432}
1433
1434/**
1435 * xmlStrcat:
1436 * @cur: the original xmlChar * array
1437 * @add: the xmlChar * array added
1438 *
1439 * a strcat for array of xmlChar's. Since they are supposed to be
1440 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1441 * a termination mark of '0'.
1442 *
1443 * Returns a new xmlChar * containing the concatenated string.
1444 */
1445xmlChar *
1446xmlStrcat(xmlChar *cur, const xmlChar *add) {
1447 const xmlChar *p = add;
1448
1449 if (add == NULL) return(cur);
1450 if (cur == NULL)
1451 return(xmlStrdup(add));
1452
1453 while (*p != 0) p++; /* non input consuming */
1454 return(xmlStrncat(cur, add, p - add));
1455}
1456
1457/************************************************************************
1458 * *
1459 * Commodity functions, cleanup needed ? *
1460 * *
1461 ************************************************************************/
1462
1463/**
1464 * areBlanks:
1465 * @ctxt: an XML parser context
1466 * @str: a xmlChar *
1467 * @len: the size of @str
1468 *
1469 * Is this a sequence of blank chars that one can ignore ?
1470 *
1471 * Returns 1 if ignorable 0 otherwise.
1472 */
1473
1474static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1475 int i, ret;
1476 xmlNodePtr lastChild;
1477
Daniel Veillard2f362242001-03-02 17:36:21 +00001478 if (ctxt->keepBlanks)
1479 return(0);
1480
Owen Taylor3473f882001-02-23 17:55:21 +00001481 /*
1482 * Check for xml:space value.
1483 */
1484 if (*(ctxt->space) == 1)
1485 return(0);
1486
1487 /*
1488 * Check that the string is made of blanks
1489 */
1490 for (i = 0;i < len;i++)
1491 if (!(IS_BLANK(str[i]))) return(0);
1492
1493 /*
1494 * Look if the element is mixed content in the Dtd if available
1495 */
Daniel Veillard6dd398f2001-07-25 22:41:03 +00001496 if (ctxt->node == NULL) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001497 if (ctxt->myDoc != NULL) {
1498 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1499 if (ret == 0) return(1);
1500 if (ret == 1) return(0);
1501 }
1502
1503 /*
1504 * Otherwise, heuristic :-\
1505 */
Owen Taylor3473f882001-02-23 17:55:21 +00001506 if (RAW != '<') return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001507 if ((ctxt->node->children == NULL) &&
1508 (RAW == '<') && (NXT(1) == '/')) return(0);
1509
1510 lastChild = xmlGetLastChild(ctxt->node);
1511 if (lastChild == NULL) {
Daniel Veillard7db37732001-07-12 01:20:08 +00001512 if ((ctxt->node->type != XML_ELEMENT_NODE) &&
1513 (ctxt->node->content != NULL)) return(0);
Owen Taylor3473f882001-02-23 17:55:21 +00001514 } else if (xmlNodeIsText(lastChild))
1515 return(0);
1516 else if ((ctxt->node->children != NULL) &&
1517 (xmlNodeIsText(ctxt->node->children)))
1518 return(0);
1519 return(1);
1520}
1521
1522/*
1523 * Forward definition for recusive behaviour.
1524 */
1525void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1526void xmlParseReference(xmlParserCtxtPtr ctxt);
1527
1528/************************************************************************
1529 * *
1530 * Extra stuff for namespace support *
1531 * Relates to http://www.w3.org/TR/WD-xml-names *
1532 * *
1533 ************************************************************************/
1534
1535/**
1536 * xmlSplitQName:
1537 * @ctxt: an XML parser context
1538 * @name: an XML parser context
1539 * @prefix: a xmlChar **
1540 *
1541 * parse an UTF8 encoded XML qualified name string
1542 *
1543 * [NS 5] QName ::= (Prefix ':')? LocalPart
1544 *
1545 * [NS 6] Prefix ::= NCName
1546 *
1547 * [NS 7] LocalPart ::= NCName
1548 *
1549 * Returns the local part, and prefix is updated
1550 * to get the Prefix if any.
1551 */
1552
1553xmlChar *
1554xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1555 xmlChar buf[XML_MAX_NAMELEN + 5];
1556 xmlChar *buffer = NULL;
1557 int len = 0;
1558 int max = XML_MAX_NAMELEN;
1559 xmlChar *ret = NULL;
1560 const xmlChar *cur = name;
1561 int c;
1562
1563 *prefix = NULL;
1564
1565#ifndef XML_XML_NAMESPACE
1566 /* xml: prefix is not really a namespace */
1567 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1568 (cur[2] == 'l') && (cur[3] == ':'))
1569 return(xmlStrdup(name));
1570#endif
1571
1572 /* nasty but valid */
1573 if (cur[0] == ':')
1574 return(xmlStrdup(name));
1575
1576 c = *cur++;
1577 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1578 buf[len++] = c;
1579 c = *cur++;
1580 }
1581 if (len >= max) {
1582 /*
1583 * Okay someone managed to make a huge name, so he's ready to pay
1584 * for the processing speed.
1585 */
1586 max = len * 2;
1587
1588 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1589 if (buffer == NULL) {
1590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1591 ctxt->sax->error(ctxt->userData,
1592 "xmlSplitQName: out of memory\n");
1593 return(NULL);
1594 }
1595 memcpy(buffer, buf, len);
1596 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1597 if (len + 10 > max) {
1598 max *= 2;
1599 buffer = (xmlChar *) xmlRealloc(buffer,
1600 max * sizeof(xmlChar));
1601 if (buffer == NULL) {
1602 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1603 ctxt->sax->error(ctxt->userData,
1604 "xmlSplitQName: out of memory\n");
1605 return(NULL);
1606 }
1607 }
1608 buffer[len++] = c;
1609 c = *cur++;
1610 }
1611 buffer[len] = 0;
1612 }
1613
1614 if (buffer == NULL)
1615 ret = xmlStrndup(buf, len);
1616 else {
1617 ret = buffer;
1618 buffer = NULL;
1619 max = XML_MAX_NAMELEN;
1620 }
1621
1622
1623 if (c == ':') {
1624 c = *cur++;
1625 if (c == 0) return(ret);
1626 *prefix = ret;
1627 len = 0;
1628
1629 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1630 buf[len++] = c;
1631 c = *cur++;
1632 }
1633 if (len >= max) {
1634 /*
1635 * Okay someone managed to make a huge name, so he's ready to pay
1636 * for the processing speed.
1637 */
1638 max = len * 2;
1639
1640 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1641 if (buffer == NULL) {
1642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1643 ctxt->sax->error(ctxt->userData,
1644 "xmlSplitQName: out of memory\n");
1645 return(NULL);
1646 }
1647 memcpy(buffer, buf, len);
1648 while (c != 0) { /* tested bigname2.xml */
1649 if (len + 10 > max) {
1650 max *= 2;
1651 buffer = (xmlChar *) xmlRealloc(buffer,
1652 max * sizeof(xmlChar));
1653 if (buffer == NULL) {
1654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1655 ctxt->sax->error(ctxt->userData,
1656 "xmlSplitQName: out of memory\n");
1657 return(NULL);
1658 }
1659 }
1660 buffer[len++] = c;
1661 c = *cur++;
1662 }
1663 buffer[len] = 0;
1664 }
1665
1666 if (buffer == NULL)
1667 ret = xmlStrndup(buf, len);
1668 else {
1669 ret = buffer;
1670 }
1671 }
1672
1673 return(ret);
1674}
1675
1676/************************************************************************
1677 * *
1678 * The parser itself *
1679 * Relates to http://www.w3.org/TR/REC-xml *
1680 * *
1681 ************************************************************************/
1682
Daniel Veillard76d66f42001-05-16 21:05:17 +00001683static xmlChar * xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001684/**
1685 * xmlParseName:
1686 * @ctxt: an XML parser context
1687 *
1688 * parse an XML name.
1689 *
1690 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1691 * CombiningChar | Extender
1692 *
1693 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1694 *
1695 * [6] Names ::= Name (S Name)*
1696 *
1697 * Returns the Name parsed or NULL
1698 */
1699
1700xmlChar *
1701xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001702 const xmlChar *in;
1703 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001704 int count = 0;
1705
1706 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001707
1708 /*
1709 * Accelerator for simple ASCII names
1710 */
1711 in = ctxt->input->cur;
1712 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1713 ((*in >= 0x41) && (*in <= 0x5A)) ||
1714 (*in == '_') || (*in == ':')) {
1715 in++;
1716 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1717 ((*in >= 0x41) && (*in <= 0x5A)) ||
1718 ((*in >= 0x30) && (*in <= 0x39)) ||
Daniel Veillard76d66f42001-05-16 21:05:17 +00001719 (*in == '_') || (*in == '-') ||
1720 (*in == ':') || (*in == '.'))
Daniel Veillard48b2f892001-02-25 16:11:03 +00001721 in++;
Daniel Veillard76d66f42001-05-16 21:05:17 +00001722 if ((*in > 0) && (*in < 0x80)) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001723 count = in - ctxt->input->cur;
1724 ret = xmlStrndup(ctxt->input->cur, count);
1725 ctxt->input->cur = in;
1726 return(ret);
1727 }
1728 }
Daniel Veillard2f362242001-03-02 17:36:21 +00001729 return(xmlParseNameComplex(ctxt));
Daniel Veillard21a0f912001-02-25 19:54:14 +00001730}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001731
Daniel Veillard76d66f42001-05-16 21:05:17 +00001732static xmlChar *
Daniel Veillard21a0f912001-02-25 19:54:14 +00001733xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1734 xmlChar buf[XML_MAX_NAMELEN + 5];
1735 int len = 0, l;
1736 int c;
1737 int count = 0;
1738
1739 /*
1740 * Handler for more complex cases
1741 */
1742 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001743 c = CUR_CHAR(l);
1744 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1745 (!IS_LETTER(c) && (c != '_') &&
1746 (c != ':'))) {
1747 return(NULL);
1748 }
1749
1750 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1751 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1752 (c == '.') || (c == '-') ||
1753 (c == '_') || (c == ':') ||
1754 (IS_COMBINING(c)) ||
1755 (IS_EXTENDER(c)))) {
1756 if (count++ > 100) {
1757 count = 0;
1758 GROW;
1759 }
1760 COPY_BUF(l,buf,len,c);
1761 NEXTL(l);
1762 c = CUR_CHAR(l);
1763 if (len >= XML_MAX_NAMELEN) {
1764 /*
1765 * Okay someone managed to make a huge name, so he's ready to pay
1766 * for the processing speed.
1767 */
1768 xmlChar *buffer;
1769 int max = len * 2;
1770
1771 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1772 if (buffer == NULL) {
1773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1774 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001775 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001776 return(NULL);
1777 }
1778 memcpy(buffer, buf, len);
1779 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1780 (c == '.') || (c == '-') ||
1781 (c == '_') || (c == ':') ||
1782 (IS_COMBINING(c)) ||
1783 (IS_EXTENDER(c))) {
1784 if (count++ > 100) {
1785 count = 0;
1786 GROW;
1787 }
1788 if (len + 10 > max) {
1789 max *= 2;
1790 buffer = (xmlChar *) xmlRealloc(buffer,
1791 max * sizeof(xmlChar));
1792 if (buffer == NULL) {
1793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1794 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001795 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001796 return(NULL);
1797 }
1798 }
1799 COPY_BUF(l,buffer,len,c);
1800 NEXTL(l);
1801 c = CUR_CHAR(l);
1802 }
1803 buffer[len] = 0;
1804 return(buffer);
1805 }
1806 }
1807 return(xmlStrndup(buf, len));
1808}
1809
1810/**
1811 * xmlParseStringName:
1812 * @ctxt: an XML parser context
1813 * @str: a pointer to the string pointer (IN/OUT)
1814 *
1815 * parse an XML name.
1816 *
1817 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1818 * CombiningChar | Extender
1819 *
1820 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1821 *
1822 * [6] Names ::= Name (S Name)*
1823 *
1824 * Returns the Name parsed or NULL. The str pointer
1825 * is updated to the current location in the string.
1826 */
1827
Daniel Veillard56a4cb82001-03-24 17:00:36 +00001828static xmlChar *
Owen Taylor3473f882001-02-23 17:55:21 +00001829xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1830 xmlChar buf[XML_MAX_NAMELEN + 5];
1831 const xmlChar *cur = *str;
1832 int len = 0, l;
1833 int c;
1834
1835 c = CUR_SCHAR(cur, l);
1836 if (!IS_LETTER(c) && (c != '_') &&
1837 (c != ':')) {
1838 return(NULL);
1839 }
1840
1841 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1842 (c == '.') || (c == '-') ||
1843 (c == '_') || (c == ':') ||
1844 (IS_COMBINING(c)) ||
1845 (IS_EXTENDER(c))) {
1846 COPY_BUF(l,buf,len,c);
1847 cur += l;
1848 c = CUR_SCHAR(cur, l);
1849 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1850 /*
1851 * Okay someone managed to make a huge name, so he's ready to pay
1852 * for the processing speed.
1853 */
1854 xmlChar *buffer;
1855 int max = len * 2;
1856
1857 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1858 if (buffer == NULL) {
1859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1860 ctxt->sax->error(ctxt->userData,
1861 "xmlParseStringName: out of memory\n");
1862 return(NULL);
1863 }
1864 memcpy(buffer, buf, len);
1865 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1866 (c == '.') || (c == '-') ||
1867 (c == '_') || (c == ':') ||
1868 (IS_COMBINING(c)) ||
1869 (IS_EXTENDER(c))) {
1870 if (len + 10 > max) {
1871 max *= 2;
1872 buffer = (xmlChar *) xmlRealloc(buffer,
1873 max * sizeof(xmlChar));
1874 if (buffer == NULL) {
1875 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1876 ctxt->sax->error(ctxt->userData,
1877 "xmlParseStringName: out of memory\n");
1878 return(NULL);
1879 }
1880 }
1881 COPY_BUF(l,buffer,len,c);
1882 cur += l;
1883 c = CUR_SCHAR(cur, l);
1884 }
1885 buffer[len] = 0;
1886 *str = cur;
1887 return(buffer);
1888 }
1889 }
1890 *str = cur;
1891 return(xmlStrndup(buf, len));
1892}
1893
1894/**
1895 * xmlParseNmtoken:
1896 * @ctxt: an XML parser context
1897 *
1898 * parse an XML Nmtoken.
1899 *
1900 * [7] Nmtoken ::= (NameChar)+
1901 *
1902 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1903 *
1904 * Returns the Nmtoken parsed or NULL
1905 */
1906
1907xmlChar *
1908xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1909 xmlChar buf[XML_MAX_NAMELEN + 5];
1910 int len = 0, l;
1911 int c;
1912 int count = 0;
1913
1914 GROW;
1915 c = CUR_CHAR(l);
1916
1917 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1918 (c == '.') || (c == '-') ||
1919 (c == '_') || (c == ':') ||
1920 (IS_COMBINING(c)) ||
1921 (IS_EXTENDER(c))) {
1922 if (count++ > 100) {
1923 count = 0;
1924 GROW;
1925 }
1926 COPY_BUF(l,buf,len,c);
1927 NEXTL(l);
1928 c = CUR_CHAR(l);
1929 if (len >= XML_MAX_NAMELEN) {
1930 /*
1931 * Okay someone managed to make a huge token, so he's ready to pay
1932 * for the processing speed.
1933 */
1934 xmlChar *buffer;
1935 int max = len * 2;
1936
1937 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1938 if (buffer == NULL) {
1939 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1940 ctxt->sax->error(ctxt->userData,
1941 "xmlParseNmtoken: out of memory\n");
1942 return(NULL);
1943 }
1944 memcpy(buffer, buf, len);
1945 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1946 (c == '.') || (c == '-') ||
1947 (c == '_') || (c == ':') ||
1948 (IS_COMBINING(c)) ||
1949 (IS_EXTENDER(c))) {
1950 if (count++ > 100) {
1951 count = 0;
1952 GROW;
1953 }
1954 if (len + 10 > max) {
1955 max *= 2;
1956 buffer = (xmlChar *) xmlRealloc(buffer,
1957 max * sizeof(xmlChar));
1958 if (buffer == NULL) {
1959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1960 ctxt->sax->error(ctxt->userData,
Daniel Veillard29631a82001-03-05 09:49:20 +00001961 "xmlParseNameComplex: out of memory\n");
Owen Taylor3473f882001-02-23 17:55:21 +00001962 return(NULL);
1963 }
1964 }
1965 COPY_BUF(l,buffer,len,c);
1966 NEXTL(l);
1967 c = CUR_CHAR(l);
1968 }
1969 buffer[len] = 0;
1970 return(buffer);
1971 }
1972 }
1973 if (len == 0)
1974 return(NULL);
1975 return(xmlStrndup(buf, len));
1976}
1977
1978/**
1979 * xmlParseEntityValue:
1980 * @ctxt: an XML parser context
1981 * @orig: if non-NULL store a copy of the original entity value
1982 *
1983 * parse a value for ENTITY declarations
1984 *
1985 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1986 * "'" ([^%&'] | PEReference | Reference)* "'"
1987 *
1988 * Returns the EntityValue parsed with reference substitued or NULL
1989 */
1990
1991xmlChar *
1992xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1993 xmlChar *buf = NULL;
1994 int len = 0;
1995 int size = XML_PARSER_BUFFER_SIZE;
1996 int c, l;
1997 xmlChar stop;
1998 xmlChar *ret = NULL;
1999 const xmlChar *cur = NULL;
2000 xmlParserInputPtr input;
2001
2002 if (RAW == '"') stop = '"';
2003 else if (RAW == '\'') stop = '\'';
2004 else {
2005 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
2006 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2007 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 return(NULL);
2011 }
2012 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2013 if (buf == NULL) {
2014 xmlGenericError(xmlGenericErrorContext,
2015 "malloc of %d byte failed\n", size);
2016 return(NULL);
2017 }
2018
2019 /*
2020 * The content of the entity definition is copied in a buffer.
2021 */
2022
2023 ctxt->instate = XML_PARSER_ENTITY_VALUE;
2024 input = ctxt->input;
2025 GROW;
2026 NEXT;
2027 c = CUR_CHAR(l);
2028 /*
2029 * NOTE: 4.4.5 Included in Literal
2030 * When a parameter entity reference appears in a literal entity
2031 * value, ... a single or double quote character in the replacement
2032 * text is always treated as a normal data character and will not
2033 * terminate the literal.
2034 * In practice it means we stop the loop only when back at parsing
2035 * the initial entity and the quote is found
2036 */
2037 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
2038 (ctxt->input != input))) {
2039 if (len + 5 >= size) {
2040 size *= 2;
2041 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2042 if (buf == NULL) {
2043 xmlGenericError(xmlGenericErrorContext,
2044 "realloc of %d byte failed\n", size);
2045 return(NULL);
2046 }
2047 }
2048 COPY_BUF(l,buf,len,c);
2049 NEXTL(l);
2050 /*
2051 * Pop-up of finished entities.
2052 */
2053 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
2054 xmlPopInput(ctxt);
2055
2056 GROW;
2057 c = CUR_CHAR(l);
2058 if (c == 0) {
2059 GROW;
2060 c = CUR_CHAR(l);
2061 }
2062 }
2063 buf[len] = 0;
2064
2065 /*
2066 * Raise problem w.r.t. '&' and '%' being used in non-entities
2067 * reference constructs. Note Charref will be handled in
2068 * xmlStringDecodeEntities()
2069 */
2070 cur = buf;
2071 while (*cur != 0) { /* non input consuming */
2072 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
2073 xmlChar *name;
2074 xmlChar tmp = *cur;
2075
2076 cur++;
2077 name = xmlParseStringName(ctxt, &cur);
2078 if ((name == NULL) || (*cur != ';')) {
2079 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
2080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2081 ctxt->sax->error(ctxt->userData,
2082 "EntityValue: '%c' forbidden except for entities references\n",
2083 tmp);
2084 ctxt->wellFormed = 0;
2085 ctxt->disableSAX = 1;
2086 }
2087 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2088 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2089 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2090 ctxt->sax->error(ctxt->userData,
2091 "EntityValue: PEReferences forbidden in internal subset\n",
2092 tmp);
2093 ctxt->wellFormed = 0;
2094 ctxt->disableSAX = 1;
2095 }
2096 if (name != NULL)
2097 xmlFree(name);
2098 }
2099 cur++;
2100 }
2101
2102 /*
2103 * Then PEReference entities are substituted.
2104 */
2105 if (c != stop) {
2106 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2108 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2109 ctxt->wellFormed = 0;
2110 ctxt->disableSAX = 1;
2111 xmlFree(buf);
2112 } else {
2113 NEXT;
2114 /*
2115 * NOTE: 4.4.7 Bypassed
2116 * When a general entity reference appears in the EntityValue in
2117 * an entity declaration, it is bypassed and left as is.
2118 * so XML_SUBSTITUTE_REF is not set here.
2119 */
2120 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2121 0, 0, 0);
2122 if (orig != NULL)
2123 *orig = buf;
2124 else
2125 xmlFree(buf);
2126 }
2127
2128 return(ret);
2129}
2130
2131/**
2132 * xmlParseAttValue:
2133 * @ctxt: an XML parser context
2134 *
2135 * parse a value for an attribute
2136 * Note: the parser won't do substitution of entities here, this
2137 * will be handled later in xmlStringGetNodeList
2138 *
2139 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2140 * "'" ([^<&'] | Reference)* "'"
2141 *
2142 * 3.3.3 Attribute-Value Normalization:
2143 * Before the value of an attribute is passed to the application or
2144 * checked for validity, the XML processor must normalize it as follows:
2145 * - a character reference is processed by appending the referenced
2146 * character to the attribute value
2147 * - an entity reference is processed by recursively processing the
2148 * replacement text of the entity
2149 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2150 * appending #x20 to the normalized value, except that only a single
2151 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2152 * parsed entity or the literal entity value of an internal parsed entity
2153 * - other characters are processed by appending them to the normalized value
2154 * If the declared value is not CDATA, then the XML processor must further
2155 * process the normalized attribute value by discarding any leading and
2156 * trailing space (#x20) characters, and by replacing sequences of space
2157 * (#x20) characters by a single space (#x20) character.
2158 * All attributes for which no declaration has been read should be treated
2159 * by a non-validating parser as if declared CDATA.
2160 *
2161 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2162 */
2163
2164xmlChar *
2165xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2166 xmlChar limit = 0;
2167 xmlChar *buf = NULL;
2168 int len = 0;
2169 int buf_size = 0;
2170 int c, l;
2171 xmlChar *current = NULL;
2172 xmlEntityPtr ent;
2173
2174
2175 SHRINK;
2176 if (NXT(0) == '"') {
2177 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2178 limit = '"';
2179 NEXT;
2180 } else if (NXT(0) == '\'') {
2181 limit = '\'';
2182 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2183 NEXT;
2184 } else {
2185 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2187 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2188 ctxt->wellFormed = 0;
2189 ctxt->disableSAX = 1;
2190 return(NULL);
2191 }
2192
2193 /*
2194 * allocate a translation buffer.
2195 */
2196 buf_size = XML_PARSER_BUFFER_SIZE;
2197 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2198 if (buf == NULL) {
2199 perror("xmlParseAttValue: malloc failed");
2200 return(NULL);
2201 }
2202
2203 /*
2204 * Ok loop until we reach one of the ending char or a size limit.
2205 */
2206 c = CUR_CHAR(l);
2207 while (((NXT(0) != limit) && /* checked */
2208 (c != '<')) || (ctxt->token != 0)) {
2209 if (c == 0) break;
2210 if (ctxt->token == '&') {
2211 /*
2212 * The reparsing will be done in xmlStringGetNodeList()
2213 * called by the attribute() function in SAX.c
2214 */
2215 static xmlChar buffer[6] = "&#38;";
2216
2217 if (len > buf_size - 10) {
2218 growBuffer(buf);
2219 }
2220 current = &buffer[0];
2221 while (*current != 0) { /* non input consuming */
2222 buf[len++] = *current++;
2223 }
2224 ctxt->token = 0;
2225 } else if (c == '&') {
2226 if (NXT(1) == '#') {
2227 int val = xmlParseCharRef(ctxt);
2228 if (val == '&') {
2229 /*
2230 * The reparsing will be done in xmlStringGetNodeList()
2231 * called by the attribute() function in SAX.c
2232 */
2233 static xmlChar buffer[6] = "&#38;";
2234
2235 if (len > buf_size - 10) {
2236 growBuffer(buf);
2237 }
2238 current = &buffer[0];
2239 while (*current != 0) { /* non input consuming */
2240 buf[len++] = *current++;
2241 }
2242 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002243 if (len > buf_size - 10) {
2244 growBuffer(buf);
2245 }
Owen Taylor3473f882001-02-23 17:55:21 +00002246 len += xmlCopyChar(0, &buf[len], val);
2247 }
2248 } else {
2249 ent = xmlParseEntityRef(ctxt);
2250 if ((ent != NULL) &&
2251 (ctxt->replaceEntities != 0)) {
2252 xmlChar *rep;
2253
2254 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2255 rep = xmlStringDecodeEntities(ctxt, ent->content,
2256 XML_SUBSTITUTE_REF, 0, 0, 0);
2257 if (rep != NULL) {
2258 current = rep;
2259 while (*current != 0) { /* non input consuming */
2260 buf[len++] = *current++;
2261 if (len > buf_size - 10) {
2262 growBuffer(buf);
2263 }
2264 }
2265 xmlFree(rep);
2266 }
2267 } else {
Daniel Veillard0b6b55b2001-03-20 11:27:34 +00002268 if (len > buf_size - 10) {
2269 growBuffer(buf);
2270 }
Owen Taylor3473f882001-02-23 17:55:21 +00002271 if (ent->content != NULL)
2272 buf[len++] = ent->content[0];
2273 }
2274 } else if (ent != NULL) {
2275 int i = xmlStrlen(ent->name);
2276 const xmlChar *cur = ent->name;
2277
2278 /*
2279 * This may look absurd but is needed to detect
2280 * entities problems
2281 */
2282 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2283 (ent->content != NULL)) {
2284 xmlChar *rep;
2285 rep = xmlStringDecodeEntities(ctxt, ent->content,
2286 XML_SUBSTITUTE_REF, 0, 0, 0);
2287 if (rep != NULL)
2288 xmlFree(rep);
2289 }
2290
2291 /*
2292 * Just output the reference
2293 */
2294 buf[len++] = '&';
2295 if (len > buf_size - i - 10) {
2296 growBuffer(buf);
2297 }
2298 for (;i > 0;i--)
2299 buf[len++] = *cur++;
2300 buf[len++] = ';';
2301 }
2302 }
2303 } else {
2304 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2305 COPY_BUF(l,buf,len,0x20);
2306 if (len > buf_size - 10) {
2307 growBuffer(buf);
2308 }
2309 } else {
2310 COPY_BUF(l,buf,len,c);
2311 if (len > buf_size - 10) {
2312 growBuffer(buf);
2313 }
2314 }
2315 NEXTL(l);
2316 }
2317 GROW;
2318 c = CUR_CHAR(l);
2319 }
2320 buf[len++] = 0;
2321 if (RAW == '<') {
2322 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324 ctxt->sax->error(ctxt->userData,
2325 "Unescaped '<' not allowed in attributes values\n");
2326 ctxt->wellFormed = 0;
2327 ctxt->disableSAX = 1;
2328 } else if (RAW != limit) {
2329 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2331 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2332 ctxt->wellFormed = 0;
2333 ctxt->disableSAX = 1;
2334 } else
2335 NEXT;
2336 return(buf);
2337}
2338
2339/**
2340 * xmlParseSystemLiteral:
2341 * @ctxt: an XML parser context
2342 *
2343 * parse an XML Literal
2344 *
2345 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2346 *
2347 * Returns the SystemLiteral parsed or NULL
2348 */
2349
2350xmlChar *
2351xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2352 xmlChar *buf = NULL;
2353 int len = 0;
2354 int size = XML_PARSER_BUFFER_SIZE;
2355 int cur, l;
2356 xmlChar stop;
2357 int state = ctxt->instate;
2358 int count = 0;
2359
2360 SHRINK;
2361 if (RAW == '"') {
2362 NEXT;
2363 stop = '"';
2364 } else if (RAW == '\'') {
2365 NEXT;
2366 stop = '\'';
2367 } else {
2368 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2369 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2370 ctxt->sax->error(ctxt->userData,
2371 "SystemLiteral \" or ' expected\n");
2372 ctxt->wellFormed = 0;
2373 ctxt->disableSAX = 1;
2374 return(NULL);
2375 }
2376
2377 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2378 if (buf == NULL) {
2379 xmlGenericError(xmlGenericErrorContext,
2380 "malloc of %d byte failed\n", size);
2381 return(NULL);
2382 }
2383 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2384 cur = CUR_CHAR(l);
2385 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2386 if (len + 5 >= size) {
2387 size *= 2;
2388 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2389 if (buf == NULL) {
2390 xmlGenericError(xmlGenericErrorContext,
2391 "realloc of %d byte failed\n", size);
2392 ctxt->instate = (xmlParserInputState) state;
2393 return(NULL);
2394 }
2395 }
2396 count++;
2397 if (count > 50) {
2398 GROW;
2399 count = 0;
2400 }
2401 COPY_BUF(l,buf,len,cur);
2402 NEXTL(l);
2403 cur = CUR_CHAR(l);
2404 if (cur == 0) {
2405 GROW;
2406 SHRINK;
2407 cur = CUR_CHAR(l);
2408 }
2409 }
2410 buf[len] = 0;
2411 ctxt->instate = (xmlParserInputState) state;
2412 if (!IS_CHAR(cur)) {
2413 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2414 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2415 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2416 ctxt->wellFormed = 0;
2417 ctxt->disableSAX = 1;
2418 } else {
2419 NEXT;
2420 }
2421 return(buf);
2422}
2423
2424/**
2425 * xmlParsePubidLiteral:
2426 * @ctxt: an XML parser context
2427 *
2428 * parse an XML public literal
2429 *
2430 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2431 *
2432 * Returns the PubidLiteral parsed or NULL.
2433 */
2434
2435xmlChar *
2436xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2437 xmlChar *buf = NULL;
2438 int len = 0;
2439 int size = XML_PARSER_BUFFER_SIZE;
2440 xmlChar cur;
2441 xmlChar stop;
2442 int count = 0;
2443
2444 SHRINK;
2445 if (RAW == '"') {
2446 NEXT;
2447 stop = '"';
2448 } else if (RAW == '\'') {
2449 NEXT;
2450 stop = '\'';
2451 } else {
2452 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2454 ctxt->sax->error(ctxt->userData,
2455 "SystemLiteral \" or ' expected\n");
2456 ctxt->wellFormed = 0;
2457 ctxt->disableSAX = 1;
2458 return(NULL);
2459 }
2460 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2461 if (buf == NULL) {
2462 xmlGenericError(xmlGenericErrorContext,
2463 "malloc of %d byte failed\n", size);
2464 return(NULL);
2465 }
2466 cur = CUR;
2467 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2468 if (len + 1 >= size) {
2469 size *= 2;
2470 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2471 if (buf == NULL) {
2472 xmlGenericError(xmlGenericErrorContext,
2473 "realloc of %d byte failed\n", size);
2474 return(NULL);
2475 }
2476 }
2477 buf[len++] = cur;
2478 count++;
2479 if (count > 50) {
2480 GROW;
2481 count = 0;
2482 }
2483 NEXT;
2484 cur = CUR;
2485 if (cur == 0) {
2486 GROW;
2487 SHRINK;
2488 cur = CUR;
2489 }
2490 }
2491 buf[len] = 0;
2492 if (cur != stop) {
2493 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2495 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2496 ctxt->wellFormed = 0;
2497 ctxt->disableSAX = 1;
2498 } else {
2499 NEXT;
2500 }
2501 return(buf);
2502}
2503
Daniel Veillard48b2f892001-02-25 16:11:03 +00002504void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002505/**
2506 * xmlParseCharData:
2507 * @ctxt: an XML parser context
2508 * @cdata: int indicating whether we are within a CDATA section
2509 *
2510 * parse a CharData section.
2511 * if we are within a CDATA section ']]>' marks an end of section.
2512 *
2513 * The right angle bracket (>) may be represented using the string "&gt;",
2514 * and must, for compatibility, be escaped using "&gt;" or a character
2515 * reference when it appears in the string "]]>" in content, when that
2516 * string is not marking the end of a CDATA section.
2517 *
2518 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2519 */
2520
2521void
2522xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002523 const xmlChar *in;
2524 int nbchar = 0;
Daniel Veillard50582112001-03-26 22:52:16 +00002525 int line = ctxt->input->line;
2526 int col = ctxt->input->col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002527
2528 SHRINK;
2529 GROW;
2530 /*
2531 * Accelerated common case where input don't need to be
2532 * modified before passing it to the handler.
2533 */
2534 if ((ctxt->token == 0) && (!cdata)) {
2535 in = ctxt->input->cur;
2536 do {
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002537get_more:
Daniel Veillard48b2f892001-02-25 16:11:03 +00002538 while (((*in >= 0x20) && (*in != '<') &&
2539 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2540 in++;
2541 if (*in == 0xA) {
2542 ctxt->input->line++;
Daniel Veillard3ed155f2001-04-29 19:56:59 +00002543 in++;
2544 while (*in == 0xA) {
2545 ctxt->input->line++;
2546 in++;
2547 }
2548 goto get_more;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002549 }
2550 nbchar = in - ctxt->input->cur;
Daniel Veillard80f32572001-03-07 19:45:40 +00002551 if (nbchar > 0) {
Daniel Veillarda7374592001-05-10 14:17:55 +00002552 if (IS_BLANK(*ctxt->input->cur)) {
2553 const xmlChar *tmp = ctxt->input->cur;
2554 ctxt->input->cur = in;
2555 if (areBlanks(ctxt, tmp, nbchar)) {
2556 if (ctxt->sax->ignorableWhitespace != NULL)
2557 ctxt->sax->ignorableWhitespace(ctxt->userData,
2558 tmp, nbchar);
2559 } else {
2560 if (ctxt->sax->characters != NULL)
2561 ctxt->sax->characters(ctxt->userData,
2562 tmp, nbchar);
2563 }
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002564 line = ctxt->input->line;
2565 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002566 } else {
2567 if (ctxt->sax->characters != NULL)
2568 ctxt->sax->characters(ctxt->userData,
2569 ctxt->input->cur, nbchar);
Daniel Veillard3ed27bd2001-06-17 17:58:17 +00002570 line = ctxt->input->line;
2571 col = ctxt->input->col;
Daniel Veillard80f32572001-03-07 19:45:40 +00002572 }
Daniel Veillard48b2f892001-02-25 16:11:03 +00002573 }
2574 ctxt->input->cur = in;
2575 if (*in == 0xD) {
2576 in++;
2577 if (*in == 0xA) {
2578 ctxt->input->cur = in;
2579 in++;
2580 ctxt->input->line++;
2581 continue; /* while */
2582 }
2583 in--;
2584 }
Daniel Veillard80f32572001-03-07 19:45:40 +00002585 if (*in == '<') {
2586 return;
2587 }
2588 if (*in == '&') {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002589 return;
2590 }
2591 SHRINK;
2592 GROW;
2593 in = ctxt->input->cur;
2594 } while ((*in >= 0x20) && (*in <= 0x7F));
2595 nbchar = 0;
2596 }
Daniel Veillard50582112001-03-26 22:52:16 +00002597 ctxt->input->line = line;
2598 ctxt->input->col = col;
Daniel Veillard48b2f892001-02-25 16:11:03 +00002599 xmlParseCharDataComplex(ctxt, cdata);
2600}
2601
2602void
2603xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002604 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2605 int nbchar = 0;
2606 int cur, l;
2607 int count = 0;
2608
2609 SHRINK;
2610 GROW;
2611 cur = CUR_CHAR(l);
2612 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2613 ((cur != '&') || (ctxt->token == '&')) &&
2614 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2615 if ((cur == ']') && (NXT(1) == ']') &&
2616 (NXT(2) == '>')) {
2617 if (cdata) break;
2618 else {
2619 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2620 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2621 ctxt->sax->error(ctxt->userData,
2622 "Sequence ']]>' not allowed in content\n");
2623 /* Should this be relaxed ??? I see a "must here */
2624 ctxt->wellFormed = 0;
2625 ctxt->disableSAX = 1;
2626 }
2627 }
2628 COPY_BUF(l,buf,nbchar,cur);
2629 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2630 /*
2631 * Ok the segment is to be consumed as chars.
2632 */
2633 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2634 if (areBlanks(ctxt, buf, nbchar)) {
2635 if (ctxt->sax->ignorableWhitespace != NULL)
2636 ctxt->sax->ignorableWhitespace(ctxt->userData,
2637 buf, nbchar);
2638 } else {
2639 if (ctxt->sax->characters != NULL)
2640 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2641 }
2642 }
2643 nbchar = 0;
2644 }
2645 count++;
2646 if (count > 50) {
2647 GROW;
2648 count = 0;
2649 }
2650 NEXTL(l);
2651 cur = CUR_CHAR(l);
2652 }
2653 if (nbchar != 0) {
2654 /*
2655 * Ok the segment is to be consumed as chars.
2656 */
2657 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2658 if (areBlanks(ctxt, buf, nbchar)) {
2659 if (ctxt->sax->ignorableWhitespace != NULL)
2660 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2661 } else {
2662 if (ctxt->sax->characters != NULL)
2663 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2664 }
2665 }
2666 }
2667}
2668
2669/**
2670 * xmlParseExternalID:
2671 * @ctxt: an XML parser context
2672 * @publicID: a xmlChar** receiving PubidLiteral
2673 * @strict: indicate whether we should restrict parsing to only
2674 * production [75], see NOTE below
2675 *
2676 * Parse an External ID or a Public ID
2677 *
2678 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2679 * 'PUBLIC' S PubidLiteral S SystemLiteral
2680 *
2681 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2682 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2683 *
2684 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2685 *
2686 * Returns the function returns SystemLiteral and in the second
2687 * case publicID receives PubidLiteral, is strict is off
2688 * it is possible to return NULL and have publicID set.
2689 */
2690
2691xmlChar *
2692xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2693 xmlChar *URI = NULL;
2694
2695 SHRINK;
Daniel Veillard146c9122001-03-22 15:22:27 +00002696
2697 *publicID = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00002698 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2699 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2700 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2701 SKIP(6);
2702 if (!IS_BLANK(CUR)) {
2703 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "Space required after 'SYSTEM'\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 }
2710 SKIP_BLANKS;
2711 URI = xmlParseSystemLiteral(ctxt);
2712 if (URI == NULL) {
2713 ctxt->errNo = XML_ERR_URI_REQUIRED;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "xmlParseExternalID: SYSTEM, no URI\n");
2717 ctxt->wellFormed = 0;
2718 ctxt->disableSAX = 1;
2719 }
2720 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2721 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2722 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2723 SKIP(6);
2724 if (!IS_BLANK(CUR)) {
2725 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2726 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2727 ctxt->sax->error(ctxt->userData,
2728 "Space required after 'PUBLIC'\n");
2729 ctxt->wellFormed = 0;
2730 ctxt->disableSAX = 1;
2731 }
2732 SKIP_BLANKS;
2733 *publicID = xmlParsePubidLiteral(ctxt);
2734 if (*publicID == NULL) {
2735 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2736 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2737 ctxt->sax->error(ctxt->userData,
2738 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2739 ctxt->wellFormed = 0;
2740 ctxt->disableSAX = 1;
2741 }
2742 if (strict) {
2743 /*
2744 * We don't handle [83] so "S SystemLiteral" is required.
2745 */
2746 if (!IS_BLANK(CUR)) {
2747 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2748 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2749 ctxt->sax->error(ctxt->userData,
2750 "Space required after the Public Identifier\n");
2751 ctxt->wellFormed = 0;
2752 ctxt->disableSAX = 1;
2753 }
2754 } else {
2755 /*
2756 * We handle [83] so we return immediately, if
2757 * "S SystemLiteral" is not detected. From a purely parsing
2758 * point of view that's a nice mess.
2759 */
2760 const xmlChar *ptr;
2761 GROW;
2762
2763 ptr = CUR_PTR;
2764 if (!IS_BLANK(*ptr)) return(NULL);
2765
2766 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2767 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2768 }
2769 SKIP_BLANKS;
2770 URI = xmlParseSystemLiteral(ctxt);
2771 if (URI == NULL) {
2772 ctxt->errNo = XML_ERR_URI_REQUIRED;
2773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2774 ctxt->sax->error(ctxt->userData,
2775 "xmlParseExternalID: PUBLIC, no URI\n");
2776 ctxt->wellFormed = 0;
2777 ctxt->disableSAX = 1;
2778 }
2779 }
2780 return(URI);
2781}
2782
2783/**
2784 * xmlParseComment:
2785 * @ctxt: an XML parser context
2786 *
2787 * Skip an XML (SGML) comment <!-- .... -->
2788 * The spec says that "For compatibility, the string "--" (double-hyphen)
2789 * must not occur within comments. "
2790 *
2791 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2792 */
2793void
2794xmlParseComment(xmlParserCtxtPtr ctxt) {
2795 xmlChar *buf = NULL;
2796 int len;
2797 int size = XML_PARSER_BUFFER_SIZE;
2798 int q, ql;
2799 int r, rl;
2800 int cur, l;
2801 xmlParserInputState state;
2802 xmlParserInputPtr input = ctxt->input;
2803 int count = 0;
2804
2805 /*
2806 * Check that there is a comment right here.
2807 */
2808 if ((RAW != '<') || (NXT(1) != '!') ||
2809 (NXT(2) != '-') || (NXT(3) != '-')) return;
2810
2811 state = ctxt->instate;
2812 ctxt->instate = XML_PARSER_COMMENT;
2813 SHRINK;
2814 SKIP(4);
2815 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2816 if (buf == NULL) {
2817 xmlGenericError(xmlGenericErrorContext,
2818 "malloc of %d byte failed\n", size);
2819 ctxt->instate = state;
2820 return;
2821 }
2822 q = CUR_CHAR(ql);
2823 NEXTL(ql);
2824 r = CUR_CHAR(rl);
2825 NEXTL(rl);
2826 cur = CUR_CHAR(l);
2827 len = 0;
2828 while (IS_CHAR(cur) && /* checked */
2829 ((cur != '>') ||
2830 (r != '-') || (q != '-'))) {
2831 if ((r == '-') && (q == '-') && (len > 1)) {
2832 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2834 ctxt->sax->error(ctxt->userData,
2835 "Comment must not contain '--' (double-hyphen)`\n");
2836 ctxt->wellFormed = 0;
2837 ctxt->disableSAX = 1;
2838 }
2839 if (len + 5 >= size) {
2840 size *= 2;
2841 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2842 if (buf == NULL) {
2843 xmlGenericError(xmlGenericErrorContext,
2844 "realloc of %d byte failed\n", size);
2845 ctxt->instate = state;
2846 return;
2847 }
2848 }
2849 COPY_BUF(ql,buf,len,q);
2850 q = r;
2851 ql = rl;
2852 r = cur;
2853 rl = l;
2854
2855 count++;
2856 if (count > 50) {
2857 GROW;
2858 count = 0;
2859 }
2860 NEXTL(l);
2861 cur = CUR_CHAR(l);
2862 if (cur == 0) {
2863 SHRINK;
2864 GROW;
2865 cur = CUR_CHAR(l);
2866 }
2867 }
2868 buf[len] = 0;
2869 if (!IS_CHAR(cur)) {
2870 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2872 ctxt->sax->error(ctxt->userData,
2873 "Comment not terminated \n<!--%.50s\n", buf);
2874 ctxt->wellFormed = 0;
2875 ctxt->disableSAX = 1;
2876 xmlFree(buf);
2877 } else {
2878 if (input != ctxt->input) {
2879 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2880 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2881 ctxt->sax->error(ctxt->userData,
2882"Comment doesn't start and stop in the same entity\n");
2883 ctxt->wellFormed = 0;
2884 ctxt->disableSAX = 1;
2885 }
2886 NEXT;
2887 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2888 (!ctxt->disableSAX))
2889 ctxt->sax->comment(ctxt->userData, buf);
2890 xmlFree(buf);
2891 }
2892 ctxt->instate = state;
2893}
2894
2895/**
2896 * xmlParsePITarget:
2897 * @ctxt: an XML parser context
2898 *
2899 * parse the name of a PI
2900 *
2901 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2902 *
2903 * Returns the PITarget name or NULL
2904 */
2905
2906xmlChar *
2907xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2908 xmlChar *name;
2909
2910 name = xmlParseName(ctxt);
2911 if ((name != NULL) &&
2912 ((name[0] == 'x') || (name[0] == 'X')) &&
2913 ((name[1] == 'm') || (name[1] == 'M')) &&
2914 ((name[2] == 'l') || (name[2] == 'L'))) {
2915 int i;
2916 if ((name[0] == 'x') && (name[1] == 'm') &&
2917 (name[2] == 'l') && (name[3] == 0)) {
2918 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2920 ctxt->sax->error(ctxt->userData,
2921 "XML declaration allowed only at the start of the document\n");
2922 ctxt->wellFormed = 0;
2923 ctxt->disableSAX = 1;
2924 return(name);
2925 } else if (name[3] == 0) {
2926 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2927 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2928 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2929 ctxt->wellFormed = 0;
2930 ctxt->disableSAX = 1;
2931 return(name);
2932 }
2933 for (i = 0;;i++) {
2934 if (xmlW3CPIs[i] == NULL) break;
2935 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2936 return(name);
2937 }
2938 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2939 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2940 ctxt->sax->warning(ctxt->userData,
2941 "xmlParsePItarget: invalid name prefix 'xml'\n");
2942 }
2943 }
2944 return(name);
2945}
2946
2947/**
2948 * xmlParsePI:
2949 * @ctxt: an XML parser context
2950 *
2951 * parse an XML Processing Instruction.
2952 *
2953 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2954 *
2955 * The processing is transfered to SAX once parsed.
2956 */
2957
2958void
2959xmlParsePI(xmlParserCtxtPtr ctxt) {
2960 xmlChar *buf = NULL;
2961 int len = 0;
2962 int size = XML_PARSER_BUFFER_SIZE;
2963 int cur, l;
2964 xmlChar *target;
2965 xmlParserInputState state;
2966 int count = 0;
2967
2968 if ((RAW == '<') && (NXT(1) == '?')) {
2969 xmlParserInputPtr input = ctxt->input;
2970 state = ctxt->instate;
2971 ctxt->instate = XML_PARSER_PI;
2972 /*
2973 * this is a Processing Instruction.
2974 */
2975 SKIP(2);
2976 SHRINK;
2977
2978 /*
2979 * Parse the target name and check for special support like
2980 * namespace.
2981 */
2982 target = xmlParsePITarget(ctxt);
2983 if (target != NULL) {
2984 if ((RAW == '?') && (NXT(1) == '>')) {
2985 if (input != ctxt->input) {
2986 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2987 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2988 ctxt->sax->error(ctxt->userData,
2989 "PI declaration doesn't start and stop in the same entity\n");
2990 ctxt->wellFormed = 0;
2991 ctxt->disableSAX = 1;
2992 }
2993 SKIP(2);
2994
2995 /*
2996 * SAX: PI detected.
2997 */
2998 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2999 (ctxt->sax->processingInstruction != NULL))
3000 ctxt->sax->processingInstruction(ctxt->userData,
3001 target, NULL);
3002 ctxt->instate = state;
3003 xmlFree(target);
3004 return;
3005 }
3006 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
3007 if (buf == NULL) {
3008 xmlGenericError(xmlGenericErrorContext,
3009 "malloc of %d byte failed\n", size);
3010 ctxt->instate = state;
3011 return;
3012 }
3013 cur = CUR;
3014 if (!IS_BLANK(cur)) {
3015 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "xmlParsePI: PI %s space expected\n", target);
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 }
3022 SKIP_BLANKS;
3023 cur = CUR_CHAR(l);
3024 while (IS_CHAR(cur) && /* checked */
3025 ((cur != '?') || (NXT(1) != '>'))) {
3026 if (len + 5 >= size) {
3027 size *= 2;
3028 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
3029 if (buf == NULL) {
3030 xmlGenericError(xmlGenericErrorContext,
3031 "realloc of %d byte failed\n", size);
3032 ctxt->instate = state;
3033 return;
3034 }
3035 }
3036 count++;
3037 if (count > 50) {
3038 GROW;
3039 count = 0;
3040 }
3041 COPY_BUF(l,buf,len,cur);
3042 NEXTL(l);
3043 cur = CUR_CHAR(l);
3044 if (cur == 0) {
3045 SHRINK;
3046 GROW;
3047 cur = CUR_CHAR(l);
3048 }
3049 }
3050 buf[len] = 0;
3051 if (cur != '?') {
3052 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
3053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3054 ctxt->sax->error(ctxt->userData,
3055 "xmlParsePI: PI %s never end ...\n", target);
3056 ctxt->wellFormed = 0;
3057 ctxt->disableSAX = 1;
3058 } else {
3059 if (input != ctxt->input) {
3060 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3062 ctxt->sax->error(ctxt->userData,
3063 "PI declaration doesn't start and stop in the same entity\n");
3064 ctxt->wellFormed = 0;
3065 ctxt->disableSAX = 1;
3066 }
3067 SKIP(2);
3068
3069 /*
3070 * SAX: PI detected.
3071 */
3072 if ((ctxt->sax) && (!ctxt->disableSAX) &&
3073 (ctxt->sax->processingInstruction != NULL))
3074 ctxt->sax->processingInstruction(ctxt->userData,
3075 target, buf);
3076 }
3077 xmlFree(buf);
3078 xmlFree(target);
3079 } else {
3080 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
3081 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3082 ctxt->sax->error(ctxt->userData,
3083 "xmlParsePI : no target name\n");
3084 ctxt->wellFormed = 0;
3085 ctxt->disableSAX = 1;
3086 }
3087 ctxt->instate = state;
3088 }
3089}
3090
3091/**
3092 * xmlParseNotationDecl:
3093 * @ctxt: an XML parser context
3094 *
3095 * parse a notation declaration
3096 *
3097 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
3098 *
3099 * Hence there is actually 3 choices:
3100 * 'PUBLIC' S PubidLiteral
3101 * 'PUBLIC' S PubidLiteral S SystemLiteral
3102 * and 'SYSTEM' S SystemLiteral
3103 *
3104 * See the NOTE on xmlParseExternalID().
3105 */
3106
3107void
3108xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
3109 xmlChar *name;
3110 xmlChar *Pubid;
3111 xmlChar *Systemid;
3112
3113 if ((RAW == '<') && (NXT(1) == '!') &&
3114 (NXT(2) == 'N') && (NXT(3) == 'O') &&
3115 (NXT(4) == 'T') && (NXT(5) == 'A') &&
3116 (NXT(6) == 'T') && (NXT(7) == 'I') &&
3117 (NXT(8) == 'O') && (NXT(9) == 'N')) {
3118 xmlParserInputPtr input = ctxt->input;
3119 SHRINK;
3120 SKIP(10);
3121 if (!IS_BLANK(CUR)) {
3122 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3123 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3124 ctxt->sax->error(ctxt->userData,
3125 "Space required after '<!NOTATION'\n");
3126 ctxt->wellFormed = 0;
3127 ctxt->disableSAX = 1;
3128 return;
3129 }
3130 SKIP_BLANKS;
3131
Daniel Veillard76d66f42001-05-16 21:05:17 +00003132 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003133 if (name == NULL) {
3134 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3136 ctxt->sax->error(ctxt->userData,
3137 "NOTATION: Name expected here\n");
3138 ctxt->wellFormed = 0;
3139 ctxt->disableSAX = 1;
3140 return;
3141 }
3142 if (!IS_BLANK(CUR)) {
3143 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3145 ctxt->sax->error(ctxt->userData,
3146 "Space required after the NOTATION name'\n");
3147 ctxt->wellFormed = 0;
3148 ctxt->disableSAX = 1;
3149 return;
3150 }
3151 SKIP_BLANKS;
3152
3153 /*
3154 * Parse the IDs.
3155 */
3156 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3157 SKIP_BLANKS;
3158
3159 if (RAW == '>') {
3160 if (input != ctxt->input) {
3161 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3162 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3163 ctxt->sax->error(ctxt->userData,
3164"Notation declaration doesn't start and stop in the same entity\n");
3165 ctxt->wellFormed = 0;
3166 ctxt->disableSAX = 1;
3167 }
3168 NEXT;
3169 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3170 (ctxt->sax->notationDecl != NULL))
3171 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3172 } else {
3173 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3174 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3175 ctxt->sax->error(ctxt->userData,
3176 "'>' required to close NOTATION declaration\n");
3177 ctxt->wellFormed = 0;
3178 ctxt->disableSAX = 1;
3179 }
3180 xmlFree(name);
3181 if (Systemid != NULL) xmlFree(Systemid);
3182 if (Pubid != NULL) xmlFree(Pubid);
3183 }
3184}
3185
3186/**
3187 * xmlParseEntityDecl:
3188 * @ctxt: an XML parser context
3189 *
3190 * parse <!ENTITY declarations
3191 *
3192 * [70] EntityDecl ::= GEDecl | PEDecl
3193 *
3194 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3195 *
3196 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3197 *
3198 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3199 *
3200 * [74] PEDef ::= EntityValue | ExternalID
3201 *
3202 * [76] NDataDecl ::= S 'NDATA' S Name
3203 *
3204 * [ VC: Notation Declared ]
3205 * The Name must match the declared name of a notation.
3206 */
3207
3208void
3209xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3210 xmlChar *name = NULL;
3211 xmlChar *value = NULL;
3212 xmlChar *URI = NULL, *literal = NULL;
3213 xmlChar *ndata = NULL;
3214 int isParameter = 0;
3215 xmlChar *orig = NULL;
3216
3217 GROW;
3218 if ((RAW == '<') && (NXT(1) == '!') &&
3219 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3220 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3221 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3222 xmlParserInputPtr input = ctxt->input;
3223 ctxt->instate = XML_PARSER_ENTITY_DECL;
3224 SHRINK;
3225 SKIP(8);
3226 if (!IS_BLANK(CUR)) {
3227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3229 ctxt->sax->error(ctxt->userData,
3230 "Space required after '<!ENTITY'\n");
3231 ctxt->wellFormed = 0;
3232 ctxt->disableSAX = 1;
3233 }
3234 SKIP_BLANKS;
3235
3236 if (RAW == '%') {
3237 NEXT;
3238 if (!IS_BLANK(CUR)) {
3239 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3241 ctxt->sax->error(ctxt->userData,
3242 "Space required after '%'\n");
3243 ctxt->wellFormed = 0;
3244 ctxt->disableSAX = 1;
3245 }
3246 SKIP_BLANKS;
3247 isParameter = 1;
3248 }
3249
Daniel Veillard76d66f42001-05-16 21:05:17 +00003250 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003251 if (name == NULL) {
3252 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3253 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3254 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3255 ctxt->wellFormed = 0;
3256 ctxt->disableSAX = 1;
3257 return;
3258 }
3259 if (!IS_BLANK(CUR)) {
3260 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3262 ctxt->sax->error(ctxt->userData,
3263 "Space required after the entity name\n");
3264 ctxt->wellFormed = 0;
3265 ctxt->disableSAX = 1;
3266 }
3267 SKIP_BLANKS;
3268
3269 /*
3270 * handle the various case of definitions...
3271 */
3272 if (isParameter) {
3273 if ((RAW == '"') || (RAW == '\'')) {
3274 value = xmlParseEntityValue(ctxt, &orig);
3275 if (value) {
3276 if ((ctxt->sax != NULL) &&
3277 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3278 ctxt->sax->entityDecl(ctxt->userData, name,
3279 XML_INTERNAL_PARAMETER_ENTITY,
3280 NULL, NULL, value);
3281 }
3282 } else {
3283 URI = xmlParseExternalID(ctxt, &literal, 1);
3284 if ((URI == NULL) && (literal == NULL)) {
3285 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3286 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3287 ctxt->sax->error(ctxt->userData,
3288 "Entity value required\n");
3289 ctxt->wellFormed = 0;
3290 ctxt->disableSAX = 1;
3291 }
3292 if (URI) {
3293 xmlURIPtr uri;
3294
3295 uri = xmlParseURI((const char *) URI);
3296 if (uri == NULL) {
3297 ctxt->errNo = XML_ERR_INVALID_URI;
3298 if ((ctxt->sax != NULL) &&
3299 (!ctxt->disableSAX) &&
3300 (ctxt->sax->error != NULL))
3301 ctxt->sax->error(ctxt->userData,
3302 "Invalid URI: %s\n", URI);
3303 ctxt->wellFormed = 0;
3304 } else {
3305 if (uri->fragment != NULL) {
3306 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3307 if ((ctxt->sax != NULL) &&
3308 (!ctxt->disableSAX) &&
3309 (ctxt->sax->error != NULL))
3310 ctxt->sax->error(ctxt->userData,
3311 "Fragment not allowed: %s\n", URI);
3312 ctxt->wellFormed = 0;
3313 } else {
3314 if ((ctxt->sax != NULL) &&
3315 (!ctxt->disableSAX) &&
3316 (ctxt->sax->entityDecl != NULL))
3317 ctxt->sax->entityDecl(ctxt->userData, name,
3318 XML_EXTERNAL_PARAMETER_ENTITY,
3319 literal, URI, NULL);
3320 }
3321 xmlFreeURI(uri);
3322 }
3323 }
3324 }
3325 } else {
3326 if ((RAW == '"') || (RAW == '\'')) {
3327 value = xmlParseEntityValue(ctxt, &orig);
3328 if ((ctxt->sax != NULL) &&
3329 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3330 ctxt->sax->entityDecl(ctxt->userData, name,
3331 XML_INTERNAL_GENERAL_ENTITY,
3332 NULL, NULL, value);
3333 } else {
3334 URI = xmlParseExternalID(ctxt, &literal, 1);
3335 if ((URI == NULL) && (literal == NULL)) {
3336 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3338 ctxt->sax->error(ctxt->userData,
3339 "Entity value required\n");
3340 ctxt->wellFormed = 0;
3341 ctxt->disableSAX = 1;
3342 }
3343 if (URI) {
3344 xmlURIPtr uri;
3345
3346 uri = xmlParseURI((const char *)URI);
3347 if (uri == NULL) {
3348 ctxt->errNo = XML_ERR_INVALID_URI;
3349 if ((ctxt->sax != NULL) &&
3350 (!ctxt->disableSAX) &&
3351 (ctxt->sax->error != NULL))
3352 ctxt->sax->error(ctxt->userData,
3353 "Invalid URI: %s\n", URI);
3354 ctxt->wellFormed = 0;
3355 } else {
3356 if (uri->fragment != NULL) {
3357 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3358 if ((ctxt->sax != NULL) &&
3359 (!ctxt->disableSAX) &&
3360 (ctxt->sax->error != NULL))
3361 ctxt->sax->error(ctxt->userData,
3362 "Fragment not allowed: %s\n", URI);
3363 ctxt->wellFormed = 0;
3364 }
3365 xmlFreeURI(uri);
3366 }
3367 }
3368 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3369 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3371 ctxt->sax->error(ctxt->userData,
3372 "Space required before 'NDATA'\n");
3373 ctxt->wellFormed = 0;
3374 ctxt->disableSAX = 1;
3375 }
3376 SKIP_BLANKS;
3377 if ((RAW == 'N') && (NXT(1) == 'D') &&
3378 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3379 (NXT(4) == 'A')) {
3380 SKIP(5);
3381 if (!IS_BLANK(CUR)) {
3382 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3383 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3384 ctxt->sax->error(ctxt->userData,
3385 "Space required after 'NDATA'\n");
3386 ctxt->wellFormed = 0;
3387 ctxt->disableSAX = 1;
3388 }
3389 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003390 ndata = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003391 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3392 (ctxt->sax->unparsedEntityDecl != NULL))
3393 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3394 literal, URI, ndata);
3395 } else {
3396 if ((ctxt->sax != NULL) &&
3397 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3398 ctxt->sax->entityDecl(ctxt->userData, name,
3399 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3400 literal, URI, NULL);
3401 }
3402 }
3403 }
3404 SKIP_BLANKS;
3405 if (RAW != '>') {
3406 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3408 ctxt->sax->error(ctxt->userData,
3409 "xmlParseEntityDecl: entity %s not terminated\n", name);
3410 ctxt->wellFormed = 0;
3411 ctxt->disableSAX = 1;
3412 } else {
3413 if (input != ctxt->input) {
3414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3416 ctxt->sax->error(ctxt->userData,
3417"Entity declaration doesn't start and stop in the same entity\n");
3418 ctxt->wellFormed = 0;
3419 ctxt->disableSAX = 1;
3420 }
3421 NEXT;
3422 }
3423 if (orig != NULL) {
3424 /*
3425 * Ugly mechanism to save the raw entity value.
3426 */
3427 xmlEntityPtr cur = NULL;
3428
3429 if (isParameter) {
3430 if ((ctxt->sax != NULL) &&
3431 (ctxt->sax->getParameterEntity != NULL))
3432 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3433 } else {
3434 if ((ctxt->sax != NULL) &&
3435 (ctxt->sax->getEntity != NULL))
3436 cur = ctxt->sax->getEntity(ctxt->userData, name);
3437 }
3438 if (cur != NULL) {
3439 if (cur->orig != NULL)
3440 xmlFree(orig);
3441 else
3442 cur->orig = orig;
3443 } else
3444 xmlFree(orig);
3445 }
3446 if (name != NULL) xmlFree(name);
3447 if (value != NULL) xmlFree(value);
3448 if (URI != NULL) xmlFree(URI);
3449 if (literal != NULL) xmlFree(literal);
3450 if (ndata != NULL) xmlFree(ndata);
3451 }
3452}
3453
3454/**
3455 * xmlParseDefaultDecl:
3456 * @ctxt: an XML parser context
3457 * @value: Receive a possible fixed default value for the attribute
3458 *
3459 * Parse an attribute default declaration
3460 *
3461 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3462 *
3463 * [ VC: Required Attribute ]
3464 * if the default declaration is the keyword #REQUIRED, then the
3465 * attribute must be specified for all elements of the type in the
3466 * attribute-list declaration.
3467 *
3468 * [ VC: Attribute Default Legal ]
3469 * The declared default value must meet the lexical constraints of
3470 * the declared attribute type c.f. xmlValidateAttributeDecl()
3471 *
3472 * [ VC: Fixed Attribute Default ]
3473 * if an attribute has a default value declared with the #FIXED
3474 * keyword, instances of that attribute must match the default value.
3475 *
3476 * [ WFC: No < in Attribute Values ]
3477 * handled in xmlParseAttValue()
3478 *
3479 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3480 * or XML_ATTRIBUTE_FIXED.
3481 */
3482
3483int
3484xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3485 int val;
3486 xmlChar *ret;
3487
3488 *value = NULL;
3489 if ((RAW == '#') && (NXT(1) == 'R') &&
3490 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3491 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3492 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3493 (NXT(8) == 'D')) {
3494 SKIP(9);
3495 return(XML_ATTRIBUTE_REQUIRED);
3496 }
3497 if ((RAW == '#') && (NXT(1) == 'I') &&
3498 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3499 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3500 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3501 SKIP(8);
3502 return(XML_ATTRIBUTE_IMPLIED);
3503 }
3504 val = XML_ATTRIBUTE_NONE;
3505 if ((RAW == '#') && (NXT(1) == 'F') &&
3506 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3507 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3508 SKIP(6);
3509 val = XML_ATTRIBUTE_FIXED;
3510 if (!IS_BLANK(CUR)) {
3511 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3513 ctxt->sax->error(ctxt->userData,
3514 "Space required after '#FIXED'\n");
3515 ctxt->wellFormed = 0;
3516 ctxt->disableSAX = 1;
3517 }
3518 SKIP_BLANKS;
3519 }
3520 ret = xmlParseAttValue(ctxt);
3521 ctxt->instate = XML_PARSER_DTD;
3522 if (ret == NULL) {
3523 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3524 ctxt->sax->error(ctxt->userData,
3525 "Attribute default value declaration error\n");
3526 ctxt->wellFormed = 0;
3527 ctxt->disableSAX = 1;
3528 } else
3529 *value = ret;
3530 return(val);
3531}
3532
3533/**
3534 * xmlParseNotationType:
3535 * @ctxt: an XML parser context
3536 *
3537 * parse an Notation attribute type.
3538 *
3539 * Note: the leading 'NOTATION' S part has already being parsed...
3540 *
3541 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3542 *
3543 * [ VC: Notation Attributes ]
3544 * Values of this type must match one of the notation names included
3545 * in the declaration; all notation names in the declaration must be declared.
3546 *
3547 * Returns: the notation attribute tree built while parsing
3548 */
3549
3550xmlEnumerationPtr
3551xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3552 xmlChar *name;
3553 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3554
3555 if (RAW != '(') {
3556 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3558 ctxt->sax->error(ctxt->userData,
3559 "'(' required to start 'NOTATION'\n");
3560 ctxt->wellFormed = 0;
3561 ctxt->disableSAX = 1;
3562 return(NULL);
3563 }
3564 SHRINK;
3565 do {
3566 NEXT;
3567 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003568 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003569 if (name == NULL) {
3570 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3572 ctxt->sax->error(ctxt->userData,
3573 "Name expected in NOTATION declaration\n");
3574 ctxt->wellFormed = 0;
3575 ctxt->disableSAX = 1;
3576 return(ret);
3577 }
3578 cur = xmlCreateEnumeration(name);
3579 xmlFree(name);
3580 if (cur == NULL) return(ret);
3581 if (last == NULL) ret = last = cur;
3582 else {
3583 last->next = cur;
3584 last = cur;
3585 }
3586 SKIP_BLANKS;
3587 } while (RAW == '|');
3588 if (RAW != ')') {
3589 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3591 ctxt->sax->error(ctxt->userData,
3592 "')' required to finish NOTATION declaration\n");
3593 ctxt->wellFormed = 0;
3594 ctxt->disableSAX = 1;
3595 if ((last != NULL) && (last != ret))
3596 xmlFreeEnumeration(last);
3597 return(ret);
3598 }
3599 NEXT;
3600 return(ret);
3601}
3602
3603/**
3604 * xmlParseEnumerationType:
3605 * @ctxt: an XML parser context
3606 *
3607 * parse an Enumeration attribute type.
3608 *
3609 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3610 *
3611 * [ VC: Enumeration ]
3612 * Values of this type must match one of the Nmtoken tokens in
3613 * the declaration
3614 *
3615 * Returns: the enumeration attribute tree built while parsing
3616 */
3617
3618xmlEnumerationPtr
3619xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3620 xmlChar *name;
3621 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3622
3623 if (RAW != '(') {
3624 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "'(' required to start ATTLIST enumeration\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return(NULL);
3631 }
3632 SHRINK;
3633 do {
3634 NEXT;
3635 SKIP_BLANKS;
3636 name = xmlParseNmtoken(ctxt);
3637 if (name == NULL) {
3638 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3640 ctxt->sax->error(ctxt->userData,
3641 "NmToken expected in ATTLIST enumeration\n");
3642 ctxt->wellFormed = 0;
3643 ctxt->disableSAX = 1;
3644 return(ret);
3645 }
3646 cur = xmlCreateEnumeration(name);
3647 xmlFree(name);
3648 if (cur == NULL) return(ret);
3649 if (last == NULL) ret = last = cur;
3650 else {
3651 last->next = cur;
3652 last = cur;
3653 }
3654 SKIP_BLANKS;
3655 } while (RAW == '|');
3656 if (RAW != ')') {
3657 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3658 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3659 ctxt->sax->error(ctxt->userData,
3660 "')' required to finish ATTLIST enumeration\n");
3661 ctxt->wellFormed = 0;
3662 ctxt->disableSAX = 1;
3663 return(ret);
3664 }
3665 NEXT;
3666 return(ret);
3667}
3668
3669/**
3670 * xmlParseEnumeratedType:
3671 * @ctxt: an XML parser context
3672 * @tree: the enumeration tree built while parsing
3673 *
3674 * parse an Enumerated attribute type.
3675 *
3676 * [57] EnumeratedType ::= NotationType | Enumeration
3677 *
3678 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3679 *
3680 *
3681 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3682 */
3683
3684int
3685xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3686 if ((RAW == 'N') && (NXT(1) == 'O') &&
3687 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3688 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3689 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3690 SKIP(8);
3691 if (!IS_BLANK(CUR)) {
3692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Space required after 'NOTATION'\n");
3696 ctxt->wellFormed = 0;
3697 ctxt->disableSAX = 1;
3698 return(0);
3699 }
3700 SKIP_BLANKS;
3701 *tree = xmlParseNotationType(ctxt);
3702 if (*tree == NULL) return(0);
3703 return(XML_ATTRIBUTE_NOTATION);
3704 }
3705 *tree = xmlParseEnumerationType(ctxt);
3706 if (*tree == NULL) return(0);
3707 return(XML_ATTRIBUTE_ENUMERATION);
3708}
3709
3710/**
3711 * xmlParseAttributeType:
3712 * @ctxt: an XML parser context
3713 * @tree: the enumeration tree built while parsing
3714 *
3715 * parse the Attribute list def for an element
3716 *
3717 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3718 *
3719 * [55] StringType ::= 'CDATA'
3720 *
3721 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3722 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3723 *
3724 * Validity constraints for attribute values syntax are checked in
3725 * xmlValidateAttributeValue()
3726 *
3727 * [ VC: ID ]
3728 * Values of type ID must match the Name production. A name must not
3729 * appear more than once in an XML document as a value of this type;
3730 * i.e., ID values must uniquely identify the elements which bear them.
3731 *
3732 * [ VC: One ID per Element Type ]
3733 * No element type may have more than one ID attribute specified.
3734 *
3735 * [ VC: ID Attribute Default ]
3736 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3737 *
3738 * [ VC: IDREF ]
3739 * Values of type IDREF must match the Name production, and values
3740 * of type IDREFS must match Names; each IDREF Name must match the value
3741 * of an ID attribute on some element in the XML document; i.e. IDREF
3742 * values must match the value of some ID attribute.
3743 *
3744 * [ VC: Entity Name ]
3745 * Values of type ENTITY must match the Name production, values
3746 * of type ENTITIES must match Names; each Entity Name must match the
3747 * name of an unparsed entity declared in the DTD.
3748 *
3749 * [ VC: Name Token ]
3750 * Values of type NMTOKEN must match the Nmtoken production; values
3751 * of type NMTOKENS must match Nmtokens.
3752 *
3753 * Returns the attribute type
3754 */
3755int
3756xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3757 SHRINK;
3758 if ((RAW == 'C') && (NXT(1) == 'D') &&
3759 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3760 (NXT(4) == 'A')) {
3761 SKIP(5);
3762 return(XML_ATTRIBUTE_CDATA);
3763 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3764 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3765 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3766 SKIP(6);
3767 return(XML_ATTRIBUTE_IDREFS);
3768 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3769 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3770 (NXT(4) == 'F')) {
3771 SKIP(5);
3772 return(XML_ATTRIBUTE_IDREF);
3773 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3774 SKIP(2);
3775 return(XML_ATTRIBUTE_ID);
3776 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3777 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3778 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3779 SKIP(6);
3780 return(XML_ATTRIBUTE_ENTITY);
3781 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3782 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3783 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3784 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3785 SKIP(8);
3786 return(XML_ATTRIBUTE_ENTITIES);
3787 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3788 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3789 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3790 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3791 SKIP(8);
3792 return(XML_ATTRIBUTE_NMTOKENS);
3793 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3794 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3795 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3796 (NXT(6) == 'N')) {
3797 SKIP(7);
3798 return(XML_ATTRIBUTE_NMTOKEN);
3799 }
3800 return(xmlParseEnumeratedType(ctxt, tree));
3801}
3802
3803/**
3804 * xmlParseAttributeListDecl:
3805 * @ctxt: an XML parser context
3806 *
3807 * : parse the Attribute list def for an element
3808 *
3809 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3810 *
3811 * [53] AttDef ::= S Name S AttType S DefaultDecl
3812 *
3813 */
3814void
3815xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3816 xmlChar *elemName;
3817 xmlChar *attrName;
3818 xmlEnumerationPtr tree;
3819
3820 if ((RAW == '<') && (NXT(1) == '!') &&
3821 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3822 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3823 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3824 (NXT(8) == 'T')) {
3825 xmlParserInputPtr input = ctxt->input;
3826
3827 SKIP(9);
3828 if (!IS_BLANK(CUR)) {
3829 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3831 ctxt->sax->error(ctxt->userData,
3832 "Space required after '<!ATTLIST'\n");
3833 ctxt->wellFormed = 0;
3834 ctxt->disableSAX = 1;
3835 }
3836 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003837 elemName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003838 if (elemName == NULL) {
3839 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3840 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3841 ctxt->sax->error(ctxt->userData,
3842 "ATTLIST: no name for Element\n");
3843 ctxt->wellFormed = 0;
3844 ctxt->disableSAX = 1;
3845 return;
3846 }
3847 SKIP_BLANKS;
3848 GROW;
3849 while (RAW != '>') {
3850 const xmlChar *check = CUR_PTR;
3851 int type;
3852 int def;
3853 xmlChar *defaultValue = NULL;
3854
3855 GROW;
3856 tree = NULL;
Daniel Veillard76d66f42001-05-16 21:05:17 +00003857 attrName = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00003858 if (attrName == NULL) {
3859 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3860 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3861 ctxt->sax->error(ctxt->userData,
3862 "ATTLIST: no name for Attribute\n");
3863 ctxt->wellFormed = 0;
3864 ctxt->disableSAX = 1;
3865 break;
3866 }
3867 GROW;
3868 if (!IS_BLANK(CUR)) {
3869 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3870 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3871 ctxt->sax->error(ctxt->userData,
3872 "Space required after the attribute name\n");
3873 ctxt->wellFormed = 0;
3874 ctxt->disableSAX = 1;
3875 if (attrName != NULL)
3876 xmlFree(attrName);
3877 if (defaultValue != NULL)
3878 xmlFree(defaultValue);
3879 break;
3880 }
3881 SKIP_BLANKS;
3882
3883 type = xmlParseAttributeType(ctxt, &tree);
3884 if (type <= 0) {
3885 if (attrName != NULL)
3886 xmlFree(attrName);
3887 if (defaultValue != NULL)
3888 xmlFree(defaultValue);
3889 break;
3890 }
3891
3892 GROW;
3893 if (!IS_BLANK(CUR)) {
3894 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3896 ctxt->sax->error(ctxt->userData,
3897 "Space required after the attribute type\n");
3898 ctxt->wellFormed = 0;
3899 ctxt->disableSAX = 1;
3900 if (attrName != NULL)
3901 xmlFree(attrName);
3902 if (defaultValue != NULL)
3903 xmlFree(defaultValue);
3904 if (tree != NULL)
3905 xmlFreeEnumeration(tree);
3906 break;
3907 }
3908 SKIP_BLANKS;
3909
3910 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3911 if (def <= 0) {
3912 if (attrName != NULL)
3913 xmlFree(attrName);
3914 if (defaultValue != NULL)
3915 xmlFree(defaultValue);
3916 if (tree != NULL)
3917 xmlFreeEnumeration(tree);
3918 break;
3919 }
3920
3921 GROW;
3922 if (RAW != '>') {
3923 if (!IS_BLANK(CUR)) {
3924 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "Space required after the attribute default value\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 if (attrName != NULL)
3931 xmlFree(attrName);
3932 if (defaultValue != NULL)
3933 xmlFree(defaultValue);
3934 if (tree != NULL)
3935 xmlFreeEnumeration(tree);
3936 break;
3937 }
3938 SKIP_BLANKS;
3939 }
3940 if (check == CUR_PTR) {
3941 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3943 ctxt->sax->error(ctxt->userData,
3944 "xmlParseAttributeListDecl: detected internal error\n");
3945 if (attrName != NULL)
3946 xmlFree(attrName);
3947 if (defaultValue != NULL)
3948 xmlFree(defaultValue);
3949 if (tree != NULL)
3950 xmlFreeEnumeration(tree);
3951 break;
3952 }
3953 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3954 (ctxt->sax->attributeDecl != NULL))
3955 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3956 type, def, defaultValue, tree);
3957 if (attrName != NULL)
3958 xmlFree(attrName);
3959 if (defaultValue != NULL)
3960 xmlFree(defaultValue);
3961 GROW;
3962 }
3963 if (RAW == '>') {
3964 if (input != ctxt->input) {
3965 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3966 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3967 ctxt->sax->error(ctxt->userData,
3968"Attribute list declaration doesn't start and stop in the same entity\n");
3969 ctxt->wellFormed = 0;
3970 ctxt->disableSAX = 1;
3971 }
3972 NEXT;
3973 }
3974
3975 xmlFree(elemName);
3976 }
3977}
3978
3979/**
3980 * xmlParseElementMixedContentDecl:
3981 * @ctxt: an XML parser context
3982 *
3983 * parse the declaration for a Mixed Element content
3984 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3985 *
3986 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3987 * '(' S? '#PCDATA' S? ')'
3988 *
3989 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3990 *
3991 * [ VC: No Duplicate Types ]
3992 * The same name must not appear more than once in a single
3993 * mixed-content declaration.
3994 *
3995 * returns: the list of the xmlElementContentPtr describing the element choices
3996 */
3997xmlElementContentPtr
3998xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3999 xmlElementContentPtr ret = NULL, cur = NULL, n;
4000 xmlChar *elem = NULL;
4001
4002 GROW;
4003 if ((RAW == '#') && (NXT(1) == 'P') &&
4004 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4005 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4006 (NXT(6) == 'A')) {
4007 SKIP(7);
4008 SKIP_BLANKS;
4009 SHRINK;
4010 if (RAW == ')') {
4011 ctxt->entity = ctxt->input;
4012 NEXT;
4013 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4014 if (RAW == '*') {
4015 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4016 NEXT;
4017 }
4018 return(ret);
4019 }
4020 if ((RAW == '(') || (RAW == '|')) {
4021 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
4022 if (ret == NULL) return(NULL);
4023 }
4024 while (RAW == '|') {
4025 NEXT;
4026 if (elem == NULL) {
4027 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4028 if (ret == NULL) return(NULL);
4029 ret->c1 = cur;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004030 if (cur != NULL)
4031 cur->parent = ret;
Owen Taylor3473f882001-02-23 17:55:21 +00004032 cur = ret;
4033 } else {
4034 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4035 if (n == NULL) return(NULL);
4036 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004037 if (n->c1 != NULL)
4038 n->c1->parent = n;
Owen Taylor3473f882001-02-23 17:55:21 +00004039 cur->c2 = n;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004040 if (n != NULL)
4041 n->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004042 cur = n;
4043 xmlFree(elem);
4044 }
4045 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004046 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004047 if (elem == NULL) {
4048 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4049 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4050 ctxt->sax->error(ctxt->userData,
4051 "xmlParseElementMixedContentDecl : Name expected\n");
4052 ctxt->wellFormed = 0;
4053 ctxt->disableSAX = 1;
4054 xmlFreeElementContent(cur);
4055 return(NULL);
4056 }
4057 SKIP_BLANKS;
4058 GROW;
4059 }
4060 if ((RAW == ')') && (NXT(1) == '*')) {
4061 if (elem != NULL) {
4062 cur->c2 = xmlNewElementContent(elem,
4063 XML_ELEMENT_CONTENT_ELEMENT);
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004064 if (cur->c2 != NULL)
4065 cur->c2->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004066 xmlFree(elem);
4067 }
4068 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4069 ctxt->entity = ctxt->input;
4070 SKIP(2);
4071 } else {
4072 if (elem != NULL) xmlFree(elem);
4073 xmlFreeElementContent(ret);
4074 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
4075 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4076 ctxt->sax->error(ctxt->userData,
4077 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
4078 ctxt->wellFormed = 0;
4079 ctxt->disableSAX = 1;
4080 return(NULL);
4081 }
4082
4083 } else {
4084 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
4085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4086 ctxt->sax->error(ctxt->userData,
4087 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
4088 ctxt->wellFormed = 0;
4089 ctxt->disableSAX = 1;
4090 }
4091 return(ret);
4092}
4093
4094/**
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004095 * xmlParseElementChildrenContentD:
4096 * @ctxt: an XML parser context
4097 *
4098 * VMS version of xmlParseElementChildrenContentDecl()
4099 *
4100 * Returns the tree of xmlElementContentPtr describing the element
4101 * hierarchy.
4102 */
4103/**
Owen Taylor3473f882001-02-23 17:55:21 +00004104 * xmlParseElementChildrenContentDecl:
4105 * @ctxt: an XML parser context
4106 *
4107 * parse the declaration for a Mixed Element content
4108 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
4109 *
4110 *
4111 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
4112 *
4113 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
4114 *
4115 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
4116 *
4117 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
4118 *
4119 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
4120 * TODO Parameter-entity replacement text must be properly nested
4121 * with parenthetized groups. That is to say, if either of the
4122 * opening or closing parentheses in a choice, seq, or Mixed
4123 * construct is contained in the replacement text for a parameter
4124 * entity, both must be contained in the same replacement text. For
4125 * interoperability, if a parameter-entity reference appears in a
4126 * choice, seq, or Mixed construct, its replacement text should not
4127 * be empty, and neither the first nor last non-blank character of
4128 * the replacement text should be a connector (| or ,).
4129 *
Daniel Veillard5e2dace2001-07-18 19:30:27 +00004130 * Returns the tree of xmlElementContentPtr describing the element
Owen Taylor3473f882001-02-23 17:55:21 +00004131 * hierarchy.
4132 */
4133xmlElementContentPtr
4134#ifdef VMS
4135xmlParseElementChildrenContentD
4136#else
4137xmlParseElementChildrenContentDecl
4138#endif
4139(xmlParserCtxtPtr ctxt) {
4140 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4141 xmlChar *elem;
4142 xmlChar type = 0;
4143
4144 SKIP_BLANKS;
4145 GROW;
4146 if (RAW == '(') {
4147 /* Recurse on first child */
4148 NEXT;
4149 SKIP_BLANKS;
4150 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4151 SKIP_BLANKS;
4152 GROW;
4153 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004154 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004155 if (elem == NULL) {
4156 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4157 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4158 ctxt->sax->error(ctxt->userData,
4159 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4160 ctxt->wellFormed = 0;
4161 ctxt->disableSAX = 1;
4162 return(NULL);
4163 }
4164 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4165 GROW;
4166 if (RAW == '?') {
4167 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4168 NEXT;
4169 } else if (RAW == '*') {
4170 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4171 NEXT;
4172 } else if (RAW == '+') {
4173 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4174 NEXT;
4175 } else {
4176 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4177 }
4178 xmlFree(elem);
4179 GROW;
4180 }
4181 SKIP_BLANKS;
4182 SHRINK;
4183 while (RAW != ')') {
4184 /*
4185 * Each loop we parse one separator and one element.
4186 */
4187 if (RAW == ',') {
4188 if (type == 0) type = CUR;
4189
4190 /*
4191 * Detect "Name | Name , Name" error
4192 */
4193 else if (type != CUR) {
4194 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4196 ctxt->sax->error(ctxt->userData,
4197 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4198 type);
4199 ctxt->wellFormed = 0;
4200 ctxt->disableSAX = 1;
4201 if ((op != NULL) && (op != ret))
4202 xmlFreeElementContent(op);
4203 if ((last != NULL) && (last != ret) &&
4204 (last != ret->c1) && (last != ret->c2))
4205 xmlFreeElementContent(last);
4206 if (ret != NULL)
4207 xmlFreeElementContent(ret);
4208 return(NULL);
4209 }
4210 NEXT;
4211
4212 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4213 if (op == NULL) {
4214 xmlFreeElementContent(ret);
4215 return(NULL);
4216 }
4217 if (last == NULL) {
4218 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004219 if (ret != NULL)
4220 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004221 ret = cur = op;
4222 } else {
4223 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004224 if (op != NULL)
4225 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004226 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004227 if (last != NULL)
4228 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004229 cur =op;
4230 last = NULL;
4231 }
4232 } else if (RAW == '|') {
4233 if (type == 0) type = CUR;
4234
4235 /*
4236 * Detect "Name , Name | Name" error
4237 */
4238 else if (type != CUR) {
4239 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241 ctxt->sax->error(ctxt->userData,
4242 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4243 type);
4244 ctxt->wellFormed = 0;
4245 ctxt->disableSAX = 1;
4246 if ((op != NULL) && (op != ret) && (op != last))
4247 xmlFreeElementContent(op);
4248 if ((last != NULL) && (last != ret) &&
4249 (last != ret->c1) && (last != ret->c2))
4250 xmlFreeElementContent(last);
4251 if (ret != NULL)
4252 xmlFreeElementContent(ret);
4253 return(NULL);
4254 }
4255 NEXT;
4256
4257 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4258 if (op == NULL) {
4259 if ((op != NULL) && (op != ret))
4260 xmlFreeElementContent(op);
4261 if ((last != NULL) && (last != ret) &&
4262 (last != ret->c1) && (last != ret->c2))
4263 xmlFreeElementContent(last);
4264 if (ret != NULL)
4265 xmlFreeElementContent(ret);
4266 return(NULL);
4267 }
4268 if (last == NULL) {
4269 op->c1 = ret;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004270 if (ret != NULL)
4271 ret->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004272 ret = cur = op;
4273 } else {
4274 cur->c2 = op;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004275 if (op != NULL)
4276 op->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004277 op->c1 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004278 if (last != NULL)
4279 last->parent = op;
Owen Taylor3473f882001-02-23 17:55:21 +00004280 cur =op;
4281 last = NULL;
4282 }
4283 } else {
4284 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286 ctxt->sax->error(ctxt->userData,
4287 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4288 ctxt->wellFormed = 0;
4289 ctxt->disableSAX = 1;
4290 if ((op != NULL) && (op != ret))
4291 xmlFreeElementContent(op);
4292 if ((last != NULL) && (last != ret) &&
4293 (last != ret->c1) && (last != ret->c2))
4294 xmlFreeElementContent(last);
4295 if (ret != NULL)
4296 xmlFreeElementContent(ret);
4297 return(NULL);
4298 }
4299 GROW;
4300 SKIP_BLANKS;
4301 GROW;
4302 if (RAW == '(') {
4303 /* Recurse on second child */
4304 NEXT;
4305 SKIP_BLANKS;
4306 last = xmlParseElementChildrenContentDecl(ctxt);
4307 SKIP_BLANKS;
4308 } else {
Daniel Veillard76d66f42001-05-16 21:05:17 +00004309 elem = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004310 if (elem == NULL) {
4311 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4313 ctxt->sax->error(ctxt->userData,
4314 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4315 ctxt->wellFormed = 0;
4316 ctxt->disableSAX = 1;
4317 if ((op != NULL) && (op != ret))
4318 xmlFreeElementContent(op);
4319 if ((last != NULL) && (last != ret) &&
4320 (last != ret->c1) && (last != ret->c2))
4321 xmlFreeElementContent(last);
4322 if (ret != NULL)
4323 xmlFreeElementContent(ret);
4324 return(NULL);
4325 }
4326 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4327 xmlFree(elem);
4328 if (RAW == '?') {
4329 last->ocur = XML_ELEMENT_CONTENT_OPT;
4330 NEXT;
4331 } else if (RAW == '*') {
4332 last->ocur = XML_ELEMENT_CONTENT_MULT;
4333 NEXT;
4334 } else if (RAW == '+') {
4335 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4336 NEXT;
4337 } else {
4338 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4339 }
4340 }
4341 SKIP_BLANKS;
4342 GROW;
4343 }
4344 if ((cur != NULL) && (last != NULL)) {
4345 cur->c2 = last;
Daniel Veillarddab4cb32001-04-20 13:03:48 +00004346 if (last != NULL)
4347 last->parent = cur;
Owen Taylor3473f882001-02-23 17:55:21 +00004348 }
4349 ctxt->entity = ctxt->input;
4350 NEXT;
4351 if (RAW == '?') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004352 if (ret != NULL)
4353 ret->ocur = XML_ELEMENT_CONTENT_OPT;
Owen Taylor3473f882001-02-23 17:55:21 +00004354 NEXT;
4355 } else if (RAW == '*') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004356 if (ret != NULL)
4357 ret->ocur = XML_ELEMENT_CONTENT_MULT;
Owen Taylor3473f882001-02-23 17:55:21 +00004358 NEXT;
4359 } else if (RAW == '+') {
Daniel Veillarde470df72001-04-18 21:41:07 +00004360 if (ret != NULL)
4361 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
Owen Taylor3473f882001-02-23 17:55:21 +00004362 NEXT;
4363 }
4364 return(ret);
4365}
4366
4367/**
4368 * xmlParseElementContentDecl:
4369 * @ctxt: an XML parser context
4370 * @name: the name of the element being defined.
4371 * @result: the Element Content pointer will be stored here if any
4372 *
4373 * parse the declaration for an Element content either Mixed or Children,
4374 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4375 *
4376 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4377 *
4378 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4379 */
4380
4381int
4382xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4383 xmlElementContentPtr *result) {
4384
4385 xmlElementContentPtr tree = NULL;
4386 xmlParserInputPtr input = ctxt->input;
4387 int res;
4388
4389 *result = NULL;
4390
4391 if (RAW != '(') {
4392 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4394 ctxt->sax->error(ctxt->userData,
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004395 "xmlParseElementContentDecl : %s '(' expected\n", name);
Owen Taylor3473f882001-02-23 17:55:21 +00004396 ctxt->wellFormed = 0;
4397 ctxt->disableSAX = 1;
4398 return(-1);
4399 }
4400 NEXT;
4401 GROW;
4402 SKIP_BLANKS;
4403 if ((RAW == '#') && (NXT(1) == 'P') &&
4404 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4405 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4406 (NXT(6) == 'A')) {
4407 tree = xmlParseElementMixedContentDecl(ctxt);
4408 res = XML_ELEMENT_TYPE_MIXED;
4409 } else {
4410 tree = xmlParseElementChildrenContentDecl(ctxt);
4411 res = XML_ELEMENT_TYPE_ELEMENT;
4412 }
4413 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4414 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4416 ctxt->sax->error(ctxt->userData,
4417"Element content declaration doesn't start and stop in the same entity\n");
4418 ctxt->wellFormed = 0;
4419 ctxt->disableSAX = 1;
4420 }
4421 SKIP_BLANKS;
4422 *result = tree;
4423 return(res);
4424}
4425
4426/**
4427 * xmlParseElementDecl:
4428 * @ctxt: an XML parser context
4429 *
4430 * parse an Element declaration.
4431 *
4432 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4433 *
4434 * [ VC: Unique Element Type Declaration ]
4435 * No element type may be declared more than once
4436 *
4437 * Returns the type of the element, or -1 in case of error
4438 */
4439int
4440xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4441 xmlChar *name;
4442 int ret = -1;
4443 xmlElementContentPtr content = NULL;
4444
4445 GROW;
4446 if ((RAW == '<') && (NXT(1) == '!') &&
4447 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4448 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4449 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4450 (NXT(8) == 'T')) {
4451 xmlParserInputPtr input = ctxt->input;
4452
4453 SKIP(9);
4454 if (!IS_BLANK(CUR)) {
4455 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4456 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4457 ctxt->sax->error(ctxt->userData,
4458 "Space required after 'ELEMENT'\n");
4459 ctxt->wellFormed = 0;
4460 ctxt->disableSAX = 1;
4461 }
4462 SKIP_BLANKS;
Daniel Veillard76d66f42001-05-16 21:05:17 +00004463 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004464 if (name == NULL) {
4465 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4466 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4467 ctxt->sax->error(ctxt->userData,
4468 "xmlParseElementDecl: no name for Element\n");
4469 ctxt->wellFormed = 0;
4470 ctxt->disableSAX = 1;
4471 return(-1);
4472 }
4473 while ((RAW == 0) && (ctxt->inputNr > 1))
4474 xmlPopInput(ctxt);
4475 if (!IS_BLANK(CUR)) {
4476 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4477 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4478 ctxt->sax->error(ctxt->userData,
4479 "Space required after the element name\n");
4480 ctxt->wellFormed = 0;
4481 ctxt->disableSAX = 1;
4482 }
4483 SKIP_BLANKS;
4484 if ((RAW == 'E') && (NXT(1) == 'M') &&
4485 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4486 (NXT(4) == 'Y')) {
4487 SKIP(5);
4488 /*
4489 * Element must always be empty.
4490 */
4491 ret = XML_ELEMENT_TYPE_EMPTY;
4492 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4493 (NXT(2) == 'Y')) {
4494 SKIP(3);
4495 /*
4496 * Element is a generic container.
4497 */
4498 ret = XML_ELEMENT_TYPE_ANY;
4499 } else if (RAW == '(') {
4500 ret = xmlParseElementContentDecl(ctxt, name, &content);
4501 } else {
4502 /*
4503 * [ WFC: PEs in Internal Subset ] error handling.
4504 */
4505 if ((RAW == '%') && (ctxt->external == 0) &&
4506 (ctxt->inputNr == 1)) {
4507 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData,
4510 "PEReference: forbidden within markup decl in internal subset\n");
4511 } else {
4512 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4514 ctxt->sax->error(ctxt->userData,
4515 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4516 }
4517 ctxt->wellFormed = 0;
4518 ctxt->disableSAX = 1;
4519 if (name != NULL) xmlFree(name);
4520 return(-1);
4521 }
4522
4523 SKIP_BLANKS;
4524 /*
4525 * Pop-up of finished entities.
4526 */
4527 while ((RAW == 0) && (ctxt->inputNr > 1))
4528 xmlPopInput(ctxt);
4529 SKIP_BLANKS;
4530
4531 if (RAW != '>') {
4532 ctxt->errNo = XML_ERR_GT_REQUIRED;
4533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4534 ctxt->sax->error(ctxt->userData,
4535 "xmlParseElementDecl: expected '>' at the end\n");
4536 ctxt->wellFormed = 0;
4537 ctxt->disableSAX = 1;
4538 } else {
4539 if (input != ctxt->input) {
4540 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4541 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4542 ctxt->sax->error(ctxt->userData,
4543"Element declaration doesn't start and stop in the same entity\n");
4544 ctxt->wellFormed = 0;
4545 ctxt->disableSAX = 1;
4546 }
4547
4548 NEXT;
4549 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4550 (ctxt->sax->elementDecl != NULL))
4551 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4552 content);
4553 }
4554 if (content != NULL) {
4555 xmlFreeElementContent(content);
4556 }
4557 if (name != NULL) {
4558 xmlFree(name);
4559 }
4560 }
4561 return(ret);
4562}
4563
4564/**
4565 * xmlParseMarkupDecl:
4566 * @ctxt: an XML parser context
4567 *
4568 * parse Markup declarations
4569 *
4570 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4571 * NotationDecl | PI | Comment
4572 *
4573 * [ VC: Proper Declaration/PE Nesting ]
4574 * Parameter-entity replacement text must be properly nested with
4575 * markup declarations. That is to say, if either the first character
4576 * or the last character of a markup declaration (markupdecl above) is
4577 * contained in the replacement text for a parameter-entity reference,
4578 * both must be contained in the same replacement text.
4579 *
4580 * [ WFC: PEs in Internal Subset ]
4581 * In the internal DTD subset, parameter-entity references can occur
4582 * only where markup declarations can occur, not within markup declarations.
4583 * (This does not apply to references that occur in external parameter
4584 * entities or to the external subset.)
4585 */
4586void
4587xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4588 GROW;
4589 xmlParseElementDecl(ctxt);
4590 xmlParseAttributeListDecl(ctxt);
4591 xmlParseEntityDecl(ctxt);
4592 xmlParseNotationDecl(ctxt);
4593 xmlParsePI(ctxt);
4594 xmlParseComment(ctxt);
4595 /*
4596 * This is only for internal subset. On external entities,
4597 * the replacement is done before parsing stage
4598 */
4599 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4600 xmlParsePEReference(ctxt);
4601 ctxt->instate = XML_PARSER_DTD;
4602}
4603
4604/**
4605 * xmlParseTextDecl:
4606 * @ctxt: an XML parser context
4607 *
4608 * parse an XML declaration header for external entities
4609 *
4610 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4611 *
4612 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4613 */
4614
4615void
4616xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4617 xmlChar *version;
4618
4619 /*
4620 * We know that '<?xml' is here.
4621 */
4622 if ((RAW == '<') && (NXT(1) == '?') &&
4623 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4624 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4625 SKIP(5);
4626 } else {
4627 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4629 ctxt->sax->error(ctxt->userData,
4630 "Text declaration '<?xml' required\n");
4631 ctxt->wellFormed = 0;
4632 ctxt->disableSAX = 1;
4633
4634 return;
4635 }
4636
4637 if (!IS_BLANK(CUR)) {
4638 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4639 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4640 ctxt->sax->error(ctxt->userData,
4641 "Space needed after '<?xml'\n");
4642 ctxt->wellFormed = 0;
4643 ctxt->disableSAX = 1;
4644 }
4645 SKIP_BLANKS;
4646
4647 /*
4648 * We may have the VersionInfo here.
4649 */
4650 version = xmlParseVersionInfo(ctxt);
4651 if (version == NULL)
4652 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4653 ctxt->input->version = version;
4654
4655 /*
4656 * We must have the encoding declaration
4657 */
4658 if (!IS_BLANK(CUR)) {
4659 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4660 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4661 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4662 ctxt->wellFormed = 0;
4663 ctxt->disableSAX = 1;
4664 }
4665 xmlParseEncodingDecl(ctxt);
4666 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4667 /*
4668 * The XML REC instructs us to stop parsing right here
4669 */
4670 return;
4671 }
4672
4673 SKIP_BLANKS;
4674 if ((RAW == '?') && (NXT(1) == '>')) {
4675 SKIP(2);
4676 } else if (RAW == '>') {
4677 /* Deprecated old WD ... */
4678 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4679 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4680 ctxt->sax->error(ctxt->userData,
4681 "XML declaration must end-up with '?>'\n");
4682 ctxt->wellFormed = 0;
4683 ctxt->disableSAX = 1;
4684 NEXT;
4685 } else {
4686 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "parsing XML declaration: '?>' expected\n");
4690 ctxt->wellFormed = 0;
4691 ctxt->disableSAX = 1;
4692 MOVETO_ENDTAG(CUR_PTR);
4693 NEXT;
4694 }
4695}
4696
4697/*
4698 * xmlParseConditionalSections
4699 * @ctxt: an XML parser context
4700 *
4701 * [61] conditionalSect ::= includeSect | ignoreSect
4702 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4703 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4704 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4705 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4706 */
4707
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004708static void
Owen Taylor3473f882001-02-23 17:55:21 +00004709xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4710 SKIP(3);
4711 SKIP_BLANKS;
4712 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4713 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4714 (NXT(6) == 'E')) {
4715 SKIP(7);
4716 SKIP_BLANKS;
4717 if (RAW != '[') {
4718 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4720 ctxt->sax->error(ctxt->userData,
4721 "XML conditional section '[' expected\n");
4722 ctxt->wellFormed = 0;
4723 ctxt->disableSAX = 1;
4724 } else {
4725 NEXT;
4726 }
4727 if (xmlParserDebugEntities) {
4728 if ((ctxt->input != NULL) && (ctxt->input->filename))
4729 xmlGenericError(xmlGenericErrorContext,
4730 "%s(%d): ", ctxt->input->filename,
4731 ctxt->input->line);
4732 xmlGenericError(xmlGenericErrorContext,
4733 "Entering INCLUDE Conditional Section\n");
4734 }
4735
4736 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4737 (NXT(2) != '>'))) {
4738 const xmlChar *check = CUR_PTR;
4739 int cons = ctxt->input->consumed;
4740 int tok = ctxt->token;
4741
4742 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4743 xmlParseConditionalSections(ctxt);
4744 } else if (IS_BLANK(CUR)) {
4745 NEXT;
4746 } else if (RAW == '%') {
4747 xmlParsePEReference(ctxt);
4748 } else
4749 xmlParseMarkupDecl(ctxt);
4750
4751 /*
4752 * Pop-up of finished entities.
4753 */
4754 while ((RAW == 0) && (ctxt->inputNr > 1))
4755 xmlPopInput(ctxt);
4756
4757 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4758 (tok == ctxt->token)) {
4759 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4760 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4761 ctxt->sax->error(ctxt->userData,
4762 "Content error in the external subset\n");
4763 ctxt->wellFormed = 0;
4764 ctxt->disableSAX = 1;
4765 break;
4766 }
4767 }
4768 if (xmlParserDebugEntities) {
4769 if ((ctxt->input != NULL) && (ctxt->input->filename))
4770 xmlGenericError(xmlGenericErrorContext,
4771 "%s(%d): ", ctxt->input->filename,
4772 ctxt->input->line);
4773 xmlGenericError(xmlGenericErrorContext,
4774 "Leaving INCLUDE Conditional Section\n");
4775 }
4776
4777 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4778 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4779 int state;
4780 int instate;
4781 int depth = 0;
4782
4783 SKIP(6);
4784 SKIP_BLANKS;
4785 if (RAW != '[') {
4786 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4788 ctxt->sax->error(ctxt->userData,
4789 "XML conditional section '[' expected\n");
4790 ctxt->wellFormed = 0;
4791 ctxt->disableSAX = 1;
4792 } else {
4793 NEXT;
4794 }
4795 if (xmlParserDebugEntities) {
4796 if ((ctxt->input != NULL) && (ctxt->input->filename))
4797 xmlGenericError(xmlGenericErrorContext,
4798 "%s(%d): ", ctxt->input->filename,
4799 ctxt->input->line);
4800 xmlGenericError(xmlGenericErrorContext,
4801 "Entering IGNORE Conditional Section\n");
4802 }
4803
4804 /*
4805 * Parse up to the end of the conditionnal section
4806 * But disable SAX event generating DTD building in the meantime
4807 */
4808 state = ctxt->disableSAX;
4809 instate = ctxt->instate;
4810 ctxt->disableSAX = 1;
4811 ctxt->instate = XML_PARSER_IGNORE;
4812
4813 while (depth >= 0) {
4814 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4815 depth++;
4816 SKIP(3);
4817 continue;
4818 }
4819 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4820 if (--depth >= 0) SKIP(3);
4821 continue;
4822 }
4823 NEXT;
4824 continue;
4825 }
4826
4827 ctxt->disableSAX = state;
4828 ctxt->instate = instate;
4829
4830 if (xmlParserDebugEntities) {
4831 if ((ctxt->input != NULL) && (ctxt->input->filename))
4832 xmlGenericError(xmlGenericErrorContext,
4833 "%s(%d): ", ctxt->input->filename,
4834 ctxt->input->line);
4835 xmlGenericError(xmlGenericErrorContext,
4836 "Leaving IGNORE Conditional Section\n");
4837 }
4838
4839 } else {
4840 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4842 ctxt->sax->error(ctxt->userData,
4843 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4844 ctxt->wellFormed = 0;
4845 ctxt->disableSAX = 1;
4846 }
4847
4848 if (RAW == 0)
4849 SHRINK;
4850
4851 if (RAW == 0) {
4852 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4853 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4854 ctxt->sax->error(ctxt->userData,
4855 "XML conditional section not closed\n");
4856 ctxt->wellFormed = 0;
4857 ctxt->disableSAX = 1;
4858 } else {
4859 SKIP(3);
4860 }
4861}
4862
4863/**
4864 * xmlParseExternalSubset:
4865 * @ctxt: an XML parser context
4866 * @ExternalID: the external identifier
4867 * @SystemID: the system identifier (or URL)
4868 *
4869 * parse Markup declarations from an external subset
4870 *
4871 * [30] extSubset ::= textDecl? extSubsetDecl
4872 *
4873 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4874 */
4875void
4876xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4877 const xmlChar *SystemID) {
4878 GROW;
4879 if ((RAW == '<') && (NXT(1) == '?') &&
4880 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4881 (NXT(4) == 'l')) {
4882 xmlParseTextDecl(ctxt);
4883 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4884 /*
4885 * The XML REC instructs us to stop parsing right here
4886 */
4887 ctxt->instate = XML_PARSER_EOF;
4888 return;
4889 }
4890 }
4891 if (ctxt->myDoc == NULL) {
4892 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4893 }
4894 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4895 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4896
4897 ctxt->instate = XML_PARSER_DTD;
4898 ctxt->external = 1;
4899 while (((RAW == '<') && (NXT(1) == '?')) ||
4900 ((RAW == '<') && (NXT(1) == '!')) ||
Daniel Veillard2454ab92001-07-25 21:39:46 +00004901 (RAW == '%') || IS_BLANK(CUR)) {
Owen Taylor3473f882001-02-23 17:55:21 +00004902 const xmlChar *check = CUR_PTR;
4903 int cons = ctxt->input->consumed;
4904 int tok = ctxt->token;
4905
4906 GROW;
4907 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4908 xmlParseConditionalSections(ctxt);
4909 } else if (IS_BLANK(CUR)) {
4910 NEXT;
4911 } else if (RAW == '%') {
4912 xmlParsePEReference(ctxt);
4913 } else
4914 xmlParseMarkupDecl(ctxt);
4915
4916 /*
4917 * Pop-up of finished entities.
4918 */
4919 while ((RAW == 0) && (ctxt->inputNr > 1))
4920 xmlPopInput(ctxt);
4921
4922 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4923 (tok == ctxt->token)) {
4924 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4926 ctxt->sax->error(ctxt->userData,
4927 "Content error in the external subset\n");
4928 ctxt->wellFormed = 0;
4929 ctxt->disableSAX = 1;
4930 break;
4931 }
4932 }
4933
4934 if (RAW != 0) {
4935 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4936 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4937 ctxt->sax->error(ctxt->userData,
4938 "Extra content at the end of the document\n");
4939 ctxt->wellFormed = 0;
4940 ctxt->disableSAX = 1;
4941 }
4942
4943}
4944
4945/**
4946 * xmlParseReference:
4947 * @ctxt: an XML parser context
4948 *
4949 * parse and handle entity references in content, depending on the SAX
4950 * interface, this may end-up in a call to character() if this is a
4951 * CharRef, a predefined entity, if there is no reference() callback.
4952 * or if the parser was asked to switch to that mode.
4953 *
4954 * [67] Reference ::= EntityRef | CharRef
4955 */
4956void
4957xmlParseReference(xmlParserCtxtPtr ctxt) {
4958 xmlEntityPtr ent;
4959 xmlChar *val;
4960 if (RAW != '&') return;
4961
4962 if (NXT(1) == '#') {
4963 int i = 0;
4964 xmlChar out[10];
4965 int hex = NXT(2);
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004966 int value = xmlParseCharRef(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00004967
4968 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4969 /*
4970 * So we are using non-UTF-8 buffers
4971 * Check that the char fit on 8bits, if not
4972 * generate a CharRef.
4973 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004974 if (value <= 0xFF) {
4975 out[0] = value;
Owen Taylor3473f882001-02-23 17:55:21 +00004976 out[1] = 0;
4977 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4978 (!ctxt->disableSAX))
4979 ctxt->sax->characters(ctxt->userData, out, 1);
4980 } else {
4981 if ((hex == 'x') || (hex == 'X'))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004982 sprintf((char *)out, "#x%X", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004983 else
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004984 sprintf((char *)out, "#%d", value);
Owen Taylor3473f882001-02-23 17:55:21 +00004985 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4986 (!ctxt->disableSAX))
4987 ctxt->sax->reference(ctxt->userData, out);
4988 }
4989 } else {
4990 /*
4991 * Just encode the value in UTF-8
4992 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00004993 COPY_BUF(0 ,out, i, value);
Owen Taylor3473f882001-02-23 17:55:21 +00004994 out[i] = 0;
4995 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4996 (!ctxt->disableSAX))
4997 ctxt->sax->characters(ctxt->userData, out, i);
4998 }
4999 } else {
5000 ent = xmlParseEntityRef(ctxt);
5001 if (ent == NULL) return;
5002 if ((ent->name != NULL) &&
5003 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
5004 xmlNodePtr list = NULL;
5005 int ret;
5006
5007
5008 /*
5009 * The first reference to the entity trigger a parsing phase
5010 * where the ent->children is filled with the result from
5011 * the parsing.
5012 */
5013 if (ent->children == NULL) {
5014 xmlChar *value;
5015 value = ent->content;
5016
5017 /*
5018 * Check that this entity is well formed
5019 */
5020 if ((value != NULL) &&
5021 (value[1] == 0) && (value[0] == '<') &&
5022 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
5023 /*
5024 * DONE: get definite answer on this !!!
5025 * Lots of entity decls are used to declare a single
5026 * char
5027 * <!ENTITY lt "<">
5028 * Which seems to be valid since
5029 * 2.4: The ampersand character (&) and the left angle
5030 * bracket (<) may appear in their literal form only
5031 * when used ... They are also legal within the literal
5032 * entity value of an internal entity declaration;i
5033 * see "4.3.2 Well-Formed Parsed Entities".
5034 * IMHO 2.4 and 4.3.2 are directly in contradiction.
5035 * Looking at the OASIS test suite and James Clark
5036 * tests, this is broken. However the XML REC uses
5037 * it. Is the XML REC not well-formed ????
5038 * This is a hack to avoid this problem
5039 *
5040 * ANSWER: since lt gt amp .. are already defined,
5041 * this is a redefinition and hence the fact that the
5042 * contentis not well balanced is not a Wf error, this
5043 * is lousy but acceptable.
5044 */
5045 list = xmlNewDocText(ctxt->myDoc, value);
5046 if (list != NULL) {
5047 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
5048 (ent->children == NULL)) {
5049 ent->children = list;
5050 ent->last = list;
5051 list->parent = (xmlNodePtr) ent;
5052 } else {
5053 xmlFreeNodeList(list);
5054 }
5055 } else if (list != NULL) {
5056 xmlFreeNodeList(list);
5057 }
5058 } else {
5059 /*
5060 * 4.3.2: An internal general parsed entity is well-formed
5061 * if its replacement text matches the production labeled
5062 * content.
5063 */
5064 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
5065 ctxt->depth++;
5066 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
5067 ctxt->sax, NULL, ctxt->depth,
5068 value, &list);
5069 ctxt->depth--;
5070 } else if (ent->etype ==
5071 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
5072 ctxt->depth++;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005073 ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
Owen Taylor3473f882001-02-23 17:55:21 +00005074 ctxt->sax, NULL, ctxt->depth,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00005075 ent->URI, ent->ExternalID, &list);
Owen Taylor3473f882001-02-23 17:55:21 +00005076 ctxt->depth--;
5077 } else {
5078 ret = -1;
5079 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5080 ctxt->sax->error(ctxt->userData,
5081 "Internal: invalid entity type\n");
5082 }
5083 if (ret == XML_ERR_ENTITY_LOOP) {
5084 ctxt->errNo = XML_ERR_ENTITY_LOOP;
5085 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5086 ctxt->sax->error(ctxt->userData,
5087 "Detected entity reference loop\n");
5088 ctxt->wellFormed = 0;
5089 ctxt->disableSAX = 1;
5090 } else if ((ret == 0) && (list != NULL)) {
Daniel Veillard76d66f42001-05-16 21:05:17 +00005091 if (((ent->etype == XML_INTERNAL_GENERAL_ENTITY) ||
5092 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY))&&
Owen Taylor3473f882001-02-23 17:55:21 +00005093 (ent->children == NULL)) {
5094 ent->children = list;
Daniel Veillard62f313b2001-07-04 19:49:14 +00005095 if (ctxt->replaceEntities) {
5096 /*
5097 * Prune it directly in the generated document
5098 * except for single text nodes.
5099 */
5100 if ((list->type == XML_TEXT_NODE) &&
5101 (list->next == NULL)) {
5102 list->parent = (xmlNodePtr) ent;
5103 list = NULL;
5104 } else {
5105 while (list != NULL) {
5106 list->parent = (xmlNodePtr) ctxt->node;
5107 if (list->next == NULL)
5108 ent->last = list;
5109 list = list->next;
5110 }
5111 list = ent->children;
5112 }
5113 } else {
5114 while (list != NULL) {
5115 list->parent = (xmlNodePtr) ent;
5116 if (list->next == NULL)
5117 ent->last = list;
5118 list = list->next;
5119 }
Owen Taylor3473f882001-02-23 17:55:21 +00005120 }
5121 } else {
5122 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005123 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005124 }
5125 } else if (ret > 0) {
5126 ctxt->errNo = ret;
5127 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5128 ctxt->sax->error(ctxt->userData,
5129 "Entity value required\n");
5130 ctxt->wellFormed = 0;
5131 ctxt->disableSAX = 1;
5132 } else if (list != NULL) {
5133 xmlFreeNodeList(list);
Daniel Veillard62f313b2001-07-04 19:49:14 +00005134 list = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00005135 }
5136 }
5137 }
5138 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
5139 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
5140 /*
5141 * Create a node.
5142 */
5143 ctxt->sax->reference(ctxt->userData, ent->name);
5144 return;
5145 } else if (ctxt->replaceEntities) {
5146 if ((ctxt->node != NULL) && (ent->children != NULL)) {
5147 /*
5148 * Seems we are generating the DOM content, do
Daniel Veillard62f313b2001-07-04 19:49:14 +00005149 * a simple tree copy for all references except the first
5150 * In the first occurence list contains the replacement
Owen Taylor3473f882001-02-23 17:55:21 +00005151 */
Daniel Veillard62f313b2001-07-04 19:49:14 +00005152 if (list == NULL) {
5153 xmlNodePtr new, cur;
5154 cur = ent->children;
5155 while (cur != NULL) {
5156 new = xmlCopyNode(cur, 1);
5157 xmlAddChild(ctxt->node, new);
5158 if (cur == ent->last)
5159 break;
5160 cur = cur->next;
5161 }
5162 } else {
5163 /*
5164 * the name change is to avoid coalescing of the
5165 * node with a prossible previous text one which
5166 * would make ent->children a dandling pointer
5167 */
5168 if (ent->children->type == XML_TEXT_NODE)
5169 ent->children->name = xmlStrdup(BAD_CAST "nbktext");
5170 if ((ent->last != ent->children) &&
5171 (ent->last->type == XML_TEXT_NODE))
5172 ent->last->name = xmlStrdup(BAD_CAST "nbktext");
5173 xmlAddChildList(ctxt->node, ent->children);
5174 }
5175
Owen Taylor3473f882001-02-23 17:55:21 +00005176 /*
5177 * This is to avoid a nasty side effect, see
5178 * characters() in SAX.c
5179 */
5180 ctxt->nodemem = 0;
5181 ctxt->nodelen = 0;
5182 return;
5183 } else {
5184 /*
5185 * Probably running in SAX mode
5186 */
5187 xmlParserInputPtr input;
5188
5189 input = xmlNewEntityInputStream(ctxt, ent);
5190 xmlPushInput(ctxt, input);
5191 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
5192 (RAW == '<') && (NXT(1) == '?') &&
5193 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5194 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5195 xmlParseTextDecl(ctxt);
5196 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5197 /*
5198 * The XML REC instructs us to stop parsing right here
5199 */
5200 ctxt->instate = XML_PARSER_EOF;
5201 return;
5202 }
5203 if (input->standalone == 1) {
5204 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5205 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5206 ctxt->sax->error(ctxt->userData,
5207 "external parsed entities cannot be standalone\n");
5208 ctxt->wellFormed = 0;
5209 ctxt->disableSAX = 1;
5210 }
5211 }
5212 return;
5213 }
5214 }
5215 } else {
5216 val = ent->content;
5217 if (val == NULL) return;
5218 /*
5219 * inline the entity.
5220 */
5221 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5222 (!ctxt->disableSAX))
5223 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5224 }
5225 }
5226}
5227
5228/**
5229 * xmlParseEntityRef:
5230 * @ctxt: an XML parser context
5231 *
5232 * parse ENTITY references declarations
5233 *
5234 * [68] EntityRef ::= '&' Name ';'
5235 *
5236 * [ WFC: Entity Declared ]
5237 * In a document without any DTD, a document with only an internal DTD
5238 * subset which contains no parameter entity references, or a document
5239 * with "standalone='yes'", the Name given in the entity reference
5240 * must match that in an entity declaration, except that well-formed
5241 * documents need not declare any of the following entities: amp, lt,
5242 * gt, apos, quot. The declaration of a parameter entity must precede
5243 * any reference to it. Similarly, the declaration of a general entity
5244 * must precede any reference to it which appears in a default value in an
5245 * attribute-list declaration. Note that if entities are declared in the
5246 * external subset or in external parameter entities, a non-validating
5247 * processor is not obligated to read and process their declarations;
5248 * for such documents, the rule that an entity must be declared is a
5249 * well-formedness constraint only if standalone='yes'.
5250 *
5251 * [ WFC: Parsed Entity ]
5252 * An entity reference must not contain the name of an unparsed entity
5253 *
5254 * Returns the xmlEntityPtr if found, or NULL otherwise.
5255 */
5256xmlEntityPtr
5257xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5258 xmlChar *name;
5259 xmlEntityPtr ent = NULL;
5260
5261 GROW;
5262
5263 if (RAW == '&') {
5264 NEXT;
5265 name = xmlParseName(ctxt);
5266 if (name == NULL) {
5267 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5268 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5269 ctxt->sax->error(ctxt->userData,
5270 "xmlParseEntityRef: no name\n");
5271 ctxt->wellFormed = 0;
5272 ctxt->disableSAX = 1;
5273 } else {
5274 if (RAW == ';') {
5275 NEXT;
5276 /*
5277 * Ask first SAX for entity resolution, otherwise try the
5278 * predefined set.
5279 */
5280 if (ctxt->sax != NULL) {
5281 if (ctxt->sax->getEntity != NULL)
5282 ent = ctxt->sax->getEntity(ctxt->userData, name);
5283 if (ent == NULL)
5284 ent = xmlGetPredefinedEntity(name);
5285 }
5286 /*
5287 * [ WFC: Entity Declared ]
5288 * In a document without any DTD, a document with only an
5289 * internal DTD subset which contains no parameter entity
5290 * references, or a document with "standalone='yes'", the
5291 * Name given in the entity reference must match that in an
5292 * entity declaration, except that well-formed documents
5293 * need not declare any of the following entities: amp, lt,
5294 * gt, apos, quot.
5295 * The declaration of a parameter entity must precede any
5296 * reference to it.
5297 * Similarly, the declaration of a general entity must
5298 * precede any reference to it which appears in a default
5299 * value in an attribute-list declaration. Note that if
5300 * entities are declared in the external subset or in
5301 * external parameter entities, a non-validating processor
5302 * is not obligated to read and process their declarations;
5303 * for such documents, the rule that an entity must be
5304 * declared is a well-formedness constraint only if
5305 * standalone='yes'.
5306 */
5307 if (ent == NULL) {
5308 if ((ctxt->standalone == 1) ||
5309 ((ctxt->hasExternalSubset == 0) &&
5310 (ctxt->hasPErefs == 0))) {
5311 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5312 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5313 ctxt->sax->error(ctxt->userData,
5314 "Entity '%s' not defined\n", name);
5315 ctxt->wellFormed = 0;
5316 ctxt->disableSAX = 1;
5317 } else {
5318 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5319 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
Daniel Veillard11648102001-06-26 16:08:24 +00005320 ctxt->sax->error(ctxt->userData,
Owen Taylor3473f882001-02-23 17:55:21 +00005321 "Entity '%s' not defined\n", name);
5322 }
5323 }
5324
5325 /*
5326 * [ WFC: Parsed Entity ]
5327 * An entity reference must not contain the name of an
5328 * unparsed entity
5329 */
5330 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5331 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5332 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5333 ctxt->sax->error(ctxt->userData,
5334 "Entity reference to unparsed entity %s\n", name);
5335 ctxt->wellFormed = 0;
5336 ctxt->disableSAX = 1;
5337 }
5338
5339 /*
5340 * [ WFC: No External Entity References ]
5341 * Attribute values cannot contain direct or indirect
5342 * entity references to external entities.
5343 */
5344 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5345 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5346 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5348 ctxt->sax->error(ctxt->userData,
5349 "Attribute references external entity '%s'\n", name);
5350 ctxt->wellFormed = 0;
5351 ctxt->disableSAX = 1;
5352 }
5353 /*
5354 * [ WFC: No < in Attribute Values ]
5355 * The replacement text of any entity referred to directly or
5356 * indirectly in an attribute value (other than "&lt;") must
5357 * not contain a <.
5358 */
5359 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5360 (ent != NULL) &&
5361 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5362 (ent->content != NULL) &&
5363 (xmlStrchr(ent->content, '<'))) {
5364 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5365 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5366 ctxt->sax->error(ctxt->userData,
5367 "'<' in entity '%s' is not allowed in attributes values\n", name);
5368 ctxt->wellFormed = 0;
5369 ctxt->disableSAX = 1;
5370 }
5371
5372 /*
5373 * Internal check, no parameter entities here ...
5374 */
5375 else {
5376 switch (ent->etype) {
5377 case XML_INTERNAL_PARAMETER_ENTITY:
5378 case XML_EXTERNAL_PARAMETER_ENTITY:
5379 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5381 ctxt->sax->error(ctxt->userData,
5382 "Attempt to reference the parameter entity '%s'\n", name);
5383 ctxt->wellFormed = 0;
5384 ctxt->disableSAX = 1;
5385 break;
5386 default:
5387 break;
5388 }
5389 }
5390
5391 /*
5392 * [ WFC: No Recursion ]
5393 * A parsed entity must not contain a recursive reference
5394 * to itself, either directly or indirectly.
5395 * Done somewhere else
5396 */
5397
5398 } else {
5399 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5401 ctxt->sax->error(ctxt->userData,
5402 "xmlParseEntityRef: expecting ';'\n");
5403 ctxt->wellFormed = 0;
5404 ctxt->disableSAX = 1;
5405 }
5406 xmlFree(name);
5407 }
5408 }
5409 return(ent);
5410}
5411
5412/**
5413 * xmlParseStringEntityRef:
5414 * @ctxt: an XML parser context
5415 * @str: a pointer to an index in the string
5416 *
5417 * parse ENTITY references declarations, but this version parses it from
5418 * a string value.
5419 *
5420 * [68] EntityRef ::= '&' Name ';'
5421 *
5422 * [ WFC: Entity Declared ]
5423 * In a document without any DTD, a document with only an internal DTD
5424 * subset which contains no parameter entity references, or a document
5425 * with "standalone='yes'", the Name given in the entity reference
5426 * must match that in an entity declaration, except that well-formed
5427 * documents need not declare any of the following entities: amp, lt,
5428 * gt, apos, quot. The declaration of a parameter entity must precede
5429 * any reference to it. Similarly, the declaration of a general entity
5430 * must precede any reference to it which appears in a default value in an
5431 * attribute-list declaration. Note that if entities are declared in the
5432 * external subset or in external parameter entities, a non-validating
5433 * processor is not obligated to read and process their declarations;
5434 * for such documents, the rule that an entity must be declared is a
5435 * well-formedness constraint only if standalone='yes'.
5436 *
5437 * [ WFC: Parsed Entity ]
5438 * An entity reference must not contain the name of an unparsed entity
5439 *
5440 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5441 * is updated to the current location in the string.
5442 */
5443xmlEntityPtr
5444xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5445 xmlChar *name;
5446 const xmlChar *ptr;
5447 xmlChar cur;
5448 xmlEntityPtr ent = NULL;
5449
5450 if ((str == NULL) || (*str == NULL))
5451 return(NULL);
5452 ptr = *str;
5453 cur = *ptr;
5454 if (cur == '&') {
5455 ptr++;
5456 cur = *ptr;
5457 name = xmlParseStringName(ctxt, &ptr);
5458 if (name == NULL) {
5459 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5460 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5461 ctxt->sax->error(ctxt->userData,
5462 "xmlParseEntityRef: no name\n");
5463 ctxt->wellFormed = 0;
5464 ctxt->disableSAX = 1;
5465 } else {
5466 if (*ptr == ';') {
5467 ptr++;
5468 /*
5469 * Ask first SAX for entity resolution, otherwise try the
5470 * predefined set.
5471 */
5472 if (ctxt->sax != NULL) {
5473 if (ctxt->sax->getEntity != NULL)
5474 ent = ctxt->sax->getEntity(ctxt->userData, name);
5475 if (ent == NULL)
5476 ent = xmlGetPredefinedEntity(name);
5477 }
5478 /*
5479 * [ WFC: Entity Declared ]
5480 * In a document without any DTD, a document with only an
5481 * internal DTD subset which contains no parameter entity
5482 * references, or a document with "standalone='yes'", the
5483 * Name given in the entity reference must match that in an
5484 * entity declaration, except that well-formed documents
5485 * need not declare any of the following entities: amp, lt,
5486 * gt, apos, quot.
5487 * The declaration of a parameter entity must precede any
5488 * reference to it.
5489 * Similarly, the declaration of a general entity must
5490 * precede any reference to it which appears in a default
5491 * value in an attribute-list declaration. Note that if
5492 * entities are declared in the external subset or in
5493 * external parameter entities, a non-validating processor
5494 * is not obligated to read and process their declarations;
5495 * for such documents, the rule that an entity must be
5496 * declared is a well-formedness constraint only if
5497 * standalone='yes'.
5498 */
5499 if (ent == NULL) {
5500 if ((ctxt->standalone == 1) ||
5501 ((ctxt->hasExternalSubset == 0) &&
5502 (ctxt->hasPErefs == 0))) {
5503 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5504 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5505 ctxt->sax->error(ctxt->userData,
5506 "Entity '%s' not defined\n", name);
5507 ctxt->wellFormed = 0;
5508 ctxt->disableSAX = 1;
5509 } else {
5510 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5511 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5512 ctxt->sax->warning(ctxt->userData,
5513 "Entity '%s' not defined\n", name);
5514 }
5515 }
5516
5517 /*
5518 * [ WFC: Parsed Entity ]
5519 * An entity reference must not contain the name of an
5520 * unparsed entity
5521 */
5522 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5523 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5525 ctxt->sax->error(ctxt->userData,
5526 "Entity reference to unparsed entity %s\n", name);
5527 ctxt->wellFormed = 0;
5528 ctxt->disableSAX = 1;
5529 }
5530
5531 /*
5532 * [ WFC: No External Entity References ]
5533 * Attribute values cannot contain direct or indirect
5534 * entity references to external entities.
5535 */
5536 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5537 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5538 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5540 ctxt->sax->error(ctxt->userData,
5541 "Attribute references external entity '%s'\n", name);
5542 ctxt->wellFormed = 0;
5543 ctxt->disableSAX = 1;
5544 }
5545 /*
5546 * [ WFC: No < in Attribute Values ]
5547 * The replacement text of any entity referred to directly or
5548 * indirectly in an attribute value (other than "&lt;") must
5549 * not contain a <.
5550 */
5551 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5552 (ent != NULL) &&
5553 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5554 (ent->content != NULL) &&
5555 (xmlStrchr(ent->content, '<'))) {
5556 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5557 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5558 ctxt->sax->error(ctxt->userData,
5559 "'<' in entity '%s' is not allowed in attributes values\n", name);
5560 ctxt->wellFormed = 0;
5561 ctxt->disableSAX = 1;
5562 }
5563
5564 /*
5565 * Internal check, no parameter entities here ...
5566 */
5567 else {
5568 switch (ent->etype) {
5569 case XML_INTERNAL_PARAMETER_ENTITY:
5570 case XML_EXTERNAL_PARAMETER_ENTITY:
5571 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "Attempt to reference the parameter entity '%s'\n", name);
5575 ctxt->wellFormed = 0;
5576 ctxt->disableSAX = 1;
5577 break;
5578 default:
5579 break;
5580 }
5581 }
5582
5583 /*
5584 * [ WFC: No Recursion ]
5585 * A parsed entity must not contain a recursive reference
5586 * to itself, either directly or indirectly.
5587 * Done somewhwere else
5588 */
5589
5590 } else {
5591 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5593 ctxt->sax->error(ctxt->userData,
5594 "xmlParseEntityRef: expecting ';'\n");
5595 ctxt->wellFormed = 0;
5596 ctxt->disableSAX = 1;
5597 }
5598 xmlFree(name);
5599 }
5600 }
5601 *str = ptr;
5602 return(ent);
5603}
5604
5605/**
5606 * xmlParsePEReference:
5607 * @ctxt: an XML parser context
5608 *
5609 * parse PEReference declarations
5610 * The entity content is handled directly by pushing it's content as
5611 * a new input stream.
5612 *
5613 * [69] PEReference ::= '%' Name ';'
5614 *
5615 * [ WFC: No Recursion ]
5616 * A parsed entity must not contain a recursive
5617 * reference to itself, either directly or indirectly.
5618 *
5619 * [ WFC: Entity Declared ]
5620 * In a document without any DTD, a document with only an internal DTD
5621 * subset which contains no parameter entity references, or a document
5622 * with "standalone='yes'", ... ... The declaration of a parameter
5623 * entity must precede any reference to it...
5624 *
5625 * [ VC: Entity Declared ]
5626 * In a document with an external subset or external parameter entities
5627 * with "standalone='no'", ... ... The declaration of a parameter entity
5628 * must precede any reference to it...
5629 *
5630 * [ WFC: In DTD ]
5631 * Parameter-entity references may only appear in the DTD.
5632 * NOTE: misleading but this is handled.
5633 */
5634void
5635xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5636 xmlChar *name;
5637 xmlEntityPtr entity = NULL;
5638 xmlParserInputPtr input;
5639
5640 if (RAW == '%') {
5641 NEXT;
Daniel Veillard76d66f42001-05-16 21:05:17 +00005642 name = xmlParseName(ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00005643 if (name == NULL) {
5644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5646 ctxt->sax->error(ctxt->userData,
5647 "xmlParsePEReference: no name\n");
5648 ctxt->wellFormed = 0;
5649 ctxt->disableSAX = 1;
5650 } else {
5651 if (RAW == ';') {
5652 NEXT;
5653 if ((ctxt->sax != NULL) &&
5654 (ctxt->sax->getParameterEntity != NULL))
5655 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5656 name);
5657 if (entity == NULL) {
5658 /*
5659 * [ WFC: Entity Declared ]
5660 * In a document without any DTD, a document with only an
5661 * internal DTD subset which contains no parameter entity
5662 * references, or a document with "standalone='yes'", ...
5663 * ... The declaration of a parameter entity must precede
5664 * any reference to it...
5665 */
5666 if ((ctxt->standalone == 1) ||
5667 ((ctxt->hasExternalSubset == 0) &&
5668 (ctxt->hasPErefs == 0))) {
5669 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5670 if ((!ctxt->disableSAX) &&
5671 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5672 ctxt->sax->error(ctxt->userData,
5673 "PEReference: %%%s; not found\n", name);
5674 ctxt->wellFormed = 0;
5675 ctxt->disableSAX = 1;
5676 } else {
5677 /*
5678 * [ VC: Entity Declared ]
5679 * In a document with an external subset or external
5680 * parameter entities with "standalone='no'", ...
5681 * ... The declaration of a parameter entity must precede
5682 * any reference to it...
5683 */
5684 if ((!ctxt->disableSAX) &&
5685 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5686 ctxt->sax->warning(ctxt->userData,
5687 "PEReference: %%%s; not found\n", name);
5688 ctxt->valid = 0;
5689 }
5690 } else {
5691 /*
5692 * Internal checking in case the entity quest barfed
5693 */
5694 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5695 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5696 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5697 ctxt->sax->warning(ctxt->userData,
5698 "Internal: %%%s; is not a parameter entity\n", name);
5699 } else {
5700 /*
5701 * TODO !!!
5702 * handle the extra spaces added before and after
5703 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5704 */
5705 input = xmlNewEntityInputStream(ctxt, entity);
5706 xmlPushInput(ctxt, input);
5707 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5708 (RAW == '<') && (NXT(1) == '?') &&
5709 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5710 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5711 xmlParseTextDecl(ctxt);
5712 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5713 /*
5714 * The XML REC instructs us to stop parsing
5715 * right here
5716 */
5717 ctxt->instate = XML_PARSER_EOF;
5718 xmlFree(name);
5719 return;
5720 }
5721 }
5722 if (ctxt->token == 0)
5723 ctxt->token = ' ';
5724 }
5725 }
5726 ctxt->hasPErefs = 1;
5727 } else {
5728 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5730 ctxt->sax->error(ctxt->userData,
5731 "xmlParsePEReference: expecting ';'\n");
5732 ctxt->wellFormed = 0;
5733 ctxt->disableSAX = 1;
5734 }
5735 xmlFree(name);
5736 }
5737 }
5738}
5739
5740/**
5741 * xmlParseStringPEReference:
5742 * @ctxt: an XML parser context
5743 * @str: a pointer to an index in the string
5744 *
5745 * parse PEReference declarations
5746 *
5747 * [69] PEReference ::= '%' Name ';'
5748 *
5749 * [ WFC: No Recursion ]
5750 * A parsed entity must not contain a recursive
5751 * reference to itself, either directly or indirectly.
5752 *
5753 * [ WFC: Entity Declared ]
5754 * In a document without any DTD, a document with only an internal DTD
5755 * subset which contains no parameter entity references, or a document
5756 * with "standalone='yes'", ... ... The declaration of a parameter
5757 * entity must precede any reference to it...
5758 *
5759 * [ VC: Entity Declared ]
5760 * In a document with an external subset or external parameter entities
5761 * with "standalone='no'", ... ... The declaration of a parameter entity
5762 * must precede any reference to it...
5763 *
5764 * [ WFC: In DTD ]
5765 * Parameter-entity references may only appear in the DTD.
5766 * NOTE: misleading but this is handled.
5767 *
5768 * Returns the string of the entity content.
5769 * str is updated to the current value of the index
5770 */
5771xmlEntityPtr
5772xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5773 const xmlChar *ptr;
5774 xmlChar cur;
5775 xmlChar *name;
5776 xmlEntityPtr entity = NULL;
5777
5778 if ((str == NULL) || (*str == NULL)) return(NULL);
5779 ptr = *str;
5780 cur = *ptr;
5781 if (cur == '%') {
5782 ptr++;
5783 cur = *ptr;
5784 name = xmlParseStringName(ctxt, &ptr);
5785 if (name == NULL) {
5786 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5788 ctxt->sax->error(ctxt->userData,
5789 "xmlParseStringPEReference: no name\n");
5790 ctxt->wellFormed = 0;
5791 ctxt->disableSAX = 1;
5792 } else {
5793 cur = *ptr;
5794 if (cur == ';') {
5795 ptr++;
5796 cur = *ptr;
5797 if ((ctxt->sax != NULL) &&
5798 (ctxt->sax->getParameterEntity != NULL))
5799 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5800 name);
5801 if (entity == NULL) {
5802 /*
5803 * [ WFC: Entity Declared ]
5804 * In a document without any DTD, a document with only an
5805 * internal DTD subset which contains no parameter entity
5806 * references, or a document with "standalone='yes'", ...
5807 * ... The declaration of a parameter entity must precede
5808 * any reference to it...
5809 */
5810 if ((ctxt->standalone == 1) ||
5811 ((ctxt->hasExternalSubset == 0) &&
5812 (ctxt->hasPErefs == 0))) {
5813 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5815 ctxt->sax->error(ctxt->userData,
5816 "PEReference: %%%s; not found\n", name);
5817 ctxt->wellFormed = 0;
5818 ctxt->disableSAX = 1;
5819 } else {
5820 /*
5821 * [ VC: Entity Declared ]
5822 * In a document with an external subset or external
5823 * parameter entities with "standalone='no'", ...
5824 * ... The declaration of a parameter entity must
5825 * precede any reference to it...
5826 */
5827 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5828 ctxt->sax->warning(ctxt->userData,
5829 "PEReference: %%%s; not found\n", name);
5830 ctxt->valid = 0;
5831 }
5832 } else {
5833 /*
5834 * Internal checking in case the entity quest barfed
5835 */
5836 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5837 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5838 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5839 ctxt->sax->warning(ctxt->userData,
5840 "Internal: %%%s; is not a parameter entity\n", name);
5841 }
5842 }
5843 ctxt->hasPErefs = 1;
5844 } else {
5845 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5847 ctxt->sax->error(ctxt->userData,
5848 "xmlParseStringPEReference: expecting ';'\n");
5849 ctxt->wellFormed = 0;
5850 ctxt->disableSAX = 1;
5851 }
5852 xmlFree(name);
5853 }
5854 }
5855 *str = ptr;
5856 return(entity);
5857}
5858
5859/**
5860 * xmlParseDocTypeDecl:
5861 * @ctxt: an XML parser context
5862 *
5863 * parse a DOCTYPE declaration
5864 *
5865 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5866 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5867 *
5868 * [ VC: Root Element Type ]
5869 * The Name in the document type declaration must match the element
5870 * type of the root element.
5871 */
5872
5873void
5874xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5875 xmlChar *name = NULL;
5876 xmlChar *ExternalID = NULL;
5877 xmlChar *URI = NULL;
5878
5879 /*
5880 * We know that '<!DOCTYPE' has been detected.
5881 */
5882 SKIP(9);
5883
5884 SKIP_BLANKS;
5885
5886 /*
5887 * Parse the DOCTYPE name.
5888 */
5889 name = xmlParseName(ctxt);
5890 if (name == NULL) {
5891 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5892 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5893 ctxt->sax->error(ctxt->userData,
5894 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5895 ctxt->wellFormed = 0;
5896 ctxt->disableSAX = 1;
5897 }
5898 ctxt->intSubName = name;
5899
5900 SKIP_BLANKS;
5901
5902 /*
5903 * Check for SystemID and ExternalID
5904 */
5905 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5906
5907 if ((URI != NULL) || (ExternalID != NULL)) {
5908 ctxt->hasExternalSubset = 1;
5909 }
5910 ctxt->extSubURI = URI;
5911 ctxt->extSubSystem = ExternalID;
5912
5913 SKIP_BLANKS;
5914
5915 /*
5916 * Create and update the internal subset.
5917 */
5918 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5919 (!ctxt->disableSAX))
5920 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5921
5922 /*
5923 * Is there any internal subset declarations ?
5924 * they are handled separately in xmlParseInternalSubset()
5925 */
5926 if (RAW == '[')
5927 return;
5928
5929 /*
5930 * We should be at the end of the DOCTYPE declaration.
5931 */
5932 if (RAW != '>') {
5933 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5935 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5936 ctxt->wellFormed = 0;
5937 ctxt->disableSAX = 1;
5938 }
5939 NEXT;
5940}
5941
5942/**
5943 * xmlParseInternalsubset:
5944 * @ctxt: an XML parser context
5945 *
5946 * parse the internal subset declaration
5947 *
5948 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5949 */
5950
Daniel Veillard56a4cb82001-03-24 17:00:36 +00005951static void
Owen Taylor3473f882001-02-23 17:55:21 +00005952xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5953 /*
5954 * Is there any DTD definition ?
5955 */
5956 if (RAW == '[') {
5957 ctxt->instate = XML_PARSER_DTD;
5958 NEXT;
5959 /*
5960 * Parse the succession of Markup declarations and
5961 * PEReferences.
5962 * Subsequence (markupdecl | PEReference | S)*
5963 */
5964 while (RAW != ']') {
5965 const xmlChar *check = CUR_PTR;
5966 int cons = ctxt->input->consumed;
5967
5968 SKIP_BLANKS;
5969 xmlParseMarkupDecl(ctxt);
5970 xmlParsePEReference(ctxt);
5971
5972 /*
5973 * Pop-up of finished entities.
5974 */
5975 while ((RAW == 0) && (ctxt->inputNr > 1))
5976 xmlPopInput(ctxt);
5977
5978 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5979 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5980 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5981 ctxt->sax->error(ctxt->userData,
5982 "xmlParseInternalSubset: error detected in Markup declaration\n");
5983 ctxt->wellFormed = 0;
5984 ctxt->disableSAX = 1;
5985 break;
5986 }
5987 }
5988 if (RAW == ']') {
5989 NEXT;
5990 SKIP_BLANKS;
5991 }
5992 }
5993
5994 /*
5995 * We should be at the end of the DOCTYPE declaration.
5996 */
5997 if (RAW != '>') {
5998 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6000 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
6001 ctxt->wellFormed = 0;
6002 ctxt->disableSAX = 1;
6003 }
6004 NEXT;
6005}
6006
6007/**
6008 * xmlParseAttribute:
6009 * @ctxt: an XML parser context
6010 * @value: a xmlChar ** used to store the value of the attribute
6011 *
6012 * parse an attribute
6013 *
6014 * [41] Attribute ::= Name Eq AttValue
6015 *
6016 * [ WFC: No External Entity References ]
6017 * Attribute values cannot contain direct or indirect entity references
6018 * to external entities.
6019 *
6020 * [ WFC: No < in Attribute Values ]
6021 * The replacement text of any entity referred to directly or indirectly in
6022 * an attribute value (other than "&lt;") must not contain a <.
6023 *
6024 * [ VC: Attribute Value Type ]
6025 * The attribute must have been declared; the value must be of the type
6026 * declared for it.
6027 *
6028 * [25] Eq ::= S? '=' S?
6029 *
6030 * With namespace:
6031 *
6032 * [NS 11] Attribute ::= QName Eq AttValue
6033 *
6034 * Also the case QName == xmlns:??? is handled independently as a namespace
6035 * definition.
6036 *
6037 * Returns the attribute name, and the value in *value.
6038 */
6039
6040xmlChar *
6041xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
6042 xmlChar *name, *val;
6043
6044 *value = NULL;
6045 name = xmlParseName(ctxt);
6046 if (name == NULL) {
6047 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6048 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6049 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
6050 ctxt->wellFormed = 0;
6051 ctxt->disableSAX = 1;
6052 return(NULL);
6053 }
6054
6055 /*
6056 * read the value
6057 */
6058 SKIP_BLANKS;
6059 if (RAW == '=') {
6060 NEXT;
6061 SKIP_BLANKS;
6062 val = xmlParseAttValue(ctxt);
6063 ctxt->instate = XML_PARSER_CONTENT;
6064 } else {
6065 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6066 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6067 ctxt->sax->error(ctxt->userData,
6068 "Specification mandate value for attribute %s\n", name);
6069 ctxt->wellFormed = 0;
6070 ctxt->disableSAX = 1;
6071 xmlFree(name);
6072 return(NULL);
6073 }
6074
6075 /*
6076 * Check that xml:lang conforms to the specification
6077 * No more registered as an error, just generate a warning now
6078 * since this was deprecated in XML second edition
6079 */
6080 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
6081 if (!xmlCheckLanguageID(val)) {
6082 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
6083 ctxt->sax->warning(ctxt->userData,
6084 "Malformed value for xml:lang : %s\n", val);
6085 }
6086 }
6087
6088 /*
6089 * Check that xml:space conforms to the specification
6090 */
6091 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
6092 if (xmlStrEqual(val, BAD_CAST "default"))
6093 *(ctxt->space) = 0;
6094 else if (xmlStrEqual(val, BAD_CAST "preserve"))
6095 *(ctxt->space) = 1;
6096 else {
6097 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
6098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6099 ctxt->sax->error(ctxt->userData,
6100"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
6101 val);
6102 ctxt->wellFormed = 0;
6103 ctxt->disableSAX = 1;
6104 }
6105 }
6106
6107 *value = val;
6108 return(name);
6109}
6110
6111/**
6112 * xmlParseStartTag:
6113 * @ctxt: an XML parser context
6114 *
6115 * parse a start of tag either for rule element or
6116 * EmptyElement. In both case we don't parse the tag closing chars.
6117 *
6118 * [40] STag ::= '<' Name (S Attribute)* S? '>'
6119 *
6120 * [ WFC: Unique Att Spec ]
6121 * No attribute name may appear more than once in the same start-tag or
6122 * empty-element tag.
6123 *
6124 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
6125 *
6126 * [ WFC: Unique Att Spec ]
6127 * No attribute name may appear more than once in the same start-tag or
6128 * empty-element tag.
6129 *
6130 * With namespace:
6131 *
6132 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
6133 *
6134 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
6135 *
6136 * Returns the element name parsed
6137 */
6138
6139xmlChar *
6140xmlParseStartTag(xmlParserCtxtPtr ctxt) {
6141 xmlChar *name;
6142 xmlChar *attname;
6143 xmlChar *attvalue;
6144 const xmlChar **atts = NULL;
6145 int nbatts = 0;
6146 int maxatts = 0;
6147 int i;
6148
6149 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00006150 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006151
6152 name = xmlParseName(ctxt);
6153 if (name == NULL) {
6154 ctxt->errNo = XML_ERR_NAME_REQUIRED;
6155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6156 ctxt->sax->error(ctxt->userData,
6157 "xmlParseStartTag: invalid element name\n");
6158 ctxt->wellFormed = 0;
6159 ctxt->disableSAX = 1;
6160 return(NULL);
6161 }
6162
6163 /*
6164 * Now parse the attributes, it ends up with the ending
6165 *
6166 * (S Attribute)* S?
6167 */
6168 SKIP_BLANKS;
6169 GROW;
6170
Daniel Veillard21a0f912001-02-25 19:54:14 +00006171 while ((RAW != '>') &&
6172 ((RAW != '/') || (NXT(1) != '>')) &&
6173 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00006174 const xmlChar *q = CUR_PTR;
6175 int cons = ctxt->input->consumed;
6176
6177 attname = xmlParseAttribute(ctxt, &attvalue);
6178 if ((attname != NULL) && (attvalue != NULL)) {
6179 /*
6180 * [ WFC: Unique Att Spec ]
6181 * No attribute name may appear more than once in the same
6182 * start-tag or empty-element tag.
6183 */
6184 for (i = 0; i < nbatts;i += 2) {
6185 if (xmlStrEqual(atts[i], attname)) {
6186 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
6187 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6188 ctxt->sax->error(ctxt->userData,
6189 "Attribute %s redefined\n",
6190 attname);
6191 ctxt->wellFormed = 0;
6192 ctxt->disableSAX = 1;
6193 xmlFree(attname);
6194 xmlFree(attvalue);
6195 goto failed;
6196 }
6197 }
6198
6199 /*
6200 * Add the pair to atts
6201 */
6202 if (atts == NULL) {
6203 maxatts = 10;
6204 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6205 if (atts == NULL) {
6206 xmlGenericError(xmlGenericErrorContext,
6207 "malloc of %ld byte failed\n",
6208 maxatts * (long)sizeof(xmlChar *));
6209 return(NULL);
6210 }
6211 } else if (nbatts + 4 > maxatts) {
6212 maxatts *= 2;
6213 atts = (const xmlChar **) xmlRealloc((void *) atts,
6214 maxatts * sizeof(xmlChar *));
6215 if (atts == NULL) {
6216 xmlGenericError(xmlGenericErrorContext,
6217 "realloc of %ld byte failed\n",
6218 maxatts * (long)sizeof(xmlChar *));
6219 return(NULL);
6220 }
6221 }
6222 atts[nbatts++] = attname;
6223 atts[nbatts++] = attvalue;
6224 atts[nbatts] = NULL;
6225 atts[nbatts + 1] = NULL;
6226 } else {
6227 if (attname != NULL)
6228 xmlFree(attname);
6229 if (attvalue != NULL)
6230 xmlFree(attvalue);
6231 }
6232
6233failed:
6234
6235 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6236 break;
6237 if (!IS_BLANK(RAW)) {
6238 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6240 ctxt->sax->error(ctxt->userData,
6241 "attributes construct error\n");
6242 ctxt->wellFormed = 0;
6243 ctxt->disableSAX = 1;
6244 }
6245 SKIP_BLANKS;
6246 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6247 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6248 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6249 ctxt->sax->error(ctxt->userData,
6250 "xmlParseStartTag: problem parsing attributes\n");
6251 ctxt->wellFormed = 0;
6252 ctxt->disableSAX = 1;
6253 break;
6254 }
6255 GROW;
6256 }
6257
6258 /*
6259 * SAX: Start of Element !
6260 */
6261 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6262 (!ctxt->disableSAX))
6263 ctxt->sax->startElement(ctxt->userData, name, atts);
6264
6265 if (atts != NULL) {
6266 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6267 xmlFree((void *) atts);
6268 }
6269 return(name);
6270}
6271
6272/**
6273 * xmlParseEndTag:
6274 * @ctxt: an XML parser context
6275 *
6276 * parse an end of tag
6277 *
6278 * [42] ETag ::= '</' Name S? '>'
6279 *
6280 * With namespace
6281 *
6282 * [NS 9] ETag ::= '</' QName S? '>'
6283 */
6284
6285void
6286xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6287 xmlChar *name;
6288 xmlChar *oldname;
6289
6290 GROW;
6291 if ((RAW != '<') || (NXT(1) != '/')) {
6292 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6293 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6294 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6295 ctxt->wellFormed = 0;
6296 ctxt->disableSAX = 1;
6297 return;
6298 }
6299 SKIP(2);
6300
6301 name = xmlParseName(ctxt);
6302
6303 /*
6304 * We should definitely be at the ending "S? '>'" part
6305 */
6306 GROW;
6307 SKIP_BLANKS;
6308 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6309 ctxt->errNo = XML_ERR_GT_REQUIRED;
6310 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6311 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6312 ctxt->wellFormed = 0;
6313 ctxt->disableSAX = 1;
6314 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006315 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006316
6317 /*
6318 * [ WFC: Element Type Match ]
6319 * The Name in an element's end-tag must match the element type in the
6320 * start-tag.
6321 *
6322 */
6323 if ((name == NULL) || (ctxt->name == NULL) ||
6324 (!xmlStrEqual(name, ctxt->name))) {
6325 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6326 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6327 if ((name != NULL) && (ctxt->name != NULL)) {
6328 ctxt->sax->error(ctxt->userData,
6329 "Opening and ending tag mismatch: %s and %s\n",
6330 ctxt->name, name);
6331 } else if (ctxt->name != NULL) {
6332 ctxt->sax->error(ctxt->userData,
6333 "Ending tag eror for: %s\n", ctxt->name);
6334 } else {
6335 ctxt->sax->error(ctxt->userData,
6336 "Ending tag error: internal error ???\n");
6337 }
6338
6339 }
6340 ctxt->wellFormed = 0;
6341 ctxt->disableSAX = 1;
6342 }
6343
6344 /*
6345 * SAX: End of Tag
6346 */
6347 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6348 (!ctxt->disableSAX))
6349 ctxt->sax->endElement(ctxt->userData, name);
6350
6351 if (name != NULL)
6352 xmlFree(name);
6353 oldname = namePop(ctxt);
6354 spacePop(ctxt);
6355 if (oldname != NULL) {
6356#ifdef DEBUG_STACK
6357 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6358#endif
6359 xmlFree(oldname);
6360 }
6361 return;
6362}
6363
6364/**
6365 * xmlParseCDSect:
6366 * @ctxt: an XML parser context
6367 *
6368 * Parse escaped pure raw content.
6369 *
6370 * [18] CDSect ::= CDStart CData CDEnd
6371 *
6372 * [19] CDStart ::= '<![CDATA['
6373 *
6374 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6375 *
6376 * [21] CDEnd ::= ']]>'
6377 */
6378void
6379xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6380 xmlChar *buf = NULL;
6381 int len = 0;
6382 int size = XML_PARSER_BUFFER_SIZE;
6383 int r, rl;
6384 int s, sl;
6385 int cur, l;
6386 int count = 0;
6387
6388 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6389 (NXT(2) == '[') && (NXT(3) == 'C') &&
6390 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6391 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6392 (NXT(8) == '[')) {
6393 SKIP(9);
6394 } else
6395 return;
6396
6397 ctxt->instate = XML_PARSER_CDATA_SECTION;
6398 r = CUR_CHAR(rl);
6399 if (!IS_CHAR(r)) {
6400 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6401 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6402 ctxt->sax->error(ctxt->userData,
6403 "CData section not finished\n");
6404 ctxt->wellFormed = 0;
6405 ctxt->disableSAX = 1;
6406 ctxt->instate = XML_PARSER_CONTENT;
6407 return;
6408 }
6409 NEXTL(rl);
6410 s = CUR_CHAR(sl);
6411 if (!IS_CHAR(s)) {
6412 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6413 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6414 ctxt->sax->error(ctxt->userData,
6415 "CData section not finished\n");
6416 ctxt->wellFormed = 0;
6417 ctxt->disableSAX = 1;
6418 ctxt->instate = XML_PARSER_CONTENT;
6419 return;
6420 }
6421 NEXTL(sl);
6422 cur = CUR_CHAR(l);
6423 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6424 if (buf == NULL) {
6425 xmlGenericError(xmlGenericErrorContext,
6426 "malloc of %d byte failed\n", size);
6427 return;
6428 }
6429 while (IS_CHAR(cur) &&
6430 ((r != ']') || (s != ']') || (cur != '>'))) {
6431 if (len + 5 >= size) {
6432 size *= 2;
6433 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6434 if (buf == NULL) {
6435 xmlGenericError(xmlGenericErrorContext,
6436 "realloc of %d byte failed\n", size);
6437 return;
6438 }
6439 }
6440 COPY_BUF(rl,buf,len,r);
6441 r = s;
6442 rl = sl;
6443 s = cur;
6444 sl = l;
6445 count++;
6446 if (count > 50) {
6447 GROW;
6448 count = 0;
6449 }
6450 NEXTL(l);
6451 cur = CUR_CHAR(l);
6452 }
6453 buf[len] = 0;
6454 ctxt->instate = XML_PARSER_CONTENT;
6455 if (cur != '>') {
6456 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6458 ctxt->sax->error(ctxt->userData,
6459 "CData section not finished\n%.50s\n", buf);
6460 ctxt->wellFormed = 0;
6461 ctxt->disableSAX = 1;
6462 xmlFree(buf);
6463 return;
6464 }
6465 NEXTL(l);
6466
6467 /*
6468 * Ok the buffer is to be consumed as cdata.
6469 */
6470 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6471 if (ctxt->sax->cdataBlock != NULL)
6472 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
Daniel Veillard7583a592001-07-08 13:15:55 +00006473 else if (ctxt->sax->characters != NULL)
6474 ctxt->sax->characters(ctxt->userData, buf, len);
Owen Taylor3473f882001-02-23 17:55:21 +00006475 }
6476 xmlFree(buf);
6477}
6478
6479/**
6480 * xmlParseContent:
6481 * @ctxt: an XML parser context
6482 *
6483 * Parse a content:
6484 *
6485 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6486 */
6487
6488void
6489xmlParseContent(xmlParserCtxtPtr ctxt) {
6490 GROW;
6491 while (((RAW != 0) || (ctxt->token != 0)) &&
6492 ((RAW != '<') || (NXT(1) != '/'))) {
6493 const xmlChar *test = CUR_PTR;
6494 int cons = ctxt->input->consumed;
Daniel Veillard04be4f52001-03-26 21:23:53 +00006495 int tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006496 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006497
6498 /*
6499 * Handle possible processed charrefs.
6500 */
6501 if (ctxt->token != 0) {
6502 xmlParseCharData(ctxt, 0);
6503 }
6504 /*
6505 * First case : a Processing Instruction.
6506 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006507 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006508 xmlParsePI(ctxt);
6509 }
6510
6511 /*
6512 * Second case : a CDSection
6513 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006514 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006515 (NXT(2) == '[') && (NXT(3) == 'C') &&
6516 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6517 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6518 (NXT(8) == '[')) {
6519 xmlParseCDSect(ctxt);
6520 }
6521
6522 /*
6523 * Third case : a comment
6524 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006525 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006526 (NXT(2) == '-') && (NXT(3) == '-')) {
6527 xmlParseComment(ctxt);
6528 ctxt->instate = XML_PARSER_CONTENT;
6529 }
6530
6531 /*
6532 * Fourth case : a sub-element.
6533 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006534 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006535 xmlParseElement(ctxt);
6536 }
6537
6538 /*
6539 * Fifth case : a reference. If if has not been resolved,
6540 * parsing returns it's Name, create the node
6541 */
6542
Daniel Veillard21a0f912001-02-25 19:54:14 +00006543 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006544 xmlParseReference(ctxt);
6545 }
6546
6547 /*
6548 * Last case, text. Note that References are handled directly.
6549 */
6550 else {
6551 xmlParseCharData(ctxt, 0);
6552 }
6553
6554 GROW;
6555 /*
6556 * Pop-up of finished entities.
6557 */
6558 while ((RAW == 0) && (ctxt->inputNr > 1))
6559 xmlPopInput(ctxt);
6560 SHRINK;
6561
6562 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6563 (tok == ctxt->token)) {
6564 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6566 ctxt->sax->error(ctxt->userData,
6567 "detected an error in element content\n");
6568 ctxt->wellFormed = 0;
6569 ctxt->disableSAX = 1;
6570 ctxt->instate = XML_PARSER_EOF;
6571 break;
6572 }
6573 }
6574}
6575
6576/**
6577 * xmlParseElement:
6578 * @ctxt: an XML parser context
6579 *
6580 * parse an XML element, this is highly recursive
6581 *
6582 * [39] element ::= EmptyElemTag | STag content ETag
6583 *
6584 * [ WFC: Element Type Match ]
6585 * The Name in an element's end-tag must match the element type in the
6586 * start-tag.
6587 *
6588 * [ VC: Element Valid ]
6589 * An element is valid if there is a declaration matching elementdecl
6590 * where the Name matches the element type and one of the following holds:
6591 * - The declaration matches EMPTY and the element has no content.
6592 * - The declaration matches children and the sequence of child elements
6593 * belongs to the language generated by the regular expression in the
6594 * content model, with optional white space (characters matching the
6595 * nonterminal S) between each pair of child elements.
6596 * - The declaration matches Mixed and the content consists of character
6597 * data and child elements whose types match names in the content model.
6598 * - The declaration matches ANY, and the types of any child elements have
6599 * been declared.
6600 */
6601
6602void
6603xmlParseElement(xmlParserCtxtPtr ctxt) {
6604 const xmlChar *openTag = CUR_PTR;
6605 xmlChar *name;
6606 xmlChar *oldname;
6607 xmlParserNodeInfo node_info;
6608 xmlNodePtr ret;
6609
6610 /* Capture start position */
6611 if (ctxt->record_info) {
6612 node_info.begin_pos = ctxt->input->consumed +
6613 (CUR_PTR - ctxt->input->base);
6614 node_info.begin_line = ctxt->input->line;
6615 }
6616
6617 if (ctxt->spaceNr == 0)
6618 spacePush(ctxt, -1);
6619 else
6620 spacePush(ctxt, *ctxt->space);
6621
6622 name = xmlParseStartTag(ctxt);
6623 if (name == NULL) {
6624 spacePop(ctxt);
6625 return;
6626 }
6627 namePush(ctxt, name);
6628 ret = ctxt->node;
6629
6630 /*
6631 * [ VC: Root Element Type ]
6632 * The Name in the document type declaration must match the element
6633 * type of the root element.
6634 */
6635 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6636 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6637 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6638
6639 /*
6640 * Check for an Empty Element.
6641 */
6642 if ((RAW == '/') && (NXT(1) == '>')) {
6643 SKIP(2);
6644 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6645 (!ctxt->disableSAX))
6646 ctxt->sax->endElement(ctxt->userData, name);
6647 oldname = namePop(ctxt);
6648 spacePop(ctxt);
6649 if (oldname != NULL) {
6650#ifdef DEBUG_STACK
6651 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6652#endif
6653 xmlFree(oldname);
6654 }
6655 if ( ret != NULL && ctxt->record_info ) {
6656 node_info.end_pos = ctxt->input->consumed +
6657 (CUR_PTR - ctxt->input->base);
6658 node_info.end_line = ctxt->input->line;
6659 node_info.node = ret;
6660 xmlParserAddNodeInfo(ctxt, &node_info);
6661 }
6662 return;
6663 }
6664 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006665 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006666 } else {
6667 ctxt->errNo = XML_ERR_GT_REQUIRED;
6668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6669 ctxt->sax->error(ctxt->userData,
6670 "Couldn't find end of Start Tag\n%.30s\n",
6671 openTag);
6672 ctxt->wellFormed = 0;
6673 ctxt->disableSAX = 1;
6674
6675 /*
6676 * end of parsing of this node.
6677 */
6678 nodePop(ctxt);
6679 oldname = namePop(ctxt);
6680 spacePop(ctxt);
6681 if (oldname != NULL) {
6682#ifdef DEBUG_STACK
6683 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6684#endif
6685 xmlFree(oldname);
6686 }
6687
6688 /*
6689 * Capture end position and add node
6690 */
6691 if ( ret != NULL && ctxt->record_info ) {
6692 node_info.end_pos = ctxt->input->consumed +
6693 (CUR_PTR - ctxt->input->base);
6694 node_info.end_line = ctxt->input->line;
6695 node_info.node = ret;
6696 xmlParserAddNodeInfo(ctxt, &node_info);
6697 }
6698 return;
6699 }
6700
6701 /*
6702 * Parse the content of the element:
6703 */
6704 xmlParseContent(ctxt);
6705 if (!IS_CHAR(RAW)) {
6706 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6708 ctxt->sax->error(ctxt->userData,
6709 "Premature end of data in tag %.30s\n", openTag);
6710 ctxt->wellFormed = 0;
6711 ctxt->disableSAX = 1;
6712
6713 /*
6714 * end of parsing of this node.
6715 */
6716 nodePop(ctxt);
6717 oldname = namePop(ctxt);
6718 spacePop(ctxt);
6719 if (oldname != NULL) {
6720#ifdef DEBUG_STACK
6721 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6722#endif
6723 xmlFree(oldname);
6724 }
6725 return;
6726 }
6727
6728 /*
6729 * parse the end of tag: '</' should be here.
6730 */
6731 xmlParseEndTag(ctxt);
6732
6733 /*
6734 * Capture end position and add node
6735 */
6736 if ( ret != NULL && ctxt->record_info ) {
6737 node_info.end_pos = ctxt->input->consumed +
6738 (CUR_PTR - ctxt->input->base);
6739 node_info.end_line = ctxt->input->line;
6740 node_info.node = ret;
6741 xmlParserAddNodeInfo(ctxt, &node_info);
6742 }
6743}
6744
6745/**
6746 * xmlParseVersionNum:
6747 * @ctxt: an XML parser context
6748 *
6749 * parse the XML version value.
6750 *
6751 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6752 *
6753 * Returns the string giving the XML version number, or NULL
6754 */
6755xmlChar *
6756xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6757 xmlChar *buf = NULL;
6758 int len = 0;
6759 int size = 10;
6760 xmlChar cur;
6761
6762 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6763 if (buf == NULL) {
6764 xmlGenericError(xmlGenericErrorContext,
6765 "malloc of %d byte failed\n", size);
6766 return(NULL);
6767 }
6768 cur = CUR;
6769 while (((cur >= 'a') && (cur <= 'z')) ||
6770 ((cur >= 'A') && (cur <= 'Z')) ||
6771 ((cur >= '0') && (cur <= '9')) ||
6772 (cur == '_') || (cur == '.') ||
6773 (cur == ':') || (cur == '-')) {
6774 if (len + 1 >= size) {
6775 size *= 2;
6776 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6777 if (buf == NULL) {
6778 xmlGenericError(xmlGenericErrorContext,
6779 "realloc of %d byte failed\n", size);
6780 return(NULL);
6781 }
6782 }
6783 buf[len++] = cur;
6784 NEXT;
6785 cur=CUR;
6786 }
6787 buf[len] = 0;
6788 return(buf);
6789}
6790
6791/**
6792 * xmlParseVersionInfo:
6793 * @ctxt: an XML parser context
6794 *
6795 * parse the XML version.
6796 *
6797 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6798 *
6799 * [25] Eq ::= S? '=' S?
6800 *
6801 * Returns the version string, e.g. "1.0"
6802 */
6803
6804xmlChar *
6805xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6806 xmlChar *version = NULL;
6807 const xmlChar *q;
6808
6809 if ((RAW == 'v') && (NXT(1) == 'e') &&
6810 (NXT(2) == 'r') && (NXT(3) == 's') &&
6811 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6812 (NXT(6) == 'n')) {
6813 SKIP(7);
6814 SKIP_BLANKS;
6815 if (RAW != '=') {
6816 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6817 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6818 ctxt->sax->error(ctxt->userData,
6819 "xmlParseVersionInfo : expected '='\n");
6820 ctxt->wellFormed = 0;
6821 ctxt->disableSAX = 1;
6822 return(NULL);
6823 }
6824 NEXT;
6825 SKIP_BLANKS;
6826 if (RAW == '"') {
6827 NEXT;
6828 q = CUR_PTR;
6829 version = xmlParseVersionNum(ctxt);
6830 if (RAW != '"') {
6831 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6832 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6833 ctxt->sax->error(ctxt->userData,
6834 "String not closed\n%.50s\n", q);
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 } else
6838 NEXT;
6839 } else if (RAW == '\''){
6840 NEXT;
6841 q = CUR_PTR;
6842 version = xmlParseVersionNum(ctxt);
6843 if (RAW != '\'') {
6844 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6845 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6846 ctxt->sax->error(ctxt->userData,
6847 "String not closed\n%.50s\n", q);
6848 ctxt->wellFormed = 0;
6849 ctxt->disableSAX = 1;
6850 } else
6851 NEXT;
6852 } else {
6853 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6855 ctxt->sax->error(ctxt->userData,
6856 "xmlParseVersionInfo : expected ' or \"\n");
6857 ctxt->wellFormed = 0;
6858 ctxt->disableSAX = 1;
6859 }
6860 }
6861 return(version);
6862}
6863
6864/**
6865 * xmlParseEncName:
6866 * @ctxt: an XML parser context
6867 *
6868 * parse the XML encoding name
6869 *
6870 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6871 *
6872 * Returns the encoding name value or NULL
6873 */
6874xmlChar *
6875xmlParseEncName(xmlParserCtxtPtr ctxt) {
6876 xmlChar *buf = NULL;
6877 int len = 0;
6878 int size = 10;
6879 xmlChar cur;
6880
6881 cur = CUR;
6882 if (((cur >= 'a') && (cur <= 'z')) ||
6883 ((cur >= 'A') && (cur <= 'Z'))) {
6884 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6885 if (buf == NULL) {
6886 xmlGenericError(xmlGenericErrorContext,
6887 "malloc of %d byte failed\n", size);
6888 return(NULL);
6889 }
6890
6891 buf[len++] = cur;
6892 NEXT;
6893 cur = CUR;
6894 while (((cur >= 'a') && (cur <= 'z')) ||
6895 ((cur >= 'A') && (cur <= 'Z')) ||
6896 ((cur >= '0') && (cur <= '9')) ||
6897 (cur == '.') || (cur == '_') ||
6898 (cur == '-')) {
6899 if (len + 1 >= size) {
6900 size *= 2;
6901 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6902 if (buf == NULL) {
6903 xmlGenericError(xmlGenericErrorContext,
6904 "realloc of %d byte failed\n", size);
6905 return(NULL);
6906 }
6907 }
6908 buf[len++] = cur;
6909 NEXT;
6910 cur = CUR;
6911 if (cur == 0) {
6912 SHRINK;
6913 GROW;
6914 cur = CUR;
6915 }
6916 }
6917 buf[len] = 0;
6918 } else {
6919 ctxt->errNo = XML_ERR_ENCODING_NAME;
6920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6921 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6922 ctxt->wellFormed = 0;
6923 ctxt->disableSAX = 1;
6924 }
6925 return(buf);
6926}
6927
6928/**
6929 * xmlParseEncodingDecl:
6930 * @ctxt: an XML parser context
6931 *
6932 * parse the XML encoding declaration
6933 *
6934 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6935 *
6936 * this setups the conversion filters.
6937 *
6938 * Returns the encoding value or NULL
6939 */
6940
6941xmlChar *
6942xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6943 xmlChar *encoding = NULL;
6944 const xmlChar *q;
6945
6946 SKIP_BLANKS;
6947 if ((RAW == 'e') && (NXT(1) == 'n') &&
6948 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6949 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6950 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6951 SKIP(8);
6952 SKIP_BLANKS;
6953 if (RAW != '=') {
6954 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6956 ctxt->sax->error(ctxt->userData,
6957 "xmlParseEncodingDecl : expected '='\n");
6958 ctxt->wellFormed = 0;
6959 ctxt->disableSAX = 1;
6960 return(NULL);
6961 }
6962 NEXT;
6963 SKIP_BLANKS;
6964 if (RAW == '"') {
6965 NEXT;
6966 q = CUR_PTR;
6967 encoding = xmlParseEncName(ctxt);
6968 if (RAW != '"') {
6969 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6970 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6971 ctxt->sax->error(ctxt->userData,
6972 "String not closed\n%.50s\n", q);
6973 ctxt->wellFormed = 0;
6974 ctxt->disableSAX = 1;
6975 } else
6976 NEXT;
6977 } else if (RAW == '\''){
6978 NEXT;
6979 q = CUR_PTR;
6980 encoding = xmlParseEncName(ctxt);
6981 if (RAW != '\'') {
6982 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6983 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6984 ctxt->sax->error(ctxt->userData,
6985 "String not closed\n%.50s\n", q);
6986 ctxt->wellFormed = 0;
6987 ctxt->disableSAX = 1;
6988 } else
6989 NEXT;
6990 } else if (RAW == '"'){
6991 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6992 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6993 ctxt->sax->error(ctxt->userData,
6994 "xmlParseEncodingDecl : expected ' or \"\n");
6995 ctxt->wellFormed = 0;
6996 ctxt->disableSAX = 1;
6997 }
6998 if (encoding != NULL) {
6999 xmlCharEncoding enc;
7000 xmlCharEncodingHandlerPtr handler;
7001
7002 if (ctxt->input->encoding != NULL)
7003 xmlFree((xmlChar *) ctxt->input->encoding);
7004 ctxt->input->encoding = encoding;
7005
7006 enc = xmlParseCharEncoding((const char *) encoding);
7007 /*
7008 * registered set of known encodings
7009 */
7010 if (enc != XML_CHAR_ENCODING_ERROR) {
7011 xmlSwitchEncoding(ctxt, enc);
7012 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7013 xmlFree(encoding);
7014 return(NULL);
7015 }
7016 } else {
7017 /*
7018 * fallback for unknown encodings
7019 */
7020 handler = xmlFindCharEncodingHandler((const char *) encoding);
7021 if (handler != NULL) {
7022 xmlSwitchToEncoding(ctxt, handler);
7023 } else {
7024 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
7025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7026 ctxt->sax->error(ctxt->userData,
7027 "Unsupported encoding %s\n", encoding);
7028 return(NULL);
7029 }
7030 }
7031 }
7032 }
7033 return(encoding);
7034}
7035
7036/**
7037 * xmlParseSDDecl:
7038 * @ctxt: an XML parser context
7039 *
7040 * parse the XML standalone declaration
7041 *
7042 * [32] SDDecl ::= S 'standalone' Eq
7043 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
7044 *
7045 * [ VC: Standalone Document Declaration ]
7046 * TODO The standalone document declaration must have the value "no"
7047 * if any external markup declarations contain declarations of:
7048 * - attributes with default values, if elements to which these
7049 * attributes apply appear in the document without specifications
7050 * of values for these attributes, or
7051 * - entities (other than amp, lt, gt, apos, quot), if references
7052 * to those entities appear in the document, or
7053 * - attributes with values subject to normalization, where the
7054 * attribute appears in the document with a value which will change
7055 * as a result of normalization, or
7056 * - element types with element content, if white space occurs directly
7057 * within any instance of those types.
7058 *
7059 * Returns 1 if standalone, 0 otherwise
7060 */
7061
7062int
7063xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
7064 int standalone = -1;
7065
7066 SKIP_BLANKS;
7067 if ((RAW == 's') && (NXT(1) == 't') &&
7068 (NXT(2) == 'a') && (NXT(3) == 'n') &&
7069 (NXT(4) == 'd') && (NXT(5) == 'a') &&
7070 (NXT(6) == 'l') && (NXT(7) == 'o') &&
7071 (NXT(8) == 'n') && (NXT(9) == 'e')) {
7072 SKIP(10);
7073 SKIP_BLANKS;
7074 if (RAW != '=') {
7075 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
7076 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7077 ctxt->sax->error(ctxt->userData,
7078 "XML standalone declaration : expected '='\n");
7079 ctxt->wellFormed = 0;
7080 ctxt->disableSAX = 1;
7081 return(standalone);
7082 }
7083 NEXT;
7084 SKIP_BLANKS;
7085 if (RAW == '\''){
7086 NEXT;
7087 if ((RAW == 'n') && (NXT(1) == 'o')) {
7088 standalone = 0;
7089 SKIP(2);
7090 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7091 (NXT(2) == 's')) {
7092 standalone = 1;
7093 SKIP(3);
7094 } else {
7095 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7097 ctxt->sax->error(ctxt->userData,
7098 "standalone accepts only 'yes' or 'no'\n");
7099 ctxt->wellFormed = 0;
7100 ctxt->disableSAX = 1;
7101 }
7102 if (RAW != '\'') {
7103 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7104 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7105 ctxt->sax->error(ctxt->userData, "String not closed\n");
7106 ctxt->wellFormed = 0;
7107 ctxt->disableSAX = 1;
7108 } else
7109 NEXT;
7110 } else if (RAW == '"'){
7111 NEXT;
7112 if ((RAW == 'n') && (NXT(1) == 'o')) {
7113 standalone = 0;
7114 SKIP(2);
7115 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
7116 (NXT(2) == 's')) {
7117 standalone = 1;
7118 SKIP(3);
7119 } else {
7120 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
7121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7122 ctxt->sax->error(ctxt->userData,
7123 "standalone accepts only 'yes' or 'no'\n");
7124 ctxt->wellFormed = 0;
7125 ctxt->disableSAX = 1;
7126 }
7127 if (RAW != '"') {
7128 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
7129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7130 ctxt->sax->error(ctxt->userData, "String not closed\n");
7131 ctxt->wellFormed = 0;
7132 ctxt->disableSAX = 1;
7133 } else
7134 NEXT;
7135 } else {
7136 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
7137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7138 ctxt->sax->error(ctxt->userData,
7139 "Standalone value not found\n");
7140 ctxt->wellFormed = 0;
7141 ctxt->disableSAX = 1;
7142 }
7143 }
7144 return(standalone);
7145}
7146
7147/**
7148 * xmlParseXMLDecl:
7149 * @ctxt: an XML parser context
7150 *
7151 * parse an XML declaration header
7152 *
7153 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
7154 */
7155
7156void
7157xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
7158 xmlChar *version;
7159
7160 /*
7161 * We know that '<?xml' is here.
7162 */
7163 SKIP(5);
7164
7165 if (!IS_BLANK(RAW)) {
7166 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7168 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
7169 ctxt->wellFormed = 0;
7170 ctxt->disableSAX = 1;
7171 }
7172 SKIP_BLANKS;
7173
7174 /*
7175 * We should have the VersionInfo here.
7176 */
7177 version = xmlParseVersionInfo(ctxt);
7178 if (version == NULL)
7179 version = xmlCharStrdup(XML_DEFAULT_VERSION);
7180 ctxt->version = xmlStrdup(version);
7181 xmlFree(version);
7182
7183 /*
7184 * We may have the encoding declaration
7185 */
7186 if (!IS_BLANK(RAW)) {
7187 if ((RAW == '?') && (NXT(1) == '>')) {
7188 SKIP(2);
7189 return;
7190 }
7191 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7193 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7194 ctxt->wellFormed = 0;
7195 ctxt->disableSAX = 1;
7196 }
7197 xmlParseEncodingDecl(ctxt);
7198 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7199 /*
7200 * The XML REC instructs us to stop parsing right here
7201 */
7202 return;
7203 }
7204
7205 /*
7206 * We may have the standalone status.
7207 */
7208 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7209 if ((RAW == '?') && (NXT(1) == '>')) {
7210 SKIP(2);
7211 return;
7212 }
7213 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7215 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7216 ctxt->wellFormed = 0;
7217 ctxt->disableSAX = 1;
7218 }
7219 SKIP_BLANKS;
7220 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7221
7222 SKIP_BLANKS;
7223 if ((RAW == '?') && (NXT(1) == '>')) {
7224 SKIP(2);
7225 } else if (RAW == '>') {
7226 /* Deprecated old WD ... */
7227 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7229 ctxt->sax->error(ctxt->userData,
7230 "XML declaration must end-up with '?>'\n");
7231 ctxt->wellFormed = 0;
7232 ctxt->disableSAX = 1;
7233 NEXT;
7234 } else {
7235 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7236 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7237 ctxt->sax->error(ctxt->userData,
7238 "parsing XML declaration: '?>' expected\n");
7239 ctxt->wellFormed = 0;
7240 ctxt->disableSAX = 1;
7241 MOVETO_ENDTAG(CUR_PTR);
7242 NEXT;
7243 }
7244}
7245
7246/**
7247 * xmlParseMisc:
7248 * @ctxt: an XML parser context
7249 *
7250 * parse an XML Misc* optionnal field.
7251 *
7252 * [27] Misc ::= Comment | PI | S
7253 */
7254
7255void
7256xmlParseMisc(xmlParserCtxtPtr ctxt) {
7257 while (((RAW == '<') && (NXT(1) == '?')) ||
7258 ((RAW == '<') && (NXT(1) == '!') &&
7259 (NXT(2) == '-') && (NXT(3) == '-')) ||
7260 IS_BLANK(CUR)) {
7261 if ((RAW == '<') && (NXT(1) == '?')) {
7262 xmlParsePI(ctxt);
7263 } else if (IS_BLANK(CUR)) {
7264 NEXT;
7265 } else
7266 xmlParseComment(ctxt);
7267 }
7268}
7269
7270/**
7271 * xmlParseDocument:
7272 * @ctxt: an XML parser context
7273 *
7274 * parse an XML document (and build a tree if using the standard SAX
7275 * interface).
7276 *
7277 * [1] document ::= prolog element Misc*
7278 *
7279 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7280 *
7281 * Returns 0, -1 in case of error. the parser context is augmented
7282 * as a result of the parsing.
7283 */
7284
7285int
7286xmlParseDocument(xmlParserCtxtPtr ctxt) {
7287 xmlChar start[4];
7288 xmlCharEncoding enc;
7289
7290 xmlInitParser();
7291
7292 GROW;
7293
7294 /*
7295 * SAX: beginning of the document processing.
7296 */
7297 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7298 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7299
Daniel Veillard4aafa792001-07-28 17:21:12 +00007300 if (ctxt->encoding == XML_CHAR_ENCODING_NONE) {
7301 /*
7302 * Get the 4 first bytes and decode the charset
7303 * if enc != XML_CHAR_ENCODING_NONE
7304 * plug some encoding conversion routines.
7305 */
7306 start[0] = RAW;
7307 start[1] = NXT(1);
7308 start[2] = NXT(2);
7309 start[3] = NXT(3);
7310 enc = xmlDetectCharEncoding(start, 4);
7311 if (enc != XML_CHAR_ENCODING_NONE) {
7312 xmlSwitchEncoding(ctxt, enc);
7313 }
Owen Taylor3473f882001-02-23 17:55:21 +00007314 }
7315
7316
7317 if (CUR == 0) {
7318 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7319 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7320 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7321 ctxt->wellFormed = 0;
7322 ctxt->disableSAX = 1;
7323 }
7324
7325 /*
7326 * Check for the XMLDecl in the Prolog.
7327 */
7328 GROW;
7329 if ((RAW == '<') && (NXT(1) == '?') &&
7330 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7331 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7332
7333 /*
7334 * Note that we will switch encoding on the fly.
7335 */
7336 xmlParseXMLDecl(ctxt);
7337 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7338 /*
7339 * The XML REC instructs us to stop parsing right here
7340 */
7341 return(-1);
7342 }
7343 ctxt->standalone = ctxt->input->standalone;
7344 SKIP_BLANKS;
7345 } else {
7346 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7347 }
7348 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7349 ctxt->sax->startDocument(ctxt->userData);
7350
7351 /*
7352 * The Misc part of the Prolog
7353 */
7354 GROW;
7355 xmlParseMisc(ctxt);
7356
7357 /*
7358 * Then possibly doc type declaration(s) and more Misc
7359 * (doctypedecl Misc*)?
7360 */
7361 GROW;
7362 if ((RAW == '<') && (NXT(1) == '!') &&
7363 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7364 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7365 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7366 (NXT(8) == 'E')) {
7367
7368 ctxt->inSubset = 1;
7369 xmlParseDocTypeDecl(ctxt);
7370 if (RAW == '[') {
7371 ctxt->instate = XML_PARSER_DTD;
7372 xmlParseInternalSubset(ctxt);
7373 }
7374
7375 /*
7376 * Create and update the external subset.
7377 */
7378 ctxt->inSubset = 2;
7379 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7380 (!ctxt->disableSAX))
7381 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7382 ctxt->extSubSystem, ctxt->extSubURI);
7383 ctxt->inSubset = 0;
7384
7385
7386 ctxt->instate = XML_PARSER_PROLOG;
7387 xmlParseMisc(ctxt);
7388 }
7389
7390 /*
7391 * Time to start parsing the tree itself
7392 */
7393 GROW;
7394 if (RAW != '<') {
7395 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7396 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7397 ctxt->sax->error(ctxt->userData,
7398 "Start tag expected, '<' not found\n");
7399 ctxt->wellFormed = 0;
7400 ctxt->disableSAX = 1;
7401 ctxt->instate = XML_PARSER_EOF;
7402 } else {
7403 ctxt->instate = XML_PARSER_CONTENT;
7404 xmlParseElement(ctxt);
7405 ctxt->instate = XML_PARSER_EPILOG;
7406
7407
7408 /*
7409 * The Misc part at the end
7410 */
7411 xmlParseMisc(ctxt);
7412
7413 if (RAW != 0) {
7414 ctxt->errNo = XML_ERR_DOCUMENT_END;
7415 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7416 ctxt->sax->error(ctxt->userData,
7417 "Extra content at the end of the document\n");
7418 ctxt->wellFormed = 0;
7419 ctxt->disableSAX = 1;
7420 }
7421 ctxt->instate = XML_PARSER_EOF;
7422 }
7423
7424 /*
7425 * SAX: end of the document processing.
7426 */
7427 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7428 (!ctxt->disableSAX))
7429 ctxt->sax->endDocument(ctxt->userData);
7430
7431 if (! ctxt->wellFormed) return(-1);
7432 return(0);
7433}
7434
7435/**
7436 * xmlParseExtParsedEnt:
7437 * @ctxt: an XML parser context
7438 *
7439 * parse a genreral parsed entity
7440 * An external general parsed entity is well-formed if it matches the
7441 * production labeled extParsedEnt.
7442 *
7443 * [78] extParsedEnt ::= TextDecl? content
7444 *
7445 * Returns 0, -1 in case of error. the parser context is augmented
7446 * as a result of the parsing.
7447 */
7448
7449int
7450xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7451 xmlChar start[4];
7452 xmlCharEncoding enc;
7453
7454 xmlDefaultSAXHandlerInit();
7455
7456 GROW;
7457
7458 /*
7459 * SAX: beginning of the document processing.
7460 */
7461 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7462 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7463
7464 /*
7465 * Get the 4 first bytes and decode the charset
7466 * if enc != XML_CHAR_ENCODING_NONE
7467 * plug some encoding conversion routines.
7468 */
7469 start[0] = RAW;
7470 start[1] = NXT(1);
7471 start[2] = NXT(2);
7472 start[3] = NXT(3);
7473 enc = xmlDetectCharEncoding(start, 4);
7474 if (enc != XML_CHAR_ENCODING_NONE) {
7475 xmlSwitchEncoding(ctxt, enc);
7476 }
7477
7478
7479 if (CUR == 0) {
7480 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7481 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7482 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7483 ctxt->wellFormed = 0;
7484 ctxt->disableSAX = 1;
7485 }
7486
7487 /*
7488 * Check for the XMLDecl in the Prolog.
7489 */
7490 GROW;
7491 if ((RAW == '<') && (NXT(1) == '?') &&
7492 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7493 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7494
7495 /*
7496 * Note that we will switch encoding on the fly.
7497 */
7498 xmlParseXMLDecl(ctxt);
7499 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7500 /*
7501 * The XML REC instructs us to stop parsing right here
7502 */
7503 return(-1);
7504 }
7505 SKIP_BLANKS;
7506 } else {
7507 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7508 }
7509 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7510 ctxt->sax->startDocument(ctxt->userData);
7511
7512 /*
7513 * Doing validity checking on chunk doesn't make sense
7514 */
7515 ctxt->instate = XML_PARSER_CONTENT;
7516 ctxt->validate = 0;
7517 ctxt->loadsubset = 0;
7518 ctxt->depth = 0;
7519
7520 xmlParseContent(ctxt);
7521
7522 if ((RAW == '<') && (NXT(1) == '/')) {
7523 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7524 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7525 ctxt->sax->error(ctxt->userData,
7526 "chunk is not well balanced\n");
7527 ctxt->wellFormed = 0;
7528 ctxt->disableSAX = 1;
7529 } else if (RAW != 0) {
7530 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7532 ctxt->sax->error(ctxt->userData,
7533 "extra content at the end of well balanced chunk\n");
7534 ctxt->wellFormed = 0;
7535 ctxt->disableSAX = 1;
7536 }
7537
7538 /*
7539 * SAX: end of the document processing.
7540 */
7541 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7542 (!ctxt->disableSAX))
7543 ctxt->sax->endDocument(ctxt->userData);
7544
7545 if (! ctxt->wellFormed) return(-1);
7546 return(0);
7547}
7548
7549/************************************************************************
7550 * *
7551 * Progressive parsing interfaces *
7552 * *
7553 ************************************************************************/
7554
7555/**
7556 * xmlParseLookupSequence:
7557 * @ctxt: an XML parser context
7558 * @first: the first char to lookup
7559 * @next: the next char to lookup or zero
7560 * @third: the next char to lookup or zero
7561 *
7562 * Try to find if a sequence (first, next, third) or just (first next) or
7563 * (first) is available in the input stream.
7564 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7565 * to avoid rescanning sequences of bytes, it DOES change the state of the
7566 * parser, do not use liberally.
7567 *
7568 * Returns the index to the current parsing point if the full sequence
7569 * is available, -1 otherwise.
7570 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007571static int
Owen Taylor3473f882001-02-23 17:55:21 +00007572xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7573 xmlChar next, xmlChar third) {
7574 int base, len;
7575 xmlParserInputPtr in;
7576 const xmlChar *buf;
7577
7578 in = ctxt->input;
7579 if (in == NULL) return(-1);
7580 base = in->cur - in->base;
7581 if (base < 0) return(-1);
7582 if (ctxt->checkIndex > base)
7583 base = ctxt->checkIndex;
7584 if (in->buf == NULL) {
7585 buf = in->base;
7586 len = in->length;
7587 } else {
7588 buf = in->buf->buffer->content;
7589 len = in->buf->buffer->use;
7590 }
7591 /* take into account the sequence length */
7592 if (third) len -= 2;
7593 else if (next) len --;
7594 for (;base < len;base++) {
7595 if (buf[base] == first) {
7596 if (third != 0) {
7597 if ((buf[base + 1] != next) ||
7598 (buf[base + 2] != third)) continue;
7599 } else if (next != 0) {
7600 if (buf[base + 1] != next) continue;
7601 }
7602 ctxt->checkIndex = 0;
7603#ifdef DEBUG_PUSH
7604 if (next == 0)
7605 xmlGenericError(xmlGenericErrorContext,
7606 "PP: lookup '%c' found at %d\n",
7607 first, base);
7608 else if (third == 0)
7609 xmlGenericError(xmlGenericErrorContext,
7610 "PP: lookup '%c%c' found at %d\n",
7611 first, next, base);
7612 else
7613 xmlGenericError(xmlGenericErrorContext,
7614 "PP: lookup '%c%c%c' found at %d\n",
7615 first, next, third, base);
7616#endif
7617 return(base - (in->cur - in->base));
7618 }
7619 }
7620 ctxt->checkIndex = base;
7621#ifdef DEBUG_PUSH
7622 if (next == 0)
7623 xmlGenericError(xmlGenericErrorContext,
7624 "PP: lookup '%c' failed\n", first);
7625 else if (third == 0)
7626 xmlGenericError(xmlGenericErrorContext,
7627 "PP: lookup '%c%c' failed\n", first, next);
7628 else
7629 xmlGenericError(xmlGenericErrorContext,
7630 "PP: lookup '%c%c%c' failed\n", first, next, third);
7631#endif
7632 return(-1);
7633}
7634
7635/**
7636 * xmlParseTryOrFinish:
7637 * @ctxt: an XML parser context
7638 * @terminate: last chunk indicator
7639 *
7640 * Try to progress on parsing
7641 *
7642 * Returns zero if no parsing was possible
7643 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +00007644static int
Owen Taylor3473f882001-02-23 17:55:21 +00007645xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7646 int ret = 0;
7647 int avail;
7648 xmlChar cur, next;
7649
7650#ifdef DEBUG_PUSH
7651 switch (ctxt->instate) {
7652 case XML_PARSER_EOF:
7653 xmlGenericError(xmlGenericErrorContext,
7654 "PP: try EOF\n"); break;
7655 case XML_PARSER_START:
7656 xmlGenericError(xmlGenericErrorContext,
7657 "PP: try START\n"); break;
7658 case XML_PARSER_MISC:
7659 xmlGenericError(xmlGenericErrorContext,
7660 "PP: try MISC\n");break;
7661 case XML_PARSER_COMMENT:
7662 xmlGenericError(xmlGenericErrorContext,
7663 "PP: try COMMENT\n");break;
7664 case XML_PARSER_PROLOG:
7665 xmlGenericError(xmlGenericErrorContext,
7666 "PP: try PROLOG\n");break;
7667 case XML_PARSER_START_TAG:
7668 xmlGenericError(xmlGenericErrorContext,
7669 "PP: try START_TAG\n");break;
7670 case XML_PARSER_CONTENT:
7671 xmlGenericError(xmlGenericErrorContext,
7672 "PP: try CONTENT\n");break;
7673 case XML_PARSER_CDATA_SECTION:
7674 xmlGenericError(xmlGenericErrorContext,
7675 "PP: try CDATA_SECTION\n");break;
7676 case XML_PARSER_END_TAG:
7677 xmlGenericError(xmlGenericErrorContext,
7678 "PP: try END_TAG\n");break;
7679 case XML_PARSER_ENTITY_DECL:
7680 xmlGenericError(xmlGenericErrorContext,
7681 "PP: try ENTITY_DECL\n");break;
7682 case XML_PARSER_ENTITY_VALUE:
7683 xmlGenericError(xmlGenericErrorContext,
7684 "PP: try ENTITY_VALUE\n");break;
7685 case XML_PARSER_ATTRIBUTE_VALUE:
7686 xmlGenericError(xmlGenericErrorContext,
7687 "PP: try ATTRIBUTE_VALUE\n");break;
7688 case XML_PARSER_DTD:
7689 xmlGenericError(xmlGenericErrorContext,
7690 "PP: try DTD\n");break;
7691 case XML_PARSER_EPILOG:
7692 xmlGenericError(xmlGenericErrorContext,
7693 "PP: try EPILOG\n");break;
7694 case XML_PARSER_PI:
7695 xmlGenericError(xmlGenericErrorContext,
7696 "PP: try PI\n");break;
7697 case XML_PARSER_IGNORE:
7698 xmlGenericError(xmlGenericErrorContext,
7699 "PP: try IGNORE\n");break;
7700 }
7701#endif
7702
7703 while (1) {
7704 /*
7705 * Pop-up of finished entities.
7706 */
7707 while ((RAW == 0) && (ctxt->inputNr > 1))
7708 xmlPopInput(ctxt);
7709
7710 if (ctxt->input ==NULL) break;
7711 if (ctxt->input->buf == NULL)
7712 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7713 else
7714 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7715 if (avail < 1)
7716 goto done;
7717 switch (ctxt->instate) {
7718 case XML_PARSER_EOF:
7719 /*
7720 * Document parsing is done !
7721 */
7722 goto done;
7723 case XML_PARSER_START:
Daniel Veillard0e4cd172001-06-28 12:13:56 +00007724 if (ctxt->charset == XML_CHAR_ENCODING_NONE) {
7725 xmlChar start[4];
7726 xmlCharEncoding enc;
7727
7728 /*
7729 * Very first chars read from the document flow.
7730 */
7731 if (avail < 4)
7732 goto done;
7733
7734 /*
7735 * Get the 4 first bytes and decode the charset
7736 * if enc != XML_CHAR_ENCODING_NONE
7737 * plug some encoding conversion routines.
7738 */
7739 start[0] = RAW;
7740 start[1] = NXT(1);
7741 start[2] = NXT(2);
7742 start[3] = NXT(3);
7743 enc = xmlDetectCharEncoding(start, 4);
7744 if (enc != XML_CHAR_ENCODING_NONE) {
7745 xmlSwitchEncoding(ctxt, enc);
7746 }
7747 break;
7748 }
Owen Taylor3473f882001-02-23 17:55:21 +00007749
7750 cur = ctxt->input->cur[0];
7751 next = ctxt->input->cur[1];
7752 if (cur == 0) {
7753 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7754 ctxt->sax->setDocumentLocator(ctxt->userData,
7755 &xmlDefaultSAXLocator);
7756 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7757 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7758 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7759 ctxt->wellFormed = 0;
7760 ctxt->disableSAX = 1;
7761 ctxt->instate = XML_PARSER_EOF;
7762#ifdef DEBUG_PUSH
7763 xmlGenericError(xmlGenericErrorContext,
7764 "PP: entering EOF\n");
7765#endif
7766 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7767 ctxt->sax->endDocument(ctxt->userData);
7768 goto done;
7769 }
7770 if ((cur == '<') && (next == '?')) {
7771 /* PI or XML decl */
7772 if (avail < 5) return(ret);
7773 if ((!terminate) &&
7774 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7775 return(ret);
7776 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7777 ctxt->sax->setDocumentLocator(ctxt->userData,
7778 &xmlDefaultSAXLocator);
7779 if ((ctxt->input->cur[2] == 'x') &&
7780 (ctxt->input->cur[3] == 'm') &&
7781 (ctxt->input->cur[4] == 'l') &&
7782 (IS_BLANK(ctxt->input->cur[5]))) {
7783 ret += 5;
7784#ifdef DEBUG_PUSH
7785 xmlGenericError(xmlGenericErrorContext,
7786 "PP: Parsing XML Decl\n");
7787#endif
7788 xmlParseXMLDecl(ctxt);
7789 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7790 /*
7791 * The XML REC instructs us to stop parsing right
7792 * here
7793 */
7794 ctxt->instate = XML_PARSER_EOF;
7795 return(0);
7796 }
7797 ctxt->standalone = ctxt->input->standalone;
7798 if ((ctxt->encoding == NULL) &&
7799 (ctxt->input->encoding != NULL))
7800 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7801 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7802 (!ctxt->disableSAX))
7803 ctxt->sax->startDocument(ctxt->userData);
7804 ctxt->instate = XML_PARSER_MISC;
7805#ifdef DEBUG_PUSH
7806 xmlGenericError(xmlGenericErrorContext,
7807 "PP: entering MISC\n");
7808#endif
7809 } else {
7810 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7811 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7812 (!ctxt->disableSAX))
7813 ctxt->sax->startDocument(ctxt->userData);
7814 ctxt->instate = XML_PARSER_MISC;
7815#ifdef DEBUG_PUSH
7816 xmlGenericError(xmlGenericErrorContext,
7817 "PP: entering MISC\n");
7818#endif
7819 }
7820 } else {
7821 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7822 ctxt->sax->setDocumentLocator(ctxt->userData,
7823 &xmlDefaultSAXLocator);
7824 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7825 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7826 (!ctxt->disableSAX))
7827 ctxt->sax->startDocument(ctxt->userData);
7828 ctxt->instate = XML_PARSER_MISC;
7829#ifdef DEBUG_PUSH
7830 xmlGenericError(xmlGenericErrorContext,
7831 "PP: entering MISC\n");
7832#endif
7833 }
7834 break;
7835 case XML_PARSER_MISC:
7836 SKIP_BLANKS;
7837 if (ctxt->input->buf == NULL)
7838 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7839 else
7840 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7841 if (avail < 2)
7842 goto done;
7843 cur = ctxt->input->cur[0];
7844 next = ctxt->input->cur[1];
7845 if ((cur == '<') && (next == '?')) {
7846 if ((!terminate) &&
7847 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7848 goto done;
7849#ifdef DEBUG_PUSH
7850 xmlGenericError(xmlGenericErrorContext,
7851 "PP: Parsing PI\n");
7852#endif
7853 xmlParsePI(ctxt);
7854 } else if ((cur == '<') && (next == '!') &&
7855 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7856 if ((!terminate) &&
7857 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7858 goto done;
7859#ifdef DEBUG_PUSH
7860 xmlGenericError(xmlGenericErrorContext,
7861 "PP: Parsing Comment\n");
7862#endif
7863 xmlParseComment(ctxt);
7864 ctxt->instate = XML_PARSER_MISC;
7865 } else if ((cur == '<') && (next == '!') &&
7866 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7867 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7868 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7869 (ctxt->input->cur[8] == 'E')) {
7870 if ((!terminate) &&
7871 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7872 goto done;
7873#ifdef DEBUG_PUSH
7874 xmlGenericError(xmlGenericErrorContext,
7875 "PP: Parsing internal subset\n");
7876#endif
7877 ctxt->inSubset = 1;
7878 xmlParseDocTypeDecl(ctxt);
7879 if (RAW == '[') {
7880 ctxt->instate = XML_PARSER_DTD;
7881#ifdef DEBUG_PUSH
7882 xmlGenericError(xmlGenericErrorContext,
7883 "PP: entering DTD\n");
7884#endif
7885 } else {
7886 /*
7887 * Create and update the external subset.
7888 */
7889 ctxt->inSubset = 2;
7890 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7891 (ctxt->sax->externalSubset != NULL))
7892 ctxt->sax->externalSubset(ctxt->userData,
7893 ctxt->intSubName, ctxt->extSubSystem,
7894 ctxt->extSubURI);
7895 ctxt->inSubset = 0;
7896 ctxt->instate = XML_PARSER_PROLOG;
7897#ifdef DEBUG_PUSH
7898 xmlGenericError(xmlGenericErrorContext,
7899 "PP: entering PROLOG\n");
7900#endif
7901 }
7902 } else if ((cur == '<') && (next == '!') &&
7903 (avail < 9)) {
7904 goto done;
7905 } else {
7906 ctxt->instate = XML_PARSER_START_TAG;
7907#ifdef DEBUG_PUSH
7908 xmlGenericError(xmlGenericErrorContext,
7909 "PP: entering START_TAG\n");
7910#endif
7911 }
7912 break;
7913 case XML_PARSER_IGNORE:
7914 xmlGenericError(xmlGenericErrorContext,
7915 "PP: internal error, state == IGNORE");
7916 ctxt->instate = XML_PARSER_DTD;
7917#ifdef DEBUG_PUSH
7918 xmlGenericError(xmlGenericErrorContext,
7919 "PP: entering DTD\n");
7920#endif
7921 break;
7922 case XML_PARSER_PROLOG:
7923 SKIP_BLANKS;
7924 if (ctxt->input->buf == NULL)
7925 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7926 else
7927 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7928 if (avail < 2)
7929 goto done;
7930 cur = ctxt->input->cur[0];
7931 next = ctxt->input->cur[1];
7932 if ((cur == '<') && (next == '?')) {
7933 if ((!terminate) &&
7934 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7935 goto done;
7936#ifdef DEBUG_PUSH
7937 xmlGenericError(xmlGenericErrorContext,
7938 "PP: Parsing PI\n");
7939#endif
7940 xmlParsePI(ctxt);
7941 } else if ((cur == '<') && (next == '!') &&
7942 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7943 if ((!terminate) &&
7944 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7945 goto done;
7946#ifdef DEBUG_PUSH
7947 xmlGenericError(xmlGenericErrorContext,
7948 "PP: Parsing Comment\n");
7949#endif
7950 xmlParseComment(ctxt);
7951 ctxt->instate = XML_PARSER_PROLOG;
7952 } else if ((cur == '<') && (next == '!') &&
7953 (avail < 4)) {
7954 goto done;
7955 } else {
7956 ctxt->instate = XML_PARSER_START_TAG;
7957#ifdef DEBUG_PUSH
7958 xmlGenericError(xmlGenericErrorContext,
7959 "PP: entering START_TAG\n");
7960#endif
7961 }
7962 break;
7963 case XML_PARSER_EPILOG:
7964 SKIP_BLANKS;
7965 if (ctxt->input->buf == NULL)
7966 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7967 else
7968 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7969 if (avail < 2)
7970 goto done;
7971 cur = ctxt->input->cur[0];
7972 next = ctxt->input->cur[1];
7973 if ((cur == '<') && (next == '?')) {
7974 if ((!terminate) &&
7975 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7976 goto done;
7977#ifdef DEBUG_PUSH
7978 xmlGenericError(xmlGenericErrorContext,
7979 "PP: Parsing PI\n");
7980#endif
7981 xmlParsePI(ctxt);
7982 ctxt->instate = XML_PARSER_EPILOG;
7983 } else if ((cur == '<') && (next == '!') &&
7984 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7985 if ((!terminate) &&
7986 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7987 goto done;
7988#ifdef DEBUG_PUSH
7989 xmlGenericError(xmlGenericErrorContext,
7990 "PP: Parsing Comment\n");
7991#endif
7992 xmlParseComment(ctxt);
7993 ctxt->instate = XML_PARSER_EPILOG;
7994 } else if ((cur == '<') && (next == '!') &&
7995 (avail < 4)) {
7996 goto done;
7997 } else {
7998 ctxt->errNo = XML_ERR_DOCUMENT_END;
7999 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8000 ctxt->sax->error(ctxt->userData,
8001 "Extra content at the end of the document\n");
8002 ctxt->wellFormed = 0;
8003 ctxt->disableSAX = 1;
8004 ctxt->instate = XML_PARSER_EOF;
8005#ifdef DEBUG_PUSH
8006 xmlGenericError(xmlGenericErrorContext,
8007 "PP: entering EOF\n");
8008#endif
8009 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8010 (!ctxt->disableSAX))
8011 ctxt->sax->endDocument(ctxt->userData);
8012 goto done;
8013 }
8014 break;
8015 case XML_PARSER_START_TAG: {
8016 xmlChar *name, *oldname;
8017
8018 if ((avail < 2) && (ctxt->inputNr == 1))
8019 goto done;
8020 cur = ctxt->input->cur[0];
8021 if (cur != '<') {
8022 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
8023 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8024 ctxt->sax->error(ctxt->userData,
8025 "Start tag expect, '<' not found\n");
8026 ctxt->wellFormed = 0;
8027 ctxt->disableSAX = 1;
8028 ctxt->instate = XML_PARSER_EOF;
8029#ifdef DEBUG_PUSH
8030 xmlGenericError(xmlGenericErrorContext,
8031 "PP: entering EOF\n");
8032#endif
8033 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8034 (!ctxt->disableSAX))
8035 ctxt->sax->endDocument(ctxt->userData);
8036 goto done;
8037 }
8038 if ((!terminate) &&
8039 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8040 goto done;
8041 if (ctxt->spaceNr == 0)
8042 spacePush(ctxt, -1);
8043 else
8044 spacePush(ctxt, *ctxt->space);
8045 name = xmlParseStartTag(ctxt);
8046 if (name == NULL) {
8047 spacePop(ctxt);
8048 ctxt->instate = XML_PARSER_EOF;
8049#ifdef DEBUG_PUSH
8050 xmlGenericError(xmlGenericErrorContext,
8051 "PP: entering EOF\n");
8052#endif
8053 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8054 (!ctxt->disableSAX))
8055 ctxt->sax->endDocument(ctxt->userData);
8056 goto done;
8057 }
8058 namePush(ctxt, xmlStrdup(name));
8059
8060 /*
8061 * [ VC: Root Element Type ]
8062 * The Name in the document type declaration must match
8063 * the element type of the root element.
8064 */
8065 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
8066 ctxt->node && (ctxt->node == ctxt->myDoc->children))
8067 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
8068
8069 /*
8070 * Check for an Empty Element.
8071 */
8072 if ((RAW == '/') && (NXT(1) == '>')) {
8073 SKIP(2);
8074 if ((ctxt->sax != NULL) &&
8075 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
8076 ctxt->sax->endElement(ctxt->userData, name);
8077 xmlFree(name);
8078 oldname = namePop(ctxt);
8079 spacePop(ctxt);
8080 if (oldname != NULL) {
8081#ifdef DEBUG_STACK
8082 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8083#endif
8084 xmlFree(oldname);
8085 }
8086 if (ctxt->name == NULL) {
8087 ctxt->instate = XML_PARSER_EPILOG;
8088#ifdef DEBUG_PUSH
8089 xmlGenericError(xmlGenericErrorContext,
8090 "PP: entering EPILOG\n");
8091#endif
8092 } else {
8093 ctxt->instate = XML_PARSER_CONTENT;
8094#ifdef DEBUG_PUSH
8095 xmlGenericError(xmlGenericErrorContext,
8096 "PP: entering CONTENT\n");
8097#endif
8098 }
8099 break;
8100 }
8101 if (RAW == '>') {
8102 NEXT;
8103 } else {
8104 ctxt->errNo = XML_ERR_GT_REQUIRED;
8105 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8106 ctxt->sax->error(ctxt->userData,
8107 "Couldn't find end of Start Tag %s\n",
8108 name);
8109 ctxt->wellFormed = 0;
8110 ctxt->disableSAX = 1;
8111
8112 /*
8113 * end of parsing of this node.
8114 */
8115 nodePop(ctxt);
8116 oldname = namePop(ctxt);
8117 spacePop(ctxt);
8118 if (oldname != NULL) {
8119#ifdef DEBUG_STACK
8120 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
8121#endif
8122 xmlFree(oldname);
8123 }
8124 }
8125 xmlFree(name);
8126 ctxt->instate = XML_PARSER_CONTENT;
8127#ifdef DEBUG_PUSH
8128 xmlGenericError(xmlGenericErrorContext,
8129 "PP: entering CONTENT\n");
8130#endif
8131 break;
8132 }
8133 case XML_PARSER_CONTENT: {
8134 const xmlChar *test;
8135 int cons;
Daniel Veillard04be4f52001-03-26 21:23:53 +00008136 int tok;
Owen Taylor3473f882001-02-23 17:55:21 +00008137
8138 /*
8139 * Handle preparsed entities and charRef
8140 */
8141 if (ctxt->token != 0) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008142 xmlChar current[2] = { 0 , 0 } ;
Owen Taylor3473f882001-02-23 17:55:21 +00008143
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008144 current[0] = (xmlChar) ctxt->token;
Owen Taylor3473f882001-02-23 17:55:21 +00008145 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8146 (ctxt->sax->characters != NULL))
Daniel Veillard56a4cb82001-03-24 17:00:36 +00008147 ctxt->sax->characters(ctxt->userData, current, 1);
Owen Taylor3473f882001-02-23 17:55:21 +00008148 ctxt->token = 0;
8149 }
8150 if ((avail < 2) && (ctxt->inputNr == 1))
8151 goto done;
8152 cur = ctxt->input->cur[0];
8153 next = ctxt->input->cur[1];
8154
8155 test = CUR_PTR;
8156 cons = ctxt->input->consumed;
8157 tok = ctxt->token;
8158 if ((cur == '<') && (next == '?')) {
8159 if ((!terminate) &&
8160 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
8161 goto done;
8162#ifdef DEBUG_PUSH
8163 xmlGenericError(xmlGenericErrorContext,
8164 "PP: Parsing PI\n");
8165#endif
8166 xmlParsePI(ctxt);
8167 } else if ((cur == '<') && (next == '!') &&
8168 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
8169 if ((!terminate) &&
8170 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
8171 goto done;
8172#ifdef DEBUG_PUSH
8173 xmlGenericError(xmlGenericErrorContext,
8174 "PP: Parsing Comment\n");
8175#endif
8176 xmlParseComment(ctxt);
8177 ctxt->instate = XML_PARSER_CONTENT;
8178 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
8179 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
8180 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
8181 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
8182 (ctxt->input->cur[8] == '[')) {
8183 SKIP(9);
8184 ctxt->instate = XML_PARSER_CDATA_SECTION;
8185#ifdef DEBUG_PUSH
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: entering CDATA_SECTION\n");
8188#endif
8189 break;
8190 } else if ((cur == '<') && (next == '!') &&
8191 (avail < 9)) {
8192 goto done;
8193 } else if ((cur == '<') && (next == '/')) {
8194 ctxt->instate = XML_PARSER_END_TAG;
8195#ifdef DEBUG_PUSH
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: entering END_TAG\n");
8198#endif
8199 break;
8200 } else if (cur == '<') {
8201 ctxt->instate = XML_PARSER_START_TAG;
8202#ifdef DEBUG_PUSH
8203 xmlGenericError(xmlGenericErrorContext,
8204 "PP: entering START_TAG\n");
8205#endif
8206 break;
8207 } else if (cur == '&') {
8208 if ((!terminate) &&
8209 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8210 goto done;
8211#ifdef DEBUG_PUSH
8212 xmlGenericError(xmlGenericErrorContext,
8213 "PP: Parsing Reference\n");
8214#endif
8215 xmlParseReference(ctxt);
8216 } else {
8217 /* TODO Avoid the extra copy, handle directly !!! */
8218 /*
8219 * Goal of the following test is:
8220 * - minimize calls to the SAX 'character' callback
8221 * when they are mergeable
8222 * - handle an problem for isBlank when we only parse
8223 * a sequence of blank chars and the next one is
8224 * not available to check against '<' presence.
8225 * - tries to homogenize the differences in SAX
8226 * callbacks beween the push and pull versions
8227 * of the parser.
8228 */
8229 if ((ctxt->inputNr == 1) &&
8230 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8231 if ((!terminate) &&
8232 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8233 goto done;
8234 }
8235 ctxt->checkIndex = 0;
8236#ifdef DEBUG_PUSH
8237 xmlGenericError(xmlGenericErrorContext,
8238 "PP: Parsing char data\n");
8239#endif
8240 xmlParseCharData(ctxt, 0);
8241 }
8242 /*
8243 * Pop-up of finished entities.
8244 */
8245 while ((RAW == 0) && (ctxt->inputNr > 1))
8246 xmlPopInput(ctxt);
8247 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8248 (tok == ctxt->token)) {
8249 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8250 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8251 ctxt->sax->error(ctxt->userData,
8252 "detected an error in element content\n");
8253 ctxt->wellFormed = 0;
8254 ctxt->disableSAX = 1;
8255 ctxt->instate = XML_PARSER_EOF;
8256 break;
8257 }
8258 break;
8259 }
8260 case XML_PARSER_CDATA_SECTION: {
8261 /*
8262 * The Push mode need to have the SAX callback for
8263 * cdataBlock merge back contiguous callbacks.
8264 */
8265 int base;
8266
8267 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8268 if (base < 0) {
8269 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8270 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8271 if (ctxt->sax->cdataBlock != NULL)
8272 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8273 XML_PARSER_BIG_BUFFER_SIZE);
8274 }
8275 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8276 ctxt->checkIndex = 0;
8277 }
8278 goto done;
8279 } else {
8280 if ((ctxt->sax != NULL) && (base > 0) &&
8281 (!ctxt->disableSAX)) {
8282 if (ctxt->sax->cdataBlock != NULL)
8283 ctxt->sax->cdataBlock(ctxt->userData,
8284 ctxt->input->cur, base);
8285 }
8286 SKIP(base + 3);
8287 ctxt->checkIndex = 0;
8288 ctxt->instate = XML_PARSER_CONTENT;
8289#ifdef DEBUG_PUSH
8290 xmlGenericError(xmlGenericErrorContext,
8291 "PP: entering CONTENT\n");
8292#endif
8293 }
8294 break;
8295 }
8296 case XML_PARSER_END_TAG:
8297 if (avail < 2)
8298 goto done;
8299 if ((!terminate) &&
8300 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8301 goto done;
8302 xmlParseEndTag(ctxt);
8303 if (ctxt->name == NULL) {
8304 ctxt->instate = XML_PARSER_EPILOG;
8305#ifdef DEBUG_PUSH
8306 xmlGenericError(xmlGenericErrorContext,
8307 "PP: entering EPILOG\n");
8308#endif
8309 } else {
8310 ctxt->instate = XML_PARSER_CONTENT;
8311#ifdef DEBUG_PUSH
8312 xmlGenericError(xmlGenericErrorContext,
8313 "PP: entering CONTENT\n");
8314#endif
8315 }
8316 break;
8317 case XML_PARSER_DTD: {
8318 /*
8319 * Sorry but progressive parsing of the internal subset
8320 * is not expected to be supported. We first check that
8321 * the full content of the internal subset is available and
8322 * the parsing is launched only at that point.
8323 * Internal subset ends up with "']' S? '>'" in an unescaped
8324 * section and not in a ']]>' sequence which are conditional
8325 * sections (whoever argued to keep that crap in XML deserve
8326 * a place in hell !).
8327 */
8328 int base, i;
8329 xmlChar *buf;
8330 xmlChar quote = 0;
8331
8332 base = ctxt->input->cur - ctxt->input->base;
8333 if (base < 0) return(0);
8334 if (ctxt->checkIndex > base)
8335 base = ctxt->checkIndex;
8336 buf = ctxt->input->buf->buffer->content;
8337 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8338 base++) {
8339 if (quote != 0) {
8340 if (buf[base] == quote)
8341 quote = 0;
8342 continue;
8343 }
8344 if (buf[base] == '"') {
8345 quote = '"';
8346 continue;
8347 }
8348 if (buf[base] == '\'') {
8349 quote = '\'';
8350 continue;
8351 }
8352 if (buf[base] == ']') {
8353 if ((unsigned int) base +1 >=
8354 ctxt->input->buf->buffer->use)
8355 break;
8356 if (buf[base + 1] == ']') {
8357 /* conditional crap, skip both ']' ! */
8358 base++;
8359 continue;
8360 }
8361 for (i = 0;
8362 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8363 i++) {
8364 if (buf[base + i] == '>')
8365 goto found_end_int_subset;
8366 }
8367 break;
8368 }
8369 }
8370 /*
8371 * We didn't found the end of the Internal subset
8372 */
8373 if (quote == 0)
8374 ctxt->checkIndex = base;
8375#ifdef DEBUG_PUSH
8376 if (next == 0)
8377 xmlGenericError(xmlGenericErrorContext,
8378 "PP: lookup of int subset end filed\n");
8379#endif
8380 goto done;
8381
8382found_end_int_subset:
8383 xmlParseInternalSubset(ctxt);
8384 ctxt->inSubset = 2;
8385 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8386 (ctxt->sax->externalSubset != NULL))
8387 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8388 ctxt->extSubSystem, ctxt->extSubURI);
8389 ctxt->inSubset = 0;
8390 ctxt->instate = XML_PARSER_PROLOG;
8391 ctxt->checkIndex = 0;
8392#ifdef DEBUG_PUSH
8393 xmlGenericError(xmlGenericErrorContext,
8394 "PP: entering PROLOG\n");
8395#endif
8396 break;
8397 }
8398 case XML_PARSER_COMMENT:
8399 xmlGenericError(xmlGenericErrorContext,
8400 "PP: internal error, state == COMMENT\n");
8401 ctxt->instate = XML_PARSER_CONTENT;
8402#ifdef DEBUG_PUSH
8403 xmlGenericError(xmlGenericErrorContext,
8404 "PP: entering CONTENT\n");
8405#endif
8406 break;
8407 case XML_PARSER_PI:
8408 xmlGenericError(xmlGenericErrorContext,
8409 "PP: internal error, state == PI\n");
8410 ctxt->instate = XML_PARSER_CONTENT;
8411#ifdef DEBUG_PUSH
8412 xmlGenericError(xmlGenericErrorContext,
8413 "PP: entering CONTENT\n");
8414#endif
8415 break;
8416 case XML_PARSER_ENTITY_DECL:
8417 xmlGenericError(xmlGenericErrorContext,
8418 "PP: internal error, state == ENTITY_DECL\n");
8419 ctxt->instate = XML_PARSER_DTD;
8420#ifdef DEBUG_PUSH
8421 xmlGenericError(xmlGenericErrorContext,
8422 "PP: entering DTD\n");
8423#endif
8424 break;
8425 case XML_PARSER_ENTITY_VALUE:
8426 xmlGenericError(xmlGenericErrorContext,
8427 "PP: internal error, state == ENTITY_VALUE\n");
8428 ctxt->instate = XML_PARSER_CONTENT;
8429#ifdef DEBUG_PUSH
8430 xmlGenericError(xmlGenericErrorContext,
8431 "PP: entering DTD\n");
8432#endif
8433 break;
8434 case XML_PARSER_ATTRIBUTE_VALUE:
8435 xmlGenericError(xmlGenericErrorContext,
8436 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8437 ctxt->instate = XML_PARSER_START_TAG;
8438#ifdef DEBUG_PUSH
8439 xmlGenericError(xmlGenericErrorContext,
8440 "PP: entering START_TAG\n");
8441#endif
8442 break;
8443 case XML_PARSER_SYSTEM_LITERAL:
8444 xmlGenericError(xmlGenericErrorContext,
8445 "PP: internal error, state == SYSTEM_LITERAL\n");
8446 ctxt->instate = XML_PARSER_START_TAG;
8447#ifdef DEBUG_PUSH
8448 xmlGenericError(xmlGenericErrorContext,
8449 "PP: entering START_TAG\n");
8450#endif
8451 break;
8452 }
8453 }
8454done:
8455#ifdef DEBUG_PUSH
8456 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8457#endif
8458 return(ret);
8459}
8460
8461/**
Owen Taylor3473f882001-02-23 17:55:21 +00008462 * xmlParseChunk:
8463 * @ctxt: an XML parser context
8464 * @chunk: an char array
8465 * @size: the size in byte of the chunk
8466 * @terminate: last chunk indicator
8467 *
8468 * Parse a Chunk of memory
8469 *
8470 * Returns zero if no error, the xmlParserErrors otherwise.
8471 */
8472int
8473xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8474 int terminate) {
8475 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8476 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8477 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8478 int cur = ctxt->input->cur - ctxt->input->base;
8479
8480 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8481 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8482 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008483 ctxt->input->end =
8484 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008485#ifdef DEBUG_PUSH
8486 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8487#endif
8488
8489 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8490 xmlParseTryOrFinish(ctxt, terminate);
8491 } else if (ctxt->instate != XML_PARSER_EOF) {
8492 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8493 xmlParserInputBufferPtr in = ctxt->input->buf;
8494 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8495 (in->raw != NULL)) {
8496 int nbchars;
8497
8498 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8499 if (nbchars < 0) {
8500 xmlGenericError(xmlGenericErrorContext,
8501 "xmlParseChunk: encoder error\n");
8502 return(XML_ERR_INVALID_ENCODING);
8503 }
8504 }
8505 }
8506 }
8507 xmlParseTryOrFinish(ctxt, terminate);
8508 if (terminate) {
8509 /*
8510 * Check for termination
8511 */
8512 if ((ctxt->instate != XML_PARSER_EOF) &&
8513 (ctxt->instate != XML_PARSER_EPILOG)) {
8514 ctxt->errNo = XML_ERR_DOCUMENT_END;
8515 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8516 ctxt->sax->error(ctxt->userData,
8517 "Extra content at the end of the document\n");
8518 ctxt->wellFormed = 0;
8519 ctxt->disableSAX = 1;
8520 }
8521 if (ctxt->instate != XML_PARSER_EOF) {
8522 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8523 (!ctxt->disableSAX))
8524 ctxt->sax->endDocument(ctxt->userData);
8525 }
8526 ctxt->instate = XML_PARSER_EOF;
8527 }
8528 return((xmlParserErrors) ctxt->errNo);
8529}
8530
8531/************************************************************************
8532 * *
8533 * I/O front end functions to the parser *
8534 * *
8535 ************************************************************************/
8536
8537/**
8538 * xmlStopParser:
8539 * @ctxt: an XML parser context
8540 *
8541 * Blocks further parser processing
8542 */
8543void
8544xmlStopParser(xmlParserCtxtPtr ctxt) {
8545 ctxt->instate = XML_PARSER_EOF;
8546 if (ctxt->input != NULL)
8547 ctxt->input->cur = BAD_CAST"";
8548}
8549
8550/**
8551 * xmlCreatePushParserCtxt:
8552 * @sax: a SAX handler
8553 * @user_data: The user data returned on SAX callbacks
8554 * @chunk: a pointer to an array of chars
8555 * @size: number of chars in the array
8556 * @filename: an optional file name or URI
8557 *
8558 * Create a parser context for using the XML parser in push mode
8559 * To allow content encoding detection, @size should be >= 4
8560 * The value of @filename is used for fetching external entities
8561 * and error/warning reports.
8562 *
8563 * Returns the new parser context or NULL
8564 */
8565xmlParserCtxtPtr
8566xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8567 const char *chunk, int size, const char *filename) {
8568 xmlParserCtxtPtr ctxt;
8569 xmlParserInputPtr inputStream;
8570 xmlParserInputBufferPtr buf;
8571 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8572
8573 /*
8574 * plug some encoding conversion routines
8575 */
8576 if ((chunk != NULL) && (size >= 4))
8577 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8578
8579 buf = xmlAllocParserInputBuffer(enc);
8580 if (buf == NULL) return(NULL);
8581
8582 ctxt = xmlNewParserCtxt();
8583 if (ctxt == NULL) {
8584 xmlFree(buf);
8585 return(NULL);
8586 }
8587 if (sax != NULL) {
8588 if (ctxt->sax != &xmlDefaultSAXHandler)
8589 xmlFree(ctxt->sax);
8590 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8591 if (ctxt->sax == NULL) {
8592 xmlFree(buf);
8593 xmlFree(ctxt);
8594 return(NULL);
8595 }
8596 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8597 if (user_data != NULL)
8598 ctxt->userData = user_data;
8599 }
8600 if (filename == NULL) {
8601 ctxt->directory = NULL;
8602 } else {
8603 ctxt->directory = xmlParserGetDirectory(filename);
8604 }
8605
8606 inputStream = xmlNewInputStream(ctxt);
8607 if (inputStream == NULL) {
8608 xmlFreeParserCtxt(ctxt);
8609 return(NULL);
8610 }
8611
8612 if (filename == NULL)
8613 inputStream->filename = NULL;
8614 else
8615 inputStream->filename = xmlMemStrdup(filename);
8616 inputStream->buf = buf;
8617 inputStream->base = inputStream->buf->buffer->content;
8618 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008619 inputStream->end =
8620 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008621
8622 inputPush(ctxt, inputStream);
8623
8624 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8625 (ctxt->input->buf != NULL)) {
8626 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8627#ifdef DEBUG_PUSH
8628 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8629#endif
8630 }
8631
Daniel Veillard0e4cd172001-06-28 12:13:56 +00008632 if (enc != XML_CHAR_ENCODING_NONE) {
8633 xmlSwitchEncoding(ctxt, enc);
8634 }
8635
Owen Taylor3473f882001-02-23 17:55:21 +00008636 return(ctxt);
8637}
8638
8639/**
8640 * xmlCreateIOParserCtxt:
8641 * @sax: a SAX handler
8642 * @user_data: The user data returned on SAX callbacks
8643 * @ioread: an I/O read function
8644 * @ioclose: an I/O close function
8645 * @ioctx: an I/O handler
8646 * @enc: the charset encoding if known
8647 *
8648 * Create a parser context for using the XML parser with an existing
8649 * I/O stream
8650 *
8651 * Returns the new parser context or NULL
8652 */
8653xmlParserCtxtPtr
8654xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8655 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8656 void *ioctx, xmlCharEncoding enc) {
8657 xmlParserCtxtPtr ctxt;
8658 xmlParserInputPtr inputStream;
8659 xmlParserInputBufferPtr buf;
8660
8661 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8662 if (buf == NULL) return(NULL);
8663
8664 ctxt = xmlNewParserCtxt();
8665 if (ctxt == NULL) {
8666 xmlFree(buf);
8667 return(NULL);
8668 }
8669 if (sax != NULL) {
8670 if (ctxt->sax != &xmlDefaultSAXHandler)
8671 xmlFree(ctxt->sax);
8672 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8673 if (ctxt->sax == NULL) {
8674 xmlFree(buf);
8675 xmlFree(ctxt);
8676 return(NULL);
8677 }
8678 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8679 if (user_data != NULL)
8680 ctxt->userData = user_data;
8681 }
8682
8683 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8684 if (inputStream == NULL) {
8685 xmlFreeParserCtxt(ctxt);
8686 return(NULL);
8687 }
8688 inputPush(ctxt, inputStream);
8689
8690 return(ctxt);
8691}
8692
8693/************************************************************************
8694 * *
8695 * Front ends when parsing a Dtd *
8696 * *
8697 ************************************************************************/
8698
8699/**
8700 * xmlIOParseDTD:
8701 * @sax: the SAX handler block or NULL
8702 * @input: an Input Buffer
8703 * @enc: the charset encoding if known
8704 *
8705 * Load and parse a DTD
8706 *
8707 * Returns the resulting xmlDtdPtr or NULL in case of error.
8708 * @input will be freed at parsing end.
8709 */
8710
8711xmlDtdPtr
8712xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8713 xmlCharEncoding enc) {
8714 xmlDtdPtr ret = NULL;
8715 xmlParserCtxtPtr ctxt;
8716 xmlParserInputPtr pinput = NULL;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008717 xmlChar start[4];
Owen Taylor3473f882001-02-23 17:55:21 +00008718
8719 if (input == NULL)
8720 return(NULL);
8721
8722 ctxt = xmlNewParserCtxt();
8723 if (ctxt == NULL) {
8724 return(NULL);
8725 }
8726
8727 /*
8728 * Set-up the SAX context
8729 */
8730 if (sax != NULL) {
8731 if (ctxt->sax != NULL)
8732 xmlFree(ctxt->sax);
8733 ctxt->sax = sax;
8734 ctxt->userData = NULL;
8735 }
8736
8737 /*
8738 * generate a parser input from the I/O handler
8739 */
8740
8741 pinput = xmlNewIOInputStream(ctxt, input, enc);
8742 if (pinput == NULL) {
8743 if (sax != NULL) ctxt->sax = NULL;
8744 xmlFreeParserCtxt(ctxt);
8745 return(NULL);
8746 }
8747
8748 /*
8749 * plug some encoding conversion routines here.
8750 */
8751 xmlPushInput(ctxt, pinput);
8752
8753 pinput->filename = NULL;
8754 pinput->line = 1;
8755 pinput->col = 1;
8756 pinput->base = ctxt->input->cur;
8757 pinput->cur = ctxt->input->cur;
8758 pinput->free = NULL;
8759
8760 /*
8761 * let's parse that entity knowing it's an external subset.
8762 */
8763 ctxt->inSubset = 2;
8764 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8765 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8766 BAD_CAST "none", BAD_CAST "none");
Daniel Veillard87a764e2001-06-20 17:41:10 +00008767
8768 if (enc == XML_CHAR_ENCODING_NONE) {
8769 /*
8770 * Get the 4 first bytes and decode the charset
8771 * if enc != XML_CHAR_ENCODING_NONE
8772 * plug some encoding conversion routines.
8773 */
8774 start[0] = RAW;
8775 start[1] = NXT(1);
8776 start[2] = NXT(2);
8777 start[3] = NXT(3);
8778 enc = xmlDetectCharEncoding(start, 4);
8779 if (enc != XML_CHAR_ENCODING_NONE) {
8780 xmlSwitchEncoding(ctxt, enc);
8781 }
8782 }
8783
Owen Taylor3473f882001-02-23 17:55:21 +00008784 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8785
8786 if (ctxt->myDoc != NULL) {
8787 if (ctxt->wellFormed) {
8788 ret = ctxt->myDoc->extSubset;
8789 ctxt->myDoc->extSubset = NULL;
8790 } else {
8791 ret = NULL;
8792 }
8793 xmlFreeDoc(ctxt->myDoc);
8794 ctxt->myDoc = NULL;
8795 }
8796 if (sax != NULL) ctxt->sax = NULL;
8797 xmlFreeParserCtxt(ctxt);
8798
8799 return(ret);
8800}
8801
8802/**
8803 * xmlSAXParseDTD:
8804 * @sax: the SAX handler block
8805 * @ExternalID: a NAME* containing the External ID of the DTD
8806 * @SystemID: a NAME* containing the URL to the DTD
8807 *
8808 * Load and parse an external subset.
8809 *
8810 * Returns the resulting xmlDtdPtr or NULL in case of error.
8811 */
8812
8813xmlDtdPtr
8814xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8815 const xmlChar *SystemID) {
8816 xmlDtdPtr ret = NULL;
8817 xmlParserCtxtPtr ctxt;
8818 xmlParserInputPtr input = NULL;
8819 xmlCharEncoding enc;
8820
8821 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8822
8823 ctxt = xmlNewParserCtxt();
8824 if (ctxt == NULL) {
8825 return(NULL);
8826 }
8827
8828 /*
8829 * Set-up the SAX context
8830 */
8831 if (sax != NULL) {
8832 if (ctxt->sax != NULL)
8833 xmlFree(ctxt->sax);
8834 ctxt->sax = sax;
8835 ctxt->userData = NULL;
8836 }
8837
8838 /*
8839 * Ask the Entity resolver to load the damn thing
8840 */
8841
8842 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8843 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8844 if (input == NULL) {
8845 if (sax != NULL) ctxt->sax = NULL;
8846 xmlFreeParserCtxt(ctxt);
8847 return(NULL);
8848 }
8849
8850 /*
8851 * plug some encoding conversion routines here.
8852 */
8853 xmlPushInput(ctxt, input);
8854 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8855 xmlSwitchEncoding(ctxt, enc);
8856
8857 if (input->filename == NULL)
8858 input->filename = (char *) xmlStrdup(SystemID);
8859 input->line = 1;
8860 input->col = 1;
8861 input->base = ctxt->input->cur;
8862 input->cur = ctxt->input->cur;
8863 input->free = NULL;
8864
8865 /*
8866 * let's parse that entity knowing it's an external subset.
8867 */
8868 ctxt->inSubset = 2;
8869 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8870 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8871 ExternalID, SystemID);
8872 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8873
8874 if (ctxt->myDoc != NULL) {
8875 if (ctxt->wellFormed) {
8876 ret = ctxt->myDoc->extSubset;
8877 ctxt->myDoc->extSubset = NULL;
8878 } else {
8879 ret = NULL;
8880 }
8881 xmlFreeDoc(ctxt->myDoc);
8882 ctxt->myDoc = NULL;
8883 }
8884 if (sax != NULL) ctxt->sax = NULL;
8885 xmlFreeParserCtxt(ctxt);
8886
8887 return(ret);
8888}
8889
8890/**
8891 * xmlParseDTD:
8892 * @ExternalID: a NAME* containing the External ID of the DTD
8893 * @SystemID: a NAME* containing the URL to the DTD
8894 *
8895 * Load and parse an external subset.
8896 *
8897 * Returns the resulting xmlDtdPtr or NULL in case of error.
8898 */
8899
8900xmlDtdPtr
8901xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8902 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8903}
8904
8905/************************************************************************
8906 * *
8907 * Front ends when parsing an Entity *
8908 * *
8909 ************************************************************************/
8910
8911/**
Owen Taylor3473f882001-02-23 17:55:21 +00008912 * xmlParseCtxtExternalEntity:
8913 * @ctx: the existing parsing context
8914 * @URL: the URL for the entity to load
8915 * @ID: the System ID for the entity to load
8916 * @list: the return value for the set of parsed nodes
8917 *
8918 * Parse an external general entity within an existing parsing context
8919 * An external general parsed entity is well-formed if it matches the
8920 * production labeled extParsedEnt.
8921 *
8922 * [78] extParsedEnt ::= TextDecl? content
8923 *
8924 * Returns 0 if the entity is well formed, -1 in case of args problem and
8925 * the parser error code otherwise
8926 */
8927
8928int
8929xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8930 const xmlChar *ID, xmlNodePtr *list) {
8931 xmlParserCtxtPtr ctxt;
8932 xmlDocPtr newDoc;
8933 xmlSAXHandlerPtr oldsax = NULL;
8934 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00008935 xmlChar start[4];
8936 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00008937
8938 if (ctx->depth > 40) {
8939 return(XML_ERR_ENTITY_LOOP);
8940 }
8941
8942 if (list != NULL)
8943 *list = NULL;
8944 if ((URL == NULL) && (ID == NULL))
8945 return(-1);
8946 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8947 return(-1);
8948
8949
8950 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8951 if (ctxt == NULL) return(-1);
8952 ctxt->userData = ctxt;
8953 oldsax = ctxt->sax;
8954 ctxt->sax = ctx->sax;
8955 newDoc = xmlNewDoc(BAD_CAST "1.0");
8956 if (newDoc == NULL) {
8957 xmlFreeParserCtxt(ctxt);
8958 return(-1);
8959 }
8960 if (ctx->myDoc != NULL) {
8961 newDoc->intSubset = ctx->myDoc->intSubset;
8962 newDoc->extSubset = ctx->myDoc->extSubset;
8963 }
8964 if (ctx->myDoc->URL != NULL) {
8965 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8966 }
8967 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8968 if (newDoc->children == NULL) {
8969 ctxt->sax = oldsax;
8970 xmlFreeParserCtxt(ctxt);
8971 newDoc->intSubset = NULL;
8972 newDoc->extSubset = NULL;
8973 xmlFreeDoc(newDoc);
8974 return(-1);
8975 }
8976 nodePush(ctxt, newDoc->children);
8977 if (ctx->myDoc == NULL) {
8978 ctxt->myDoc = newDoc;
8979 } else {
8980 ctxt->myDoc = ctx->myDoc;
8981 newDoc->children->doc = ctx->myDoc;
8982 }
8983
Daniel Veillard87a764e2001-06-20 17:41:10 +00008984 /*
8985 * Get the 4 first bytes and decode the charset
8986 * if enc != XML_CHAR_ENCODING_NONE
8987 * plug some encoding conversion routines.
8988 */
8989 GROW
8990 start[0] = RAW;
8991 start[1] = NXT(1);
8992 start[2] = NXT(2);
8993 start[3] = NXT(3);
8994 enc = xmlDetectCharEncoding(start, 4);
8995 if (enc != XML_CHAR_ENCODING_NONE) {
8996 xmlSwitchEncoding(ctxt, enc);
8997 }
8998
Owen Taylor3473f882001-02-23 17:55:21 +00008999 /*
9000 * Parse a possible text declaration first
9001 */
Owen Taylor3473f882001-02-23 17:55:21 +00009002 if ((RAW == '<') && (NXT(1) == '?') &&
9003 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9004 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9005 xmlParseTextDecl(ctxt);
9006 }
9007
9008 /*
9009 * Doing validity checking on chunk doesn't make sense
9010 */
9011 ctxt->instate = XML_PARSER_CONTENT;
9012 ctxt->validate = ctx->validate;
9013 ctxt->loadsubset = ctx->loadsubset;
9014 ctxt->depth = ctx->depth + 1;
9015 ctxt->replaceEntities = ctx->replaceEntities;
9016 if (ctxt->validate) {
9017 ctxt->vctxt.error = ctx->vctxt.error;
9018 ctxt->vctxt.warning = ctx->vctxt.warning;
Owen Taylor3473f882001-02-23 17:55:21 +00009019 } else {
9020 ctxt->vctxt.error = NULL;
9021 ctxt->vctxt.warning = NULL;
9022 }
Daniel Veillarda9142e72001-06-19 11:07:54 +00009023 ctxt->vctxt.nodeTab = NULL;
9024 ctxt->vctxt.nodeNr = 0;
9025 ctxt->vctxt.nodeMax = 0;
9026 ctxt->vctxt.node = NULL;
Owen Taylor3473f882001-02-23 17:55:21 +00009027
9028 xmlParseContent(ctxt);
9029
9030 if ((RAW == '<') && (NXT(1) == '/')) {
9031 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9032 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9033 ctxt->sax->error(ctxt->userData,
9034 "chunk is not well balanced\n");
9035 ctxt->wellFormed = 0;
9036 ctxt->disableSAX = 1;
9037 } else if (RAW != 0) {
9038 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9040 ctxt->sax->error(ctxt->userData,
9041 "extra content at the end of well balanced chunk\n");
9042 ctxt->wellFormed = 0;
9043 ctxt->disableSAX = 1;
9044 }
9045 if (ctxt->node != newDoc->children) {
9046 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9048 ctxt->sax->error(ctxt->userData,
9049 "chunk is not well balanced\n");
9050 ctxt->wellFormed = 0;
9051 ctxt->disableSAX = 1;
9052 }
9053
9054 if (!ctxt->wellFormed) {
9055 if (ctxt->errNo == 0)
9056 ret = 1;
9057 else
9058 ret = ctxt->errNo;
9059 } else {
9060 if (list != NULL) {
9061 xmlNodePtr cur;
9062
9063 /*
9064 * Return the newly created nodeset after unlinking it from
9065 * they pseudo parent.
9066 */
9067 cur = newDoc->children->children;
9068 *list = cur;
9069 while (cur != NULL) {
9070 cur->parent = NULL;
9071 cur = cur->next;
9072 }
9073 newDoc->children->children = NULL;
9074 }
9075 ret = 0;
9076 }
9077 ctxt->sax = oldsax;
9078 xmlFreeParserCtxt(ctxt);
9079 newDoc->intSubset = NULL;
9080 newDoc->extSubset = NULL;
9081 xmlFreeDoc(newDoc);
9082
9083 return(ret);
9084}
9085
9086/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009087 * xmlParseExternalEntityPrivate:
Owen Taylor3473f882001-02-23 17:55:21 +00009088 * @doc: the document the chunk pertains to
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009089 * @oldctxt: the previous parser context if available
Owen Taylor3473f882001-02-23 17:55:21 +00009090 * @sax: the SAX handler bloc (possibly NULL)
9091 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9092 * @depth: Used for loop detection, use 0
9093 * @URL: the URL for the entity to load
9094 * @ID: the System ID for the entity to load
9095 * @list: the return value for the set of parsed nodes
9096 *
Daniel Veillard257d9102001-05-08 10:41:44 +00009097 * Private version of xmlParseExternalEntity()
Owen Taylor3473f882001-02-23 17:55:21 +00009098 *
9099 * Returns 0 if the entity is well formed, -1 in case of args problem and
9100 * the parser error code otherwise
9101 */
9102
Daniel Veillard257d9102001-05-08 10:41:44 +00009103static int
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009104xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
9105 xmlSAXHandlerPtr sax,
Daniel Veillard257d9102001-05-08 10:41:44 +00009106 void *user_data, int depth, const xmlChar *URL,
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009107 const xmlChar *ID, xmlNodePtr *list) {
Owen Taylor3473f882001-02-23 17:55:21 +00009108 xmlParserCtxtPtr ctxt;
9109 xmlDocPtr newDoc;
9110 xmlSAXHandlerPtr oldsax = NULL;
9111 int ret = 0;
Daniel Veillard87a764e2001-06-20 17:41:10 +00009112 xmlChar start[4];
9113 xmlCharEncoding enc;
Owen Taylor3473f882001-02-23 17:55:21 +00009114
9115 if (depth > 40) {
9116 return(XML_ERR_ENTITY_LOOP);
9117 }
9118
9119
9120
9121 if (list != NULL)
9122 *list = NULL;
9123 if ((URL == NULL) && (ID == NULL))
9124 return(-1);
9125 if (doc == NULL) /* @@ relax but check for dereferences */
9126 return(-1);
9127
9128
9129 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
9130 if (ctxt == NULL) return(-1);
9131 ctxt->userData = ctxt;
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009132 if (oldctxt != NULL) {
9133 ctxt->_private = oldctxt->_private;
9134 ctxt->loadsubset = oldctxt->loadsubset;
9135 ctxt->validate = oldctxt->validate;
9136 ctxt->external = oldctxt->external;
9137 } else {
9138 /*
9139 * Doing validity checking on chunk without context
9140 * doesn't make sense
9141 */
9142 ctxt->_private = NULL;
9143 ctxt->validate = 0;
9144 ctxt->external = 2;
9145 ctxt->loadsubset = 0;
9146 }
Owen Taylor3473f882001-02-23 17:55:21 +00009147 if (sax != NULL) {
9148 oldsax = ctxt->sax;
9149 ctxt->sax = sax;
9150 if (user_data != NULL)
9151 ctxt->userData = user_data;
9152 }
9153 newDoc = xmlNewDoc(BAD_CAST "1.0");
9154 if (newDoc == NULL) {
9155 xmlFreeParserCtxt(ctxt);
9156 return(-1);
9157 }
9158 if (doc != NULL) {
9159 newDoc->intSubset = doc->intSubset;
9160 newDoc->extSubset = doc->extSubset;
9161 }
9162 if (doc->URL != NULL) {
9163 newDoc->URL = xmlStrdup(doc->URL);
9164 }
9165 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9166 if (newDoc->children == NULL) {
9167 if (sax != NULL)
9168 ctxt->sax = oldsax;
9169 xmlFreeParserCtxt(ctxt);
9170 newDoc->intSubset = NULL;
9171 newDoc->extSubset = NULL;
9172 xmlFreeDoc(newDoc);
9173 return(-1);
9174 }
9175 nodePush(ctxt, newDoc->children);
9176 if (doc == NULL) {
9177 ctxt->myDoc = newDoc;
9178 } else {
9179 ctxt->myDoc = doc;
9180 newDoc->children->doc = doc;
9181 }
9182
Daniel Veillard87a764e2001-06-20 17:41:10 +00009183 /*
9184 * Get the 4 first bytes and decode the charset
9185 * if enc != XML_CHAR_ENCODING_NONE
9186 * plug some encoding conversion routines.
9187 */
9188 GROW;
9189 start[0] = RAW;
9190 start[1] = NXT(1);
9191 start[2] = NXT(2);
9192 start[3] = NXT(3);
9193 enc = xmlDetectCharEncoding(start, 4);
9194 if (enc != XML_CHAR_ENCODING_NONE) {
9195 xmlSwitchEncoding(ctxt, enc);
9196 }
9197
Owen Taylor3473f882001-02-23 17:55:21 +00009198 /*
9199 * Parse a possible text declaration first
9200 */
Owen Taylor3473f882001-02-23 17:55:21 +00009201 if ((RAW == '<') && (NXT(1) == '?') &&
9202 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9203 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9204 xmlParseTextDecl(ctxt);
9205 }
9206
Owen Taylor3473f882001-02-23 17:55:21 +00009207 ctxt->instate = XML_PARSER_CONTENT;
Owen Taylor3473f882001-02-23 17:55:21 +00009208 ctxt->depth = depth;
9209
9210 xmlParseContent(ctxt);
9211
9212 if ((RAW == '<') && (NXT(1) == '/')) {
9213 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9215 ctxt->sax->error(ctxt->userData,
9216 "chunk is not well balanced\n");
9217 ctxt->wellFormed = 0;
9218 ctxt->disableSAX = 1;
9219 } else if (RAW != 0) {
9220 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9222 ctxt->sax->error(ctxt->userData,
9223 "extra content at the end of well balanced chunk\n");
9224 ctxt->wellFormed = 0;
9225 ctxt->disableSAX = 1;
9226 }
9227 if (ctxt->node != newDoc->children) {
9228 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9229 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9230 ctxt->sax->error(ctxt->userData,
9231 "chunk is not well balanced\n");
9232 ctxt->wellFormed = 0;
9233 ctxt->disableSAX = 1;
9234 }
9235
9236 if (!ctxt->wellFormed) {
9237 if (ctxt->errNo == 0)
9238 ret = 1;
9239 else
9240 ret = ctxt->errNo;
9241 } else {
9242 if (list != NULL) {
9243 xmlNodePtr cur;
9244
9245 /*
9246 * Return the newly created nodeset after unlinking it from
9247 * they pseudo parent.
9248 */
9249 cur = newDoc->children->children;
9250 *list = cur;
9251 while (cur != NULL) {
9252 cur->parent = NULL;
9253 cur = cur->next;
9254 }
9255 newDoc->children->children = NULL;
9256 }
9257 ret = 0;
9258 }
9259 if (sax != NULL)
9260 ctxt->sax = oldsax;
9261 xmlFreeParserCtxt(ctxt);
9262 newDoc->intSubset = NULL;
9263 newDoc->extSubset = NULL;
9264 xmlFreeDoc(newDoc);
9265
9266 return(ret);
9267}
9268
9269/**
Daniel Veillard257d9102001-05-08 10:41:44 +00009270 * xmlParseExternalEntity:
9271 * @doc: the document the chunk pertains to
9272 * @sax: the SAX handler bloc (possibly NULL)
9273 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9274 * @depth: Used for loop detection, use 0
9275 * @URL: the URL for the entity to load
9276 * @ID: the System ID for the entity to load
9277 * @list: the return value for the set of parsed nodes
9278 *
9279 * Parse an external general entity
9280 * An external general parsed entity is well-formed if it matches the
9281 * production labeled extParsedEnt.
9282 *
9283 * [78] extParsedEnt ::= TextDecl? content
9284 *
9285 * Returns 0 if the entity is well formed, -1 in case of args problem and
9286 * the parser error code otherwise
9287 */
9288
9289int
9290xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
9291 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
Daniel Veillarda97a19b2001-05-20 13:19:52 +00009292 return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
9293 ID, list));
Daniel Veillard257d9102001-05-08 10:41:44 +00009294}
9295
9296/**
Daniel Veillarde020c3a2001-03-21 18:06:15 +00009297 * xmlParseBalancedChunkMemory:
Owen Taylor3473f882001-02-23 17:55:21 +00009298 * @doc: the document the chunk pertains to
9299 * @sax: the SAX handler bloc (possibly NULL)
9300 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9301 * @depth: Used for loop detection, use 0
9302 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9303 * @list: the return value for the set of parsed nodes
9304 *
9305 * Parse a well-balanced chunk of an XML document
9306 * called by the parser
9307 * The allowed sequence for the Well Balanced Chunk is the one defined by
9308 * the content production in the XML grammar:
9309 *
9310 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9311 *
9312 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9313 * the parser error code otherwise
9314 */
9315
9316int
9317xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9318 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9319 xmlParserCtxtPtr ctxt;
9320 xmlDocPtr newDoc;
9321 xmlSAXHandlerPtr oldsax = NULL;
9322 int size;
9323 int ret = 0;
9324
9325 if (depth > 40) {
9326 return(XML_ERR_ENTITY_LOOP);
9327 }
9328
9329
9330 if (list != NULL)
9331 *list = NULL;
9332 if (string == NULL)
9333 return(-1);
9334
9335 size = xmlStrlen(string);
9336
9337 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9338 if (ctxt == NULL) return(-1);
9339 ctxt->userData = ctxt;
9340 if (sax != NULL) {
9341 oldsax = ctxt->sax;
9342 ctxt->sax = sax;
9343 if (user_data != NULL)
9344 ctxt->userData = user_data;
9345 }
9346 newDoc = xmlNewDoc(BAD_CAST "1.0");
9347 if (newDoc == NULL) {
9348 xmlFreeParserCtxt(ctxt);
9349 return(-1);
9350 }
9351 if (doc != NULL) {
9352 newDoc->intSubset = doc->intSubset;
9353 newDoc->extSubset = doc->extSubset;
9354 }
9355 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9356 if (newDoc->children == NULL) {
9357 if (sax != NULL)
9358 ctxt->sax = oldsax;
9359 xmlFreeParserCtxt(ctxt);
9360 newDoc->intSubset = NULL;
9361 newDoc->extSubset = NULL;
9362 xmlFreeDoc(newDoc);
9363 return(-1);
9364 }
9365 nodePush(ctxt, newDoc->children);
9366 if (doc == NULL) {
9367 ctxt->myDoc = newDoc;
9368 } else {
9369 ctxt->myDoc = doc;
9370 newDoc->children->doc = doc;
9371 }
9372 ctxt->instate = XML_PARSER_CONTENT;
9373 ctxt->depth = depth;
9374
9375 /*
9376 * Doing validity checking on chunk doesn't make sense
9377 */
9378 ctxt->validate = 0;
9379 ctxt->loadsubset = 0;
9380
9381 xmlParseContent(ctxt);
9382
9383 if ((RAW == '<') && (NXT(1) == '/')) {
9384 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9386 ctxt->sax->error(ctxt->userData,
9387 "chunk is not well balanced\n");
9388 ctxt->wellFormed = 0;
9389 ctxt->disableSAX = 1;
9390 } else if (RAW != 0) {
9391 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9392 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9393 ctxt->sax->error(ctxt->userData,
9394 "extra content at the end of well balanced chunk\n");
9395 ctxt->wellFormed = 0;
9396 ctxt->disableSAX = 1;
9397 }
9398 if (ctxt->node != newDoc->children) {
9399 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9400 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9401 ctxt->sax->error(ctxt->userData,
9402 "chunk is not well balanced\n");
9403 ctxt->wellFormed = 0;
9404 ctxt->disableSAX = 1;
9405 }
9406
9407 if (!ctxt->wellFormed) {
9408 if (ctxt->errNo == 0)
9409 ret = 1;
9410 else
9411 ret = ctxt->errNo;
9412 } else {
9413 if (list != NULL) {
9414 xmlNodePtr cur;
9415
9416 /*
9417 * Return the newly created nodeset after unlinking it from
9418 * they pseudo parent.
9419 */
9420 cur = newDoc->children->children;
9421 *list = cur;
9422 while (cur != NULL) {
9423 cur->parent = NULL;
9424 cur = cur->next;
9425 }
9426 newDoc->children->children = NULL;
9427 }
9428 ret = 0;
9429 }
9430 if (sax != NULL)
9431 ctxt->sax = oldsax;
9432 xmlFreeParserCtxt(ctxt);
9433 newDoc->intSubset = NULL;
9434 newDoc->extSubset = NULL;
9435 xmlFreeDoc(newDoc);
9436
9437 return(ret);
9438}
9439
9440/**
9441 * xmlSAXParseEntity:
9442 * @sax: the SAX handler block
9443 * @filename: the filename
9444 *
9445 * parse an XML external entity out of context and build a tree.
9446 * It use the given SAX function block to handle the parsing callback.
9447 * If sax is NULL, fallback to the default DOM tree building routines.
9448 *
9449 * [78] extParsedEnt ::= TextDecl? content
9450 *
9451 * This correspond to a "Well Balanced" chunk
9452 *
9453 * Returns the resulting document tree
9454 */
9455
9456xmlDocPtr
9457xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9458 xmlDocPtr ret;
9459 xmlParserCtxtPtr ctxt;
9460 char *directory = NULL;
9461
9462 ctxt = xmlCreateFileParserCtxt(filename);
9463 if (ctxt == NULL) {
9464 return(NULL);
9465 }
9466 if (sax != NULL) {
9467 if (ctxt->sax != NULL)
9468 xmlFree(ctxt->sax);
9469 ctxt->sax = sax;
9470 ctxt->userData = NULL;
9471 }
9472
9473 if ((ctxt->directory == NULL) && (directory == NULL))
9474 directory = xmlParserGetDirectory(filename);
9475
9476 xmlParseExtParsedEnt(ctxt);
9477
9478 if (ctxt->wellFormed)
9479 ret = ctxt->myDoc;
9480 else {
9481 ret = NULL;
9482 xmlFreeDoc(ctxt->myDoc);
9483 ctxt->myDoc = NULL;
9484 }
9485 if (sax != NULL)
9486 ctxt->sax = NULL;
9487 xmlFreeParserCtxt(ctxt);
9488
9489 return(ret);
9490}
9491
9492/**
9493 * xmlParseEntity:
9494 * @filename: the filename
9495 *
9496 * parse an XML external entity out of context and build a tree.
9497 *
9498 * [78] extParsedEnt ::= TextDecl? content
9499 *
9500 * This correspond to a "Well Balanced" chunk
9501 *
9502 * Returns the resulting document tree
9503 */
9504
9505xmlDocPtr
9506xmlParseEntity(const char *filename) {
9507 return(xmlSAXParseEntity(NULL, filename));
9508}
9509
9510/**
9511 * xmlCreateEntityParserCtxt:
9512 * @URL: the entity URL
9513 * @ID: the entity PUBLIC ID
9514 * @base: a posible base for the target URI
9515 *
9516 * Create a parser context for an external entity
9517 * Automatic support for ZLIB/Compress compressed document is provided
9518 * by default if found at compile-time.
9519 *
9520 * Returns the new parser context or NULL
9521 */
9522xmlParserCtxtPtr
9523xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9524 const xmlChar *base) {
9525 xmlParserCtxtPtr ctxt;
9526 xmlParserInputPtr inputStream;
9527 char *directory = NULL;
9528 xmlChar *uri;
9529
9530 ctxt = xmlNewParserCtxt();
9531 if (ctxt == NULL) {
9532 return(NULL);
9533 }
9534
9535 uri = xmlBuildURI(URL, base);
9536
9537 if (uri == NULL) {
9538 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9539 if (inputStream == NULL) {
9540 xmlFreeParserCtxt(ctxt);
9541 return(NULL);
9542 }
9543
9544 inputPush(ctxt, inputStream);
9545
9546 if ((ctxt->directory == NULL) && (directory == NULL))
9547 directory = xmlParserGetDirectory((char *)URL);
9548 if ((ctxt->directory == NULL) && (directory != NULL))
9549 ctxt->directory = directory;
9550 } else {
9551 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9552 if (inputStream == NULL) {
9553 xmlFree(uri);
9554 xmlFreeParserCtxt(ctxt);
9555 return(NULL);
9556 }
9557
9558 inputPush(ctxt, inputStream);
9559
9560 if ((ctxt->directory == NULL) && (directory == NULL))
9561 directory = xmlParserGetDirectory((char *)uri);
9562 if ((ctxt->directory == NULL) && (directory != NULL))
9563 ctxt->directory = directory;
9564 xmlFree(uri);
9565 }
9566
9567 return(ctxt);
9568}
9569
9570/************************************************************************
9571 * *
9572 * Front ends when parsing from a file *
9573 * *
9574 ************************************************************************/
9575
9576/**
9577 * xmlCreateFileParserCtxt:
9578 * @filename: the filename
9579 *
9580 * Create a parser context for a file content.
9581 * Automatic support for ZLIB/Compress compressed document is provided
9582 * by default if found at compile-time.
9583 *
9584 * Returns the new parser context or NULL
9585 */
9586xmlParserCtxtPtr
9587xmlCreateFileParserCtxt(const char *filename)
9588{
9589 xmlParserCtxtPtr ctxt;
9590 xmlParserInputPtr inputStream;
9591 xmlParserInputBufferPtr buf;
9592 char *directory = NULL;
9593
9594 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9595 if (buf == NULL) {
9596 return(NULL);
9597 }
9598
9599 ctxt = xmlNewParserCtxt();
9600 if (ctxt == NULL) {
9601 if (xmlDefaultSAXHandler.error != NULL) {
9602 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9603 }
9604 return(NULL);
9605 }
9606
9607 inputStream = xmlNewInputStream(ctxt);
9608 if (inputStream == NULL) {
9609 xmlFreeParserCtxt(ctxt);
9610 return(NULL);
9611 }
9612
9613 inputStream->filename = xmlMemStrdup(filename);
9614 inputStream->buf = buf;
9615 inputStream->base = inputStream->buf->buffer->content;
9616 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009617 inputStream->end =
9618 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009619
9620 inputPush(ctxt, inputStream);
9621 if ((ctxt->directory == NULL) && (directory == NULL))
9622 directory = xmlParserGetDirectory(filename);
9623 if ((ctxt->directory == NULL) && (directory != NULL))
9624 ctxt->directory = directory;
9625
9626 return(ctxt);
9627}
9628
9629/**
9630 * xmlSAXParseFile:
9631 * @sax: the SAX handler block
9632 * @filename: the filename
9633 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9634 * documents
9635 *
9636 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9637 * compressed document is provided by default if found at compile-time.
9638 * It use the given SAX function block to handle the parsing callback.
9639 * If sax is NULL, fallback to the default DOM tree building routines.
9640 *
9641 * Returns the resulting document tree
9642 */
9643
9644xmlDocPtr
9645xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9646 int recovery) {
9647 xmlDocPtr ret;
9648 xmlParserCtxtPtr ctxt;
9649 char *directory = NULL;
9650
9651 ctxt = xmlCreateFileParserCtxt(filename);
9652 if (ctxt == NULL) {
9653 return(NULL);
9654 }
9655 if (sax != NULL) {
9656 if (ctxt->sax != NULL)
9657 xmlFree(ctxt->sax);
9658 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009659 }
9660
9661 if ((ctxt->directory == NULL) && (directory == NULL))
9662 directory = xmlParserGetDirectory(filename);
9663 if ((ctxt->directory == NULL) && (directory != NULL))
9664 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9665
9666 xmlParseDocument(ctxt);
9667
9668 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9669 else {
9670 ret = NULL;
9671 xmlFreeDoc(ctxt->myDoc);
9672 ctxt->myDoc = NULL;
9673 }
9674 if (sax != NULL)
9675 ctxt->sax = NULL;
9676 xmlFreeParserCtxt(ctxt);
9677
9678 return(ret);
9679}
9680
9681/**
9682 * xmlRecoverDoc:
9683 * @cur: a pointer to an array of xmlChar
9684 *
9685 * parse an XML in-memory document and build a tree.
9686 * In the case the document is not Well Formed, a tree is built anyway
9687 *
9688 * Returns the resulting document tree
9689 */
9690
9691xmlDocPtr
9692xmlRecoverDoc(xmlChar *cur) {
9693 return(xmlSAXParseDoc(NULL, cur, 1));
9694}
9695
9696/**
9697 * xmlParseFile:
9698 * @filename: the filename
9699 *
9700 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9701 * compressed document is provided by default if found at compile-time.
9702 *
9703 * Returns the resulting document tree
9704 */
9705
9706xmlDocPtr
9707xmlParseFile(const char *filename) {
9708 return(xmlSAXParseFile(NULL, filename, 0));
9709}
9710
9711/**
9712 * xmlRecoverFile:
9713 * @filename: the filename
9714 *
9715 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9716 * compressed document is provided by default if found at compile-time.
9717 * In the case the document is not Well Formed, a tree is built anyway
9718 *
9719 * Returns the resulting document tree
9720 */
9721
9722xmlDocPtr
9723xmlRecoverFile(const char *filename) {
9724 return(xmlSAXParseFile(NULL, filename, 1));
9725}
9726
9727
9728/**
9729 * xmlSetupParserForBuffer:
9730 * @ctxt: an XML parser context
9731 * @buffer: a xmlChar * buffer
9732 * @filename: a file name
9733 *
9734 * Setup the parser context to parse a new buffer; Clears any prior
9735 * contents from the parser context. The buffer parameter must not be
9736 * NULL, but the filename parameter can be
9737 */
9738void
9739xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9740 const char* filename)
9741{
9742 xmlParserInputPtr input;
9743
9744 input = xmlNewInputStream(ctxt);
9745 if (input == NULL) {
9746 perror("malloc");
9747 xmlFree(ctxt);
9748 return;
9749 }
9750
9751 xmlClearParserCtxt(ctxt);
9752 if (filename != NULL)
9753 input->filename = xmlMemStrdup(filename);
9754 input->base = buffer;
9755 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009756 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009757 inputPush(ctxt, input);
9758}
9759
9760/**
9761 * xmlSAXUserParseFile:
9762 * @sax: a SAX handler
9763 * @user_data: The user data returned on SAX callbacks
9764 * @filename: a file name
9765 *
9766 * parse an XML file and call the given SAX handler routines.
9767 * Automatic support for ZLIB/Compress compressed document is provided
9768 *
9769 * Returns 0 in case of success or a error number otherwise
9770 */
9771int
9772xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9773 const char *filename) {
9774 int ret = 0;
9775 xmlParserCtxtPtr ctxt;
9776
9777 ctxt = xmlCreateFileParserCtxt(filename);
9778 if (ctxt == NULL) return -1;
9779 if (ctxt->sax != &xmlDefaultSAXHandler)
9780 xmlFree(ctxt->sax);
9781 ctxt->sax = sax;
9782 if (user_data != NULL)
9783 ctxt->userData = user_data;
9784
9785 xmlParseDocument(ctxt);
9786
9787 if (ctxt->wellFormed)
9788 ret = 0;
9789 else {
9790 if (ctxt->errNo != 0)
9791 ret = ctxt->errNo;
9792 else
9793 ret = -1;
9794 }
9795 if (sax != NULL)
9796 ctxt->sax = NULL;
9797 xmlFreeParserCtxt(ctxt);
9798
9799 return ret;
9800}
9801
9802/************************************************************************
9803 * *
9804 * Front ends when parsing from memory *
9805 * *
9806 ************************************************************************/
9807
9808/**
9809 * xmlCreateMemoryParserCtxt:
9810 * @buffer: a pointer to a char array
9811 * @size: the size of the array
9812 *
9813 * Create a parser context for an XML in-memory document.
9814 *
9815 * Returns the new parser context or NULL
9816 */
9817xmlParserCtxtPtr
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009818xmlCreateMemoryParserCtxt(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009819 xmlParserCtxtPtr ctxt;
9820 xmlParserInputPtr input;
9821 xmlParserInputBufferPtr buf;
9822
9823 if (buffer == NULL)
9824 return(NULL);
9825 if (size <= 0)
9826 return(NULL);
9827
9828 ctxt = xmlNewParserCtxt();
9829 if (ctxt == NULL)
9830 return(NULL);
9831
9832 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9833 if (buf == NULL) return(NULL);
9834
9835 input = xmlNewInputStream(ctxt);
9836 if (input == NULL) {
9837 xmlFreeParserCtxt(ctxt);
9838 return(NULL);
9839 }
9840
9841 input->filename = NULL;
9842 input->buf = buf;
9843 input->base = input->buf->buffer->content;
9844 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009845 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009846
9847 inputPush(ctxt, input);
9848 return(ctxt);
9849}
9850
9851/**
9852 * xmlSAXParseMemory:
9853 * @sax: the SAX handler block
9854 * @buffer: an pointer to a char array
9855 * @size: the size of the array
9856 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9857 * documents
9858 *
9859 * parse an XML in-memory block and use the given SAX function block
9860 * to handle the parsing callback. If sax is NULL, fallback to the default
9861 * DOM tree building routines.
9862 *
9863 * Returns the resulting document tree
9864 */
9865xmlDocPtr
Daniel Veillard50822cb2001-07-26 20:05:51 +00009866xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
9867 int size, int recovery) {
Owen Taylor3473f882001-02-23 17:55:21 +00009868 xmlDocPtr ret;
9869 xmlParserCtxtPtr ctxt;
9870
9871 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9872 if (ctxt == NULL) return(NULL);
9873 if (sax != NULL) {
9874 ctxt->sax = sax;
Owen Taylor3473f882001-02-23 17:55:21 +00009875 }
9876
9877 xmlParseDocument(ctxt);
9878
9879 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9880 else {
9881 ret = NULL;
9882 xmlFreeDoc(ctxt->myDoc);
9883 ctxt->myDoc = NULL;
9884 }
9885 if (sax != NULL)
9886 ctxt->sax = NULL;
9887 xmlFreeParserCtxt(ctxt);
9888
9889 return(ret);
9890}
9891
9892/**
9893 * xmlParseMemory:
9894 * @buffer: an pointer to a char array
9895 * @size: the size of the array
9896 *
9897 * parse an XML in-memory block and build a tree.
9898 *
9899 * Returns the resulting document tree
9900 */
9901
Daniel Veillard50822cb2001-07-26 20:05:51 +00009902xmlDocPtr xmlParseMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009903 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9904}
9905
9906/**
9907 * xmlRecoverMemory:
9908 * @buffer: an pointer to a char array
9909 * @size: the size of the array
9910 *
9911 * parse an XML in-memory block and build a tree.
9912 * In the case the document is not Well Formed, a tree is built anyway
9913 *
9914 * Returns the resulting document tree
9915 */
9916
Daniel Veillard50822cb2001-07-26 20:05:51 +00009917xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009918 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9919}
9920
9921/**
9922 * xmlSAXUserParseMemory:
9923 * @sax: a SAX handler
9924 * @user_data: The user data returned on SAX callbacks
9925 * @buffer: an in-memory XML document input
9926 * @size: the length of the XML document in bytes
9927 *
9928 * A better SAX parsing routine.
9929 * parse an XML in-memory buffer and call the given SAX handler routines.
9930 *
9931 * Returns 0 in case of success or a error number otherwise
9932 */
9933int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
Daniel Veillardfd7ddca2001-05-16 10:57:35 +00009934 const char *buffer, int size) {
Owen Taylor3473f882001-02-23 17:55:21 +00009935 int ret = 0;
9936 xmlParserCtxtPtr ctxt;
9937 xmlSAXHandlerPtr oldsax = NULL;
9938
9939 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9940 if (ctxt == NULL) return -1;
9941 if (sax != NULL) {
9942 oldsax = ctxt->sax;
9943 ctxt->sax = sax;
9944 }
Daniel Veillard30211a02001-04-26 09:33:18 +00009945 if (user_data != NULL)
9946 ctxt->userData = user_data;
Owen Taylor3473f882001-02-23 17:55:21 +00009947
9948 xmlParseDocument(ctxt);
9949
9950 if (ctxt->wellFormed)
9951 ret = 0;
9952 else {
9953 if (ctxt->errNo != 0)
9954 ret = ctxt->errNo;
9955 else
9956 ret = -1;
9957 }
9958 if (sax != NULL) {
9959 ctxt->sax = oldsax;
9960 }
9961 xmlFreeParserCtxt(ctxt);
9962
9963 return ret;
9964}
9965
9966/**
9967 * xmlCreateDocParserCtxt:
9968 * @cur: a pointer to an array of xmlChar
9969 *
9970 * Creates a parser context for an XML in-memory document.
9971 *
9972 * Returns the new parser context or NULL
9973 */
9974xmlParserCtxtPtr
9975xmlCreateDocParserCtxt(xmlChar *cur) {
9976 int len;
9977
9978 if (cur == NULL)
9979 return(NULL);
9980 len = xmlStrlen(cur);
9981 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9982}
9983
9984/**
9985 * xmlSAXParseDoc:
9986 * @sax: the SAX handler block
9987 * @cur: a pointer to an array of xmlChar
9988 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9989 * documents
9990 *
9991 * parse an XML in-memory document and build a tree.
9992 * It use the given SAX function block to handle the parsing callback.
9993 * If sax is NULL, fallback to the default DOM tree building routines.
9994 *
9995 * Returns the resulting document tree
9996 */
9997
9998xmlDocPtr
9999xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
10000 xmlDocPtr ret;
10001 xmlParserCtxtPtr ctxt;
10002
10003 if (cur == NULL) return(NULL);
10004
10005
10006 ctxt = xmlCreateDocParserCtxt(cur);
10007 if (ctxt == NULL) return(NULL);
10008 if (sax != NULL) {
10009 ctxt->sax = sax;
10010 ctxt->userData = NULL;
10011 }
10012
10013 xmlParseDocument(ctxt);
10014 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
10015 else {
10016 ret = NULL;
10017 xmlFreeDoc(ctxt->myDoc);
10018 ctxt->myDoc = NULL;
10019 }
10020 if (sax != NULL)
10021 ctxt->sax = NULL;
10022 xmlFreeParserCtxt(ctxt);
10023
10024 return(ret);
10025}
10026
10027/**
10028 * xmlParseDoc:
10029 * @cur: a pointer to an array of xmlChar
10030 *
10031 * parse an XML in-memory document and build a tree.
10032 *
10033 * Returns the resulting document tree
10034 */
10035
10036xmlDocPtr
10037xmlParseDoc(xmlChar *cur) {
10038 return(xmlSAXParseDoc(NULL, cur, 0));
10039}
10040
10041
10042/************************************************************************
10043 * *
10044 * Miscellaneous *
10045 * *
10046 ************************************************************************/
10047
10048#ifdef LIBXML_XPATH_ENABLED
10049#include <libxml/xpath.h>
10050#endif
10051
10052static int xmlParserInitialized = 0;
10053
10054/**
10055 * xmlInitParser:
10056 *
10057 * Initialization function for the XML parser.
10058 * This is not reentrant. Call once before processing in case of
10059 * use in multithreaded programs.
10060 */
10061
10062void
10063xmlInitParser(void) {
10064 if (xmlParserInitialized) return;
10065
10066 xmlInitCharEncodingHandlers();
10067 xmlInitializePredefinedEntities();
10068 xmlDefaultSAXHandlerInit();
10069 xmlRegisterDefaultInputCallbacks();
10070 xmlRegisterDefaultOutputCallbacks();
10071#ifdef LIBXML_HTML_ENABLED
10072 htmlInitAutoClose();
10073 htmlDefaultSAXHandlerInit();
10074#endif
10075#ifdef LIBXML_XPATH_ENABLED
10076 xmlXPathInit();
10077#endif
10078 xmlParserInitialized = 1;
10079}
10080
10081/**
10082 * xmlCleanupParser:
10083 *
10084 * Cleanup function for the XML parser. It tries to reclaim all
10085 * parsing related global memory allocated for the parser processing.
10086 * It doesn't deallocate any document related memory. Calling this
10087 * function should not prevent reusing the parser.
10088 */
10089
10090void
10091xmlCleanupParser(void) {
10092 xmlParserInitialized = 0;
10093 xmlCleanupCharEncodingHandlers();
10094 xmlCleanupPredefinedEntities();
10095}
10096