blob: 5541628851929449fb7c08eff55fd1016680fc12 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
78#define XML_PARSER_BIG_BUFFER_SIZE 1000
79#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
234 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
235 if ((*ctxt->input->cur == 0) && \
236 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
237 xmlPopInput(ctxt); \
238 } while (0)
239
240#define SHRINK do { \
241 xmlParserInputShrink(ctxt->input); \
242 if ((*ctxt->input->cur == 0) && \
243 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
244 xmlPopInput(ctxt); \
245 } while (0)
246
247#define GROW do { \
248 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
249 if ((*ctxt->input->cur == 0) && \
250 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
251 xmlPopInput(ctxt); \
252 } while (0)
253
254#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
255
256#define NEXT xmlNextChar(ctxt)
257
258#define NEXTL(l) do { \
259 if (*(ctxt->input->cur) == '\n') { \
260 ctxt->input->line++; ctxt->input->col = 1; \
261 } else ctxt->input->col++; \
262 ctxt->token = 0; ctxt->input->cur += l; \
263 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
264 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */\
265 } while (0)
266
267#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
268#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
269
270#define COPY_BUF(l,b,i,v) \
271 if (l == 1) b[i++] = (xmlChar) v; \
272 else i += xmlCopyChar(l,&b[i],v)
273
274/**
275 * xmlSkipBlankChars:
276 * @ctxt: the XML parser context
277 *
278 * skip all blanks character found at that point in the input streams.
279 * It pops up finished entities in the process if allowable at that point.
280 *
281 * Returns the number of space chars skipped
282 */
283
284int
285xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
286 int cur, res = 0;
287
288 /*
289 * It's Okay to use CUR/NEXT here since all the blanks are on
290 * the ASCII range.
291 */
292 do {
293 cur = CUR;
294 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
295 NEXT;
296 cur = CUR;
297 res++;
298 }
299 while ((cur == 0) && (ctxt->inputNr > 1) &&
300 (ctxt->instate != XML_PARSER_COMMENT)) {
301 xmlPopInput(ctxt);
302 cur = CUR;
303 }
304 /*
305 * Need to handle support of entities branching here
306 */
307 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
308 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
309 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
310 return(res);
311}
312
313/************************************************************************
314 * *
315 * Commodity functions to handle entities *
316 * *
317 ************************************************************************/
318
319/**
320 * xmlPopInput:
321 * @ctxt: an XML parser context
322 *
323 * xmlPopInput: the current input pointed by ctxt->input came to an end
324 * pop it and return the next char.
325 *
326 * Returns the current xmlChar in the parser context
327 */
328xmlChar
329xmlPopInput(xmlParserCtxtPtr ctxt) {
330 if (ctxt->inputNr == 1) return(0); /* End of main Input */
331 if (xmlParserDebugEntities)
332 xmlGenericError(xmlGenericErrorContext,
333 "Popping input %d\n", ctxt->inputNr);
334 xmlFreeInputStream(inputPop(ctxt));
335 if ((*ctxt->input->cur == 0) &&
336 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
337 return(xmlPopInput(ctxt));
338 return(CUR);
339}
340
341/**
342 * xmlPushInput:
343 * @ctxt: an XML parser context
344 * @input: an XML parser input fragment (entity, XML fragment ...).
345 *
346 * xmlPushInput: switch to a new input stream which is stacked on top
347 * of the previous one(s).
348 */
349void
350xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
351 if (input == NULL) return;
352
353 if (xmlParserDebugEntities) {
354 if ((ctxt->input != NULL) && (ctxt->input->filename))
355 xmlGenericError(xmlGenericErrorContext,
356 "%s(%d): ", ctxt->input->filename,
357 ctxt->input->line);
358 xmlGenericError(xmlGenericErrorContext,
359 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
360 }
361 inputPush(ctxt, input);
362 GROW;
363}
364
365/**
366 * xmlParseCharRef:
367 * @ctxt: an XML parser context
368 *
369 * parse Reference declarations
370 *
371 * [66] CharRef ::= '&#' [0-9]+ ';' |
372 * '&#x' [0-9a-fA-F]+ ';'
373 *
374 * [ WFC: Legal Character ]
375 * Characters referred to using character references must match the
376 * production for Char.
377 *
378 * Returns the value parsed (as an int), 0 in case of error
379 */
380int
381xmlParseCharRef(xmlParserCtxtPtr ctxt) {
382 int val = 0;
383 int count = 0;
384
385 if (ctxt->token != 0) {
386 val = ctxt->token;
387 ctxt->token = 0;
388 return(val);
389 }
390 /*
391 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
392 */
393 if ((RAW == '&') && (NXT(1) == '#') &&
394 (NXT(2) == 'x')) {
395 SKIP(3);
396 GROW;
397 while (RAW != ';') { /* loop blocked by count */
398 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
399 val = val * 16 + (CUR - '0');
400 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
401 val = val * 16 + (CUR - 'a') + 10;
402 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
403 val = val * 16 + (CUR - 'A') + 10;
404 else {
405 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
406 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
407 ctxt->sax->error(ctxt->userData,
408 "xmlParseCharRef: invalid hexadecimal value\n");
409 ctxt->wellFormed = 0;
410 ctxt->disableSAX = 1;
411 val = 0;
412 break;
413 }
414 NEXT;
415 count++;
416 }
417 if (RAW == ';') {
418 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
419 ctxt->nbChars ++;
420 ctxt->input->cur++;
421 }
422 } else if ((RAW == '&') && (NXT(1) == '#')) {
423 SKIP(2);
424 GROW;
425 while (RAW != ';') { /* loop blocked by count */
426 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
427 val = val * 10 + (CUR - '0');
428 else {
429 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
431 ctxt->sax->error(ctxt->userData,
432 "xmlParseCharRef: invalid decimal value\n");
433 ctxt->wellFormed = 0;
434 ctxt->disableSAX = 1;
435 val = 0;
436 break;
437 }
438 NEXT;
439 count++;
440 }
441 if (RAW == ';') {
442 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
443 ctxt->nbChars ++;
444 ctxt->input->cur++;
445 }
446 } else {
447 ctxt->errNo = XML_ERR_INVALID_CHARREF;
448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
449 ctxt->sax->error(ctxt->userData,
450 "xmlParseCharRef: invalid value\n");
451 ctxt->wellFormed = 0;
452 ctxt->disableSAX = 1;
453 }
454
455 /*
456 * [ WFC: Legal Character ]
457 * Characters referred to using character references must match the
458 * production for Char.
459 */
460 if (IS_CHAR(val)) {
461 return(val);
462 } else {
463 ctxt->errNo = XML_ERR_INVALID_CHAR;
464 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
465 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
466 val);
467 ctxt->wellFormed = 0;
468 ctxt->disableSAX = 1;
469 }
470 return(0);
471}
472
473/**
474 * xmlParseStringCharRef:
475 * @ctxt: an XML parser context
476 * @str: a pointer to an index in the string
477 *
478 * parse Reference declarations, variant parsing from a string rather
479 * than an an input flow.
480 *
481 * [66] CharRef ::= '&#' [0-9]+ ';' |
482 * '&#x' [0-9a-fA-F]+ ';'
483 *
484 * [ WFC: Legal Character ]
485 * Characters referred to using character references must match the
486 * production for Char.
487 *
488 * Returns the value parsed (as an int), 0 in case of error, str will be
489 * updated to the current value of the index
490 */
491int
492xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
493 const xmlChar *ptr;
494 xmlChar cur;
495 int val = 0;
496
497 if ((str == NULL) || (*str == NULL)) return(0);
498 ptr = *str;
499 cur = *ptr;
500 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
501 ptr += 3;
502 cur = *ptr;
503 while (cur != ';') { /* Non input consuming loop */
504 if ((cur >= '0') && (cur <= '9'))
505 val = val * 16 + (cur - '0');
506 else if ((cur >= 'a') && (cur <= 'f'))
507 val = val * 16 + (cur - 'a') + 10;
508 else if ((cur >= 'A') && (cur <= 'F'))
509 val = val * 16 + (cur - 'A') + 10;
510 else {
511 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
512 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
513 ctxt->sax->error(ctxt->userData,
514 "xmlParseStringCharRef: invalid hexadecimal value\n");
515 ctxt->wellFormed = 0;
516 ctxt->disableSAX = 1;
517 val = 0;
518 break;
519 }
520 ptr++;
521 cur = *ptr;
522 }
523 if (cur == ';')
524 ptr++;
525 } else if ((cur == '&') && (ptr[1] == '#')){
526 ptr += 2;
527 cur = *ptr;
528 while (cur != ';') { /* Non input consuming loops */
529 if ((cur >= '0') && (cur <= '9'))
530 val = val * 10 + (cur - '0');
531 else {
532 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
534 ctxt->sax->error(ctxt->userData,
535 "xmlParseStringCharRef: invalid decimal value\n");
536 ctxt->wellFormed = 0;
537 ctxt->disableSAX = 1;
538 val = 0;
539 break;
540 }
541 ptr++;
542 cur = *ptr;
543 }
544 if (cur == ';')
545 ptr++;
546 } else {
547 ctxt->errNo = XML_ERR_INVALID_CHARREF;
548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
549 ctxt->sax->error(ctxt->userData,
550 "xmlParseCharRef: invalid value\n");
551 ctxt->wellFormed = 0;
552 ctxt->disableSAX = 1;
553 return(0);
554 }
555 *str = ptr;
556
557 /*
558 * [ WFC: Legal Character ]
559 * Characters referred to using character references must match the
560 * production for Char.
561 */
562 if (IS_CHAR(val)) {
563 return(val);
564 } else {
565 ctxt->errNo = XML_ERR_INVALID_CHAR;
566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
567 ctxt->sax->error(ctxt->userData,
568 "CharRef: invalid xmlChar value %d\n", val);
569 ctxt->wellFormed = 0;
570 ctxt->disableSAX = 1;
571 }
572 return(0);
573}
574
575/**
576 * xmlParserHandlePEReference:
577 * @ctxt: the parser context
578 *
579 * [69] PEReference ::= '%' Name ';'
580 *
581 * [ WFC: No Recursion ]
582 * A parsed entity must not contain a recursive
583 * reference to itself, either directly or indirectly.
584 *
585 * [ WFC: Entity Declared ]
586 * In a document without any DTD, a document with only an internal DTD
587 * subset which contains no parameter entity references, or a document
588 * with "standalone='yes'", ... ... The declaration of a parameter
589 * entity must precede any reference to it...
590 *
591 * [ VC: Entity Declared ]
592 * In a document with an external subset or external parameter entities
593 * with "standalone='no'", ... ... The declaration of a parameter entity
594 * must precede any reference to it...
595 *
596 * [ WFC: In DTD ]
597 * Parameter-entity references may only appear in the DTD.
598 * NOTE: misleading but this is handled.
599 *
600 * A PEReference may have been detected in the current input stream
601 * the handling is done accordingly to
602 * http://www.w3.org/TR/REC-xml#entproc
603 * i.e.
604 * - Included in literal in entity values
605 * - Included as Paraemeter Entity reference within DTDs
606 */
607void
608xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
609 xmlChar *name;
610 xmlEntityPtr entity = NULL;
611 xmlParserInputPtr input;
612
613 if (ctxt->token != 0) {
614 return;
615 }
616 if (RAW != '%') return;
617 switch(ctxt->instate) {
618 case XML_PARSER_CDATA_SECTION:
619 return;
620 case XML_PARSER_COMMENT:
621 return;
622 case XML_PARSER_START_TAG:
623 return;
624 case XML_PARSER_END_TAG:
625 return;
626 case XML_PARSER_EOF:
627 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
628 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
629 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
630 ctxt->wellFormed = 0;
631 ctxt->disableSAX = 1;
632 return;
633 case XML_PARSER_PROLOG:
634 case XML_PARSER_START:
635 case XML_PARSER_MISC:
636 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
637 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
638 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
639 ctxt->wellFormed = 0;
640 ctxt->disableSAX = 1;
641 return;
642 case XML_PARSER_ENTITY_DECL:
643 case XML_PARSER_CONTENT:
644 case XML_PARSER_ATTRIBUTE_VALUE:
645 case XML_PARSER_PI:
646 case XML_PARSER_SYSTEM_LITERAL:
647 /* we just ignore it there */
648 return;
649 case XML_PARSER_EPILOG:
650 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
651 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
652 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
653 ctxt->wellFormed = 0;
654 ctxt->disableSAX = 1;
655 return;
656 case XML_PARSER_ENTITY_VALUE:
657 /*
658 * NOTE: in the case of entity values, we don't do the
659 * substitution here since we need the literal
660 * entity value to be able to save the internal
661 * subset of the document.
662 * This will be handled by xmlStringDecodeEntities
663 */
664 return;
665 case XML_PARSER_DTD:
666 /*
667 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
668 * In the internal DTD subset, parameter-entity references
669 * can occur only where markup declarations can occur, not
670 * within markup declarations.
671 * In that case this is handled in xmlParseMarkupDecl
672 */
673 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
674 return;
675 break;
676 case XML_PARSER_IGNORE:
677 return;
678 }
679
680 NEXT;
681 name = xmlParseName(ctxt);
682 if (xmlParserDebugEntities)
683 xmlGenericError(xmlGenericErrorContext,
684 "PE Reference: %s\n", name);
685 if (name == NULL) {
686 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
688 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
689 ctxt->wellFormed = 0;
690 ctxt->disableSAX = 1;
691 } else {
692 if (RAW == ';') {
693 NEXT;
694 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
695 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
696 if (entity == NULL) {
697
698 /*
699 * [ WFC: Entity Declared ]
700 * In a document without any DTD, a document with only an
701 * internal DTD subset which contains no parameter entity
702 * references, or a document with "standalone='yes'", ...
703 * ... The declaration of a parameter entity must precede
704 * any reference to it...
705 */
706 if ((ctxt->standalone == 1) ||
707 ((ctxt->hasExternalSubset == 0) &&
708 (ctxt->hasPErefs == 0))) {
709 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
710 ctxt->sax->error(ctxt->userData,
711 "PEReference: %%%s; not found\n", name);
712 ctxt->wellFormed = 0;
713 ctxt->disableSAX = 1;
714 } else {
715 /*
716 * [ VC: Entity Declared ]
717 * In a document with an external subset or external
718 * parameter entities with "standalone='no'", ...
719 * ... The declaration of a parameter entity must precede
720 * any reference to it...
721 */
722 if ((!ctxt->disableSAX) &&
723 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
724 ctxt->vctxt.error(ctxt->vctxt.userData,
725 "PEReference: %%%s; not found\n", name);
726 } else if ((!ctxt->disableSAX) &&
727 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
728 ctxt->sax->warning(ctxt->userData,
729 "PEReference: %%%s; not found\n", name);
730 ctxt->valid = 0;
731 }
732 } else {
733 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
734 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
735 /*
736 * handle the extra spaces added before and after
737 * c.f. http://www.w3.org/TR/REC-xml#as-PE
738 * this is done independantly.
739 */
740 input = xmlNewEntityInputStream(ctxt, entity);
741 xmlPushInput(ctxt, input);
742 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
743 (RAW == '<') && (NXT(1) == '?') &&
744 (NXT(2) == 'x') && (NXT(3) == 'm') &&
745 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
746 xmlParseTextDecl(ctxt);
747 }
748 if (ctxt->token == 0)
749 ctxt->token = ' ';
750 } else {
751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
752 ctxt->sax->error(ctxt->userData,
753 "xmlHandlePEReference: %s is not a parameter entity\n",
754 name);
755 ctxt->wellFormed = 0;
756 ctxt->disableSAX = 1;
757 }
758 }
759 } else {
760 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
762 ctxt->sax->error(ctxt->userData,
763 "xmlHandlePEReference: expecting ';'\n");
764 ctxt->wellFormed = 0;
765 ctxt->disableSAX = 1;
766 }
767 xmlFree(name);
768 }
769}
770
771/*
772 * Macro used to grow the current buffer.
773 */
774#define growBuffer(buffer) { \
775 buffer##_size *= 2; \
776 buffer = (xmlChar *) \
777 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
778 if (buffer == NULL) { \
779 perror("realloc failed"); \
780 return(NULL); \
781 } \
782}
783
784/**
785 * xmlStringDecodeEntities:
786 * @ctxt: the parser context
787 * @str: the input string
788 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
789 * @end: an end marker xmlChar, 0 if none
790 * @end2: an end marker xmlChar, 0 if none
791 * @end3: an end marker xmlChar, 0 if none
792 *
793 * Takes a entity string content and process to do the adequate subtitutions.
794 *
795 * [67] Reference ::= EntityRef | CharRef
796 *
797 * [69] PEReference ::= '%' Name ';'
798 *
799 * Returns A newly allocated string with the substitution done. The caller
800 * must deallocate it !
801 */
802xmlChar *
803xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
804 xmlChar end, xmlChar end2, xmlChar end3) {
805 xmlChar *buffer = NULL;
806 int buffer_size = 0;
807
808 xmlChar *current = NULL;
809 xmlEntityPtr ent;
810 int c,l;
811 int nbchars = 0;
812
813 if (str == NULL)
814 return(NULL);
815
816 if (ctxt->depth > 40) {
817 ctxt->errNo = XML_ERR_ENTITY_LOOP;
818 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
819 ctxt->sax->error(ctxt->userData,
820 "Detected entity reference loop\n");
821 ctxt->wellFormed = 0;
822 ctxt->disableSAX = 1;
823 return(NULL);
824 }
825
826 /*
827 * allocate a translation buffer.
828 */
829 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
830 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
831 if (buffer == NULL) {
832 perror("xmlDecodeEntities: malloc failed");
833 return(NULL);
834 }
835
836 /*
837 * Ok loop until we reach one of the ending char or a size limit.
838 * we are operating on already parsed values.
839 */
840 c = CUR_SCHAR(str, l);
841 while ((c != 0) && (c != end) && /* non input consuming loop */
842 (c != end2) && (c != end3)) {
843
844 if (c == 0) break;
845 if ((c == '&') && (str[1] == '#')) {
846 int val = xmlParseStringCharRef(ctxt, &str);
847 if (val != 0) {
848 COPY_BUF(0,buffer,nbchars,val);
849 }
850 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
851 if (xmlParserDebugEntities)
852 xmlGenericError(xmlGenericErrorContext,
853 "String decoding Entity Reference: %.30s\n",
854 str);
855 ent = xmlParseStringEntityRef(ctxt, &str);
856 if ((ent != NULL) &&
857 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
858 if (ent->content != NULL) {
859 COPY_BUF(0,buffer,nbchars,ent->content[0]);
860 } else {
861 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
862 ctxt->sax->error(ctxt->userData,
863 "internal error entity has no content\n");
864 }
865 } else if ((ent != NULL) && (ent->content != NULL)) {
866 xmlChar *rep;
867
868 ctxt->depth++;
869 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
870 0, 0, 0);
871 ctxt->depth--;
872 if (rep != NULL) {
873 current = rep;
874 while (*current != 0) { /* non input consuming loop */
875 buffer[nbchars++] = *current++;
876 if (nbchars >
877 buffer_size - XML_PARSER_BUFFER_SIZE) {
878 growBuffer(buffer);
879 }
880 }
881 xmlFree(rep);
882 }
883 } else if (ent != NULL) {
884 int i = xmlStrlen(ent->name);
885 const xmlChar *cur = ent->name;
886
887 buffer[nbchars++] = '&';
888 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
889 growBuffer(buffer);
890 }
891 for (;i > 0;i--)
892 buffer[nbchars++] = *cur++;
893 buffer[nbchars++] = ';';
894 }
895 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
896 if (xmlParserDebugEntities)
897 xmlGenericError(xmlGenericErrorContext,
898 "String decoding PE Reference: %.30s\n", str);
899 ent = xmlParseStringPEReference(ctxt, &str);
900 if (ent != NULL) {
901 xmlChar *rep;
902
903 ctxt->depth++;
904 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
905 0, 0, 0);
906 ctxt->depth--;
907 if (rep != NULL) {
908 current = rep;
909 while (*current != 0) { /* non input consuming loop */
910 buffer[nbchars++] = *current++;
911 if (nbchars >
912 buffer_size - XML_PARSER_BUFFER_SIZE) {
913 growBuffer(buffer);
914 }
915 }
916 xmlFree(rep);
917 }
918 }
919 } else {
920 COPY_BUF(l,buffer,nbchars,c);
921 str += l;
922 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
923 growBuffer(buffer);
924 }
925 }
926 c = CUR_SCHAR(str, l);
927 }
928 buffer[nbchars++] = 0;
929 return(buffer);
930}
931
932
933/************************************************************************
934 * *
935 * Commodity functions to handle xmlChars *
936 * *
937 ************************************************************************/
938
939/**
940 * xmlStrndup:
941 * @cur: the input xmlChar *
942 * @len: the len of @cur
943 *
944 * a strndup for array of xmlChar's
945 *
946 * Returns a new xmlChar * or NULL
947 */
948xmlChar *
949xmlStrndup(const xmlChar *cur, int len) {
950 xmlChar *ret;
951
952 if ((cur == NULL) || (len < 0)) return(NULL);
953 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
954 if (ret == NULL) {
955 xmlGenericError(xmlGenericErrorContext,
956 "malloc of %ld byte failed\n",
957 (len + 1) * (long)sizeof(xmlChar));
958 return(NULL);
959 }
960 memcpy(ret, cur, len * sizeof(xmlChar));
961 ret[len] = 0;
962 return(ret);
963}
964
965/**
966 * xmlStrdup:
967 * @cur: the input xmlChar *
968 *
969 * a strdup for array of xmlChar's. Since they are supposed to be
970 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
971 * a termination mark of '0'.
972 *
973 * Returns a new xmlChar * or NULL
974 */
975xmlChar *
976xmlStrdup(const xmlChar *cur) {
977 const xmlChar *p = cur;
978
979 if (cur == NULL) return(NULL);
980 while (*p != 0) p++; /* non input consuming */
981 return(xmlStrndup(cur, p - cur));
982}
983
984/**
985 * xmlCharStrndup:
986 * @cur: the input char *
987 * @len: the len of @cur
988 *
989 * a strndup for char's to xmlChar's
990 *
991 * Returns a new xmlChar * or NULL
992 */
993
994xmlChar *
995xmlCharStrndup(const char *cur, int len) {
996 int i;
997 xmlChar *ret;
998
999 if ((cur == NULL) || (len < 0)) return(NULL);
1000 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1001 if (ret == NULL) {
1002 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1003 (len + 1) * (long)sizeof(xmlChar));
1004 return(NULL);
1005 }
1006 for (i = 0;i < len;i++)
1007 ret[i] = (xmlChar) cur[i];
1008 ret[len] = 0;
1009 return(ret);
1010}
1011
1012/**
1013 * xmlCharStrdup:
1014 * @cur: the input char *
1015 * @len: the len of @cur
1016 *
1017 * a strdup for char's to xmlChar's
1018 *
1019 * Returns a new xmlChar * or NULL
1020 */
1021
1022xmlChar *
1023xmlCharStrdup(const char *cur) {
1024 const char *p = cur;
1025
1026 if (cur == NULL) return(NULL);
1027 while (*p != '\0') p++; /* non input consuming */
1028 return(xmlCharStrndup(cur, p - cur));
1029}
1030
1031/**
1032 * xmlStrcmp:
1033 * @str1: the first xmlChar *
1034 * @str2: the second xmlChar *
1035 *
1036 * a strcmp for xmlChar's
1037 *
1038 * Returns the integer result of the comparison
1039 */
1040
1041int
1042xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1043 register int tmp;
1044
1045 if (str1 == str2) return(0);
1046 if (str1 == NULL) return(-1);
1047 if (str2 == NULL) return(1);
1048 do {
1049 tmp = *str1++ - *str2;
1050 if (tmp != 0) return(tmp);
1051 } while (*str2++ != 0);
1052 return 0;
1053}
1054
1055/**
1056 * xmlStrEqual:
1057 * @str1: the first xmlChar *
1058 * @str2: the second xmlChar *
1059 *
1060 * Check if both string are equal of have same content
1061 * Should be a bit more readable and faster than xmlStrEqual()
1062 *
1063 * Returns 1 if they are equal, 0 if they are different
1064 */
1065
1066int
1067xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1068 if (str1 == str2) return(1);
1069 if (str1 == NULL) return(0);
1070 if (str2 == NULL) return(0);
1071 do {
1072 if (*str1++ != *str2) return(0);
1073 } while (*str2++);
1074 return(1);
1075}
1076
1077/**
1078 * xmlStrncmp:
1079 * @str1: the first xmlChar *
1080 * @str2: the second xmlChar *
1081 * @len: the max comparison length
1082 *
1083 * a strncmp for xmlChar's
1084 *
1085 * Returns the integer result of the comparison
1086 */
1087
1088int
1089xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1090 register int tmp;
1091
1092 if (len <= 0) return(0);
1093 if (str1 == str2) return(0);
1094 if (str1 == NULL) return(-1);
1095 if (str2 == NULL) return(1);
1096 do {
1097 tmp = *str1++ - *str2;
1098 if (tmp != 0 || --len == 0) return(tmp);
1099 } while (*str2++ != 0);
1100 return 0;
1101}
1102
1103static xmlChar casemap[256] = {
1104 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1105 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1106 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1107 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1108 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1109 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1110 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1111 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1112 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1113 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1114 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1115 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1116 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1119 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1120 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1121 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1122 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1123 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1124 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1125 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1126 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1127 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1128 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1129 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1130 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1131 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1132 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1133 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1134 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1135 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1136};
1137
1138/**
1139 * xmlStrcasecmp:
1140 * @str1: the first xmlChar *
1141 * @str2: the second xmlChar *
1142 *
1143 * a strcasecmp for xmlChar's
1144 *
1145 * Returns the integer result of the comparison
1146 */
1147
1148int
1149xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1150 register int tmp;
1151
1152 if (str1 == str2) return(0);
1153 if (str1 == NULL) return(-1);
1154 if (str2 == NULL) return(1);
1155 do {
1156 tmp = casemap[*str1++] - casemap[*str2];
1157 if (tmp != 0) return(tmp);
1158 } while (*str2++ != 0);
1159 return 0;
1160}
1161
1162/**
1163 * xmlStrncasecmp:
1164 * @str1: the first xmlChar *
1165 * @str2: the second xmlChar *
1166 * @len: the max comparison length
1167 *
1168 * a strncasecmp for xmlChar's
1169 *
1170 * Returns the integer result of the comparison
1171 */
1172
1173int
1174xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1175 register int tmp;
1176
1177 if (len <= 0) return(0);
1178 if (str1 == str2) return(0);
1179 if (str1 == NULL) return(-1);
1180 if (str2 == NULL) return(1);
1181 do {
1182 tmp = casemap[*str1++] - casemap[*str2];
1183 if (tmp != 0 || --len == 0) return(tmp);
1184 } while (*str2++ != 0);
1185 return 0;
1186}
1187
1188/**
1189 * xmlStrchr:
1190 * @str: the xmlChar * array
1191 * @val: the xmlChar to search
1192 *
1193 * a strchr for xmlChar's
1194 *
1195 * Returns the xmlChar * for the first occurence or NULL.
1196 */
1197
1198const xmlChar *
1199xmlStrchr(const xmlChar *str, xmlChar val) {
1200 if (str == NULL) return(NULL);
1201 while (*str != 0) { /* non input consuming */
1202 if (*str == val) return((xmlChar *) str);
1203 str++;
1204 }
1205 return(NULL);
1206}
1207
1208/**
1209 * xmlStrstr:
1210 * @str: the xmlChar * array (haystack)
1211 * @val: the xmlChar to search (needle)
1212 *
1213 * a strstr for xmlChar's
1214 *
1215 * Returns the xmlChar * for the first occurence or NULL.
1216 */
1217
1218const xmlChar *
1219xmlStrstr(const xmlChar *str, xmlChar *val) {
1220 int n;
1221
1222 if (str == NULL) return(NULL);
1223 if (val == NULL) return(NULL);
1224 n = xmlStrlen(val);
1225
1226 if (n == 0) return(str);
1227 while (*str != 0) { /* non input consuming */
1228 if (*str == *val) {
1229 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1230 }
1231 str++;
1232 }
1233 return(NULL);
1234}
1235
1236/**
1237 * xmlStrcasestr:
1238 * @str: the xmlChar * array (haystack)
1239 * @val: the xmlChar to search (needle)
1240 *
1241 * a case-ignoring strstr for xmlChar's
1242 *
1243 * Returns the xmlChar * for the first occurence or NULL.
1244 */
1245
1246const xmlChar *
1247xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1248 int n;
1249
1250 if (str == NULL) return(NULL);
1251 if (val == NULL) return(NULL);
1252 n = xmlStrlen(val);
1253
1254 if (n == 0) return(str);
1255 while (*str != 0) { /* non input consuming */
1256 if (casemap[*str] == casemap[*val])
1257 if (!xmlStrncasecmp(str, val, n)) return(str);
1258 str++;
1259 }
1260 return(NULL);
1261}
1262
1263/**
1264 * xmlStrsub:
1265 * @str: the xmlChar * array (haystack)
1266 * @start: the index of the first char (zero based)
1267 * @len: the length of the substring
1268 *
1269 * Extract a substring of a given string
1270 *
1271 * Returns the xmlChar * for the first occurence or NULL.
1272 */
1273
1274xmlChar *
1275xmlStrsub(const xmlChar *str, int start, int len) {
1276 int i;
1277
1278 if (str == NULL) return(NULL);
1279 if (start < 0) return(NULL);
1280 if (len < 0) return(NULL);
1281
1282 for (i = 0;i < start;i++) {
1283 if (*str == 0) return(NULL);
1284 str++;
1285 }
1286 if (*str == 0) return(NULL);
1287 return(xmlStrndup(str, len));
1288}
1289
1290/**
1291 * xmlStrlen:
1292 * @str: the xmlChar * array
1293 *
1294 * length of a xmlChar's string
1295 *
1296 * Returns the number of xmlChar contained in the ARRAY.
1297 */
1298
1299int
1300xmlStrlen(const xmlChar *str) {
1301 int len = 0;
1302
1303 if (str == NULL) return(0);
1304 while (*str != 0) { /* non input consuming */
1305 str++;
1306 len++;
1307 }
1308 return(len);
1309}
1310
1311/**
1312 * xmlStrncat:
1313 * @cur: the original xmlChar * array
1314 * @add: the xmlChar * array added
1315 * @len: the length of @add
1316 *
1317 * a strncat for array of xmlChar's, it will extend cur with the len
1318 * first bytes of @add.
1319 *
1320 * Returns a new xmlChar *, the original @cur is reallocated if needed
1321 * and should not be freed
1322 */
1323
1324xmlChar *
1325xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1326 int size;
1327 xmlChar *ret;
1328
1329 if ((add == NULL) || (len == 0))
1330 return(cur);
1331 if (cur == NULL)
1332 return(xmlStrndup(add, len));
1333
1334 size = xmlStrlen(cur);
1335 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1336 if (ret == NULL) {
1337 xmlGenericError(xmlGenericErrorContext,
1338 "xmlStrncat: realloc of %ld byte failed\n",
1339 (size + len + 1) * (long)sizeof(xmlChar));
1340 return(cur);
1341 }
1342 memcpy(&ret[size], add, len * sizeof(xmlChar));
1343 ret[size + len] = 0;
1344 return(ret);
1345}
1346
1347/**
1348 * xmlStrcat:
1349 * @cur: the original xmlChar * array
1350 * @add: the xmlChar * array added
1351 *
1352 * a strcat for array of xmlChar's. Since they are supposed to be
1353 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1354 * a termination mark of '0'.
1355 *
1356 * Returns a new xmlChar * containing the concatenated string.
1357 */
1358xmlChar *
1359xmlStrcat(xmlChar *cur, const xmlChar *add) {
1360 const xmlChar *p = add;
1361
1362 if (add == NULL) return(cur);
1363 if (cur == NULL)
1364 return(xmlStrdup(add));
1365
1366 while (*p != 0) p++; /* non input consuming */
1367 return(xmlStrncat(cur, add, p - add));
1368}
1369
1370/************************************************************************
1371 * *
1372 * Commodity functions, cleanup needed ? *
1373 * *
1374 ************************************************************************/
1375
1376/**
1377 * areBlanks:
1378 * @ctxt: an XML parser context
1379 * @str: a xmlChar *
1380 * @len: the size of @str
1381 *
1382 * Is this a sequence of blank chars that one can ignore ?
1383 *
1384 * Returns 1 if ignorable 0 otherwise.
1385 */
1386
1387static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1388 int i, ret;
1389 xmlNodePtr lastChild;
1390
1391 /*
1392 * Check for xml:space value.
1393 */
1394 if (*(ctxt->space) == 1)
1395 return(0);
1396
1397 /*
1398 * Check that the string is made of blanks
1399 */
1400 for (i = 0;i < len;i++)
1401 if (!(IS_BLANK(str[i]))) return(0);
1402
1403 /*
1404 * Look if the element is mixed content in the Dtd if available
1405 */
1406 if (ctxt->myDoc != NULL) {
1407 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1408 if (ret == 0) return(1);
1409 if (ret == 1) return(0);
1410 }
1411
1412 /*
1413 * Otherwise, heuristic :-\
1414 */
1415 if (ctxt->keepBlanks)
1416 return(0);
1417 if (RAW != '<') return(0);
1418 if (ctxt->node == NULL) return(0);
1419 if ((ctxt->node->children == NULL) &&
1420 (RAW == '<') && (NXT(1) == '/')) return(0);
1421
1422 lastChild = xmlGetLastChild(ctxt->node);
1423 if (lastChild == NULL) {
1424 if (ctxt->node->content != NULL) return(0);
1425 } else if (xmlNodeIsText(lastChild))
1426 return(0);
1427 else if ((ctxt->node->children != NULL) &&
1428 (xmlNodeIsText(ctxt->node->children)))
1429 return(0);
1430 return(1);
1431}
1432
1433/*
1434 * Forward definition for recusive behaviour.
1435 */
1436void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1437void xmlParseReference(xmlParserCtxtPtr ctxt);
1438
1439/************************************************************************
1440 * *
1441 * Extra stuff for namespace support *
1442 * Relates to http://www.w3.org/TR/WD-xml-names *
1443 * *
1444 ************************************************************************/
1445
1446/**
1447 * xmlSplitQName:
1448 * @ctxt: an XML parser context
1449 * @name: an XML parser context
1450 * @prefix: a xmlChar **
1451 *
1452 * parse an UTF8 encoded XML qualified name string
1453 *
1454 * [NS 5] QName ::= (Prefix ':')? LocalPart
1455 *
1456 * [NS 6] Prefix ::= NCName
1457 *
1458 * [NS 7] LocalPart ::= NCName
1459 *
1460 * Returns the local part, and prefix is updated
1461 * to get the Prefix if any.
1462 */
1463
1464xmlChar *
1465xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1466 xmlChar buf[XML_MAX_NAMELEN + 5];
1467 xmlChar *buffer = NULL;
1468 int len = 0;
1469 int max = XML_MAX_NAMELEN;
1470 xmlChar *ret = NULL;
1471 const xmlChar *cur = name;
1472 int c;
1473
1474 *prefix = NULL;
1475
1476#ifndef XML_XML_NAMESPACE
1477 /* xml: prefix is not really a namespace */
1478 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1479 (cur[2] == 'l') && (cur[3] == ':'))
1480 return(xmlStrdup(name));
1481#endif
1482
1483 /* nasty but valid */
1484 if (cur[0] == ':')
1485 return(xmlStrdup(name));
1486
1487 c = *cur++;
1488 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1489 buf[len++] = c;
1490 c = *cur++;
1491 }
1492 if (len >= max) {
1493 /*
1494 * Okay someone managed to make a huge name, so he's ready to pay
1495 * for the processing speed.
1496 */
1497 max = len * 2;
1498
1499 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1500 if (buffer == NULL) {
1501 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1502 ctxt->sax->error(ctxt->userData,
1503 "xmlSplitQName: out of memory\n");
1504 return(NULL);
1505 }
1506 memcpy(buffer, buf, len);
1507 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1508 if (len + 10 > max) {
1509 max *= 2;
1510 buffer = (xmlChar *) xmlRealloc(buffer,
1511 max * sizeof(xmlChar));
1512 if (buffer == NULL) {
1513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1514 ctxt->sax->error(ctxt->userData,
1515 "xmlSplitQName: out of memory\n");
1516 return(NULL);
1517 }
1518 }
1519 buffer[len++] = c;
1520 c = *cur++;
1521 }
1522 buffer[len] = 0;
1523 }
1524
1525 if (buffer == NULL)
1526 ret = xmlStrndup(buf, len);
1527 else {
1528 ret = buffer;
1529 buffer = NULL;
1530 max = XML_MAX_NAMELEN;
1531 }
1532
1533
1534 if (c == ':') {
1535 c = *cur++;
1536 if (c == 0) return(ret);
1537 *prefix = ret;
1538 len = 0;
1539
1540 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1541 buf[len++] = c;
1542 c = *cur++;
1543 }
1544 if (len >= max) {
1545 /*
1546 * Okay someone managed to make a huge name, so he's ready to pay
1547 * for the processing speed.
1548 */
1549 max = len * 2;
1550
1551 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1552 if (buffer == NULL) {
1553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1554 ctxt->sax->error(ctxt->userData,
1555 "xmlSplitQName: out of memory\n");
1556 return(NULL);
1557 }
1558 memcpy(buffer, buf, len);
1559 while (c != 0) { /* tested bigname2.xml */
1560 if (len + 10 > max) {
1561 max *= 2;
1562 buffer = (xmlChar *) xmlRealloc(buffer,
1563 max * sizeof(xmlChar));
1564 if (buffer == NULL) {
1565 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1566 ctxt->sax->error(ctxt->userData,
1567 "xmlSplitQName: out of memory\n");
1568 return(NULL);
1569 }
1570 }
1571 buffer[len++] = c;
1572 c = *cur++;
1573 }
1574 buffer[len] = 0;
1575 }
1576
1577 if (buffer == NULL)
1578 ret = xmlStrndup(buf, len);
1579 else {
1580 ret = buffer;
1581 }
1582 }
1583
1584 return(ret);
1585}
1586
1587/************************************************************************
1588 * *
1589 * The parser itself *
1590 * Relates to http://www.w3.org/TR/REC-xml *
1591 * *
1592 ************************************************************************/
1593
1594/**
1595 * xmlParseName:
1596 * @ctxt: an XML parser context
1597 *
1598 * parse an XML name.
1599 *
1600 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1601 * CombiningChar | Extender
1602 *
1603 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1604 *
1605 * [6] Names ::= Name (S Name)*
1606 *
1607 * Returns the Name parsed or NULL
1608 */
1609
1610xmlChar *
1611xmlParseName(xmlParserCtxtPtr ctxt) {
1612 xmlChar buf[XML_MAX_NAMELEN + 5];
1613 int len = 0, l;
1614 int c;
1615 int count = 0;
1616
1617 GROW;
1618 c = CUR_CHAR(l);
1619 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1620 (!IS_LETTER(c) && (c != '_') &&
1621 (c != ':'))) {
1622 return(NULL);
1623 }
1624
1625 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1626 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1627 (c == '.') || (c == '-') ||
1628 (c == '_') || (c == ':') ||
1629 (IS_COMBINING(c)) ||
1630 (IS_EXTENDER(c)))) {
1631 if (count++ > 100) {
1632 count = 0;
1633 GROW;
1634 }
1635 COPY_BUF(l,buf,len,c);
1636 NEXTL(l);
1637 c = CUR_CHAR(l);
1638 if (len >= XML_MAX_NAMELEN) {
1639 /*
1640 * Okay someone managed to make a huge name, so he's ready to pay
1641 * for the processing speed.
1642 */
1643 xmlChar *buffer;
1644 int max = len * 2;
1645
1646 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1647 if (buffer == NULL) {
1648 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1649 ctxt->sax->error(ctxt->userData,
1650 "xmlParseName: out of memory\n");
1651 return(NULL);
1652 }
1653 memcpy(buffer, buf, len);
1654 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1655 (c == '.') || (c == '-') ||
1656 (c == '_') || (c == ':') ||
1657 (IS_COMBINING(c)) ||
1658 (IS_EXTENDER(c))) {
1659 if (count++ > 100) {
1660 count = 0;
1661 GROW;
1662 }
1663 if (len + 10 > max) {
1664 max *= 2;
1665 buffer = (xmlChar *) xmlRealloc(buffer,
1666 max * sizeof(xmlChar));
1667 if (buffer == NULL) {
1668 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1669 ctxt->sax->error(ctxt->userData,
1670 "xmlParseName: out of memory\n");
1671 return(NULL);
1672 }
1673 }
1674 COPY_BUF(l,buffer,len,c);
1675 NEXTL(l);
1676 c = CUR_CHAR(l);
1677 }
1678 buffer[len] = 0;
1679 return(buffer);
1680 }
1681 }
1682 return(xmlStrndup(buf, len));
1683}
1684
1685/**
1686 * xmlParseStringName:
1687 * @ctxt: an XML parser context
1688 * @str: a pointer to the string pointer (IN/OUT)
1689 *
1690 * parse an XML name.
1691 *
1692 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1693 * CombiningChar | Extender
1694 *
1695 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1696 *
1697 * [6] Names ::= Name (S Name)*
1698 *
1699 * Returns the Name parsed or NULL. The str pointer
1700 * is updated to the current location in the string.
1701 */
1702
1703xmlChar *
1704xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1705 xmlChar buf[XML_MAX_NAMELEN + 5];
1706 const xmlChar *cur = *str;
1707 int len = 0, l;
1708 int c;
1709
1710 c = CUR_SCHAR(cur, l);
1711 if (!IS_LETTER(c) && (c != '_') &&
1712 (c != ':')) {
1713 return(NULL);
1714 }
1715
1716 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1717 (c == '.') || (c == '-') ||
1718 (c == '_') || (c == ':') ||
1719 (IS_COMBINING(c)) ||
1720 (IS_EXTENDER(c))) {
1721 COPY_BUF(l,buf,len,c);
1722 cur += l;
1723 c = CUR_SCHAR(cur, l);
1724 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1725 /*
1726 * Okay someone managed to make a huge name, so he's ready to pay
1727 * for the processing speed.
1728 */
1729 xmlChar *buffer;
1730 int max = len * 2;
1731
1732 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1733 if (buffer == NULL) {
1734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1735 ctxt->sax->error(ctxt->userData,
1736 "xmlParseStringName: out of memory\n");
1737 return(NULL);
1738 }
1739 memcpy(buffer, buf, len);
1740 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1741 (c == '.') || (c == '-') ||
1742 (c == '_') || (c == ':') ||
1743 (IS_COMBINING(c)) ||
1744 (IS_EXTENDER(c))) {
1745 if (len + 10 > max) {
1746 max *= 2;
1747 buffer = (xmlChar *) xmlRealloc(buffer,
1748 max * sizeof(xmlChar));
1749 if (buffer == NULL) {
1750 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1751 ctxt->sax->error(ctxt->userData,
1752 "xmlParseStringName: out of memory\n");
1753 return(NULL);
1754 }
1755 }
1756 COPY_BUF(l,buffer,len,c);
1757 cur += l;
1758 c = CUR_SCHAR(cur, l);
1759 }
1760 buffer[len] = 0;
1761 *str = cur;
1762 return(buffer);
1763 }
1764 }
1765 *str = cur;
1766 return(xmlStrndup(buf, len));
1767}
1768
1769/**
1770 * xmlParseNmtoken:
1771 * @ctxt: an XML parser context
1772 *
1773 * parse an XML Nmtoken.
1774 *
1775 * [7] Nmtoken ::= (NameChar)+
1776 *
1777 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1778 *
1779 * Returns the Nmtoken parsed or NULL
1780 */
1781
1782xmlChar *
1783xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1784 xmlChar buf[XML_MAX_NAMELEN + 5];
1785 int len = 0, l;
1786 int c;
1787 int count = 0;
1788
1789 GROW;
1790 c = CUR_CHAR(l);
1791
1792 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1793 (c == '.') || (c == '-') ||
1794 (c == '_') || (c == ':') ||
1795 (IS_COMBINING(c)) ||
1796 (IS_EXTENDER(c))) {
1797 if (count++ > 100) {
1798 count = 0;
1799 GROW;
1800 }
1801 COPY_BUF(l,buf,len,c);
1802 NEXTL(l);
1803 c = CUR_CHAR(l);
1804 if (len >= XML_MAX_NAMELEN) {
1805 /*
1806 * Okay someone managed to make a huge token, so he's ready to pay
1807 * for the processing speed.
1808 */
1809 xmlChar *buffer;
1810 int max = len * 2;
1811
1812 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1813 if (buffer == NULL) {
1814 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1815 ctxt->sax->error(ctxt->userData,
1816 "xmlParseNmtoken: out of memory\n");
1817 return(NULL);
1818 }
1819 memcpy(buffer, buf, len);
1820 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1821 (c == '.') || (c == '-') ||
1822 (c == '_') || (c == ':') ||
1823 (IS_COMBINING(c)) ||
1824 (IS_EXTENDER(c))) {
1825 if (count++ > 100) {
1826 count = 0;
1827 GROW;
1828 }
1829 if (len + 10 > max) {
1830 max *= 2;
1831 buffer = (xmlChar *) xmlRealloc(buffer,
1832 max * sizeof(xmlChar));
1833 if (buffer == NULL) {
1834 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1835 ctxt->sax->error(ctxt->userData,
1836 "xmlParseName: out of memory\n");
1837 return(NULL);
1838 }
1839 }
1840 COPY_BUF(l,buffer,len,c);
1841 NEXTL(l);
1842 c = CUR_CHAR(l);
1843 }
1844 buffer[len] = 0;
1845 return(buffer);
1846 }
1847 }
1848 if (len == 0)
1849 return(NULL);
1850 return(xmlStrndup(buf, len));
1851}
1852
1853/**
1854 * xmlParseEntityValue:
1855 * @ctxt: an XML parser context
1856 * @orig: if non-NULL store a copy of the original entity value
1857 *
1858 * parse a value for ENTITY declarations
1859 *
1860 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1861 * "'" ([^%&'] | PEReference | Reference)* "'"
1862 *
1863 * Returns the EntityValue parsed with reference substitued or NULL
1864 */
1865
1866xmlChar *
1867xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1868 xmlChar *buf = NULL;
1869 int len = 0;
1870 int size = XML_PARSER_BUFFER_SIZE;
1871 int c, l;
1872 xmlChar stop;
1873 xmlChar *ret = NULL;
1874 const xmlChar *cur = NULL;
1875 xmlParserInputPtr input;
1876
1877 if (RAW == '"') stop = '"';
1878 else if (RAW == '\'') stop = '\'';
1879 else {
1880 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1881 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1882 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1883 ctxt->wellFormed = 0;
1884 ctxt->disableSAX = 1;
1885 return(NULL);
1886 }
1887 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1888 if (buf == NULL) {
1889 xmlGenericError(xmlGenericErrorContext,
1890 "malloc of %d byte failed\n", size);
1891 return(NULL);
1892 }
1893
1894 /*
1895 * The content of the entity definition is copied in a buffer.
1896 */
1897
1898 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1899 input = ctxt->input;
1900 GROW;
1901 NEXT;
1902 c = CUR_CHAR(l);
1903 /*
1904 * NOTE: 4.4.5 Included in Literal
1905 * When a parameter entity reference appears in a literal entity
1906 * value, ... a single or double quote character in the replacement
1907 * text is always treated as a normal data character and will not
1908 * terminate the literal.
1909 * In practice it means we stop the loop only when back at parsing
1910 * the initial entity and the quote is found
1911 */
1912 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1913 (ctxt->input != input))) {
1914 if (len + 5 >= size) {
1915 size *= 2;
1916 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1917 if (buf == NULL) {
1918 xmlGenericError(xmlGenericErrorContext,
1919 "realloc of %d byte failed\n", size);
1920 return(NULL);
1921 }
1922 }
1923 COPY_BUF(l,buf,len,c);
1924 NEXTL(l);
1925 /*
1926 * Pop-up of finished entities.
1927 */
1928 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1929 xmlPopInput(ctxt);
1930
1931 GROW;
1932 c = CUR_CHAR(l);
1933 if (c == 0) {
1934 GROW;
1935 c = CUR_CHAR(l);
1936 }
1937 }
1938 buf[len] = 0;
1939
1940 /*
1941 * Raise problem w.r.t. '&' and '%' being used in non-entities
1942 * reference constructs. Note Charref will be handled in
1943 * xmlStringDecodeEntities()
1944 */
1945 cur = buf;
1946 while (*cur != 0) { /* non input consuming */
1947 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1948 xmlChar *name;
1949 xmlChar tmp = *cur;
1950
1951 cur++;
1952 name = xmlParseStringName(ctxt, &cur);
1953 if ((name == NULL) || (*cur != ';')) {
1954 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1956 ctxt->sax->error(ctxt->userData,
1957 "EntityValue: '%c' forbidden except for entities references\n",
1958 tmp);
1959 ctxt->wellFormed = 0;
1960 ctxt->disableSAX = 1;
1961 }
1962 if ((ctxt->inSubset == 1) && (tmp == '%')) {
1963 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1964 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1965 ctxt->sax->error(ctxt->userData,
1966 "EntityValue: PEReferences forbidden in internal subset\n",
1967 tmp);
1968 ctxt->wellFormed = 0;
1969 ctxt->disableSAX = 1;
1970 }
1971 if (name != NULL)
1972 xmlFree(name);
1973 }
1974 cur++;
1975 }
1976
1977 /*
1978 * Then PEReference entities are substituted.
1979 */
1980 if (c != stop) {
1981 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
1982 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1983 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
1984 ctxt->wellFormed = 0;
1985 ctxt->disableSAX = 1;
1986 xmlFree(buf);
1987 } else {
1988 NEXT;
1989 /*
1990 * NOTE: 4.4.7 Bypassed
1991 * When a general entity reference appears in the EntityValue in
1992 * an entity declaration, it is bypassed and left as is.
1993 * so XML_SUBSTITUTE_REF is not set here.
1994 */
1995 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
1996 0, 0, 0);
1997 if (orig != NULL)
1998 *orig = buf;
1999 else
2000 xmlFree(buf);
2001 }
2002
2003 return(ret);
2004}
2005
2006/**
2007 * xmlParseAttValue:
2008 * @ctxt: an XML parser context
2009 *
2010 * parse a value for an attribute
2011 * Note: the parser won't do substitution of entities here, this
2012 * will be handled later in xmlStringGetNodeList
2013 *
2014 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2015 * "'" ([^<&'] | Reference)* "'"
2016 *
2017 * 3.3.3 Attribute-Value Normalization:
2018 * Before the value of an attribute is passed to the application or
2019 * checked for validity, the XML processor must normalize it as follows:
2020 * - a character reference is processed by appending the referenced
2021 * character to the attribute value
2022 * - an entity reference is processed by recursively processing the
2023 * replacement text of the entity
2024 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2025 * appending #x20 to the normalized value, except that only a single
2026 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2027 * parsed entity or the literal entity value of an internal parsed entity
2028 * - other characters are processed by appending them to the normalized value
2029 * If the declared value is not CDATA, then the XML processor must further
2030 * process the normalized attribute value by discarding any leading and
2031 * trailing space (#x20) characters, and by replacing sequences of space
2032 * (#x20) characters by a single space (#x20) character.
2033 * All attributes for which no declaration has been read should be treated
2034 * by a non-validating parser as if declared CDATA.
2035 *
2036 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2037 */
2038
2039xmlChar *
2040xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2041 xmlChar limit = 0;
2042 xmlChar *buf = NULL;
2043 int len = 0;
2044 int buf_size = 0;
2045 int c, l;
2046 xmlChar *current = NULL;
2047 xmlEntityPtr ent;
2048
2049
2050 SHRINK;
2051 if (NXT(0) == '"') {
2052 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2053 limit = '"';
2054 NEXT;
2055 } else if (NXT(0) == '\'') {
2056 limit = '\'';
2057 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2058 NEXT;
2059 } else {
2060 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2061 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2062 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2063 ctxt->wellFormed = 0;
2064 ctxt->disableSAX = 1;
2065 return(NULL);
2066 }
2067
2068 /*
2069 * allocate a translation buffer.
2070 */
2071 buf_size = XML_PARSER_BUFFER_SIZE;
2072 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2073 if (buf == NULL) {
2074 perror("xmlParseAttValue: malloc failed");
2075 return(NULL);
2076 }
2077
2078 /*
2079 * Ok loop until we reach one of the ending char or a size limit.
2080 */
2081 c = CUR_CHAR(l);
2082 while (((NXT(0) != limit) && /* checked */
2083 (c != '<')) || (ctxt->token != 0)) {
2084 if (c == 0) break;
2085 if (ctxt->token == '&') {
2086 /*
2087 * The reparsing will be done in xmlStringGetNodeList()
2088 * called by the attribute() function in SAX.c
2089 */
2090 static xmlChar buffer[6] = "&#38;";
2091
2092 if (len > buf_size - 10) {
2093 growBuffer(buf);
2094 }
2095 current = &buffer[0];
2096 while (*current != 0) { /* non input consuming */
2097 buf[len++] = *current++;
2098 }
2099 ctxt->token = 0;
2100 } else if (c == '&') {
2101 if (NXT(1) == '#') {
2102 int val = xmlParseCharRef(ctxt);
2103 if (val == '&') {
2104 /*
2105 * The reparsing will be done in xmlStringGetNodeList()
2106 * called by the attribute() function in SAX.c
2107 */
2108 static xmlChar buffer[6] = "&#38;";
2109
2110 if (len > buf_size - 10) {
2111 growBuffer(buf);
2112 }
2113 current = &buffer[0];
2114 while (*current != 0) { /* non input consuming */
2115 buf[len++] = *current++;
2116 }
2117 } else {
2118 len += xmlCopyChar(0, &buf[len], val);
2119 }
2120 } else {
2121 ent = xmlParseEntityRef(ctxt);
2122 if ((ent != NULL) &&
2123 (ctxt->replaceEntities != 0)) {
2124 xmlChar *rep;
2125
2126 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2127 rep = xmlStringDecodeEntities(ctxt, ent->content,
2128 XML_SUBSTITUTE_REF, 0, 0, 0);
2129 if (rep != NULL) {
2130 current = rep;
2131 while (*current != 0) { /* non input consuming */
2132 buf[len++] = *current++;
2133 if (len > buf_size - 10) {
2134 growBuffer(buf);
2135 }
2136 }
2137 xmlFree(rep);
2138 }
2139 } else {
2140 if (ent->content != NULL)
2141 buf[len++] = ent->content[0];
2142 }
2143 } else if (ent != NULL) {
2144 int i = xmlStrlen(ent->name);
2145 const xmlChar *cur = ent->name;
2146
2147 /*
2148 * This may look absurd but is needed to detect
2149 * entities problems
2150 */
2151 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2152 (ent->content != NULL)) {
2153 xmlChar *rep;
2154 rep = xmlStringDecodeEntities(ctxt, ent->content,
2155 XML_SUBSTITUTE_REF, 0, 0, 0);
2156 if (rep != NULL)
2157 xmlFree(rep);
2158 }
2159
2160 /*
2161 * Just output the reference
2162 */
2163 buf[len++] = '&';
2164 if (len > buf_size - i - 10) {
2165 growBuffer(buf);
2166 }
2167 for (;i > 0;i--)
2168 buf[len++] = *cur++;
2169 buf[len++] = ';';
2170 }
2171 }
2172 } else {
2173 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2174 COPY_BUF(l,buf,len,0x20);
2175 if (len > buf_size - 10) {
2176 growBuffer(buf);
2177 }
2178 } else {
2179 COPY_BUF(l,buf,len,c);
2180 if (len > buf_size - 10) {
2181 growBuffer(buf);
2182 }
2183 }
2184 NEXTL(l);
2185 }
2186 GROW;
2187 c = CUR_CHAR(l);
2188 }
2189 buf[len++] = 0;
2190 if (RAW == '<') {
2191 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2193 ctxt->sax->error(ctxt->userData,
2194 "Unescaped '<' not allowed in attributes values\n");
2195 ctxt->wellFormed = 0;
2196 ctxt->disableSAX = 1;
2197 } else if (RAW != limit) {
2198 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2199 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2200 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2201 ctxt->wellFormed = 0;
2202 ctxt->disableSAX = 1;
2203 } else
2204 NEXT;
2205 return(buf);
2206}
2207
2208/**
2209 * xmlParseSystemLiteral:
2210 * @ctxt: an XML parser context
2211 *
2212 * parse an XML Literal
2213 *
2214 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2215 *
2216 * Returns the SystemLiteral parsed or NULL
2217 */
2218
2219xmlChar *
2220xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2221 xmlChar *buf = NULL;
2222 int len = 0;
2223 int size = XML_PARSER_BUFFER_SIZE;
2224 int cur, l;
2225 xmlChar stop;
2226 int state = ctxt->instate;
2227 int count = 0;
2228
2229 SHRINK;
2230 if (RAW == '"') {
2231 NEXT;
2232 stop = '"';
2233 } else if (RAW == '\'') {
2234 NEXT;
2235 stop = '\'';
2236 } else {
2237 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2238 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2239 ctxt->sax->error(ctxt->userData,
2240 "SystemLiteral \" or ' expected\n");
2241 ctxt->wellFormed = 0;
2242 ctxt->disableSAX = 1;
2243 return(NULL);
2244 }
2245
2246 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2247 if (buf == NULL) {
2248 xmlGenericError(xmlGenericErrorContext,
2249 "malloc of %d byte failed\n", size);
2250 return(NULL);
2251 }
2252 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2253 cur = CUR_CHAR(l);
2254 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2255 if (len + 5 >= size) {
2256 size *= 2;
2257 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2258 if (buf == NULL) {
2259 xmlGenericError(xmlGenericErrorContext,
2260 "realloc of %d byte failed\n", size);
2261 ctxt->instate = (xmlParserInputState) state;
2262 return(NULL);
2263 }
2264 }
2265 count++;
2266 if (count > 50) {
2267 GROW;
2268 count = 0;
2269 }
2270 COPY_BUF(l,buf,len,cur);
2271 NEXTL(l);
2272 cur = CUR_CHAR(l);
2273 if (cur == 0) {
2274 GROW;
2275 SHRINK;
2276 cur = CUR_CHAR(l);
2277 }
2278 }
2279 buf[len] = 0;
2280 ctxt->instate = (xmlParserInputState) state;
2281 if (!IS_CHAR(cur)) {
2282 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2283 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2284 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2285 ctxt->wellFormed = 0;
2286 ctxt->disableSAX = 1;
2287 } else {
2288 NEXT;
2289 }
2290 return(buf);
2291}
2292
2293/**
2294 * xmlParsePubidLiteral:
2295 * @ctxt: an XML parser context
2296 *
2297 * parse an XML public literal
2298 *
2299 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2300 *
2301 * Returns the PubidLiteral parsed or NULL.
2302 */
2303
2304xmlChar *
2305xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2306 xmlChar *buf = NULL;
2307 int len = 0;
2308 int size = XML_PARSER_BUFFER_SIZE;
2309 xmlChar cur;
2310 xmlChar stop;
2311 int count = 0;
2312
2313 SHRINK;
2314 if (RAW == '"') {
2315 NEXT;
2316 stop = '"';
2317 } else if (RAW == '\'') {
2318 NEXT;
2319 stop = '\'';
2320 } else {
2321 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2322 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2323 ctxt->sax->error(ctxt->userData,
2324 "SystemLiteral \" or ' expected\n");
2325 ctxt->wellFormed = 0;
2326 ctxt->disableSAX = 1;
2327 return(NULL);
2328 }
2329 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2330 if (buf == NULL) {
2331 xmlGenericError(xmlGenericErrorContext,
2332 "malloc of %d byte failed\n", size);
2333 return(NULL);
2334 }
2335 cur = CUR;
2336 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2337 if (len + 1 >= size) {
2338 size *= 2;
2339 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2340 if (buf == NULL) {
2341 xmlGenericError(xmlGenericErrorContext,
2342 "realloc of %d byte failed\n", size);
2343 return(NULL);
2344 }
2345 }
2346 buf[len++] = cur;
2347 count++;
2348 if (count > 50) {
2349 GROW;
2350 count = 0;
2351 }
2352 NEXT;
2353 cur = CUR;
2354 if (cur == 0) {
2355 GROW;
2356 SHRINK;
2357 cur = CUR;
2358 }
2359 }
2360 buf[len] = 0;
2361 if (cur != stop) {
2362 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2363 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2364 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2365 ctxt->wellFormed = 0;
2366 ctxt->disableSAX = 1;
2367 } else {
2368 NEXT;
2369 }
2370 return(buf);
2371}
2372
2373/**
2374 * xmlParseCharData:
2375 * @ctxt: an XML parser context
2376 * @cdata: int indicating whether we are within a CDATA section
2377 *
2378 * parse a CharData section.
2379 * if we are within a CDATA section ']]>' marks an end of section.
2380 *
2381 * The right angle bracket (>) may be represented using the string "&gt;",
2382 * and must, for compatibility, be escaped using "&gt;" or a character
2383 * reference when it appears in the string "]]>" in content, when that
2384 * string is not marking the end of a CDATA section.
2385 *
2386 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2387 */
2388
2389void
2390xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
2391 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2392 int nbchar = 0;
2393 int cur, l;
2394 int count = 0;
2395
2396 SHRINK;
2397 GROW;
2398 cur = CUR_CHAR(l);
2399 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2400 ((cur != '&') || (ctxt->token == '&')) &&
2401 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2402 if ((cur == ']') && (NXT(1) == ']') &&
2403 (NXT(2) == '>')) {
2404 if (cdata) break;
2405 else {
2406 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2407 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2408 ctxt->sax->error(ctxt->userData,
2409 "Sequence ']]>' not allowed in content\n");
2410 /* Should this be relaxed ??? I see a "must here */
2411 ctxt->wellFormed = 0;
2412 ctxt->disableSAX = 1;
2413 }
2414 }
2415 COPY_BUF(l,buf,nbchar,cur);
2416 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2417 /*
2418 * Ok the segment is to be consumed as chars.
2419 */
2420 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2421 if (areBlanks(ctxt, buf, nbchar)) {
2422 if (ctxt->sax->ignorableWhitespace != NULL)
2423 ctxt->sax->ignorableWhitespace(ctxt->userData,
2424 buf, nbchar);
2425 } else {
2426 if (ctxt->sax->characters != NULL)
2427 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2428 }
2429 }
2430 nbchar = 0;
2431 }
2432 count++;
2433 if (count > 50) {
2434 GROW;
2435 count = 0;
2436 }
2437 NEXTL(l);
2438 cur = CUR_CHAR(l);
2439 }
2440 if (nbchar != 0) {
2441 /*
2442 * Ok the segment is to be consumed as chars.
2443 */
2444 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2445 if (areBlanks(ctxt, buf, nbchar)) {
2446 if (ctxt->sax->ignorableWhitespace != NULL)
2447 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2448 } else {
2449 if (ctxt->sax->characters != NULL)
2450 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2451 }
2452 }
2453 }
2454}
2455
2456/**
2457 * xmlParseExternalID:
2458 * @ctxt: an XML parser context
2459 * @publicID: a xmlChar** receiving PubidLiteral
2460 * @strict: indicate whether we should restrict parsing to only
2461 * production [75], see NOTE below
2462 *
2463 * Parse an External ID or a Public ID
2464 *
2465 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2466 * 'PUBLIC' S PubidLiteral S SystemLiteral
2467 *
2468 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2469 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2470 *
2471 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2472 *
2473 * Returns the function returns SystemLiteral and in the second
2474 * case publicID receives PubidLiteral, is strict is off
2475 * it is possible to return NULL and have publicID set.
2476 */
2477
2478xmlChar *
2479xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2480 xmlChar *URI = NULL;
2481
2482 SHRINK;
2483 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2484 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2485 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2486 SKIP(6);
2487 if (!IS_BLANK(CUR)) {
2488 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2490 ctxt->sax->error(ctxt->userData,
2491 "Space required after 'SYSTEM'\n");
2492 ctxt->wellFormed = 0;
2493 ctxt->disableSAX = 1;
2494 }
2495 SKIP_BLANKS;
2496 URI = xmlParseSystemLiteral(ctxt);
2497 if (URI == NULL) {
2498 ctxt->errNo = XML_ERR_URI_REQUIRED;
2499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2500 ctxt->sax->error(ctxt->userData,
2501 "xmlParseExternalID: SYSTEM, no URI\n");
2502 ctxt->wellFormed = 0;
2503 ctxt->disableSAX = 1;
2504 }
2505 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2506 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2507 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2508 SKIP(6);
2509 if (!IS_BLANK(CUR)) {
2510 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2512 ctxt->sax->error(ctxt->userData,
2513 "Space required after 'PUBLIC'\n");
2514 ctxt->wellFormed = 0;
2515 ctxt->disableSAX = 1;
2516 }
2517 SKIP_BLANKS;
2518 *publicID = xmlParsePubidLiteral(ctxt);
2519 if (*publicID == NULL) {
2520 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2522 ctxt->sax->error(ctxt->userData,
2523 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2524 ctxt->wellFormed = 0;
2525 ctxt->disableSAX = 1;
2526 }
2527 if (strict) {
2528 /*
2529 * We don't handle [83] so "S SystemLiteral" is required.
2530 */
2531 if (!IS_BLANK(CUR)) {
2532 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2533 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2534 ctxt->sax->error(ctxt->userData,
2535 "Space required after the Public Identifier\n");
2536 ctxt->wellFormed = 0;
2537 ctxt->disableSAX = 1;
2538 }
2539 } else {
2540 /*
2541 * We handle [83] so we return immediately, if
2542 * "S SystemLiteral" is not detected. From a purely parsing
2543 * point of view that's a nice mess.
2544 */
2545 const xmlChar *ptr;
2546 GROW;
2547
2548 ptr = CUR_PTR;
2549 if (!IS_BLANK(*ptr)) return(NULL);
2550
2551 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2552 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2553 }
2554 SKIP_BLANKS;
2555 URI = xmlParseSystemLiteral(ctxt);
2556 if (URI == NULL) {
2557 ctxt->errNo = XML_ERR_URI_REQUIRED;
2558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2559 ctxt->sax->error(ctxt->userData,
2560 "xmlParseExternalID: PUBLIC, no URI\n");
2561 ctxt->wellFormed = 0;
2562 ctxt->disableSAX = 1;
2563 }
2564 }
2565 return(URI);
2566}
2567
2568/**
2569 * xmlParseComment:
2570 * @ctxt: an XML parser context
2571 *
2572 * Skip an XML (SGML) comment <!-- .... -->
2573 * The spec says that "For compatibility, the string "--" (double-hyphen)
2574 * must not occur within comments. "
2575 *
2576 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2577 */
2578void
2579xmlParseComment(xmlParserCtxtPtr ctxt) {
2580 xmlChar *buf = NULL;
2581 int len;
2582 int size = XML_PARSER_BUFFER_SIZE;
2583 int q, ql;
2584 int r, rl;
2585 int cur, l;
2586 xmlParserInputState state;
2587 xmlParserInputPtr input = ctxt->input;
2588 int count = 0;
2589
2590 /*
2591 * Check that there is a comment right here.
2592 */
2593 if ((RAW != '<') || (NXT(1) != '!') ||
2594 (NXT(2) != '-') || (NXT(3) != '-')) return;
2595
2596 state = ctxt->instate;
2597 ctxt->instate = XML_PARSER_COMMENT;
2598 SHRINK;
2599 SKIP(4);
2600 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2601 if (buf == NULL) {
2602 xmlGenericError(xmlGenericErrorContext,
2603 "malloc of %d byte failed\n", size);
2604 ctxt->instate = state;
2605 return;
2606 }
2607 q = CUR_CHAR(ql);
2608 NEXTL(ql);
2609 r = CUR_CHAR(rl);
2610 NEXTL(rl);
2611 cur = CUR_CHAR(l);
2612 len = 0;
2613 while (IS_CHAR(cur) && /* checked */
2614 ((cur != '>') ||
2615 (r != '-') || (q != '-'))) {
2616 if ((r == '-') && (q == '-') && (len > 1)) {
2617 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2619 ctxt->sax->error(ctxt->userData,
2620 "Comment must not contain '--' (double-hyphen)`\n");
2621 ctxt->wellFormed = 0;
2622 ctxt->disableSAX = 1;
2623 }
2624 if (len + 5 >= size) {
2625 size *= 2;
2626 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2627 if (buf == NULL) {
2628 xmlGenericError(xmlGenericErrorContext,
2629 "realloc of %d byte failed\n", size);
2630 ctxt->instate = state;
2631 return;
2632 }
2633 }
2634 COPY_BUF(ql,buf,len,q);
2635 q = r;
2636 ql = rl;
2637 r = cur;
2638 rl = l;
2639
2640 count++;
2641 if (count > 50) {
2642 GROW;
2643 count = 0;
2644 }
2645 NEXTL(l);
2646 cur = CUR_CHAR(l);
2647 if (cur == 0) {
2648 SHRINK;
2649 GROW;
2650 cur = CUR_CHAR(l);
2651 }
2652 }
2653 buf[len] = 0;
2654 if (!IS_CHAR(cur)) {
2655 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2657 ctxt->sax->error(ctxt->userData,
2658 "Comment not terminated \n<!--%.50s\n", buf);
2659 ctxt->wellFormed = 0;
2660 ctxt->disableSAX = 1;
2661 xmlFree(buf);
2662 } else {
2663 if (input != ctxt->input) {
2664 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2665 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2666 ctxt->sax->error(ctxt->userData,
2667"Comment doesn't start and stop in the same entity\n");
2668 ctxt->wellFormed = 0;
2669 ctxt->disableSAX = 1;
2670 }
2671 NEXT;
2672 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2673 (!ctxt->disableSAX))
2674 ctxt->sax->comment(ctxt->userData, buf);
2675 xmlFree(buf);
2676 }
2677 ctxt->instate = state;
2678}
2679
2680/**
2681 * xmlParsePITarget:
2682 * @ctxt: an XML parser context
2683 *
2684 * parse the name of a PI
2685 *
2686 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2687 *
2688 * Returns the PITarget name or NULL
2689 */
2690
2691xmlChar *
2692xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2693 xmlChar *name;
2694
2695 name = xmlParseName(ctxt);
2696 if ((name != NULL) &&
2697 ((name[0] == 'x') || (name[0] == 'X')) &&
2698 ((name[1] == 'm') || (name[1] == 'M')) &&
2699 ((name[2] == 'l') || (name[2] == 'L'))) {
2700 int i;
2701 if ((name[0] == 'x') && (name[1] == 'm') &&
2702 (name[2] == 'l') && (name[3] == 0)) {
2703 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2704 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2705 ctxt->sax->error(ctxt->userData,
2706 "XML declaration allowed only at the start of the document\n");
2707 ctxt->wellFormed = 0;
2708 ctxt->disableSAX = 1;
2709 return(name);
2710 } else if (name[3] == 0) {
2711 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2712 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2713 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2714 ctxt->wellFormed = 0;
2715 ctxt->disableSAX = 1;
2716 return(name);
2717 }
2718 for (i = 0;;i++) {
2719 if (xmlW3CPIs[i] == NULL) break;
2720 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2721 return(name);
2722 }
2723 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2724 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2725 ctxt->sax->warning(ctxt->userData,
2726 "xmlParsePItarget: invalid name prefix 'xml'\n");
2727 }
2728 }
2729 return(name);
2730}
2731
2732/**
2733 * xmlParsePI:
2734 * @ctxt: an XML parser context
2735 *
2736 * parse an XML Processing Instruction.
2737 *
2738 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2739 *
2740 * The processing is transfered to SAX once parsed.
2741 */
2742
2743void
2744xmlParsePI(xmlParserCtxtPtr ctxt) {
2745 xmlChar *buf = NULL;
2746 int len = 0;
2747 int size = XML_PARSER_BUFFER_SIZE;
2748 int cur, l;
2749 xmlChar *target;
2750 xmlParserInputState state;
2751 int count = 0;
2752
2753 if ((RAW == '<') && (NXT(1) == '?')) {
2754 xmlParserInputPtr input = ctxt->input;
2755 state = ctxt->instate;
2756 ctxt->instate = XML_PARSER_PI;
2757 /*
2758 * this is a Processing Instruction.
2759 */
2760 SKIP(2);
2761 SHRINK;
2762
2763 /*
2764 * Parse the target name and check for special support like
2765 * namespace.
2766 */
2767 target = xmlParsePITarget(ctxt);
2768 if (target != NULL) {
2769 if ((RAW == '?') && (NXT(1) == '>')) {
2770 if (input != ctxt->input) {
2771 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2773 ctxt->sax->error(ctxt->userData,
2774 "PI declaration doesn't start and stop in the same entity\n");
2775 ctxt->wellFormed = 0;
2776 ctxt->disableSAX = 1;
2777 }
2778 SKIP(2);
2779
2780 /*
2781 * SAX: PI detected.
2782 */
2783 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2784 (ctxt->sax->processingInstruction != NULL))
2785 ctxt->sax->processingInstruction(ctxt->userData,
2786 target, NULL);
2787 ctxt->instate = state;
2788 xmlFree(target);
2789 return;
2790 }
2791 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2792 if (buf == NULL) {
2793 xmlGenericError(xmlGenericErrorContext,
2794 "malloc of %d byte failed\n", size);
2795 ctxt->instate = state;
2796 return;
2797 }
2798 cur = CUR;
2799 if (!IS_BLANK(cur)) {
2800 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2802 ctxt->sax->error(ctxt->userData,
2803 "xmlParsePI: PI %s space expected\n", target);
2804 ctxt->wellFormed = 0;
2805 ctxt->disableSAX = 1;
2806 }
2807 SKIP_BLANKS;
2808 cur = CUR_CHAR(l);
2809 while (IS_CHAR(cur) && /* checked */
2810 ((cur != '?') || (NXT(1) != '>'))) {
2811 if (len + 5 >= size) {
2812 size *= 2;
2813 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2814 if (buf == NULL) {
2815 xmlGenericError(xmlGenericErrorContext,
2816 "realloc of %d byte failed\n", size);
2817 ctxt->instate = state;
2818 return;
2819 }
2820 }
2821 count++;
2822 if (count > 50) {
2823 GROW;
2824 count = 0;
2825 }
2826 COPY_BUF(l,buf,len,cur);
2827 NEXTL(l);
2828 cur = CUR_CHAR(l);
2829 if (cur == 0) {
2830 SHRINK;
2831 GROW;
2832 cur = CUR_CHAR(l);
2833 }
2834 }
2835 buf[len] = 0;
2836 if (cur != '?') {
2837 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2838 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2839 ctxt->sax->error(ctxt->userData,
2840 "xmlParsePI: PI %s never end ...\n", target);
2841 ctxt->wellFormed = 0;
2842 ctxt->disableSAX = 1;
2843 } else {
2844 if (input != ctxt->input) {
2845 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2846 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2847 ctxt->sax->error(ctxt->userData,
2848 "PI declaration doesn't start and stop in the same entity\n");
2849 ctxt->wellFormed = 0;
2850 ctxt->disableSAX = 1;
2851 }
2852 SKIP(2);
2853
2854 /*
2855 * SAX: PI detected.
2856 */
2857 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2858 (ctxt->sax->processingInstruction != NULL))
2859 ctxt->sax->processingInstruction(ctxt->userData,
2860 target, buf);
2861 }
2862 xmlFree(buf);
2863 xmlFree(target);
2864 } else {
2865 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2867 ctxt->sax->error(ctxt->userData,
2868 "xmlParsePI : no target name\n");
2869 ctxt->wellFormed = 0;
2870 ctxt->disableSAX = 1;
2871 }
2872 ctxt->instate = state;
2873 }
2874}
2875
2876/**
2877 * xmlParseNotationDecl:
2878 * @ctxt: an XML parser context
2879 *
2880 * parse a notation declaration
2881 *
2882 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2883 *
2884 * Hence there is actually 3 choices:
2885 * 'PUBLIC' S PubidLiteral
2886 * 'PUBLIC' S PubidLiteral S SystemLiteral
2887 * and 'SYSTEM' S SystemLiteral
2888 *
2889 * See the NOTE on xmlParseExternalID().
2890 */
2891
2892void
2893xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
2894 xmlChar *name;
2895 xmlChar *Pubid;
2896 xmlChar *Systemid;
2897
2898 if ((RAW == '<') && (NXT(1) == '!') &&
2899 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2900 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2901 (NXT(6) == 'T') && (NXT(7) == 'I') &&
2902 (NXT(8) == 'O') && (NXT(9) == 'N')) {
2903 xmlParserInputPtr input = ctxt->input;
2904 SHRINK;
2905 SKIP(10);
2906 if (!IS_BLANK(CUR)) {
2907 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2909 ctxt->sax->error(ctxt->userData,
2910 "Space required after '<!NOTATION'\n");
2911 ctxt->wellFormed = 0;
2912 ctxt->disableSAX = 1;
2913 return;
2914 }
2915 SKIP_BLANKS;
2916
2917 name = xmlParseName(ctxt);
2918 if (name == NULL) {
2919 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
2920 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2921 ctxt->sax->error(ctxt->userData,
2922 "NOTATION: Name expected here\n");
2923 ctxt->wellFormed = 0;
2924 ctxt->disableSAX = 1;
2925 return;
2926 }
2927 if (!IS_BLANK(CUR)) {
2928 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2929 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2930 ctxt->sax->error(ctxt->userData,
2931 "Space required after the NOTATION name'\n");
2932 ctxt->wellFormed = 0;
2933 ctxt->disableSAX = 1;
2934 return;
2935 }
2936 SKIP_BLANKS;
2937
2938 /*
2939 * Parse the IDs.
2940 */
2941 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
2942 SKIP_BLANKS;
2943
2944 if (RAW == '>') {
2945 if (input != ctxt->input) {
2946 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2947 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2948 ctxt->sax->error(ctxt->userData,
2949"Notation declaration doesn't start and stop in the same entity\n");
2950 ctxt->wellFormed = 0;
2951 ctxt->disableSAX = 1;
2952 }
2953 NEXT;
2954 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
2955 (ctxt->sax->notationDecl != NULL))
2956 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
2957 } else {
2958 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
2959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2960 ctxt->sax->error(ctxt->userData,
2961 "'>' required to close NOTATION declaration\n");
2962 ctxt->wellFormed = 0;
2963 ctxt->disableSAX = 1;
2964 }
2965 xmlFree(name);
2966 if (Systemid != NULL) xmlFree(Systemid);
2967 if (Pubid != NULL) xmlFree(Pubid);
2968 }
2969}
2970
2971/**
2972 * xmlParseEntityDecl:
2973 * @ctxt: an XML parser context
2974 *
2975 * parse <!ENTITY declarations
2976 *
2977 * [70] EntityDecl ::= GEDecl | PEDecl
2978 *
2979 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
2980 *
2981 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
2982 *
2983 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
2984 *
2985 * [74] PEDef ::= EntityValue | ExternalID
2986 *
2987 * [76] NDataDecl ::= S 'NDATA' S Name
2988 *
2989 * [ VC: Notation Declared ]
2990 * The Name must match the declared name of a notation.
2991 */
2992
2993void
2994xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
2995 xmlChar *name = NULL;
2996 xmlChar *value = NULL;
2997 xmlChar *URI = NULL, *literal = NULL;
2998 xmlChar *ndata = NULL;
2999 int isParameter = 0;
3000 xmlChar *orig = NULL;
3001
3002 GROW;
3003 if ((RAW == '<') && (NXT(1) == '!') &&
3004 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3005 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3006 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3007 xmlParserInputPtr input = ctxt->input;
3008 ctxt->instate = XML_PARSER_ENTITY_DECL;
3009 SHRINK;
3010 SKIP(8);
3011 if (!IS_BLANK(CUR)) {
3012 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3013 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3014 ctxt->sax->error(ctxt->userData,
3015 "Space required after '<!ENTITY'\n");
3016 ctxt->wellFormed = 0;
3017 ctxt->disableSAX = 1;
3018 }
3019 SKIP_BLANKS;
3020
3021 if (RAW == '%') {
3022 NEXT;
3023 if (!IS_BLANK(CUR)) {
3024 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3026 ctxt->sax->error(ctxt->userData,
3027 "Space required after '%'\n");
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 }
3031 SKIP_BLANKS;
3032 isParameter = 1;
3033 }
3034
3035 name = xmlParseName(ctxt);
3036 if (name == NULL) {
3037 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3038 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3039 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3040 ctxt->wellFormed = 0;
3041 ctxt->disableSAX = 1;
3042 return;
3043 }
3044 if (!IS_BLANK(CUR)) {
3045 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3046 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3047 ctxt->sax->error(ctxt->userData,
3048 "Space required after the entity name\n");
3049 ctxt->wellFormed = 0;
3050 ctxt->disableSAX = 1;
3051 }
3052 SKIP_BLANKS;
3053
3054 /*
3055 * handle the various case of definitions...
3056 */
3057 if (isParameter) {
3058 if ((RAW == '"') || (RAW == '\'')) {
3059 value = xmlParseEntityValue(ctxt, &orig);
3060 if (value) {
3061 if ((ctxt->sax != NULL) &&
3062 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3063 ctxt->sax->entityDecl(ctxt->userData, name,
3064 XML_INTERNAL_PARAMETER_ENTITY,
3065 NULL, NULL, value);
3066 }
3067 } else {
3068 URI = xmlParseExternalID(ctxt, &literal, 1);
3069 if ((URI == NULL) && (literal == NULL)) {
3070 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3072 ctxt->sax->error(ctxt->userData,
3073 "Entity value required\n");
3074 ctxt->wellFormed = 0;
3075 ctxt->disableSAX = 1;
3076 }
3077 if (URI) {
3078 xmlURIPtr uri;
3079
3080 uri = xmlParseURI((const char *) URI);
3081 if (uri == NULL) {
3082 ctxt->errNo = XML_ERR_INVALID_URI;
3083 if ((ctxt->sax != NULL) &&
3084 (!ctxt->disableSAX) &&
3085 (ctxt->sax->error != NULL))
3086 ctxt->sax->error(ctxt->userData,
3087 "Invalid URI: %s\n", URI);
3088 ctxt->wellFormed = 0;
3089 } else {
3090 if (uri->fragment != NULL) {
3091 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3092 if ((ctxt->sax != NULL) &&
3093 (!ctxt->disableSAX) &&
3094 (ctxt->sax->error != NULL))
3095 ctxt->sax->error(ctxt->userData,
3096 "Fragment not allowed: %s\n", URI);
3097 ctxt->wellFormed = 0;
3098 } else {
3099 if ((ctxt->sax != NULL) &&
3100 (!ctxt->disableSAX) &&
3101 (ctxt->sax->entityDecl != NULL))
3102 ctxt->sax->entityDecl(ctxt->userData, name,
3103 XML_EXTERNAL_PARAMETER_ENTITY,
3104 literal, URI, NULL);
3105 }
3106 xmlFreeURI(uri);
3107 }
3108 }
3109 }
3110 } else {
3111 if ((RAW == '"') || (RAW == '\'')) {
3112 value = xmlParseEntityValue(ctxt, &orig);
3113 if ((ctxt->sax != NULL) &&
3114 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3115 ctxt->sax->entityDecl(ctxt->userData, name,
3116 XML_INTERNAL_GENERAL_ENTITY,
3117 NULL, NULL, value);
3118 } else {
3119 URI = xmlParseExternalID(ctxt, &literal, 1);
3120 if ((URI == NULL) && (literal == NULL)) {
3121 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3122 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3123 ctxt->sax->error(ctxt->userData,
3124 "Entity value required\n");
3125 ctxt->wellFormed = 0;
3126 ctxt->disableSAX = 1;
3127 }
3128 if (URI) {
3129 xmlURIPtr uri;
3130
3131 uri = xmlParseURI((const char *)URI);
3132 if (uri == NULL) {
3133 ctxt->errNo = XML_ERR_INVALID_URI;
3134 if ((ctxt->sax != NULL) &&
3135 (!ctxt->disableSAX) &&
3136 (ctxt->sax->error != NULL))
3137 ctxt->sax->error(ctxt->userData,
3138 "Invalid URI: %s\n", URI);
3139 ctxt->wellFormed = 0;
3140 } else {
3141 if (uri->fragment != NULL) {
3142 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3143 if ((ctxt->sax != NULL) &&
3144 (!ctxt->disableSAX) &&
3145 (ctxt->sax->error != NULL))
3146 ctxt->sax->error(ctxt->userData,
3147 "Fragment not allowed: %s\n", URI);
3148 ctxt->wellFormed = 0;
3149 }
3150 xmlFreeURI(uri);
3151 }
3152 }
3153 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3154 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3155 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3156 ctxt->sax->error(ctxt->userData,
3157 "Space required before 'NDATA'\n");
3158 ctxt->wellFormed = 0;
3159 ctxt->disableSAX = 1;
3160 }
3161 SKIP_BLANKS;
3162 if ((RAW == 'N') && (NXT(1) == 'D') &&
3163 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3164 (NXT(4) == 'A')) {
3165 SKIP(5);
3166 if (!IS_BLANK(CUR)) {
3167 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3169 ctxt->sax->error(ctxt->userData,
3170 "Space required after 'NDATA'\n");
3171 ctxt->wellFormed = 0;
3172 ctxt->disableSAX = 1;
3173 }
3174 SKIP_BLANKS;
3175 ndata = xmlParseName(ctxt);
3176 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3177 (ctxt->sax->unparsedEntityDecl != NULL))
3178 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3179 literal, URI, ndata);
3180 } else {
3181 if ((ctxt->sax != NULL) &&
3182 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3183 ctxt->sax->entityDecl(ctxt->userData, name,
3184 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3185 literal, URI, NULL);
3186 }
3187 }
3188 }
3189 SKIP_BLANKS;
3190 if (RAW != '>') {
3191 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3192 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3193 ctxt->sax->error(ctxt->userData,
3194 "xmlParseEntityDecl: entity %s not terminated\n", name);
3195 ctxt->wellFormed = 0;
3196 ctxt->disableSAX = 1;
3197 } else {
3198 if (input != ctxt->input) {
3199 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3201 ctxt->sax->error(ctxt->userData,
3202"Entity declaration doesn't start and stop in the same entity\n");
3203 ctxt->wellFormed = 0;
3204 ctxt->disableSAX = 1;
3205 }
3206 NEXT;
3207 }
3208 if (orig != NULL) {
3209 /*
3210 * Ugly mechanism to save the raw entity value.
3211 */
3212 xmlEntityPtr cur = NULL;
3213
3214 if (isParameter) {
3215 if ((ctxt->sax != NULL) &&
3216 (ctxt->sax->getParameterEntity != NULL))
3217 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3218 } else {
3219 if ((ctxt->sax != NULL) &&
3220 (ctxt->sax->getEntity != NULL))
3221 cur = ctxt->sax->getEntity(ctxt->userData, name);
3222 }
3223 if (cur != NULL) {
3224 if (cur->orig != NULL)
3225 xmlFree(orig);
3226 else
3227 cur->orig = orig;
3228 } else
3229 xmlFree(orig);
3230 }
3231 if (name != NULL) xmlFree(name);
3232 if (value != NULL) xmlFree(value);
3233 if (URI != NULL) xmlFree(URI);
3234 if (literal != NULL) xmlFree(literal);
3235 if (ndata != NULL) xmlFree(ndata);
3236 }
3237}
3238
3239/**
3240 * xmlParseDefaultDecl:
3241 * @ctxt: an XML parser context
3242 * @value: Receive a possible fixed default value for the attribute
3243 *
3244 * Parse an attribute default declaration
3245 *
3246 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3247 *
3248 * [ VC: Required Attribute ]
3249 * if the default declaration is the keyword #REQUIRED, then the
3250 * attribute must be specified for all elements of the type in the
3251 * attribute-list declaration.
3252 *
3253 * [ VC: Attribute Default Legal ]
3254 * The declared default value must meet the lexical constraints of
3255 * the declared attribute type c.f. xmlValidateAttributeDecl()
3256 *
3257 * [ VC: Fixed Attribute Default ]
3258 * if an attribute has a default value declared with the #FIXED
3259 * keyword, instances of that attribute must match the default value.
3260 *
3261 * [ WFC: No < in Attribute Values ]
3262 * handled in xmlParseAttValue()
3263 *
3264 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3265 * or XML_ATTRIBUTE_FIXED.
3266 */
3267
3268int
3269xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3270 int val;
3271 xmlChar *ret;
3272
3273 *value = NULL;
3274 if ((RAW == '#') && (NXT(1) == 'R') &&
3275 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3276 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3277 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3278 (NXT(8) == 'D')) {
3279 SKIP(9);
3280 return(XML_ATTRIBUTE_REQUIRED);
3281 }
3282 if ((RAW == '#') && (NXT(1) == 'I') &&
3283 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3284 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3285 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3286 SKIP(8);
3287 return(XML_ATTRIBUTE_IMPLIED);
3288 }
3289 val = XML_ATTRIBUTE_NONE;
3290 if ((RAW == '#') && (NXT(1) == 'F') &&
3291 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3292 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3293 SKIP(6);
3294 val = XML_ATTRIBUTE_FIXED;
3295 if (!IS_BLANK(CUR)) {
3296 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3297 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3298 ctxt->sax->error(ctxt->userData,
3299 "Space required after '#FIXED'\n");
3300 ctxt->wellFormed = 0;
3301 ctxt->disableSAX = 1;
3302 }
3303 SKIP_BLANKS;
3304 }
3305 ret = xmlParseAttValue(ctxt);
3306 ctxt->instate = XML_PARSER_DTD;
3307 if (ret == NULL) {
3308 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3309 ctxt->sax->error(ctxt->userData,
3310 "Attribute default value declaration error\n");
3311 ctxt->wellFormed = 0;
3312 ctxt->disableSAX = 1;
3313 } else
3314 *value = ret;
3315 return(val);
3316}
3317
3318/**
3319 * xmlParseNotationType:
3320 * @ctxt: an XML parser context
3321 *
3322 * parse an Notation attribute type.
3323 *
3324 * Note: the leading 'NOTATION' S part has already being parsed...
3325 *
3326 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3327 *
3328 * [ VC: Notation Attributes ]
3329 * Values of this type must match one of the notation names included
3330 * in the declaration; all notation names in the declaration must be declared.
3331 *
3332 * Returns: the notation attribute tree built while parsing
3333 */
3334
3335xmlEnumerationPtr
3336xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3337 xmlChar *name;
3338 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3339
3340 if (RAW != '(') {
3341 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3343 ctxt->sax->error(ctxt->userData,
3344 "'(' required to start 'NOTATION'\n");
3345 ctxt->wellFormed = 0;
3346 ctxt->disableSAX = 1;
3347 return(NULL);
3348 }
3349 SHRINK;
3350 do {
3351 NEXT;
3352 SKIP_BLANKS;
3353 name = xmlParseName(ctxt);
3354 if (name == NULL) {
3355 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3356 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3357 ctxt->sax->error(ctxt->userData,
3358 "Name expected in NOTATION declaration\n");
3359 ctxt->wellFormed = 0;
3360 ctxt->disableSAX = 1;
3361 return(ret);
3362 }
3363 cur = xmlCreateEnumeration(name);
3364 xmlFree(name);
3365 if (cur == NULL) return(ret);
3366 if (last == NULL) ret = last = cur;
3367 else {
3368 last->next = cur;
3369 last = cur;
3370 }
3371 SKIP_BLANKS;
3372 } while (RAW == '|');
3373 if (RAW != ')') {
3374 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "')' required to finish NOTATION declaration\n");
3378 ctxt->wellFormed = 0;
3379 ctxt->disableSAX = 1;
3380 if ((last != NULL) && (last != ret))
3381 xmlFreeEnumeration(last);
3382 return(ret);
3383 }
3384 NEXT;
3385 return(ret);
3386}
3387
3388/**
3389 * xmlParseEnumerationType:
3390 * @ctxt: an XML parser context
3391 *
3392 * parse an Enumeration attribute type.
3393 *
3394 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3395 *
3396 * [ VC: Enumeration ]
3397 * Values of this type must match one of the Nmtoken tokens in
3398 * the declaration
3399 *
3400 * Returns: the enumeration attribute tree built while parsing
3401 */
3402
3403xmlEnumerationPtr
3404xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3405 xmlChar *name;
3406 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3407
3408 if (RAW != '(') {
3409 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3410 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3411 ctxt->sax->error(ctxt->userData,
3412 "'(' required to start ATTLIST enumeration\n");
3413 ctxt->wellFormed = 0;
3414 ctxt->disableSAX = 1;
3415 return(NULL);
3416 }
3417 SHRINK;
3418 do {
3419 NEXT;
3420 SKIP_BLANKS;
3421 name = xmlParseNmtoken(ctxt);
3422 if (name == NULL) {
3423 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3424 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3425 ctxt->sax->error(ctxt->userData,
3426 "NmToken expected in ATTLIST enumeration\n");
3427 ctxt->wellFormed = 0;
3428 ctxt->disableSAX = 1;
3429 return(ret);
3430 }
3431 cur = xmlCreateEnumeration(name);
3432 xmlFree(name);
3433 if (cur == NULL) return(ret);
3434 if (last == NULL) ret = last = cur;
3435 else {
3436 last->next = cur;
3437 last = cur;
3438 }
3439 SKIP_BLANKS;
3440 } while (RAW == '|');
3441 if (RAW != ')') {
3442 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3443 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3444 ctxt->sax->error(ctxt->userData,
3445 "')' required to finish ATTLIST enumeration\n");
3446 ctxt->wellFormed = 0;
3447 ctxt->disableSAX = 1;
3448 return(ret);
3449 }
3450 NEXT;
3451 return(ret);
3452}
3453
3454/**
3455 * xmlParseEnumeratedType:
3456 * @ctxt: an XML parser context
3457 * @tree: the enumeration tree built while parsing
3458 *
3459 * parse an Enumerated attribute type.
3460 *
3461 * [57] EnumeratedType ::= NotationType | Enumeration
3462 *
3463 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3464 *
3465 *
3466 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3467 */
3468
3469int
3470xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3471 if ((RAW == 'N') && (NXT(1) == 'O') &&
3472 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3473 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3474 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3475 SKIP(8);
3476 if (!IS_BLANK(CUR)) {
3477 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3478 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3479 ctxt->sax->error(ctxt->userData,
3480 "Space required after 'NOTATION'\n");
3481 ctxt->wellFormed = 0;
3482 ctxt->disableSAX = 1;
3483 return(0);
3484 }
3485 SKIP_BLANKS;
3486 *tree = xmlParseNotationType(ctxt);
3487 if (*tree == NULL) return(0);
3488 return(XML_ATTRIBUTE_NOTATION);
3489 }
3490 *tree = xmlParseEnumerationType(ctxt);
3491 if (*tree == NULL) return(0);
3492 return(XML_ATTRIBUTE_ENUMERATION);
3493}
3494
3495/**
3496 * xmlParseAttributeType:
3497 * @ctxt: an XML parser context
3498 * @tree: the enumeration tree built while parsing
3499 *
3500 * parse the Attribute list def for an element
3501 *
3502 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3503 *
3504 * [55] StringType ::= 'CDATA'
3505 *
3506 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3507 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3508 *
3509 * Validity constraints for attribute values syntax are checked in
3510 * xmlValidateAttributeValue()
3511 *
3512 * [ VC: ID ]
3513 * Values of type ID must match the Name production. A name must not
3514 * appear more than once in an XML document as a value of this type;
3515 * i.e., ID values must uniquely identify the elements which bear them.
3516 *
3517 * [ VC: One ID per Element Type ]
3518 * No element type may have more than one ID attribute specified.
3519 *
3520 * [ VC: ID Attribute Default ]
3521 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3522 *
3523 * [ VC: IDREF ]
3524 * Values of type IDREF must match the Name production, and values
3525 * of type IDREFS must match Names; each IDREF Name must match the value
3526 * of an ID attribute on some element in the XML document; i.e. IDREF
3527 * values must match the value of some ID attribute.
3528 *
3529 * [ VC: Entity Name ]
3530 * Values of type ENTITY must match the Name production, values
3531 * of type ENTITIES must match Names; each Entity Name must match the
3532 * name of an unparsed entity declared in the DTD.
3533 *
3534 * [ VC: Name Token ]
3535 * Values of type NMTOKEN must match the Nmtoken production; values
3536 * of type NMTOKENS must match Nmtokens.
3537 *
3538 * Returns the attribute type
3539 */
3540int
3541xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3542 SHRINK;
3543 if ((RAW == 'C') && (NXT(1) == 'D') &&
3544 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3545 (NXT(4) == 'A')) {
3546 SKIP(5);
3547 return(XML_ATTRIBUTE_CDATA);
3548 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3549 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3550 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3551 SKIP(6);
3552 return(XML_ATTRIBUTE_IDREFS);
3553 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3554 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3555 (NXT(4) == 'F')) {
3556 SKIP(5);
3557 return(XML_ATTRIBUTE_IDREF);
3558 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3559 SKIP(2);
3560 return(XML_ATTRIBUTE_ID);
3561 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3562 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3563 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3564 SKIP(6);
3565 return(XML_ATTRIBUTE_ENTITY);
3566 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3567 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3568 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3569 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3570 SKIP(8);
3571 return(XML_ATTRIBUTE_ENTITIES);
3572 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3573 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3574 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3575 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3576 SKIP(8);
3577 return(XML_ATTRIBUTE_NMTOKENS);
3578 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3579 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3580 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3581 (NXT(6) == 'N')) {
3582 SKIP(7);
3583 return(XML_ATTRIBUTE_NMTOKEN);
3584 }
3585 return(xmlParseEnumeratedType(ctxt, tree));
3586}
3587
3588/**
3589 * xmlParseAttributeListDecl:
3590 * @ctxt: an XML parser context
3591 *
3592 * : parse the Attribute list def for an element
3593 *
3594 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3595 *
3596 * [53] AttDef ::= S Name S AttType S DefaultDecl
3597 *
3598 */
3599void
3600xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3601 xmlChar *elemName;
3602 xmlChar *attrName;
3603 xmlEnumerationPtr tree;
3604
3605 if ((RAW == '<') && (NXT(1) == '!') &&
3606 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3607 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3608 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3609 (NXT(8) == 'T')) {
3610 xmlParserInputPtr input = ctxt->input;
3611
3612 SKIP(9);
3613 if (!IS_BLANK(CUR)) {
3614 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3615 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3616 ctxt->sax->error(ctxt->userData,
3617 "Space required after '<!ATTLIST'\n");
3618 ctxt->wellFormed = 0;
3619 ctxt->disableSAX = 1;
3620 }
3621 SKIP_BLANKS;
3622 elemName = xmlParseName(ctxt);
3623 if (elemName == NULL) {
3624 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3625 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3626 ctxt->sax->error(ctxt->userData,
3627 "ATTLIST: no name for Element\n");
3628 ctxt->wellFormed = 0;
3629 ctxt->disableSAX = 1;
3630 return;
3631 }
3632 SKIP_BLANKS;
3633 GROW;
3634 while (RAW != '>') {
3635 const xmlChar *check = CUR_PTR;
3636 int type;
3637 int def;
3638 xmlChar *defaultValue = NULL;
3639
3640 GROW;
3641 tree = NULL;
3642 attrName = xmlParseName(ctxt);
3643 if (attrName == NULL) {
3644 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3645 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3646 ctxt->sax->error(ctxt->userData,
3647 "ATTLIST: no name for Attribute\n");
3648 ctxt->wellFormed = 0;
3649 ctxt->disableSAX = 1;
3650 break;
3651 }
3652 GROW;
3653 if (!IS_BLANK(CUR)) {
3654 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3656 ctxt->sax->error(ctxt->userData,
3657 "Space required after the attribute name\n");
3658 ctxt->wellFormed = 0;
3659 ctxt->disableSAX = 1;
3660 if (attrName != NULL)
3661 xmlFree(attrName);
3662 if (defaultValue != NULL)
3663 xmlFree(defaultValue);
3664 break;
3665 }
3666 SKIP_BLANKS;
3667
3668 type = xmlParseAttributeType(ctxt, &tree);
3669 if (type <= 0) {
3670 if (attrName != NULL)
3671 xmlFree(attrName);
3672 if (defaultValue != NULL)
3673 xmlFree(defaultValue);
3674 break;
3675 }
3676
3677 GROW;
3678 if (!IS_BLANK(CUR)) {
3679 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3680 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3681 ctxt->sax->error(ctxt->userData,
3682 "Space required after the attribute type\n");
3683 ctxt->wellFormed = 0;
3684 ctxt->disableSAX = 1;
3685 if (attrName != NULL)
3686 xmlFree(attrName);
3687 if (defaultValue != NULL)
3688 xmlFree(defaultValue);
3689 if (tree != NULL)
3690 xmlFreeEnumeration(tree);
3691 break;
3692 }
3693 SKIP_BLANKS;
3694
3695 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3696 if (def <= 0) {
3697 if (attrName != NULL)
3698 xmlFree(attrName);
3699 if (defaultValue != NULL)
3700 xmlFree(defaultValue);
3701 if (tree != NULL)
3702 xmlFreeEnumeration(tree);
3703 break;
3704 }
3705
3706 GROW;
3707 if (RAW != '>') {
3708 if (!IS_BLANK(CUR)) {
3709 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3710 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3711 ctxt->sax->error(ctxt->userData,
3712 "Space required after the attribute default value\n");
3713 ctxt->wellFormed = 0;
3714 ctxt->disableSAX = 1;
3715 if (attrName != NULL)
3716 xmlFree(attrName);
3717 if (defaultValue != NULL)
3718 xmlFree(defaultValue);
3719 if (tree != NULL)
3720 xmlFreeEnumeration(tree);
3721 break;
3722 }
3723 SKIP_BLANKS;
3724 }
3725 if (check == CUR_PTR) {
3726 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3727 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3728 ctxt->sax->error(ctxt->userData,
3729 "xmlParseAttributeListDecl: detected internal error\n");
3730 if (attrName != NULL)
3731 xmlFree(attrName);
3732 if (defaultValue != NULL)
3733 xmlFree(defaultValue);
3734 if (tree != NULL)
3735 xmlFreeEnumeration(tree);
3736 break;
3737 }
3738 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3739 (ctxt->sax->attributeDecl != NULL))
3740 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3741 type, def, defaultValue, tree);
3742 if (attrName != NULL)
3743 xmlFree(attrName);
3744 if (defaultValue != NULL)
3745 xmlFree(defaultValue);
3746 GROW;
3747 }
3748 if (RAW == '>') {
3749 if (input != ctxt->input) {
3750 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3752 ctxt->sax->error(ctxt->userData,
3753"Attribute list declaration doesn't start and stop in the same entity\n");
3754 ctxt->wellFormed = 0;
3755 ctxt->disableSAX = 1;
3756 }
3757 NEXT;
3758 }
3759
3760 xmlFree(elemName);
3761 }
3762}
3763
3764/**
3765 * xmlParseElementMixedContentDecl:
3766 * @ctxt: an XML parser context
3767 *
3768 * parse the declaration for a Mixed Element content
3769 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3770 *
3771 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3772 * '(' S? '#PCDATA' S? ')'
3773 *
3774 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3775 *
3776 * [ VC: No Duplicate Types ]
3777 * The same name must not appear more than once in a single
3778 * mixed-content declaration.
3779 *
3780 * returns: the list of the xmlElementContentPtr describing the element choices
3781 */
3782xmlElementContentPtr
3783xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3784 xmlElementContentPtr ret = NULL, cur = NULL, n;
3785 xmlChar *elem = NULL;
3786
3787 GROW;
3788 if ((RAW == '#') && (NXT(1) == 'P') &&
3789 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3790 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3791 (NXT(6) == 'A')) {
3792 SKIP(7);
3793 SKIP_BLANKS;
3794 SHRINK;
3795 if (RAW == ')') {
3796 ctxt->entity = ctxt->input;
3797 NEXT;
3798 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3799 if (RAW == '*') {
3800 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3801 NEXT;
3802 }
3803 return(ret);
3804 }
3805 if ((RAW == '(') || (RAW == '|')) {
3806 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3807 if (ret == NULL) return(NULL);
3808 }
3809 while (RAW == '|') {
3810 NEXT;
3811 if (elem == NULL) {
3812 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3813 if (ret == NULL) return(NULL);
3814 ret->c1 = cur;
3815 cur = ret;
3816 } else {
3817 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3818 if (n == NULL) return(NULL);
3819 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3820 cur->c2 = n;
3821 cur = n;
3822 xmlFree(elem);
3823 }
3824 SKIP_BLANKS;
3825 elem = xmlParseName(ctxt);
3826 if (elem == NULL) {
3827 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3828 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3829 ctxt->sax->error(ctxt->userData,
3830 "xmlParseElementMixedContentDecl : Name expected\n");
3831 ctxt->wellFormed = 0;
3832 ctxt->disableSAX = 1;
3833 xmlFreeElementContent(cur);
3834 return(NULL);
3835 }
3836 SKIP_BLANKS;
3837 GROW;
3838 }
3839 if ((RAW == ')') && (NXT(1) == '*')) {
3840 if (elem != NULL) {
3841 cur->c2 = xmlNewElementContent(elem,
3842 XML_ELEMENT_CONTENT_ELEMENT);
3843 xmlFree(elem);
3844 }
3845 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3846 ctxt->entity = ctxt->input;
3847 SKIP(2);
3848 } else {
3849 if (elem != NULL) xmlFree(elem);
3850 xmlFreeElementContent(ret);
3851 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3852 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3853 ctxt->sax->error(ctxt->userData,
3854 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3855 ctxt->wellFormed = 0;
3856 ctxt->disableSAX = 1;
3857 return(NULL);
3858 }
3859
3860 } else {
3861 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3863 ctxt->sax->error(ctxt->userData,
3864 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3865 ctxt->wellFormed = 0;
3866 ctxt->disableSAX = 1;
3867 }
3868 return(ret);
3869}
3870
3871/**
3872 * xmlParseElementChildrenContentDecl:
3873 * @ctxt: an XML parser context
3874 *
3875 * parse the declaration for a Mixed Element content
3876 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3877 *
3878 *
3879 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3880 *
3881 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3882 *
3883 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3884 *
3885 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3886 *
3887 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3888 * TODO Parameter-entity replacement text must be properly nested
3889 * with parenthetized groups. That is to say, if either of the
3890 * opening or closing parentheses in a choice, seq, or Mixed
3891 * construct is contained in the replacement text for a parameter
3892 * entity, both must be contained in the same replacement text. For
3893 * interoperability, if a parameter-entity reference appears in a
3894 * choice, seq, or Mixed construct, its replacement text should not
3895 * be empty, and neither the first nor last non-blank character of
3896 * the replacement text should be a connector (| or ,).
3897 *
3898 * returns: the tree of xmlElementContentPtr describing the element
3899 * hierarchy.
3900 */
3901xmlElementContentPtr
3902#ifdef VMS
3903xmlParseElementChildrenContentD
3904#else
3905xmlParseElementChildrenContentDecl
3906#endif
3907(xmlParserCtxtPtr ctxt) {
3908 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
3909 xmlChar *elem;
3910 xmlChar type = 0;
3911
3912 SKIP_BLANKS;
3913 GROW;
3914 if (RAW == '(') {
3915 /* Recurse on first child */
3916 NEXT;
3917 SKIP_BLANKS;
3918 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3919 SKIP_BLANKS;
3920 GROW;
3921 } else {
3922 elem = xmlParseName(ctxt);
3923 if (elem == NULL) {
3924 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
3925 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3926 ctxt->sax->error(ctxt->userData,
3927 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
3928 ctxt->wellFormed = 0;
3929 ctxt->disableSAX = 1;
3930 return(NULL);
3931 }
3932 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3933 GROW;
3934 if (RAW == '?') {
3935 cur->ocur = XML_ELEMENT_CONTENT_OPT;
3936 NEXT;
3937 } else if (RAW == '*') {
3938 cur->ocur = XML_ELEMENT_CONTENT_MULT;
3939 NEXT;
3940 } else if (RAW == '+') {
3941 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
3942 NEXT;
3943 } else {
3944 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
3945 }
3946 xmlFree(elem);
3947 GROW;
3948 }
3949 SKIP_BLANKS;
3950 SHRINK;
3951 while (RAW != ')') {
3952 /*
3953 * Each loop we parse one separator and one element.
3954 */
3955 if (RAW == ',') {
3956 if (type == 0) type = CUR;
3957
3958 /*
3959 * Detect "Name | Name , Name" error
3960 */
3961 else if (type != CUR) {
3962 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
3963 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3964 ctxt->sax->error(ctxt->userData,
3965 "xmlParseElementChildrenContentDecl : '%c' expected\n",
3966 type);
3967 ctxt->wellFormed = 0;
3968 ctxt->disableSAX = 1;
3969 if ((op != NULL) && (op != ret))
3970 xmlFreeElementContent(op);
3971 if ((last != NULL) && (last != ret) &&
3972 (last != ret->c1) && (last != ret->c2))
3973 xmlFreeElementContent(last);
3974 if (ret != NULL)
3975 xmlFreeElementContent(ret);
3976 return(NULL);
3977 }
3978 NEXT;
3979
3980 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
3981 if (op == NULL) {
3982 xmlFreeElementContent(ret);
3983 return(NULL);
3984 }
3985 if (last == NULL) {
3986 op->c1 = ret;
3987 ret = cur = op;
3988 } else {
3989 cur->c2 = op;
3990 op->c1 = last;
3991 cur =op;
3992 last = NULL;
3993 }
3994 } else if (RAW == '|') {
3995 if (type == 0) type = CUR;
3996
3997 /*
3998 * Detect "Name , Name | Name" error
3999 */
4000 else if (type != CUR) {
4001 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4002 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4003 ctxt->sax->error(ctxt->userData,
4004 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4005 type);
4006 ctxt->wellFormed = 0;
4007 ctxt->disableSAX = 1;
4008 if ((op != NULL) && (op != ret) && (op != last))
4009 xmlFreeElementContent(op);
4010 if ((last != NULL) && (last != ret) &&
4011 (last != ret->c1) && (last != ret->c2))
4012 xmlFreeElementContent(last);
4013 if (ret != NULL)
4014 xmlFreeElementContent(ret);
4015 return(NULL);
4016 }
4017 NEXT;
4018
4019 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4020 if (op == NULL) {
4021 if ((op != NULL) && (op != ret))
4022 xmlFreeElementContent(op);
4023 if ((last != NULL) && (last != ret) &&
4024 (last != ret->c1) && (last != ret->c2))
4025 xmlFreeElementContent(last);
4026 if (ret != NULL)
4027 xmlFreeElementContent(ret);
4028 return(NULL);
4029 }
4030 if (last == NULL) {
4031 op->c1 = ret;
4032 ret = cur = op;
4033 } else {
4034 cur->c2 = op;
4035 op->c1 = last;
4036 cur =op;
4037 last = NULL;
4038 }
4039 } else {
4040 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4042 ctxt->sax->error(ctxt->userData,
4043 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4044 ctxt->wellFormed = 0;
4045 ctxt->disableSAX = 1;
4046 if ((op != NULL) && (op != ret))
4047 xmlFreeElementContent(op);
4048 if ((last != NULL) && (last != ret) &&
4049 (last != ret->c1) && (last != ret->c2))
4050 xmlFreeElementContent(last);
4051 if (ret != NULL)
4052 xmlFreeElementContent(ret);
4053 return(NULL);
4054 }
4055 GROW;
4056 SKIP_BLANKS;
4057 GROW;
4058 if (RAW == '(') {
4059 /* Recurse on second child */
4060 NEXT;
4061 SKIP_BLANKS;
4062 last = xmlParseElementChildrenContentDecl(ctxt);
4063 SKIP_BLANKS;
4064 } else {
4065 elem = xmlParseName(ctxt);
4066 if (elem == NULL) {
4067 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4068 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4069 ctxt->sax->error(ctxt->userData,
4070 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4071 ctxt->wellFormed = 0;
4072 ctxt->disableSAX = 1;
4073 if ((op != NULL) && (op != ret))
4074 xmlFreeElementContent(op);
4075 if ((last != NULL) && (last != ret) &&
4076 (last != ret->c1) && (last != ret->c2))
4077 xmlFreeElementContent(last);
4078 if (ret != NULL)
4079 xmlFreeElementContent(ret);
4080 return(NULL);
4081 }
4082 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4083 xmlFree(elem);
4084 if (RAW == '?') {
4085 last->ocur = XML_ELEMENT_CONTENT_OPT;
4086 NEXT;
4087 } else if (RAW == '*') {
4088 last->ocur = XML_ELEMENT_CONTENT_MULT;
4089 NEXT;
4090 } else if (RAW == '+') {
4091 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4092 NEXT;
4093 } else {
4094 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4095 }
4096 }
4097 SKIP_BLANKS;
4098 GROW;
4099 }
4100 if ((cur != NULL) && (last != NULL)) {
4101 cur->c2 = last;
4102 }
4103 ctxt->entity = ctxt->input;
4104 NEXT;
4105 if (RAW == '?') {
4106 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4107 NEXT;
4108 } else if (RAW == '*') {
4109 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4110 NEXT;
4111 } else if (RAW == '+') {
4112 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4113 NEXT;
4114 }
4115 return(ret);
4116}
4117
4118/**
4119 * xmlParseElementContentDecl:
4120 * @ctxt: an XML parser context
4121 * @name: the name of the element being defined.
4122 * @result: the Element Content pointer will be stored here if any
4123 *
4124 * parse the declaration for an Element content either Mixed or Children,
4125 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4126 *
4127 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4128 *
4129 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4130 */
4131
4132int
4133xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4134 xmlElementContentPtr *result) {
4135
4136 xmlElementContentPtr tree = NULL;
4137 xmlParserInputPtr input = ctxt->input;
4138 int res;
4139
4140 *result = NULL;
4141
4142 if (RAW != '(') {
4143 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4144 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4145 ctxt->sax->error(ctxt->userData,
4146 "xmlParseElementContentDecl : '(' expected\n");
4147 ctxt->wellFormed = 0;
4148 ctxt->disableSAX = 1;
4149 return(-1);
4150 }
4151 NEXT;
4152 GROW;
4153 SKIP_BLANKS;
4154 if ((RAW == '#') && (NXT(1) == 'P') &&
4155 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4156 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4157 (NXT(6) == 'A')) {
4158 tree = xmlParseElementMixedContentDecl(ctxt);
4159 res = XML_ELEMENT_TYPE_MIXED;
4160 } else {
4161 tree = xmlParseElementChildrenContentDecl(ctxt);
4162 res = XML_ELEMENT_TYPE_ELEMENT;
4163 }
4164 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4165 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4166 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4167 ctxt->sax->error(ctxt->userData,
4168"Element content declaration doesn't start and stop in the same entity\n");
4169 ctxt->wellFormed = 0;
4170 ctxt->disableSAX = 1;
4171 }
4172 SKIP_BLANKS;
4173 *result = tree;
4174 return(res);
4175}
4176
4177/**
4178 * xmlParseElementDecl:
4179 * @ctxt: an XML parser context
4180 *
4181 * parse an Element declaration.
4182 *
4183 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4184 *
4185 * [ VC: Unique Element Type Declaration ]
4186 * No element type may be declared more than once
4187 *
4188 * Returns the type of the element, or -1 in case of error
4189 */
4190int
4191xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4192 xmlChar *name;
4193 int ret = -1;
4194 xmlElementContentPtr content = NULL;
4195
4196 GROW;
4197 if ((RAW == '<') && (NXT(1) == '!') &&
4198 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4199 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4200 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4201 (NXT(8) == 'T')) {
4202 xmlParserInputPtr input = ctxt->input;
4203
4204 SKIP(9);
4205 if (!IS_BLANK(CUR)) {
4206 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4207 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4208 ctxt->sax->error(ctxt->userData,
4209 "Space required after 'ELEMENT'\n");
4210 ctxt->wellFormed = 0;
4211 ctxt->disableSAX = 1;
4212 }
4213 SKIP_BLANKS;
4214 name = xmlParseName(ctxt);
4215 if (name == NULL) {
4216 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4217 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4218 ctxt->sax->error(ctxt->userData,
4219 "xmlParseElementDecl: no name for Element\n");
4220 ctxt->wellFormed = 0;
4221 ctxt->disableSAX = 1;
4222 return(-1);
4223 }
4224 while ((RAW == 0) && (ctxt->inputNr > 1))
4225 xmlPopInput(ctxt);
4226 if (!IS_BLANK(CUR)) {
4227 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4228 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4229 ctxt->sax->error(ctxt->userData,
4230 "Space required after the element name\n");
4231 ctxt->wellFormed = 0;
4232 ctxt->disableSAX = 1;
4233 }
4234 SKIP_BLANKS;
4235 if ((RAW == 'E') && (NXT(1) == 'M') &&
4236 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4237 (NXT(4) == 'Y')) {
4238 SKIP(5);
4239 /*
4240 * Element must always be empty.
4241 */
4242 ret = XML_ELEMENT_TYPE_EMPTY;
4243 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4244 (NXT(2) == 'Y')) {
4245 SKIP(3);
4246 /*
4247 * Element is a generic container.
4248 */
4249 ret = XML_ELEMENT_TYPE_ANY;
4250 } else if (RAW == '(') {
4251 ret = xmlParseElementContentDecl(ctxt, name, &content);
4252 } else {
4253 /*
4254 * [ WFC: PEs in Internal Subset ] error handling.
4255 */
4256 if ((RAW == '%') && (ctxt->external == 0) &&
4257 (ctxt->inputNr == 1)) {
4258 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4259 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4260 ctxt->sax->error(ctxt->userData,
4261 "PEReference: forbidden within markup decl in internal subset\n");
4262 } else {
4263 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4265 ctxt->sax->error(ctxt->userData,
4266 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4267 }
4268 ctxt->wellFormed = 0;
4269 ctxt->disableSAX = 1;
4270 if (name != NULL) xmlFree(name);
4271 return(-1);
4272 }
4273
4274 SKIP_BLANKS;
4275 /*
4276 * Pop-up of finished entities.
4277 */
4278 while ((RAW == 0) && (ctxt->inputNr > 1))
4279 xmlPopInput(ctxt);
4280 SKIP_BLANKS;
4281
4282 if (RAW != '>') {
4283 ctxt->errNo = XML_ERR_GT_REQUIRED;
4284 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4285 ctxt->sax->error(ctxt->userData,
4286 "xmlParseElementDecl: expected '>' at the end\n");
4287 ctxt->wellFormed = 0;
4288 ctxt->disableSAX = 1;
4289 } else {
4290 if (input != ctxt->input) {
4291 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4292 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4293 ctxt->sax->error(ctxt->userData,
4294"Element declaration doesn't start and stop in the same entity\n");
4295 ctxt->wellFormed = 0;
4296 ctxt->disableSAX = 1;
4297 }
4298
4299 NEXT;
4300 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4301 (ctxt->sax->elementDecl != NULL))
4302 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4303 content);
4304 }
4305 if (content != NULL) {
4306 xmlFreeElementContent(content);
4307 }
4308 if (name != NULL) {
4309 xmlFree(name);
4310 }
4311 }
4312 return(ret);
4313}
4314
4315/**
4316 * xmlParseMarkupDecl:
4317 * @ctxt: an XML parser context
4318 *
4319 * parse Markup declarations
4320 *
4321 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4322 * NotationDecl | PI | Comment
4323 *
4324 * [ VC: Proper Declaration/PE Nesting ]
4325 * Parameter-entity replacement text must be properly nested with
4326 * markup declarations. That is to say, if either the first character
4327 * or the last character of a markup declaration (markupdecl above) is
4328 * contained in the replacement text for a parameter-entity reference,
4329 * both must be contained in the same replacement text.
4330 *
4331 * [ WFC: PEs in Internal Subset ]
4332 * In the internal DTD subset, parameter-entity references can occur
4333 * only where markup declarations can occur, not within markup declarations.
4334 * (This does not apply to references that occur in external parameter
4335 * entities or to the external subset.)
4336 */
4337void
4338xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4339 GROW;
4340 xmlParseElementDecl(ctxt);
4341 xmlParseAttributeListDecl(ctxt);
4342 xmlParseEntityDecl(ctxt);
4343 xmlParseNotationDecl(ctxt);
4344 xmlParsePI(ctxt);
4345 xmlParseComment(ctxt);
4346 /*
4347 * This is only for internal subset. On external entities,
4348 * the replacement is done before parsing stage
4349 */
4350 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4351 xmlParsePEReference(ctxt);
4352 ctxt->instate = XML_PARSER_DTD;
4353}
4354
4355/**
4356 * xmlParseTextDecl:
4357 * @ctxt: an XML parser context
4358 *
4359 * parse an XML declaration header for external entities
4360 *
4361 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4362 *
4363 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4364 */
4365
4366void
4367xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4368 xmlChar *version;
4369
4370 /*
4371 * We know that '<?xml' is here.
4372 */
4373 if ((RAW == '<') && (NXT(1) == '?') &&
4374 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4375 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4376 SKIP(5);
4377 } else {
4378 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4379 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4380 ctxt->sax->error(ctxt->userData,
4381 "Text declaration '<?xml' required\n");
4382 ctxt->wellFormed = 0;
4383 ctxt->disableSAX = 1;
4384
4385 return;
4386 }
4387
4388 if (!IS_BLANK(CUR)) {
4389 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4390 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4391 ctxt->sax->error(ctxt->userData,
4392 "Space needed after '<?xml'\n");
4393 ctxt->wellFormed = 0;
4394 ctxt->disableSAX = 1;
4395 }
4396 SKIP_BLANKS;
4397
4398 /*
4399 * We may have the VersionInfo here.
4400 */
4401 version = xmlParseVersionInfo(ctxt);
4402 if (version == NULL)
4403 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4404 ctxt->input->version = version;
4405
4406 /*
4407 * We must have the encoding declaration
4408 */
4409 if (!IS_BLANK(CUR)) {
4410 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4412 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4413 ctxt->wellFormed = 0;
4414 ctxt->disableSAX = 1;
4415 }
4416 xmlParseEncodingDecl(ctxt);
4417 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4418 /*
4419 * The XML REC instructs us to stop parsing right here
4420 */
4421 return;
4422 }
4423
4424 SKIP_BLANKS;
4425 if ((RAW == '?') && (NXT(1) == '>')) {
4426 SKIP(2);
4427 } else if (RAW == '>') {
4428 /* Deprecated old WD ... */
4429 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4431 ctxt->sax->error(ctxt->userData,
4432 "XML declaration must end-up with '?>'\n");
4433 ctxt->wellFormed = 0;
4434 ctxt->disableSAX = 1;
4435 NEXT;
4436 } else {
4437 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4439 ctxt->sax->error(ctxt->userData,
4440 "parsing XML declaration: '?>' expected\n");
4441 ctxt->wellFormed = 0;
4442 ctxt->disableSAX = 1;
4443 MOVETO_ENDTAG(CUR_PTR);
4444 NEXT;
4445 }
4446}
4447
4448/*
4449 * xmlParseConditionalSections
4450 * @ctxt: an XML parser context
4451 *
4452 * [61] conditionalSect ::= includeSect | ignoreSect
4453 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4454 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4455 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4456 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4457 */
4458
4459void
4460xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4461 SKIP(3);
4462 SKIP_BLANKS;
4463 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4464 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4465 (NXT(6) == 'E')) {
4466 SKIP(7);
4467 SKIP_BLANKS;
4468 if (RAW != '[') {
4469 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4470 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4471 ctxt->sax->error(ctxt->userData,
4472 "XML conditional section '[' expected\n");
4473 ctxt->wellFormed = 0;
4474 ctxt->disableSAX = 1;
4475 } else {
4476 NEXT;
4477 }
4478 if (xmlParserDebugEntities) {
4479 if ((ctxt->input != NULL) && (ctxt->input->filename))
4480 xmlGenericError(xmlGenericErrorContext,
4481 "%s(%d): ", ctxt->input->filename,
4482 ctxt->input->line);
4483 xmlGenericError(xmlGenericErrorContext,
4484 "Entering INCLUDE Conditional Section\n");
4485 }
4486
4487 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4488 (NXT(2) != '>'))) {
4489 const xmlChar *check = CUR_PTR;
4490 int cons = ctxt->input->consumed;
4491 int tok = ctxt->token;
4492
4493 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4494 xmlParseConditionalSections(ctxt);
4495 } else if (IS_BLANK(CUR)) {
4496 NEXT;
4497 } else if (RAW == '%') {
4498 xmlParsePEReference(ctxt);
4499 } else
4500 xmlParseMarkupDecl(ctxt);
4501
4502 /*
4503 * Pop-up of finished entities.
4504 */
4505 while ((RAW == 0) && (ctxt->inputNr > 1))
4506 xmlPopInput(ctxt);
4507
4508 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4509 (tok == ctxt->token)) {
4510 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4512 ctxt->sax->error(ctxt->userData,
4513 "Content error in the external subset\n");
4514 ctxt->wellFormed = 0;
4515 ctxt->disableSAX = 1;
4516 break;
4517 }
4518 }
4519 if (xmlParserDebugEntities) {
4520 if ((ctxt->input != NULL) && (ctxt->input->filename))
4521 xmlGenericError(xmlGenericErrorContext,
4522 "%s(%d): ", ctxt->input->filename,
4523 ctxt->input->line);
4524 xmlGenericError(xmlGenericErrorContext,
4525 "Leaving INCLUDE Conditional Section\n");
4526 }
4527
4528 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4529 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4530 int state;
4531 int instate;
4532 int depth = 0;
4533
4534 SKIP(6);
4535 SKIP_BLANKS;
4536 if (RAW != '[') {
4537 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4539 ctxt->sax->error(ctxt->userData,
4540 "XML conditional section '[' expected\n");
4541 ctxt->wellFormed = 0;
4542 ctxt->disableSAX = 1;
4543 } else {
4544 NEXT;
4545 }
4546 if (xmlParserDebugEntities) {
4547 if ((ctxt->input != NULL) && (ctxt->input->filename))
4548 xmlGenericError(xmlGenericErrorContext,
4549 "%s(%d): ", ctxt->input->filename,
4550 ctxt->input->line);
4551 xmlGenericError(xmlGenericErrorContext,
4552 "Entering IGNORE Conditional Section\n");
4553 }
4554
4555 /*
4556 * Parse up to the end of the conditionnal section
4557 * But disable SAX event generating DTD building in the meantime
4558 */
4559 state = ctxt->disableSAX;
4560 instate = ctxt->instate;
4561 ctxt->disableSAX = 1;
4562 ctxt->instate = XML_PARSER_IGNORE;
4563
4564 while (depth >= 0) {
4565 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4566 depth++;
4567 SKIP(3);
4568 continue;
4569 }
4570 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4571 if (--depth >= 0) SKIP(3);
4572 continue;
4573 }
4574 NEXT;
4575 continue;
4576 }
4577
4578 ctxt->disableSAX = state;
4579 ctxt->instate = instate;
4580
4581 if (xmlParserDebugEntities) {
4582 if ((ctxt->input != NULL) && (ctxt->input->filename))
4583 xmlGenericError(xmlGenericErrorContext,
4584 "%s(%d): ", ctxt->input->filename,
4585 ctxt->input->line);
4586 xmlGenericError(xmlGenericErrorContext,
4587 "Leaving IGNORE Conditional Section\n");
4588 }
4589
4590 } else {
4591 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4592 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4593 ctxt->sax->error(ctxt->userData,
4594 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4595 ctxt->wellFormed = 0;
4596 ctxt->disableSAX = 1;
4597 }
4598
4599 if (RAW == 0)
4600 SHRINK;
4601
4602 if (RAW == 0) {
4603 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4604 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4605 ctxt->sax->error(ctxt->userData,
4606 "XML conditional section not closed\n");
4607 ctxt->wellFormed = 0;
4608 ctxt->disableSAX = 1;
4609 } else {
4610 SKIP(3);
4611 }
4612}
4613
4614/**
4615 * xmlParseExternalSubset:
4616 * @ctxt: an XML parser context
4617 * @ExternalID: the external identifier
4618 * @SystemID: the system identifier (or URL)
4619 *
4620 * parse Markup declarations from an external subset
4621 *
4622 * [30] extSubset ::= textDecl? extSubsetDecl
4623 *
4624 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4625 */
4626void
4627xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4628 const xmlChar *SystemID) {
4629 GROW;
4630 if ((RAW == '<') && (NXT(1) == '?') &&
4631 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4632 (NXT(4) == 'l')) {
4633 xmlParseTextDecl(ctxt);
4634 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4635 /*
4636 * The XML REC instructs us to stop parsing right here
4637 */
4638 ctxt->instate = XML_PARSER_EOF;
4639 return;
4640 }
4641 }
4642 if (ctxt->myDoc == NULL) {
4643 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4644 }
4645 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4646 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4647
4648 ctxt->instate = XML_PARSER_DTD;
4649 ctxt->external = 1;
4650 while (((RAW == '<') && (NXT(1) == '?')) ||
4651 ((RAW == '<') && (NXT(1) == '!')) ||
4652 IS_BLANK(CUR)) {
4653 const xmlChar *check = CUR_PTR;
4654 int cons = ctxt->input->consumed;
4655 int tok = ctxt->token;
4656
4657 GROW;
4658 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4659 xmlParseConditionalSections(ctxt);
4660 } else if (IS_BLANK(CUR)) {
4661 NEXT;
4662 } else if (RAW == '%') {
4663 xmlParsePEReference(ctxt);
4664 } else
4665 xmlParseMarkupDecl(ctxt);
4666
4667 /*
4668 * Pop-up of finished entities.
4669 */
4670 while ((RAW == 0) && (ctxt->inputNr > 1))
4671 xmlPopInput(ctxt);
4672
4673 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4674 (tok == ctxt->token)) {
4675 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4676 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4677 ctxt->sax->error(ctxt->userData,
4678 "Content error in the external subset\n");
4679 ctxt->wellFormed = 0;
4680 ctxt->disableSAX = 1;
4681 break;
4682 }
4683 }
4684
4685 if (RAW != 0) {
4686 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4688 ctxt->sax->error(ctxt->userData,
4689 "Extra content at the end of the document\n");
4690 ctxt->wellFormed = 0;
4691 ctxt->disableSAX = 1;
4692 }
4693
4694}
4695
4696/**
4697 * xmlParseReference:
4698 * @ctxt: an XML parser context
4699 *
4700 * parse and handle entity references in content, depending on the SAX
4701 * interface, this may end-up in a call to character() if this is a
4702 * CharRef, a predefined entity, if there is no reference() callback.
4703 * or if the parser was asked to switch to that mode.
4704 *
4705 * [67] Reference ::= EntityRef | CharRef
4706 */
4707void
4708xmlParseReference(xmlParserCtxtPtr ctxt) {
4709 xmlEntityPtr ent;
4710 xmlChar *val;
4711 if (RAW != '&') return;
4712
4713 if (NXT(1) == '#') {
4714 int i = 0;
4715 xmlChar out[10];
4716 int hex = NXT(2);
4717 int val = xmlParseCharRef(ctxt);
4718
4719 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4720 /*
4721 * So we are using non-UTF-8 buffers
4722 * Check that the char fit on 8bits, if not
4723 * generate a CharRef.
4724 */
4725 if (val <= 0xFF) {
4726 out[0] = val;
4727 out[1] = 0;
4728 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4729 (!ctxt->disableSAX))
4730 ctxt->sax->characters(ctxt->userData, out, 1);
4731 } else {
4732 if ((hex == 'x') || (hex == 'X'))
4733 sprintf((char *)out, "#x%X", val);
4734 else
4735 sprintf((char *)out, "#%d", val);
4736 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4737 (!ctxt->disableSAX))
4738 ctxt->sax->reference(ctxt->userData, out);
4739 }
4740 } else {
4741 /*
4742 * Just encode the value in UTF-8
4743 */
4744 COPY_BUF(0 ,out, i, val);
4745 out[i] = 0;
4746 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4747 (!ctxt->disableSAX))
4748 ctxt->sax->characters(ctxt->userData, out, i);
4749 }
4750 } else {
4751 ent = xmlParseEntityRef(ctxt);
4752 if (ent == NULL) return;
4753 if ((ent->name != NULL) &&
4754 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4755 xmlNodePtr list = NULL;
4756 int ret;
4757
4758
4759 /*
4760 * The first reference to the entity trigger a parsing phase
4761 * where the ent->children is filled with the result from
4762 * the parsing.
4763 */
4764 if (ent->children == NULL) {
4765 xmlChar *value;
4766 value = ent->content;
4767
4768 /*
4769 * Check that this entity is well formed
4770 */
4771 if ((value != NULL) &&
4772 (value[1] == 0) && (value[0] == '<') &&
4773 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4774 /*
4775 * DONE: get definite answer on this !!!
4776 * Lots of entity decls are used to declare a single
4777 * char
4778 * <!ENTITY lt "<">
4779 * Which seems to be valid since
4780 * 2.4: The ampersand character (&) and the left angle
4781 * bracket (<) may appear in their literal form only
4782 * when used ... They are also legal within the literal
4783 * entity value of an internal entity declaration;i
4784 * see "4.3.2 Well-Formed Parsed Entities".
4785 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4786 * Looking at the OASIS test suite and James Clark
4787 * tests, this is broken. However the XML REC uses
4788 * it. Is the XML REC not well-formed ????
4789 * This is a hack to avoid this problem
4790 *
4791 * ANSWER: since lt gt amp .. are already defined,
4792 * this is a redefinition and hence the fact that the
4793 * contentis not well balanced is not a Wf error, this
4794 * is lousy but acceptable.
4795 */
4796 list = xmlNewDocText(ctxt->myDoc, value);
4797 if (list != NULL) {
4798 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4799 (ent->children == NULL)) {
4800 ent->children = list;
4801 ent->last = list;
4802 list->parent = (xmlNodePtr) ent;
4803 } else {
4804 xmlFreeNodeList(list);
4805 }
4806 } else if (list != NULL) {
4807 xmlFreeNodeList(list);
4808 }
4809 } else {
4810 /*
4811 * 4.3.2: An internal general parsed entity is well-formed
4812 * if its replacement text matches the production labeled
4813 * content.
4814 */
4815 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4816 ctxt->depth++;
4817 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4818 ctxt->sax, NULL, ctxt->depth,
4819 value, &list);
4820 ctxt->depth--;
4821 } else if (ent->etype ==
4822 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4823 ctxt->depth++;
4824 ret = xmlParseExternalEntity(ctxt->myDoc,
4825 ctxt->sax, NULL, ctxt->depth,
4826 ent->URI, ent->ExternalID, &list);
4827 ctxt->depth--;
4828 } else {
4829 ret = -1;
4830 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4831 ctxt->sax->error(ctxt->userData,
4832 "Internal: invalid entity type\n");
4833 }
4834 if (ret == XML_ERR_ENTITY_LOOP) {
4835 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4837 ctxt->sax->error(ctxt->userData,
4838 "Detected entity reference loop\n");
4839 ctxt->wellFormed = 0;
4840 ctxt->disableSAX = 1;
4841 } else if ((ret == 0) && (list != NULL)) {
4842 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4843 (ent->children == NULL)) {
4844 ent->children = list;
4845 while (list != NULL) {
4846 list->parent = (xmlNodePtr) ent;
4847 if (list->next == NULL)
4848 ent->last = list;
4849 list = list->next;
4850 }
4851 } else {
4852 xmlFreeNodeList(list);
4853 }
4854 } else if (ret > 0) {
4855 ctxt->errNo = ret;
4856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4857 ctxt->sax->error(ctxt->userData,
4858 "Entity value required\n");
4859 ctxt->wellFormed = 0;
4860 ctxt->disableSAX = 1;
4861 } else if (list != NULL) {
4862 xmlFreeNodeList(list);
4863 }
4864 }
4865 }
4866 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4867 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4868 /*
4869 * Create a node.
4870 */
4871 ctxt->sax->reference(ctxt->userData, ent->name);
4872 return;
4873 } else if (ctxt->replaceEntities) {
4874 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4875 /*
4876 * Seems we are generating the DOM content, do
4877 * a simple tree copy
4878 */
4879 xmlNodePtr new;
4880 new = xmlCopyNodeList(ent->children);
4881
4882 xmlAddChildList(ctxt->node, new);
4883 /*
4884 * This is to avoid a nasty side effect, see
4885 * characters() in SAX.c
4886 */
4887 ctxt->nodemem = 0;
4888 ctxt->nodelen = 0;
4889 return;
4890 } else {
4891 /*
4892 * Probably running in SAX mode
4893 */
4894 xmlParserInputPtr input;
4895
4896 input = xmlNewEntityInputStream(ctxt, ent);
4897 xmlPushInput(ctxt, input);
4898 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4899 (RAW == '<') && (NXT(1) == '?') &&
4900 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4901 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4902 xmlParseTextDecl(ctxt);
4903 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4904 /*
4905 * The XML REC instructs us to stop parsing right here
4906 */
4907 ctxt->instate = XML_PARSER_EOF;
4908 return;
4909 }
4910 if (input->standalone == 1) {
4911 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
4912 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4913 ctxt->sax->error(ctxt->userData,
4914 "external parsed entities cannot be standalone\n");
4915 ctxt->wellFormed = 0;
4916 ctxt->disableSAX = 1;
4917 }
4918 }
4919 return;
4920 }
4921 }
4922 } else {
4923 val = ent->content;
4924 if (val == NULL) return;
4925 /*
4926 * inline the entity.
4927 */
4928 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4929 (!ctxt->disableSAX))
4930 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
4931 }
4932 }
4933}
4934
4935/**
4936 * xmlParseEntityRef:
4937 * @ctxt: an XML parser context
4938 *
4939 * parse ENTITY references declarations
4940 *
4941 * [68] EntityRef ::= '&' Name ';'
4942 *
4943 * [ WFC: Entity Declared ]
4944 * In a document without any DTD, a document with only an internal DTD
4945 * subset which contains no parameter entity references, or a document
4946 * with "standalone='yes'", the Name given in the entity reference
4947 * must match that in an entity declaration, except that well-formed
4948 * documents need not declare any of the following entities: amp, lt,
4949 * gt, apos, quot. The declaration of a parameter entity must precede
4950 * any reference to it. Similarly, the declaration of a general entity
4951 * must precede any reference to it which appears in a default value in an
4952 * attribute-list declaration. Note that if entities are declared in the
4953 * external subset or in external parameter entities, a non-validating
4954 * processor is not obligated to read and process their declarations;
4955 * for such documents, the rule that an entity must be declared is a
4956 * well-formedness constraint only if standalone='yes'.
4957 *
4958 * [ WFC: Parsed Entity ]
4959 * An entity reference must not contain the name of an unparsed entity
4960 *
4961 * Returns the xmlEntityPtr if found, or NULL otherwise.
4962 */
4963xmlEntityPtr
4964xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
4965 xmlChar *name;
4966 xmlEntityPtr ent = NULL;
4967
4968 GROW;
4969
4970 if (RAW == '&') {
4971 NEXT;
4972 name = xmlParseName(ctxt);
4973 if (name == NULL) {
4974 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4975 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4976 ctxt->sax->error(ctxt->userData,
4977 "xmlParseEntityRef: no name\n");
4978 ctxt->wellFormed = 0;
4979 ctxt->disableSAX = 1;
4980 } else {
4981 if (RAW == ';') {
4982 NEXT;
4983 /*
4984 * Ask first SAX for entity resolution, otherwise try the
4985 * predefined set.
4986 */
4987 if (ctxt->sax != NULL) {
4988 if (ctxt->sax->getEntity != NULL)
4989 ent = ctxt->sax->getEntity(ctxt->userData, name);
4990 if (ent == NULL)
4991 ent = xmlGetPredefinedEntity(name);
4992 }
4993 /*
4994 * [ WFC: Entity Declared ]
4995 * In a document without any DTD, a document with only an
4996 * internal DTD subset which contains no parameter entity
4997 * references, or a document with "standalone='yes'", the
4998 * Name given in the entity reference must match that in an
4999 * entity declaration, except that well-formed documents
5000 * need not declare any of the following entities: amp, lt,
5001 * gt, apos, quot.
5002 * The declaration of a parameter entity must precede any
5003 * reference to it.
5004 * Similarly, the declaration of a general entity must
5005 * precede any reference to it which appears in a default
5006 * value in an attribute-list declaration. Note that if
5007 * entities are declared in the external subset or in
5008 * external parameter entities, a non-validating processor
5009 * is not obligated to read and process their declarations;
5010 * for such documents, the rule that an entity must be
5011 * declared is a well-formedness constraint only if
5012 * standalone='yes'.
5013 */
5014 if (ent == NULL) {
5015 if ((ctxt->standalone == 1) ||
5016 ((ctxt->hasExternalSubset == 0) &&
5017 (ctxt->hasPErefs == 0))) {
5018 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5019 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5020 ctxt->sax->error(ctxt->userData,
5021 "Entity '%s' not defined\n", name);
5022 ctxt->wellFormed = 0;
5023 ctxt->disableSAX = 1;
5024 } else {
5025 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5026 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5027 ctxt->sax->warning(ctxt->userData,
5028 "Entity '%s' not defined\n", name);
5029 }
5030 }
5031
5032 /*
5033 * [ WFC: Parsed Entity ]
5034 * An entity reference must not contain the name of an
5035 * unparsed entity
5036 */
5037 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5038 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5039 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5040 ctxt->sax->error(ctxt->userData,
5041 "Entity reference to unparsed entity %s\n", name);
5042 ctxt->wellFormed = 0;
5043 ctxt->disableSAX = 1;
5044 }
5045
5046 /*
5047 * [ WFC: No External Entity References ]
5048 * Attribute values cannot contain direct or indirect
5049 * entity references to external entities.
5050 */
5051 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5052 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5053 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5054 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5055 ctxt->sax->error(ctxt->userData,
5056 "Attribute references external entity '%s'\n", name);
5057 ctxt->wellFormed = 0;
5058 ctxt->disableSAX = 1;
5059 }
5060 /*
5061 * [ WFC: No < in Attribute Values ]
5062 * The replacement text of any entity referred to directly or
5063 * indirectly in an attribute value (other than "&lt;") must
5064 * not contain a <.
5065 */
5066 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5067 (ent != NULL) &&
5068 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5069 (ent->content != NULL) &&
5070 (xmlStrchr(ent->content, '<'))) {
5071 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5072 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5073 ctxt->sax->error(ctxt->userData,
5074 "'<' in entity '%s' is not allowed in attributes values\n", name);
5075 ctxt->wellFormed = 0;
5076 ctxt->disableSAX = 1;
5077 }
5078
5079 /*
5080 * Internal check, no parameter entities here ...
5081 */
5082 else {
5083 switch (ent->etype) {
5084 case XML_INTERNAL_PARAMETER_ENTITY:
5085 case XML_EXTERNAL_PARAMETER_ENTITY:
5086 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5087 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5088 ctxt->sax->error(ctxt->userData,
5089 "Attempt to reference the parameter entity '%s'\n", name);
5090 ctxt->wellFormed = 0;
5091 ctxt->disableSAX = 1;
5092 break;
5093 default:
5094 break;
5095 }
5096 }
5097
5098 /*
5099 * [ WFC: No Recursion ]
5100 * A parsed entity must not contain a recursive reference
5101 * to itself, either directly or indirectly.
5102 * Done somewhere else
5103 */
5104
5105 } else {
5106 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5107 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5108 ctxt->sax->error(ctxt->userData,
5109 "xmlParseEntityRef: expecting ';'\n");
5110 ctxt->wellFormed = 0;
5111 ctxt->disableSAX = 1;
5112 }
5113 xmlFree(name);
5114 }
5115 }
5116 return(ent);
5117}
5118
5119/**
5120 * xmlParseStringEntityRef:
5121 * @ctxt: an XML parser context
5122 * @str: a pointer to an index in the string
5123 *
5124 * parse ENTITY references declarations, but this version parses it from
5125 * a string value.
5126 *
5127 * [68] EntityRef ::= '&' Name ';'
5128 *
5129 * [ WFC: Entity Declared ]
5130 * In a document without any DTD, a document with only an internal DTD
5131 * subset which contains no parameter entity references, or a document
5132 * with "standalone='yes'", the Name given in the entity reference
5133 * must match that in an entity declaration, except that well-formed
5134 * documents need not declare any of the following entities: amp, lt,
5135 * gt, apos, quot. The declaration of a parameter entity must precede
5136 * any reference to it. Similarly, the declaration of a general entity
5137 * must precede any reference to it which appears in a default value in an
5138 * attribute-list declaration. Note that if entities are declared in the
5139 * external subset or in external parameter entities, a non-validating
5140 * processor is not obligated to read and process their declarations;
5141 * for such documents, the rule that an entity must be declared is a
5142 * well-formedness constraint only if standalone='yes'.
5143 *
5144 * [ WFC: Parsed Entity ]
5145 * An entity reference must not contain the name of an unparsed entity
5146 *
5147 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5148 * is updated to the current location in the string.
5149 */
5150xmlEntityPtr
5151xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5152 xmlChar *name;
5153 const xmlChar *ptr;
5154 xmlChar cur;
5155 xmlEntityPtr ent = NULL;
5156
5157 if ((str == NULL) || (*str == NULL))
5158 return(NULL);
5159 ptr = *str;
5160 cur = *ptr;
5161 if (cur == '&') {
5162 ptr++;
5163 cur = *ptr;
5164 name = xmlParseStringName(ctxt, &ptr);
5165 if (name == NULL) {
5166 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5168 ctxt->sax->error(ctxt->userData,
5169 "xmlParseEntityRef: no name\n");
5170 ctxt->wellFormed = 0;
5171 ctxt->disableSAX = 1;
5172 } else {
5173 if (*ptr == ';') {
5174 ptr++;
5175 /*
5176 * Ask first SAX for entity resolution, otherwise try the
5177 * predefined set.
5178 */
5179 if (ctxt->sax != NULL) {
5180 if (ctxt->sax->getEntity != NULL)
5181 ent = ctxt->sax->getEntity(ctxt->userData, name);
5182 if (ent == NULL)
5183 ent = xmlGetPredefinedEntity(name);
5184 }
5185 /*
5186 * [ WFC: Entity Declared ]
5187 * In a document without any DTD, a document with only an
5188 * internal DTD subset which contains no parameter entity
5189 * references, or a document with "standalone='yes'", the
5190 * Name given in the entity reference must match that in an
5191 * entity declaration, except that well-formed documents
5192 * need not declare any of the following entities: amp, lt,
5193 * gt, apos, quot.
5194 * The declaration of a parameter entity must precede any
5195 * reference to it.
5196 * Similarly, the declaration of a general entity must
5197 * precede any reference to it which appears in a default
5198 * value in an attribute-list declaration. Note that if
5199 * entities are declared in the external subset or in
5200 * external parameter entities, a non-validating processor
5201 * is not obligated to read and process their declarations;
5202 * for such documents, the rule that an entity must be
5203 * declared is a well-formedness constraint only if
5204 * standalone='yes'.
5205 */
5206 if (ent == NULL) {
5207 if ((ctxt->standalone == 1) ||
5208 ((ctxt->hasExternalSubset == 0) &&
5209 (ctxt->hasPErefs == 0))) {
5210 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5211 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5212 ctxt->sax->error(ctxt->userData,
5213 "Entity '%s' not defined\n", name);
5214 ctxt->wellFormed = 0;
5215 ctxt->disableSAX = 1;
5216 } else {
5217 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5218 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5219 ctxt->sax->warning(ctxt->userData,
5220 "Entity '%s' not defined\n", name);
5221 }
5222 }
5223
5224 /*
5225 * [ WFC: Parsed Entity ]
5226 * An entity reference must not contain the name of an
5227 * unparsed entity
5228 */
5229 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5230 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5231 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5232 ctxt->sax->error(ctxt->userData,
5233 "Entity reference to unparsed entity %s\n", name);
5234 ctxt->wellFormed = 0;
5235 ctxt->disableSAX = 1;
5236 }
5237
5238 /*
5239 * [ WFC: No External Entity References ]
5240 * Attribute values cannot contain direct or indirect
5241 * entity references to external entities.
5242 */
5243 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5244 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5245 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5247 ctxt->sax->error(ctxt->userData,
5248 "Attribute references external entity '%s'\n", name);
5249 ctxt->wellFormed = 0;
5250 ctxt->disableSAX = 1;
5251 }
5252 /*
5253 * [ WFC: No < in Attribute Values ]
5254 * The replacement text of any entity referred to directly or
5255 * indirectly in an attribute value (other than "&lt;") must
5256 * not contain a <.
5257 */
5258 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5259 (ent != NULL) &&
5260 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5261 (ent->content != NULL) &&
5262 (xmlStrchr(ent->content, '<'))) {
5263 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5265 ctxt->sax->error(ctxt->userData,
5266 "'<' in entity '%s' is not allowed in attributes values\n", name);
5267 ctxt->wellFormed = 0;
5268 ctxt->disableSAX = 1;
5269 }
5270
5271 /*
5272 * Internal check, no parameter entities here ...
5273 */
5274 else {
5275 switch (ent->etype) {
5276 case XML_INTERNAL_PARAMETER_ENTITY:
5277 case XML_EXTERNAL_PARAMETER_ENTITY:
5278 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5279 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5280 ctxt->sax->error(ctxt->userData,
5281 "Attempt to reference the parameter entity '%s'\n", name);
5282 ctxt->wellFormed = 0;
5283 ctxt->disableSAX = 1;
5284 break;
5285 default:
5286 break;
5287 }
5288 }
5289
5290 /*
5291 * [ WFC: No Recursion ]
5292 * A parsed entity must not contain a recursive reference
5293 * to itself, either directly or indirectly.
5294 * Done somewhwere else
5295 */
5296
5297 } else {
5298 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5299 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5300 ctxt->sax->error(ctxt->userData,
5301 "xmlParseEntityRef: expecting ';'\n");
5302 ctxt->wellFormed = 0;
5303 ctxt->disableSAX = 1;
5304 }
5305 xmlFree(name);
5306 }
5307 }
5308 *str = ptr;
5309 return(ent);
5310}
5311
5312/**
5313 * xmlParsePEReference:
5314 * @ctxt: an XML parser context
5315 *
5316 * parse PEReference declarations
5317 * The entity content is handled directly by pushing it's content as
5318 * a new input stream.
5319 *
5320 * [69] PEReference ::= '%' Name ';'
5321 *
5322 * [ WFC: No Recursion ]
5323 * A parsed entity must not contain a recursive
5324 * reference to itself, either directly or indirectly.
5325 *
5326 * [ WFC: Entity Declared ]
5327 * In a document without any DTD, a document with only an internal DTD
5328 * subset which contains no parameter entity references, or a document
5329 * with "standalone='yes'", ... ... The declaration of a parameter
5330 * entity must precede any reference to it...
5331 *
5332 * [ VC: Entity Declared ]
5333 * In a document with an external subset or external parameter entities
5334 * with "standalone='no'", ... ... The declaration of a parameter entity
5335 * must precede any reference to it...
5336 *
5337 * [ WFC: In DTD ]
5338 * Parameter-entity references may only appear in the DTD.
5339 * NOTE: misleading but this is handled.
5340 */
5341void
5342xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5343 xmlChar *name;
5344 xmlEntityPtr entity = NULL;
5345 xmlParserInputPtr input;
5346
5347 if (RAW == '%') {
5348 NEXT;
5349 name = xmlParseName(ctxt);
5350 if (name == NULL) {
5351 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5352 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5353 ctxt->sax->error(ctxt->userData,
5354 "xmlParsePEReference: no name\n");
5355 ctxt->wellFormed = 0;
5356 ctxt->disableSAX = 1;
5357 } else {
5358 if (RAW == ';') {
5359 NEXT;
5360 if ((ctxt->sax != NULL) &&
5361 (ctxt->sax->getParameterEntity != NULL))
5362 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5363 name);
5364 if (entity == NULL) {
5365 /*
5366 * [ WFC: Entity Declared ]
5367 * In a document without any DTD, a document with only an
5368 * internal DTD subset which contains no parameter entity
5369 * references, or a document with "standalone='yes'", ...
5370 * ... The declaration of a parameter entity must precede
5371 * any reference to it...
5372 */
5373 if ((ctxt->standalone == 1) ||
5374 ((ctxt->hasExternalSubset == 0) &&
5375 (ctxt->hasPErefs == 0))) {
5376 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5377 if ((!ctxt->disableSAX) &&
5378 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5379 ctxt->sax->error(ctxt->userData,
5380 "PEReference: %%%s; not found\n", name);
5381 ctxt->wellFormed = 0;
5382 ctxt->disableSAX = 1;
5383 } else {
5384 /*
5385 * [ VC: Entity Declared ]
5386 * In a document with an external subset or external
5387 * parameter entities with "standalone='no'", ...
5388 * ... The declaration of a parameter entity must precede
5389 * any reference to it...
5390 */
5391 if ((!ctxt->disableSAX) &&
5392 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5393 ctxt->sax->warning(ctxt->userData,
5394 "PEReference: %%%s; not found\n", name);
5395 ctxt->valid = 0;
5396 }
5397 } else {
5398 /*
5399 * Internal checking in case the entity quest barfed
5400 */
5401 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5402 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5403 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5404 ctxt->sax->warning(ctxt->userData,
5405 "Internal: %%%s; is not a parameter entity\n", name);
5406 } else {
5407 /*
5408 * TODO !!!
5409 * handle the extra spaces added before and after
5410 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5411 */
5412 input = xmlNewEntityInputStream(ctxt, entity);
5413 xmlPushInput(ctxt, input);
5414 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5415 (RAW == '<') && (NXT(1) == '?') &&
5416 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5417 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5418 xmlParseTextDecl(ctxt);
5419 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5420 /*
5421 * The XML REC instructs us to stop parsing
5422 * right here
5423 */
5424 ctxt->instate = XML_PARSER_EOF;
5425 xmlFree(name);
5426 return;
5427 }
5428 }
5429 if (ctxt->token == 0)
5430 ctxt->token = ' ';
5431 }
5432 }
5433 ctxt->hasPErefs = 1;
5434 } else {
5435 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5436 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5437 ctxt->sax->error(ctxt->userData,
5438 "xmlParsePEReference: expecting ';'\n");
5439 ctxt->wellFormed = 0;
5440 ctxt->disableSAX = 1;
5441 }
5442 xmlFree(name);
5443 }
5444 }
5445}
5446
5447/**
5448 * xmlParseStringPEReference:
5449 * @ctxt: an XML parser context
5450 * @str: a pointer to an index in the string
5451 *
5452 * parse PEReference declarations
5453 *
5454 * [69] PEReference ::= '%' Name ';'
5455 *
5456 * [ WFC: No Recursion ]
5457 * A parsed entity must not contain a recursive
5458 * reference to itself, either directly or indirectly.
5459 *
5460 * [ WFC: Entity Declared ]
5461 * In a document without any DTD, a document with only an internal DTD
5462 * subset which contains no parameter entity references, or a document
5463 * with "standalone='yes'", ... ... The declaration of a parameter
5464 * entity must precede any reference to it...
5465 *
5466 * [ VC: Entity Declared ]
5467 * In a document with an external subset or external parameter entities
5468 * with "standalone='no'", ... ... The declaration of a parameter entity
5469 * must precede any reference to it...
5470 *
5471 * [ WFC: In DTD ]
5472 * Parameter-entity references may only appear in the DTD.
5473 * NOTE: misleading but this is handled.
5474 *
5475 * Returns the string of the entity content.
5476 * str is updated to the current value of the index
5477 */
5478xmlEntityPtr
5479xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5480 const xmlChar *ptr;
5481 xmlChar cur;
5482 xmlChar *name;
5483 xmlEntityPtr entity = NULL;
5484
5485 if ((str == NULL) || (*str == NULL)) return(NULL);
5486 ptr = *str;
5487 cur = *ptr;
5488 if (cur == '%') {
5489 ptr++;
5490 cur = *ptr;
5491 name = xmlParseStringName(ctxt, &ptr);
5492 if (name == NULL) {
5493 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5494 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5495 ctxt->sax->error(ctxt->userData,
5496 "xmlParseStringPEReference: no name\n");
5497 ctxt->wellFormed = 0;
5498 ctxt->disableSAX = 1;
5499 } else {
5500 cur = *ptr;
5501 if (cur == ';') {
5502 ptr++;
5503 cur = *ptr;
5504 if ((ctxt->sax != NULL) &&
5505 (ctxt->sax->getParameterEntity != NULL))
5506 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5507 name);
5508 if (entity == NULL) {
5509 /*
5510 * [ WFC: Entity Declared ]
5511 * In a document without any DTD, a document with only an
5512 * internal DTD subset which contains no parameter entity
5513 * references, or a document with "standalone='yes'", ...
5514 * ... The declaration of a parameter entity must precede
5515 * any reference to it...
5516 */
5517 if ((ctxt->standalone == 1) ||
5518 ((ctxt->hasExternalSubset == 0) &&
5519 (ctxt->hasPErefs == 0))) {
5520 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5522 ctxt->sax->error(ctxt->userData,
5523 "PEReference: %%%s; not found\n", name);
5524 ctxt->wellFormed = 0;
5525 ctxt->disableSAX = 1;
5526 } else {
5527 /*
5528 * [ VC: Entity Declared ]
5529 * In a document with an external subset or external
5530 * parameter entities with "standalone='no'", ...
5531 * ... The declaration of a parameter entity must
5532 * precede any reference to it...
5533 */
5534 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5535 ctxt->sax->warning(ctxt->userData,
5536 "PEReference: %%%s; not found\n", name);
5537 ctxt->valid = 0;
5538 }
5539 } else {
5540 /*
5541 * Internal checking in case the entity quest barfed
5542 */
5543 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5544 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5545 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5546 ctxt->sax->warning(ctxt->userData,
5547 "Internal: %%%s; is not a parameter entity\n", name);
5548 }
5549 }
5550 ctxt->hasPErefs = 1;
5551 } else {
5552 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5554 ctxt->sax->error(ctxt->userData,
5555 "xmlParseStringPEReference: expecting ';'\n");
5556 ctxt->wellFormed = 0;
5557 ctxt->disableSAX = 1;
5558 }
5559 xmlFree(name);
5560 }
5561 }
5562 *str = ptr;
5563 return(entity);
5564}
5565
5566/**
5567 * xmlParseDocTypeDecl:
5568 * @ctxt: an XML parser context
5569 *
5570 * parse a DOCTYPE declaration
5571 *
5572 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5573 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5574 *
5575 * [ VC: Root Element Type ]
5576 * The Name in the document type declaration must match the element
5577 * type of the root element.
5578 */
5579
5580void
5581xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5582 xmlChar *name = NULL;
5583 xmlChar *ExternalID = NULL;
5584 xmlChar *URI = NULL;
5585
5586 /*
5587 * We know that '<!DOCTYPE' has been detected.
5588 */
5589 SKIP(9);
5590
5591 SKIP_BLANKS;
5592
5593 /*
5594 * Parse the DOCTYPE name.
5595 */
5596 name = xmlParseName(ctxt);
5597 if (name == NULL) {
5598 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5600 ctxt->sax->error(ctxt->userData,
5601 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5602 ctxt->wellFormed = 0;
5603 ctxt->disableSAX = 1;
5604 }
5605 ctxt->intSubName = name;
5606
5607 SKIP_BLANKS;
5608
5609 /*
5610 * Check for SystemID and ExternalID
5611 */
5612 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5613
5614 if ((URI != NULL) || (ExternalID != NULL)) {
5615 ctxt->hasExternalSubset = 1;
5616 }
5617 ctxt->extSubURI = URI;
5618 ctxt->extSubSystem = ExternalID;
5619
5620 SKIP_BLANKS;
5621
5622 /*
5623 * Create and update the internal subset.
5624 */
5625 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5626 (!ctxt->disableSAX))
5627 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5628
5629 /*
5630 * Is there any internal subset declarations ?
5631 * they are handled separately in xmlParseInternalSubset()
5632 */
5633 if (RAW == '[')
5634 return;
5635
5636 /*
5637 * We should be at the end of the DOCTYPE declaration.
5638 */
5639 if (RAW != '>') {
5640 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5641 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5642 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5643 ctxt->wellFormed = 0;
5644 ctxt->disableSAX = 1;
5645 }
5646 NEXT;
5647}
5648
5649/**
5650 * xmlParseInternalsubset:
5651 * @ctxt: an XML parser context
5652 *
5653 * parse the internal subset declaration
5654 *
5655 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5656 */
5657
5658void
5659xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5660 /*
5661 * Is there any DTD definition ?
5662 */
5663 if (RAW == '[') {
5664 ctxt->instate = XML_PARSER_DTD;
5665 NEXT;
5666 /*
5667 * Parse the succession of Markup declarations and
5668 * PEReferences.
5669 * Subsequence (markupdecl | PEReference | S)*
5670 */
5671 while (RAW != ']') {
5672 const xmlChar *check = CUR_PTR;
5673 int cons = ctxt->input->consumed;
5674
5675 SKIP_BLANKS;
5676 xmlParseMarkupDecl(ctxt);
5677 xmlParsePEReference(ctxt);
5678
5679 /*
5680 * Pop-up of finished entities.
5681 */
5682 while ((RAW == 0) && (ctxt->inputNr > 1))
5683 xmlPopInput(ctxt);
5684
5685 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5686 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5688 ctxt->sax->error(ctxt->userData,
5689 "xmlParseInternalSubset: error detected in Markup declaration\n");
5690 ctxt->wellFormed = 0;
5691 ctxt->disableSAX = 1;
5692 break;
5693 }
5694 }
5695 if (RAW == ']') {
5696 NEXT;
5697 SKIP_BLANKS;
5698 }
5699 }
5700
5701 /*
5702 * We should be at the end of the DOCTYPE declaration.
5703 */
5704 if (RAW != '>') {
5705 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5706 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5707 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5708 ctxt->wellFormed = 0;
5709 ctxt->disableSAX = 1;
5710 }
5711 NEXT;
5712}
5713
5714/**
5715 * xmlParseAttribute:
5716 * @ctxt: an XML parser context
5717 * @value: a xmlChar ** used to store the value of the attribute
5718 *
5719 * parse an attribute
5720 *
5721 * [41] Attribute ::= Name Eq AttValue
5722 *
5723 * [ WFC: No External Entity References ]
5724 * Attribute values cannot contain direct or indirect entity references
5725 * to external entities.
5726 *
5727 * [ WFC: No < in Attribute Values ]
5728 * The replacement text of any entity referred to directly or indirectly in
5729 * an attribute value (other than "&lt;") must not contain a <.
5730 *
5731 * [ VC: Attribute Value Type ]
5732 * The attribute must have been declared; the value must be of the type
5733 * declared for it.
5734 *
5735 * [25] Eq ::= S? '=' S?
5736 *
5737 * With namespace:
5738 *
5739 * [NS 11] Attribute ::= QName Eq AttValue
5740 *
5741 * Also the case QName == xmlns:??? is handled independently as a namespace
5742 * definition.
5743 *
5744 * Returns the attribute name, and the value in *value.
5745 */
5746
5747xmlChar *
5748xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5749 xmlChar *name, *val;
5750
5751 *value = NULL;
5752 name = xmlParseName(ctxt);
5753 if (name == NULL) {
5754 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5755 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5756 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5757 ctxt->wellFormed = 0;
5758 ctxt->disableSAX = 1;
5759 return(NULL);
5760 }
5761
5762 /*
5763 * read the value
5764 */
5765 SKIP_BLANKS;
5766 if (RAW == '=') {
5767 NEXT;
5768 SKIP_BLANKS;
5769 val = xmlParseAttValue(ctxt);
5770 ctxt->instate = XML_PARSER_CONTENT;
5771 } else {
5772 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5773 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5774 ctxt->sax->error(ctxt->userData,
5775 "Specification mandate value for attribute %s\n", name);
5776 ctxt->wellFormed = 0;
5777 ctxt->disableSAX = 1;
5778 xmlFree(name);
5779 return(NULL);
5780 }
5781
5782 /*
5783 * Check that xml:lang conforms to the specification
5784 * No more registered as an error, just generate a warning now
5785 * since this was deprecated in XML second edition
5786 */
5787 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5788 if (!xmlCheckLanguageID(val)) {
5789 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5790 ctxt->sax->warning(ctxt->userData,
5791 "Malformed value for xml:lang : %s\n", val);
5792 }
5793 }
5794
5795 /*
5796 * Check that xml:space conforms to the specification
5797 */
5798 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5799 if (xmlStrEqual(val, BAD_CAST "default"))
5800 *(ctxt->space) = 0;
5801 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5802 *(ctxt->space) = 1;
5803 else {
5804 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5806 ctxt->sax->error(ctxt->userData,
5807"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5808 val);
5809 ctxt->wellFormed = 0;
5810 ctxt->disableSAX = 1;
5811 }
5812 }
5813
5814 *value = val;
5815 return(name);
5816}
5817
5818/**
5819 * xmlParseStartTag:
5820 * @ctxt: an XML parser context
5821 *
5822 * parse a start of tag either for rule element or
5823 * EmptyElement. In both case we don't parse the tag closing chars.
5824 *
5825 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5826 *
5827 * [ WFC: Unique Att Spec ]
5828 * No attribute name may appear more than once in the same start-tag or
5829 * empty-element tag.
5830 *
5831 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5832 *
5833 * [ WFC: Unique Att Spec ]
5834 * No attribute name may appear more than once in the same start-tag or
5835 * empty-element tag.
5836 *
5837 * With namespace:
5838 *
5839 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5840 *
5841 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5842 *
5843 * Returns the element name parsed
5844 */
5845
5846xmlChar *
5847xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5848 xmlChar *name;
5849 xmlChar *attname;
5850 xmlChar *attvalue;
5851 const xmlChar **atts = NULL;
5852 int nbatts = 0;
5853 int maxatts = 0;
5854 int i;
5855
5856 if (RAW != '<') return(NULL);
5857 NEXT;
5858
5859 name = xmlParseName(ctxt);
5860 if (name == NULL) {
5861 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5862 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5863 ctxt->sax->error(ctxt->userData,
5864 "xmlParseStartTag: invalid element name\n");
5865 ctxt->wellFormed = 0;
5866 ctxt->disableSAX = 1;
5867 return(NULL);
5868 }
5869
5870 /*
5871 * Now parse the attributes, it ends up with the ending
5872 *
5873 * (S Attribute)* S?
5874 */
5875 SKIP_BLANKS;
5876 GROW;
5877
5878 while ((IS_CHAR(RAW)) &&
5879 (RAW != '>') &&
5880 ((RAW != '/') || (NXT(1) != '>'))) {
5881 const xmlChar *q = CUR_PTR;
5882 int cons = ctxt->input->consumed;
5883
5884 attname = xmlParseAttribute(ctxt, &attvalue);
5885 if ((attname != NULL) && (attvalue != NULL)) {
5886 /*
5887 * [ WFC: Unique Att Spec ]
5888 * No attribute name may appear more than once in the same
5889 * start-tag or empty-element tag.
5890 */
5891 for (i = 0; i < nbatts;i += 2) {
5892 if (xmlStrEqual(atts[i], attname)) {
5893 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
5894 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5895 ctxt->sax->error(ctxt->userData,
5896 "Attribute %s redefined\n",
5897 attname);
5898 ctxt->wellFormed = 0;
5899 ctxt->disableSAX = 1;
5900 xmlFree(attname);
5901 xmlFree(attvalue);
5902 goto failed;
5903 }
5904 }
5905
5906 /*
5907 * Add the pair to atts
5908 */
5909 if (atts == NULL) {
5910 maxatts = 10;
5911 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
5912 if (atts == NULL) {
5913 xmlGenericError(xmlGenericErrorContext,
5914 "malloc of %ld byte failed\n",
5915 maxatts * (long)sizeof(xmlChar *));
5916 return(NULL);
5917 }
5918 } else if (nbatts + 4 > maxatts) {
5919 maxatts *= 2;
5920 atts = (const xmlChar **) xmlRealloc((void *) atts,
5921 maxatts * sizeof(xmlChar *));
5922 if (atts == NULL) {
5923 xmlGenericError(xmlGenericErrorContext,
5924 "realloc of %ld byte failed\n",
5925 maxatts * (long)sizeof(xmlChar *));
5926 return(NULL);
5927 }
5928 }
5929 atts[nbatts++] = attname;
5930 atts[nbatts++] = attvalue;
5931 atts[nbatts] = NULL;
5932 atts[nbatts + 1] = NULL;
5933 } else {
5934 if (attname != NULL)
5935 xmlFree(attname);
5936 if (attvalue != NULL)
5937 xmlFree(attvalue);
5938 }
5939
5940failed:
5941
5942 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
5943 break;
5944 if (!IS_BLANK(RAW)) {
5945 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
5946 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5947 ctxt->sax->error(ctxt->userData,
5948 "attributes construct error\n");
5949 ctxt->wellFormed = 0;
5950 ctxt->disableSAX = 1;
5951 }
5952 SKIP_BLANKS;
5953 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
5954 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5955 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5956 ctxt->sax->error(ctxt->userData,
5957 "xmlParseStartTag: problem parsing attributes\n");
5958 ctxt->wellFormed = 0;
5959 ctxt->disableSAX = 1;
5960 break;
5961 }
5962 GROW;
5963 }
5964
5965 /*
5966 * SAX: Start of Element !
5967 */
5968 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
5969 (!ctxt->disableSAX))
5970 ctxt->sax->startElement(ctxt->userData, name, atts);
5971
5972 if (atts != NULL) {
5973 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
5974 xmlFree((void *) atts);
5975 }
5976 return(name);
5977}
5978
5979/**
5980 * xmlParseEndTag:
5981 * @ctxt: an XML parser context
5982 *
5983 * parse an end of tag
5984 *
5985 * [42] ETag ::= '</' Name S? '>'
5986 *
5987 * With namespace
5988 *
5989 * [NS 9] ETag ::= '</' QName S? '>'
5990 */
5991
5992void
5993xmlParseEndTag(xmlParserCtxtPtr ctxt) {
5994 xmlChar *name;
5995 xmlChar *oldname;
5996
5997 GROW;
5998 if ((RAW != '<') || (NXT(1) != '/')) {
5999 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6000 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6001 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6002 ctxt->wellFormed = 0;
6003 ctxt->disableSAX = 1;
6004 return;
6005 }
6006 SKIP(2);
6007
6008 name = xmlParseName(ctxt);
6009
6010 /*
6011 * We should definitely be at the ending "S? '>'" part
6012 */
6013 GROW;
6014 SKIP_BLANKS;
6015 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6016 ctxt->errNo = XML_ERR_GT_REQUIRED;
6017 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6018 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6019 ctxt->wellFormed = 0;
6020 ctxt->disableSAX = 1;
6021 } else
6022 NEXT;
6023
6024 /*
6025 * [ WFC: Element Type Match ]
6026 * The Name in an element's end-tag must match the element type in the
6027 * start-tag.
6028 *
6029 */
6030 if ((name == NULL) || (ctxt->name == NULL) ||
6031 (!xmlStrEqual(name, ctxt->name))) {
6032 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6034 if ((name != NULL) && (ctxt->name != NULL)) {
6035 ctxt->sax->error(ctxt->userData,
6036 "Opening and ending tag mismatch: %s and %s\n",
6037 ctxt->name, name);
6038 } else if (ctxt->name != NULL) {
6039 ctxt->sax->error(ctxt->userData,
6040 "Ending tag eror for: %s\n", ctxt->name);
6041 } else {
6042 ctxt->sax->error(ctxt->userData,
6043 "Ending tag error: internal error ???\n");
6044 }
6045
6046 }
6047 ctxt->wellFormed = 0;
6048 ctxt->disableSAX = 1;
6049 }
6050
6051 /*
6052 * SAX: End of Tag
6053 */
6054 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6055 (!ctxt->disableSAX))
6056 ctxt->sax->endElement(ctxt->userData, name);
6057
6058 if (name != NULL)
6059 xmlFree(name);
6060 oldname = namePop(ctxt);
6061 spacePop(ctxt);
6062 if (oldname != NULL) {
6063#ifdef DEBUG_STACK
6064 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6065#endif
6066 xmlFree(oldname);
6067 }
6068 return;
6069}
6070
6071/**
6072 * xmlParseCDSect:
6073 * @ctxt: an XML parser context
6074 *
6075 * Parse escaped pure raw content.
6076 *
6077 * [18] CDSect ::= CDStart CData CDEnd
6078 *
6079 * [19] CDStart ::= '<![CDATA['
6080 *
6081 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6082 *
6083 * [21] CDEnd ::= ']]>'
6084 */
6085void
6086xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6087 xmlChar *buf = NULL;
6088 int len = 0;
6089 int size = XML_PARSER_BUFFER_SIZE;
6090 int r, rl;
6091 int s, sl;
6092 int cur, l;
6093 int count = 0;
6094
6095 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6096 (NXT(2) == '[') && (NXT(3) == 'C') &&
6097 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6098 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6099 (NXT(8) == '[')) {
6100 SKIP(9);
6101 } else
6102 return;
6103
6104 ctxt->instate = XML_PARSER_CDATA_SECTION;
6105 r = CUR_CHAR(rl);
6106 if (!IS_CHAR(r)) {
6107 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6108 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6109 ctxt->sax->error(ctxt->userData,
6110 "CData section not finished\n");
6111 ctxt->wellFormed = 0;
6112 ctxt->disableSAX = 1;
6113 ctxt->instate = XML_PARSER_CONTENT;
6114 return;
6115 }
6116 NEXTL(rl);
6117 s = CUR_CHAR(sl);
6118 if (!IS_CHAR(s)) {
6119 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6120 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6121 ctxt->sax->error(ctxt->userData,
6122 "CData section not finished\n");
6123 ctxt->wellFormed = 0;
6124 ctxt->disableSAX = 1;
6125 ctxt->instate = XML_PARSER_CONTENT;
6126 return;
6127 }
6128 NEXTL(sl);
6129 cur = CUR_CHAR(l);
6130 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6131 if (buf == NULL) {
6132 xmlGenericError(xmlGenericErrorContext,
6133 "malloc of %d byte failed\n", size);
6134 return;
6135 }
6136 while (IS_CHAR(cur) &&
6137 ((r != ']') || (s != ']') || (cur != '>'))) {
6138 if (len + 5 >= size) {
6139 size *= 2;
6140 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6141 if (buf == NULL) {
6142 xmlGenericError(xmlGenericErrorContext,
6143 "realloc of %d byte failed\n", size);
6144 return;
6145 }
6146 }
6147 COPY_BUF(rl,buf,len,r);
6148 r = s;
6149 rl = sl;
6150 s = cur;
6151 sl = l;
6152 count++;
6153 if (count > 50) {
6154 GROW;
6155 count = 0;
6156 }
6157 NEXTL(l);
6158 cur = CUR_CHAR(l);
6159 }
6160 buf[len] = 0;
6161 ctxt->instate = XML_PARSER_CONTENT;
6162 if (cur != '>') {
6163 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6165 ctxt->sax->error(ctxt->userData,
6166 "CData section not finished\n%.50s\n", buf);
6167 ctxt->wellFormed = 0;
6168 ctxt->disableSAX = 1;
6169 xmlFree(buf);
6170 return;
6171 }
6172 NEXTL(l);
6173
6174 /*
6175 * Ok the buffer is to be consumed as cdata.
6176 */
6177 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6178 if (ctxt->sax->cdataBlock != NULL)
6179 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6180 }
6181 xmlFree(buf);
6182}
6183
6184/**
6185 * xmlParseContent:
6186 * @ctxt: an XML parser context
6187 *
6188 * Parse a content:
6189 *
6190 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6191 */
6192
6193void
6194xmlParseContent(xmlParserCtxtPtr ctxt) {
6195 GROW;
6196 while (((RAW != 0) || (ctxt->token != 0)) &&
6197 ((RAW != '<') || (NXT(1) != '/'))) {
6198 const xmlChar *test = CUR_PTR;
6199 int cons = ctxt->input->consumed;
6200 xmlChar tok = ctxt->token;
6201
6202 /*
6203 * Handle possible processed charrefs.
6204 */
6205 if (ctxt->token != 0) {
6206 xmlParseCharData(ctxt, 0);
6207 }
6208 /*
6209 * First case : a Processing Instruction.
6210 */
6211 else if ((RAW == '<') && (NXT(1) == '?')) {
6212 xmlParsePI(ctxt);
6213 }
6214
6215 /*
6216 * Second case : a CDSection
6217 */
6218 else if ((RAW == '<') && (NXT(1) == '!') &&
6219 (NXT(2) == '[') && (NXT(3) == 'C') &&
6220 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6221 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6222 (NXT(8) == '[')) {
6223 xmlParseCDSect(ctxt);
6224 }
6225
6226 /*
6227 * Third case : a comment
6228 */
6229 else if ((RAW == '<') && (NXT(1) == '!') &&
6230 (NXT(2) == '-') && (NXT(3) == '-')) {
6231 xmlParseComment(ctxt);
6232 ctxt->instate = XML_PARSER_CONTENT;
6233 }
6234
6235 /*
6236 * Fourth case : a sub-element.
6237 */
6238 else if (RAW == '<') {
6239 xmlParseElement(ctxt);
6240 }
6241
6242 /*
6243 * Fifth case : a reference. If if has not been resolved,
6244 * parsing returns it's Name, create the node
6245 */
6246
6247 else if (RAW == '&') {
6248 xmlParseReference(ctxt);
6249 }
6250
6251 /*
6252 * Last case, text. Note that References are handled directly.
6253 */
6254 else {
6255 xmlParseCharData(ctxt, 0);
6256 }
6257
6258 GROW;
6259 /*
6260 * Pop-up of finished entities.
6261 */
6262 while ((RAW == 0) && (ctxt->inputNr > 1))
6263 xmlPopInput(ctxt);
6264 SHRINK;
6265
6266 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6267 (tok == ctxt->token)) {
6268 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6269 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6270 ctxt->sax->error(ctxt->userData,
6271 "detected an error in element content\n");
6272 ctxt->wellFormed = 0;
6273 ctxt->disableSAX = 1;
6274 ctxt->instate = XML_PARSER_EOF;
6275 break;
6276 }
6277 }
6278}
6279
6280/**
6281 * xmlParseElement:
6282 * @ctxt: an XML parser context
6283 *
6284 * parse an XML element, this is highly recursive
6285 *
6286 * [39] element ::= EmptyElemTag | STag content ETag
6287 *
6288 * [ WFC: Element Type Match ]
6289 * The Name in an element's end-tag must match the element type in the
6290 * start-tag.
6291 *
6292 * [ VC: Element Valid ]
6293 * An element is valid if there is a declaration matching elementdecl
6294 * where the Name matches the element type and one of the following holds:
6295 * - The declaration matches EMPTY and the element has no content.
6296 * - The declaration matches children and the sequence of child elements
6297 * belongs to the language generated by the regular expression in the
6298 * content model, with optional white space (characters matching the
6299 * nonterminal S) between each pair of child elements.
6300 * - The declaration matches Mixed and the content consists of character
6301 * data and child elements whose types match names in the content model.
6302 * - The declaration matches ANY, and the types of any child elements have
6303 * been declared.
6304 */
6305
6306void
6307xmlParseElement(xmlParserCtxtPtr ctxt) {
6308 const xmlChar *openTag = CUR_PTR;
6309 xmlChar *name;
6310 xmlChar *oldname;
6311 xmlParserNodeInfo node_info;
6312 xmlNodePtr ret;
6313
6314 /* Capture start position */
6315 if (ctxt->record_info) {
6316 node_info.begin_pos = ctxt->input->consumed +
6317 (CUR_PTR - ctxt->input->base);
6318 node_info.begin_line = ctxt->input->line;
6319 }
6320
6321 if (ctxt->spaceNr == 0)
6322 spacePush(ctxt, -1);
6323 else
6324 spacePush(ctxt, *ctxt->space);
6325
6326 name = xmlParseStartTag(ctxt);
6327 if (name == NULL) {
6328 spacePop(ctxt);
6329 return;
6330 }
6331 namePush(ctxt, name);
6332 ret = ctxt->node;
6333
6334 /*
6335 * [ VC: Root Element Type ]
6336 * The Name in the document type declaration must match the element
6337 * type of the root element.
6338 */
6339 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6340 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6341 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6342
6343 /*
6344 * Check for an Empty Element.
6345 */
6346 if ((RAW == '/') && (NXT(1) == '>')) {
6347 SKIP(2);
6348 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6349 (!ctxt->disableSAX))
6350 ctxt->sax->endElement(ctxt->userData, name);
6351 oldname = namePop(ctxt);
6352 spacePop(ctxt);
6353 if (oldname != NULL) {
6354#ifdef DEBUG_STACK
6355 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6356#endif
6357 xmlFree(oldname);
6358 }
6359 if ( ret != NULL && ctxt->record_info ) {
6360 node_info.end_pos = ctxt->input->consumed +
6361 (CUR_PTR - ctxt->input->base);
6362 node_info.end_line = ctxt->input->line;
6363 node_info.node = ret;
6364 xmlParserAddNodeInfo(ctxt, &node_info);
6365 }
6366 return;
6367 }
6368 if (RAW == '>') {
6369 NEXT;
6370 } else {
6371 ctxt->errNo = XML_ERR_GT_REQUIRED;
6372 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6373 ctxt->sax->error(ctxt->userData,
6374 "Couldn't find end of Start Tag\n%.30s\n",
6375 openTag);
6376 ctxt->wellFormed = 0;
6377 ctxt->disableSAX = 1;
6378
6379 /*
6380 * end of parsing of this node.
6381 */
6382 nodePop(ctxt);
6383 oldname = namePop(ctxt);
6384 spacePop(ctxt);
6385 if (oldname != NULL) {
6386#ifdef DEBUG_STACK
6387 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6388#endif
6389 xmlFree(oldname);
6390 }
6391
6392 /*
6393 * Capture end position and add node
6394 */
6395 if ( ret != NULL && ctxt->record_info ) {
6396 node_info.end_pos = ctxt->input->consumed +
6397 (CUR_PTR - ctxt->input->base);
6398 node_info.end_line = ctxt->input->line;
6399 node_info.node = ret;
6400 xmlParserAddNodeInfo(ctxt, &node_info);
6401 }
6402 return;
6403 }
6404
6405 /*
6406 * Parse the content of the element:
6407 */
6408 xmlParseContent(ctxt);
6409 if (!IS_CHAR(RAW)) {
6410 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6412 ctxt->sax->error(ctxt->userData,
6413 "Premature end of data in tag %.30s\n", openTag);
6414 ctxt->wellFormed = 0;
6415 ctxt->disableSAX = 1;
6416
6417 /*
6418 * end of parsing of this node.
6419 */
6420 nodePop(ctxt);
6421 oldname = namePop(ctxt);
6422 spacePop(ctxt);
6423 if (oldname != NULL) {
6424#ifdef DEBUG_STACK
6425 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6426#endif
6427 xmlFree(oldname);
6428 }
6429 return;
6430 }
6431
6432 /*
6433 * parse the end of tag: '</' should be here.
6434 */
6435 xmlParseEndTag(ctxt);
6436
6437 /*
6438 * Capture end position and add node
6439 */
6440 if ( ret != NULL && ctxt->record_info ) {
6441 node_info.end_pos = ctxt->input->consumed +
6442 (CUR_PTR - ctxt->input->base);
6443 node_info.end_line = ctxt->input->line;
6444 node_info.node = ret;
6445 xmlParserAddNodeInfo(ctxt, &node_info);
6446 }
6447}
6448
6449/**
6450 * xmlParseVersionNum:
6451 * @ctxt: an XML parser context
6452 *
6453 * parse the XML version value.
6454 *
6455 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6456 *
6457 * Returns the string giving the XML version number, or NULL
6458 */
6459xmlChar *
6460xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6461 xmlChar *buf = NULL;
6462 int len = 0;
6463 int size = 10;
6464 xmlChar cur;
6465
6466 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6467 if (buf == NULL) {
6468 xmlGenericError(xmlGenericErrorContext,
6469 "malloc of %d byte failed\n", size);
6470 return(NULL);
6471 }
6472 cur = CUR;
6473 while (((cur >= 'a') && (cur <= 'z')) ||
6474 ((cur >= 'A') && (cur <= 'Z')) ||
6475 ((cur >= '0') && (cur <= '9')) ||
6476 (cur == '_') || (cur == '.') ||
6477 (cur == ':') || (cur == '-')) {
6478 if (len + 1 >= size) {
6479 size *= 2;
6480 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6481 if (buf == NULL) {
6482 xmlGenericError(xmlGenericErrorContext,
6483 "realloc of %d byte failed\n", size);
6484 return(NULL);
6485 }
6486 }
6487 buf[len++] = cur;
6488 NEXT;
6489 cur=CUR;
6490 }
6491 buf[len] = 0;
6492 return(buf);
6493}
6494
6495/**
6496 * xmlParseVersionInfo:
6497 * @ctxt: an XML parser context
6498 *
6499 * parse the XML version.
6500 *
6501 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6502 *
6503 * [25] Eq ::= S? '=' S?
6504 *
6505 * Returns the version string, e.g. "1.0"
6506 */
6507
6508xmlChar *
6509xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6510 xmlChar *version = NULL;
6511 const xmlChar *q;
6512
6513 if ((RAW == 'v') && (NXT(1) == 'e') &&
6514 (NXT(2) == 'r') && (NXT(3) == 's') &&
6515 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6516 (NXT(6) == 'n')) {
6517 SKIP(7);
6518 SKIP_BLANKS;
6519 if (RAW != '=') {
6520 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6522 ctxt->sax->error(ctxt->userData,
6523 "xmlParseVersionInfo : expected '='\n");
6524 ctxt->wellFormed = 0;
6525 ctxt->disableSAX = 1;
6526 return(NULL);
6527 }
6528 NEXT;
6529 SKIP_BLANKS;
6530 if (RAW == '"') {
6531 NEXT;
6532 q = CUR_PTR;
6533 version = xmlParseVersionNum(ctxt);
6534 if (RAW != '"') {
6535 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6536 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6537 ctxt->sax->error(ctxt->userData,
6538 "String not closed\n%.50s\n", q);
6539 ctxt->wellFormed = 0;
6540 ctxt->disableSAX = 1;
6541 } else
6542 NEXT;
6543 } else if (RAW == '\''){
6544 NEXT;
6545 q = CUR_PTR;
6546 version = xmlParseVersionNum(ctxt);
6547 if (RAW != '\'') {
6548 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6549 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6550 ctxt->sax->error(ctxt->userData,
6551 "String not closed\n%.50s\n", q);
6552 ctxt->wellFormed = 0;
6553 ctxt->disableSAX = 1;
6554 } else
6555 NEXT;
6556 } else {
6557 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6559 ctxt->sax->error(ctxt->userData,
6560 "xmlParseVersionInfo : expected ' or \"\n");
6561 ctxt->wellFormed = 0;
6562 ctxt->disableSAX = 1;
6563 }
6564 }
6565 return(version);
6566}
6567
6568/**
6569 * xmlParseEncName:
6570 * @ctxt: an XML parser context
6571 *
6572 * parse the XML encoding name
6573 *
6574 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6575 *
6576 * Returns the encoding name value or NULL
6577 */
6578xmlChar *
6579xmlParseEncName(xmlParserCtxtPtr ctxt) {
6580 xmlChar *buf = NULL;
6581 int len = 0;
6582 int size = 10;
6583 xmlChar cur;
6584
6585 cur = CUR;
6586 if (((cur >= 'a') && (cur <= 'z')) ||
6587 ((cur >= 'A') && (cur <= 'Z'))) {
6588 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6589 if (buf == NULL) {
6590 xmlGenericError(xmlGenericErrorContext,
6591 "malloc of %d byte failed\n", size);
6592 return(NULL);
6593 }
6594
6595 buf[len++] = cur;
6596 NEXT;
6597 cur = CUR;
6598 while (((cur >= 'a') && (cur <= 'z')) ||
6599 ((cur >= 'A') && (cur <= 'Z')) ||
6600 ((cur >= '0') && (cur <= '9')) ||
6601 (cur == '.') || (cur == '_') ||
6602 (cur == '-')) {
6603 if (len + 1 >= size) {
6604 size *= 2;
6605 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6606 if (buf == NULL) {
6607 xmlGenericError(xmlGenericErrorContext,
6608 "realloc of %d byte failed\n", size);
6609 return(NULL);
6610 }
6611 }
6612 buf[len++] = cur;
6613 NEXT;
6614 cur = CUR;
6615 if (cur == 0) {
6616 SHRINK;
6617 GROW;
6618 cur = CUR;
6619 }
6620 }
6621 buf[len] = 0;
6622 } else {
6623 ctxt->errNo = XML_ERR_ENCODING_NAME;
6624 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6625 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6626 ctxt->wellFormed = 0;
6627 ctxt->disableSAX = 1;
6628 }
6629 return(buf);
6630}
6631
6632/**
6633 * xmlParseEncodingDecl:
6634 * @ctxt: an XML parser context
6635 *
6636 * parse the XML encoding declaration
6637 *
6638 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6639 *
6640 * this setups the conversion filters.
6641 *
6642 * Returns the encoding value or NULL
6643 */
6644
6645xmlChar *
6646xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6647 xmlChar *encoding = NULL;
6648 const xmlChar *q;
6649
6650 SKIP_BLANKS;
6651 if ((RAW == 'e') && (NXT(1) == 'n') &&
6652 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6653 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6654 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6655 SKIP(8);
6656 SKIP_BLANKS;
6657 if (RAW != '=') {
6658 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6659 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6660 ctxt->sax->error(ctxt->userData,
6661 "xmlParseEncodingDecl : expected '='\n");
6662 ctxt->wellFormed = 0;
6663 ctxt->disableSAX = 1;
6664 return(NULL);
6665 }
6666 NEXT;
6667 SKIP_BLANKS;
6668 if (RAW == '"') {
6669 NEXT;
6670 q = CUR_PTR;
6671 encoding = xmlParseEncName(ctxt);
6672 if (RAW != '"') {
6673 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6674 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6675 ctxt->sax->error(ctxt->userData,
6676 "String not closed\n%.50s\n", q);
6677 ctxt->wellFormed = 0;
6678 ctxt->disableSAX = 1;
6679 } else
6680 NEXT;
6681 } else if (RAW == '\''){
6682 NEXT;
6683 q = CUR_PTR;
6684 encoding = xmlParseEncName(ctxt);
6685 if (RAW != '\'') {
6686 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6687 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6688 ctxt->sax->error(ctxt->userData,
6689 "String not closed\n%.50s\n", q);
6690 ctxt->wellFormed = 0;
6691 ctxt->disableSAX = 1;
6692 } else
6693 NEXT;
6694 } else if (RAW == '"'){
6695 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6697 ctxt->sax->error(ctxt->userData,
6698 "xmlParseEncodingDecl : expected ' or \"\n");
6699 ctxt->wellFormed = 0;
6700 ctxt->disableSAX = 1;
6701 }
6702 if (encoding != NULL) {
6703 xmlCharEncoding enc;
6704 xmlCharEncodingHandlerPtr handler;
6705
6706 if (ctxt->input->encoding != NULL)
6707 xmlFree((xmlChar *) ctxt->input->encoding);
6708 ctxt->input->encoding = encoding;
6709
6710 enc = xmlParseCharEncoding((const char *) encoding);
6711 /*
6712 * registered set of known encodings
6713 */
6714 if (enc != XML_CHAR_ENCODING_ERROR) {
6715 xmlSwitchEncoding(ctxt, enc);
6716 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6717 xmlFree(encoding);
6718 return(NULL);
6719 }
6720 } else {
6721 /*
6722 * fallback for unknown encodings
6723 */
6724 handler = xmlFindCharEncodingHandler((const char *) encoding);
6725 if (handler != NULL) {
6726 xmlSwitchToEncoding(ctxt, handler);
6727 } else {
6728 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6729 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6730 ctxt->sax->error(ctxt->userData,
6731 "Unsupported encoding %s\n", encoding);
6732 return(NULL);
6733 }
6734 }
6735 }
6736 }
6737 return(encoding);
6738}
6739
6740/**
6741 * xmlParseSDDecl:
6742 * @ctxt: an XML parser context
6743 *
6744 * parse the XML standalone declaration
6745 *
6746 * [32] SDDecl ::= S 'standalone' Eq
6747 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6748 *
6749 * [ VC: Standalone Document Declaration ]
6750 * TODO The standalone document declaration must have the value "no"
6751 * if any external markup declarations contain declarations of:
6752 * - attributes with default values, if elements to which these
6753 * attributes apply appear in the document without specifications
6754 * of values for these attributes, or
6755 * - entities (other than amp, lt, gt, apos, quot), if references
6756 * to those entities appear in the document, or
6757 * - attributes with values subject to normalization, where the
6758 * attribute appears in the document with a value which will change
6759 * as a result of normalization, or
6760 * - element types with element content, if white space occurs directly
6761 * within any instance of those types.
6762 *
6763 * Returns 1 if standalone, 0 otherwise
6764 */
6765
6766int
6767xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6768 int standalone = -1;
6769
6770 SKIP_BLANKS;
6771 if ((RAW == 's') && (NXT(1) == 't') &&
6772 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6773 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6774 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6775 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6776 SKIP(10);
6777 SKIP_BLANKS;
6778 if (RAW != '=') {
6779 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6780 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6781 ctxt->sax->error(ctxt->userData,
6782 "XML standalone declaration : expected '='\n");
6783 ctxt->wellFormed = 0;
6784 ctxt->disableSAX = 1;
6785 return(standalone);
6786 }
6787 NEXT;
6788 SKIP_BLANKS;
6789 if (RAW == '\''){
6790 NEXT;
6791 if ((RAW == 'n') && (NXT(1) == 'o')) {
6792 standalone = 0;
6793 SKIP(2);
6794 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6795 (NXT(2) == 's')) {
6796 standalone = 1;
6797 SKIP(3);
6798 } else {
6799 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6801 ctxt->sax->error(ctxt->userData,
6802 "standalone accepts only 'yes' or 'no'\n");
6803 ctxt->wellFormed = 0;
6804 ctxt->disableSAX = 1;
6805 }
6806 if (RAW != '\'') {
6807 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6809 ctxt->sax->error(ctxt->userData, "String not closed\n");
6810 ctxt->wellFormed = 0;
6811 ctxt->disableSAX = 1;
6812 } else
6813 NEXT;
6814 } else if (RAW == '"'){
6815 NEXT;
6816 if ((RAW == 'n') && (NXT(1) == 'o')) {
6817 standalone = 0;
6818 SKIP(2);
6819 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6820 (NXT(2) == 's')) {
6821 standalone = 1;
6822 SKIP(3);
6823 } else {
6824 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6825 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6826 ctxt->sax->error(ctxt->userData,
6827 "standalone accepts only 'yes' or 'no'\n");
6828 ctxt->wellFormed = 0;
6829 ctxt->disableSAX = 1;
6830 }
6831 if (RAW != '"') {
6832 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6834 ctxt->sax->error(ctxt->userData, "String not closed\n");
6835 ctxt->wellFormed = 0;
6836 ctxt->disableSAX = 1;
6837 } else
6838 NEXT;
6839 } else {
6840 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6841 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6842 ctxt->sax->error(ctxt->userData,
6843 "Standalone value not found\n");
6844 ctxt->wellFormed = 0;
6845 ctxt->disableSAX = 1;
6846 }
6847 }
6848 return(standalone);
6849}
6850
6851/**
6852 * xmlParseXMLDecl:
6853 * @ctxt: an XML parser context
6854 *
6855 * parse an XML declaration header
6856 *
6857 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6858 */
6859
6860void
6861xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6862 xmlChar *version;
6863
6864 /*
6865 * We know that '<?xml' is here.
6866 */
6867 SKIP(5);
6868
6869 if (!IS_BLANK(RAW)) {
6870 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6871 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6872 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6873 ctxt->wellFormed = 0;
6874 ctxt->disableSAX = 1;
6875 }
6876 SKIP_BLANKS;
6877
6878 /*
6879 * We should have the VersionInfo here.
6880 */
6881 version = xmlParseVersionInfo(ctxt);
6882 if (version == NULL)
6883 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6884 ctxt->version = xmlStrdup(version);
6885 xmlFree(version);
6886
6887 /*
6888 * We may have the encoding declaration
6889 */
6890 if (!IS_BLANK(RAW)) {
6891 if ((RAW == '?') && (NXT(1) == '>')) {
6892 SKIP(2);
6893 return;
6894 }
6895 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6896 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6897 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6898 ctxt->wellFormed = 0;
6899 ctxt->disableSAX = 1;
6900 }
6901 xmlParseEncodingDecl(ctxt);
6902 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6903 /*
6904 * The XML REC instructs us to stop parsing right here
6905 */
6906 return;
6907 }
6908
6909 /*
6910 * We may have the standalone status.
6911 */
6912 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
6913 if ((RAW == '?') && (NXT(1) == '>')) {
6914 SKIP(2);
6915 return;
6916 }
6917 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6918 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6919 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6920 ctxt->wellFormed = 0;
6921 ctxt->disableSAX = 1;
6922 }
6923 SKIP_BLANKS;
6924 ctxt->input->standalone = xmlParseSDDecl(ctxt);
6925
6926 SKIP_BLANKS;
6927 if ((RAW == '?') && (NXT(1) == '>')) {
6928 SKIP(2);
6929 } else if (RAW == '>') {
6930 /* Deprecated old WD ... */
6931 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
6932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6933 ctxt->sax->error(ctxt->userData,
6934 "XML declaration must end-up with '?>'\n");
6935 ctxt->wellFormed = 0;
6936 ctxt->disableSAX = 1;
6937 NEXT;
6938 } else {
6939 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
6940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6941 ctxt->sax->error(ctxt->userData,
6942 "parsing XML declaration: '?>' expected\n");
6943 ctxt->wellFormed = 0;
6944 ctxt->disableSAX = 1;
6945 MOVETO_ENDTAG(CUR_PTR);
6946 NEXT;
6947 }
6948}
6949
6950/**
6951 * xmlParseMisc:
6952 * @ctxt: an XML parser context
6953 *
6954 * parse an XML Misc* optionnal field.
6955 *
6956 * [27] Misc ::= Comment | PI | S
6957 */
6958
6959void
6960xmlParseMisc(xmlParserCtxtPtr ctxt) {
6961 while (((RAW == '<') && (NXT(1) == '?')) ||
6962 ((RAW == '<') && (NXT(1) == '!') &&
6963 (NXT(2) == '-') && (NXT(3) == '-')) ||
6964 IS_BLANK(CUR)) {
6965 if ((RAW == '<') && (NXT(1) == '?')) {
6966 xmlParsePI(ctxt);
6967 } else if (IS_BLANK(CUR)) {
6968 NEXT;
6969 } else
6970 xmlParseComment(ctxt);
6971 }
6972}
6973
6974/**
6975 * xmlParseDocument:
6976 * @ctxt: an XML parser context
6977 *
6978 * parse an XML document (and build a tree if using the standard SAX
6979 * interface).
6980 *
6981 * [1] document ::= prolog element Misc*
6982 *
6983 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
6984 *
6985 * Returns 0, -1 in case of error. the parser context is augmented
6986 * as a result of the parsing.
6987 */
6988
6989int
6990xmlParseDocument(xmlParserCtxtPtr ctxt) {
6991 xmlChar start[4];
6992 xmlCharEncoding enc;
6993
6994 xmlInitParser();
6995
6996 GROW;
6997
6998 /*
6999 * SAX: beginning of the document processing.
7000 */
7001 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7002 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7003
7004 /*
7005 * Get the 4 first bytes and decode the charset
7006 * if enc != XML_CHAR_ENCODING_NONE
7007 * plug some encoding conversion routines.
7008 */
7009 start[0] = RAW;
7010 start[1] = NXT(1);
7011 start[2] = NXT(2);
7012 start[3] = NXT(3);
7013 enc = xmlDetectCharEncoding(start, 4);
7014 if (enc != XML_CHAR_ENCODING_NONE) {
7015 xmlSwitchEncoding(ctxt, enc);
7016 }
7017
7018
7019 if (CUR == 0) {
7020 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7022 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7023 ctxt->wellFormed = 0;
7024 ctxt->disableSAX = 1;
7025 }
7026
7027 /*
7028 * Check for the XMLDecl in the Prolog.
7029 */
7030 GROW;
7031 if ((RAW == '<') && (NXT(1) == '?') &&
7032 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7033 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7034
7035 /*
7036 * Note that we will switch encoding on the fly.
7037 */
7038 xmlParseXMLDecl(ctxt);
7039 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7040 /*
7041 * The XML REC instructs us to stop parsing right here
7042 */
7043 return(-1);
7044 }
7045 ctxt->standalone = ctxt->input->standalone;
7046 SKIP_BLANKS;
7047 } else {
7048 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7049 }
7050 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7051 ctxt->sax->startDocument(ctxt->userData);
7052
7053 /*
7054 * The Misc part of the Prolog
7055 */
7056 GROW;
7057 xmlParseMisc(ctxt);
7058
7059 /*
7060 * Then possibly doc type declaration(s) and more Misc
7061 * (doctypedecl Misc*)?
7062 */
7063 GROW;
7064 if ((RAW == '<') && (NXT(1) == '!') &&
7065 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7066 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7067 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7068 (NXT(8) == 'E')) {
7069
7070 ctxt->inSubset = 1;
7071 xmlParseDocTypeDecl(ctxt);
7072 if (RAW == '[') {
7073 ctxt->instate = XML_PARSER_DTD;
7074 xmlParseInternalSubset(ctxt);
7075 }
7076
7077 /*
7078 * Create and update the external subset.
7079 */
7080 ctxt->inSubset = 2;
7081 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7082 (!ctxt->disableSAX))
7083 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7084 ctxt->extSubSystem, ctxt->extSubURI);
7085 ctxt->inSubset = 0;
7086
7087
7088 ctxt->instate = XML_PARSER_PROLOG;
7089 xmlParseMisc(ctxt);
7090 }
7091
7092 /*
7093 * Time to start parsing the tree itself
7094 */
7095 GROW;
7096 if (RAW != '<') {
7097 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7099 ctxt->sax->error(ctxt->userData,
7100 "Start tag expected, '<' not found\n");
7101 ctxt->wellFormed = 0;
7102 ctxt->disableSAX = 1;
7103 ctxt->instate = XML_PARSER_EOF;
7104 } else {
7105 ctxt->instate = XML_PARSER_CONTENT;
7106 xmlParseElement(ctxt);
7107 ctxt->instate = XML_PARSER_EPILOG;
7108
7109
7110 /*
7111 * The Misc part at the end
7112 */
7113 xmlParseMisc(ctxt);
7114
7115 if (RAW != 0) {
7116 ctxt->errNo = XML_ERR_DOCUMENT_END;
7117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7118 ctxt->sax->error(ctxt->userData,
7119 "Extra content at the end of the document\n");
7120 ctxt->wellFormed = 0;
7121 ctxt->disableSAX = 1;
7122 }
7123 ctxt->instate = XML_PARSER_EOF;
7124 }
7125
7126 /*
7127 * SAX: end of the document processing.
7128 */
7129 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7130 (!ctxt->disableSAX))
7131 ctxt->sax->endDocument(ctxt->userData);
7132
7133 if (! ctxt->wellFormed) return(-1);
7134 return(0);
7135}
7136
7137/**
7138 * xmlParseExtParsedEnt:
7139 * @ctxt: an XML parser context
7140 *
7141 * parse a genreral parsed entity
7142 * An external general parsed entity is well-formed if it matches the
7143 * production labeled extParsedEnt.
7144 *
7145 * [78] extParsedEnt ::= TextDecl? content
7146 *
7147 * Returns 0, -1 in case of error. the parser context is augmented
7148 * as a result of the parsing.
7149 */
7150
7151int
7152xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7153 xmlChar start[4];
7154 xmlCharEncoding enc;
7155
7156 xmlDefaultSAXHandlerInit();
7157
7158 GROW;
7159
7160 /*
7161 * SAX: beginning of the document processing.
7162 */
7163 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7164 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7165
7166 /*
7167 * Get the 4 first bytes and decode the charset
7168 * if enc != XML_CHAR_ENCODING_NONE
7169 * plug some encoding conversion routines.
7170 */
7171 start[0] = RAW;
7172 start[1] = NXT(1);
7173 start[2] = NXT(2);
7174 start[3] = NXT(3);
7175 enc = xmlDetectCharEncoding(start, 4);
7176 if (enc != XML_CHAR_ENCODING_NONE) {
7177 xmlSwitchEncoding(ctxt, enc);
7178 }
7179
7180
7181 if (CUR == 0) {
7182 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7184 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7185 ctxt->wellFormed = 0;
7186 ctxt->disableSAX = 1;
7187 }
7188
7189 /*
7190 * Check for the XMLDecl in the Prolog.
7191 */
7192 GROW;
7193 if ((RAW == '<') && (NXT(1) == '?') &&
7194 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7195 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7196
7197 /*
7198 * Note that we will switch encoding on the fly.
7199 */
7200 xmlParseXMLDecl(ctxt);
7201 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7202 /*
7203 * The XML REC instructs us to stop parsing right here
7204 */
7205 return(-1);
7206 }
7207 SKIP_BLANKS;
7208 } else {
7209 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7210 }
7211 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7212 ctxt->sax->startDocument(ctxt->userData);
7213
7214 /*
7215 * Doing validity checking on chunk doesn't make sense
7216 */
7217 ctxt->instate = XML_PARSER_CONTENT;
7218 ctxt->validate = 0;
7219 ctxt->loadsubset = 0;
7220 ctxt->depth = 0;
7221
7222 xmlParseContent(ctxt);
7223
7224 if ((RAW == '<') && (NXT(1) == '/')) {
7225 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7227 ctxt->sax->error(ctxt->userData,
7228 "chunk is not well balanced\n");
7229 ctxt->wellFormed = 0;
7230 ctxt->disableSAX = 1;
7231 } else if (RAW != 0) {
7232 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7234 ctxt->sax->error(ctxt->userData,
7235 "extra content at the end of well balanced chunk\n");
7236 ctxt->wellFormed = 0;
7237 ctxt->disableSAX = 1;
7238 }
7239
7240 /*
7241 * SAX: end of the document processing.
7242 */
7243 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7244 (!ctxt->disableSAX))
7245 ctxt->sax->endDocument(ctxt->userData);
7246
7247 if (! ctxt->wellFormed) return(-1);
7248 return(0);
7249}
7250
7251/************************************************************************
7252 * *
7253 * Progressive parsing interfaces *
7254 * *
7255 ************************************************************************/
7256
7257/**
7258 * xmlParseLookupSequence:
7259 * @ctxt: an XML parser context
7260 * @first: the first char to lookup
7261 * @next: the next char to lookup or zero
7262 * @third: the next char to lookup or zero
7263 *
7264 * Try to find if a sequence (first, next, third) or just (first next) or
7265 * (first) is available in the input stream.
7266 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7267 * to avoid rescanning sequences of bytes, it DOES change the state of the
7268 * parser, do not use liberally.
7269 *
7270 * Returns the index to the current parsing point if the full sequence
7271 * is available, -1 otherwise.
7272 */
7273int
7274xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7275 xmlChar next, xmlChar third) {
7276 int base, len;
7277 xmlParserInputPtr in;
7278 const xmlChar *buf;
7279
7280 in = ctxt->input;
7281 if (in == NULL) return(-1);
7282 base = in->cur - in->base;
7283 if (base < 0) return(-1);
7284 if (ctxt->checkIndex > base)
7285 base = ctxt->checkIndex;
7286 if (in->buf == NULL) {
7287 buf = in->base;
7288 len = in->length;
7289 } else {
7290 buf = in->buf->buffer->content;
7291 len = in->buf->buffer->use;
7292 }
7293 /* take into account the sequence length */
7294 if (third) len -= 2;
7295 else if (next) len --;
7296 for (;base < len;base++) {
7297 if (buf[base] == first) {
7298 if (third != 0) {
7299 if ((buf[base + 1] != next) ||
7300 (buf[base + 2] != third)) continue;
7301 } else if (next != 0) {
7302 if (buf[base + 1] != next) continue;
7303 }
7304 ctxt->checkIndex = 0;
7305#ifdef DEBUG_PUSH
7306 if (next == 0)
7307 xmlGenericError(xmlGenericErrorContext,
7308 "PP: lookup '%c' found at %d\n",
7309 first, base);
7310 else if (third == 0)
7311 xmlGenericError(xmlGenericErrorContext,
7312 "PP: lookup '%c%c' found at %d\n",
7313 first, next, base);
7314 else
7315 xmlGenericError(xmlGenericErrorContext,
7316 "PP: lookup '%c%c%c' found at %d\n",
7317 first, next, third, base);
7318#endif
7319 return(base - (in->cur - in->base));
7320 }
7321 }
7322 ctxt->checkIndex = base;
7323#ifdef DEBUG_PUSH
7324 if (next == 0)
7325 xmlGenericError(xmlGenericErrorContext,
7326 "PP: lookup '%c' failed\n", first);
7327 else if (third == 0)
7328 xmlGenericError(xmlGenericErrorContext,
7329 "PP: lookup '%c%c' failed\n", first, next);
7330 else
7331 xmlGenericError(xmlGenericErrorContext,
7332 "PP: lookup '%c%c%c' failed\n", first, next, third);
7333#endif
7334 return(-1);
7335}
7336
7337/**
7338 * xmlParseTryOrFinish:
7339 * @ctxt: an XML parser context
7340 * @terminate: last chunk indicator
7341 *
7342 * Try to progress on parsing
7343 *
7344 * Returns zero if no parsing was possible
7345 */
7346int
7347xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7348 int ret = 0;
7349 int avail;
7350 xmlChar cur, next;
7351
7352#ifdef DEBUG_PUSH
7353 switch (ctxt->instate) {
7354 case XML_PARSER_EOF:
7355 xmlGenericError(xmlGenericErrorContext,
7356 "PP: try EOF\n"); break;
7357 case XML_PARSER_START:
7358 xmlGenericError(xmlGenericErrorContext,
7359 "PP: try START\n"); break;
7360 case XML_PARSER_MISC:
7361 xmlGenericError(xmlGenericErrorContext,
7362 "PP: try MISC\n");break;
7363 case XML_PARSER_COMMENT:
7364 xmlGenericError(xmlGenericErrorContext,
7365 "PP: try COMMENT\n");break;
7366 case XML_PARSER_PROLOG:
7367 xmlGenericError(xmlGenericErrorContext,
7368 "PP: try PROLOG\n");break;
7369 case XML_PARSER_START_TAG:
7370 xmlGenericError(xmlGenericErrorContext,
7371 "PP: try START_TAG\n");break;
7372 case XML_PARSER_CONTENT:
7373 xmlGenericError(xmlGenericErrorContext,
7374 "PP: try CONTENT\n");break;
7375 case XML_PARSER_CDATA_SECTION:
7376 xmlGenericError(xmlGenericErrorContext,
7377 "PP: try CDATA_SECTION\n");break;
7378 case XML_PARSER_END_TAG:
7379 xmlGenericError(xmlGenericErrorContext,
7380 "PP: try END_TAG\n");break;
7381 case XML_PARSER_ENTITY_DECL:
7382 xmlGenericError(xmlGenericErrorContext,
7383 "PP: try ENTITY_DECL\n");break;
7384 case XML_PARSER_ENTITY_VALUE:
7385 xmlGenericError(xmlGenericErrorContext,
7386 "PP: try ENTITY_VALUE\n");break;
7387 case XML_PARSER_ATTRIBUTE_VALUE:
7388 xmlGenericError(xmlGenericErrorContext,
7389 "PP: try ATTRIBUTE_VALUE\n");break;
7390 case XML_PARSER_DTD:
7391 xmlGenericError(xmlGenericErrorContext,
7392 "PP: try DTD\n");break;
7393 case XML_PARSER_EPILOG:
7394 xmlGenericError(xmlGenericErrorContext,
7395 "PP: try EPILOG\n");break;
7396 case XML_PARSER_PI:
7397 xmlGenericError(xmlGenericErrorContext,
7398 "PP: try PI\n");break;
7399 case XML_PARSER_IGNORE:
7400 xmlGenericError(xmlGenericErrorContext,
7401 "PP: try IGNORE\n");break;
7402 }
7403#endif
7404
7405 while (1) {
7406 /*
7407 * Pop-up of finished entities.
7408 */
7409 while ((RAW == 0) && (ctxt->inputNr > 1))
7410 xmlPopInput(ctxt);
7411
7412 if (ctxt->input ==NULL) break;
7413 if (ctxt->input->buf == NULL)
7414 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7415 else
7416 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7417 if (avail < 1)
7418 goto done;
7419 switch (ctxt->instate) {
7420 case XML_PARSER_EOF:
7421 /*
7422 * Document parsing is done !
7423 */
7424 goto done;
7425 case XML_PARSER_START:
7426 /*
7427 * Very first chars read from the document flow.
7428 */
7429 cur = ctxt->input->cur[0];
7430 if (IS_BLANK(cur)) {
7431 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7432 ctxt->sax->setDocumentLocator(ctxt->userData,
7433 &xmlDefaultSAXLocator);
7434 ctxt->errNo = XML_ERR_DOCUMENT_START;
7435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7436 ctxt->sax->error(ctxt->userData,
7437 "Extra spaces at the beginning of the document are not allowed\n");
7438 ctxt->wellFormed = 0;
7439 ctxt->disableSAX = 1;
7440 SKIP_BLANKS;
7441 ret++;
7442 if (ctxt->input->buf == NULL)
7443 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7444 else
7445 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7446 }
7447 if (avail < 2)
7448 goto done;
7449
7450 cur = ctxt->input->cur[0];
7451 next = ctxt->input->cur[1];
7452 if (cur == 0) {
7453 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7454 ctxt->sax->setDocumentLocator(ctxt->userData,
7455 &xmlDefaultSAXLocator);
7456 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7458 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7459 ctxt->wellFormed = 0;
7460 ctxt->disableSAX = 1;
7461 ctxt->instate = XML_PARSER_EOF;
7462#ifdef DEBUG_PUSH
7463 xmlGenericError(xmlGenericErrorContext,
7464 "PP: entering EOF\n");
7465#endif
7466 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7467 ctxt->sax->endDocument(ctxt->userData);
7468 goto done;
7469 }
7470 if ((cur == '<') && (next == '?')) {
7471 /* PI or XML decl */
7472 if (avail < 5) return(ret);
7473 if ((!terminate) &&
7474 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7475 return(ret);
7476 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7477 ctxt->sax->setDocumentLocator(ctxt->userData,
7478 &xmlDefaultSAXLocator);
7479 if ((ctxt->input->cur[2] == 'x') &&
7480 (ctxt->input->cur[3] == 'm') &&
7481 (ctxt->input->cur[4] == 'l') &&
7482 (IS_BLANK(ctxt->input->cur[5]))) {
7483 ret += 5;
7484#ifdef DEBUG_PUSH
7485 xmlGenericError(xmlGenericErrorContext,
7486 "PP: Parsing XML Decl\n");
7487#endif
7488 xmlParseXMLDecl(ctxt);
7489 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7490 /*
7491 * The XML REC instructs us to stop parsing right
7492 * here
7493 */
7494 ctxt->instate = XML_PARSER_EOF;
7495 return(0);
7496 }
7497 ctxt->standalone = ctxt->input->standalone;
7498 if ((ctxt->encoding == NULL) &&
7499 (ctxt->input->encoding != NULL))
7500 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7501 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7502 (!ctxt->disableSAX))
7503 ctxt->sax->startDocument(ctxt->userData);
7504 ctxt->instate = XML_PARSER_MISC;
7505#ifdef DEBUG_PUSH
7506 xmlGenericError(xmlGenericErrorContext,
7507 "PP: entering MISC\n");
7508#endif
7509 } else {
7510 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7511 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7512 (!ctxt->disableSAX))
7513 ctxt->sax->startDocument(ctxt->userData);
7514 ctxt->instate = XML_PARSER_MISC;
7515#ifdef DEBUG_PUSH
7516 xmlGenericError(xmlGenericErrorContext,
7517 "PP: entering MISC\n");
7518#endif
7519 }
7520 } else {
7521 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7522 ctxt->sax->setDocumentLocator(ctxt->userData,
7523 &xmlDefaultSAXLocator);
7524 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7525 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7526 (!ctxt->disableSAX))
7527 ctxt->sax->startDocument(ctxt->userData);
7528 ctxt->instate = XML_PARSER_MISC;
7529#ifdef DEBUG_PUSH
7530 xmlGenericError(xmlGenericErrorContext,
7531 "PP: entering MISC\n");
7532#endif
7533 }
7534 break;
7535 case XML_PARSER_MISC:
7536 SKIP_BLANKS;
7537 if (ctxt->input->buf == NULL)
7538 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7539 else
7540 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7541 if (avail < 2)
7542 goto done;
7543 cur = ctxt->input->cur[0];
7544 next = ctxt->input->cur[1];
7545 if ((cur == '<') && (next == '?')) {
7546 if ((!terminate) &&
7547 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7548 goto done;
7549#ifdef DEBUG_PUSH
7550 xmlGenericError(xmlGenericErrorContext,
7551 "PP: Parsing PI\n");
7552#endif
7553 xmlParsePI(ctxt);
7554 } else if ((cur == '<') && (next == '!') &&
7555 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7556 if ((!terminate) &&
7557 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7558 goto done;
7559#ifdef DEBUG_PUSH
7560 xmlGenericError(xmlGenericErrorContext,
7561 "PP: Parsing Comment\n");
7562#endif
7563 xmlParseComment(ctxt);
7564 ctxt->instate = XML_PARSER_MISC;
7565 } else if ((cur == '<') && (next == '!') &&
7566 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7567 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7568 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7569 (ctxt->input->cur[8] == 'E')) {
7570 if ((!terminate) &&
7571 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7572 goto done;
7573#ifdef DEBUG_PUSH
7574 xmlGenericError(xmlGenericErrorContext,
7575 "PP: Parsing internal subset\n");
7576#endif
7577 ctxt->inSubset = 1;
7578 xmlParseDocTypeDecl(ctxt);
7579 if (RAW == '[') {
7580 ctxt->instate = XML_PARSER_DTD;
7581#ifdef DEBUG_PUSH
7582 xmlGenericError(xmlGenericErrorContext,
7583 "PP: entering DTD\n");
7584#endif
7585 } else {
7586 /*
7587 * Create and update the external subset.
7588 */
7589 ctxt->inSubset = 2;
7590 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7591 (ctxt->sax->externalSubset != NULL))
7592 ctxt->sax->externalSubset(ctxt->userData,
7593 ctxt->intSubName, ctxt->extSubSystem,
7594 ctxt->extSubURI);
7595 ctxt->inSubset = 0;
7596 ctxt->instate = XML_PARSER_PROLOG;
7597#ifdef DEBUG_PUSH
7598 xmlGenericError(xmlGenericErrorContext,
7599 "PP: entering PROLOG\n");
7600#endif
7601 }
7602 } else if ((cur == '<') && (next == '!') &&
7603 (avail < 9)) {
7604 goto done;
7605 } else {
7606 ctxt->instate = XML_PARSER_START_TAG;
7607#ifdef DEBUG_PUSH
7608 xmlGenericError(xmlGenericErrorContext,
7609 "PP: entering START_TAG\n");
7610#endif
7611 }
7612 break;
7613 case XML_PARSER_IGNORE:
7614 xmlGenericError(xmlGenericErrorContext,
7615 "PP: internal error, state == IGNORE");
7616 ctxt->instate = XML_PARSER_DTD;
7617#ifdef DEBUG_PUSH
7618 xmlGenericError(xmlGenericErrorContext,
7619 "PP: entering DTD\n");
7620#endif
7621 break;
7622 case XML_PARSER_PROLOG:
7623 SKIP_BLANKS;
7624 if (ctxt->input->buf == NULL)
7625 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7626 else
7627 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7628 if (avail < 2)
7629 goto done;
7630 cur = ctxt->input->cur[0];
7631 next = ctxt->input->cur[1];
7632 if ((cur == '<') && (next == '?')) {
7633 if ((!terminate) &&
7634 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7635 goto done;
7636#ifdef DEBUG_PUSH
7637 xmlGenericError(xmlGenericErrorContext,
7638 "PP: Parsing PI\n");
7639#endif
7640 xmlParsePI(ctxt);
7641 } else if ((cur == '<') && (next == '!') &&
7642 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7643 if ((!terminate) &&
7644 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7645 goto done;
7646#ifdef DEBUG_PUSH
7647 xmlGenericError(xmlGenericErrorContext,
7648 "PP: Parsing Comment\n");
7649#endif
7650 xmlParseComment(ctxt);
7651 ctxt->instate = XML_PARSER_PROLOG;
7652 } else if ((cur == '<') && (next == '!') &&
7653 (avail < 4)) {
7654 goto done;
7655 } else {
7656 ctxt->instate = XML_PARSER_START_TAG;
7657#ifdef DEBUG_PUSH
7658 xmlGenericError(xmlGenericErrorContext,
7659 "PP: entering START_TAG\n");
7660#endif
7661 }
7662 break;
7663 case XML_PARSER_EPILOG:
7664 SKIP_BLANKS;
7665 if (ctxt->input->buf == NULL)
7666 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7667 else
7668 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7669 if (avail < 2)
7670 goto done;
7671 cur = ctxt->input->cur[0];
7672 next = ctxt->input->cur[1];
7673 if ((cur == '<') && (next == '?')) {
7674 if ((!terminate) &&
7675 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7676 goto done;
7677#ifdef DEBUG_PUSH
7678 xmlGenericError(xmlGenericErrorContext,
7679 "PP: Parsing PI\n");
7680#endif
7681 xmlParsePI(ctxt);
7682 ctxt->instate = XML_PARSER_EPILOG;
7683 } else if ((cur == '<') && (next == '!') &&
7684 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7685 if ((!terminate) &&
7686 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7687 goto done;
7688#ifdef DEBUG_PUSH
7689 xmlGenericError(xmlGenericErrorContext,
7690 "PP: Parsing Comment\n");
7691#endif
7692 xmlParseComment(ctxt);
7693 ctxt->instate = XML_PARSER_EPILOG;
7694 } else if ((cur == '<') && (next == '!') &&
7695 (avail < 4)) {
7696 goto done;
7697 } else {
7698 ctxt->errNo = XML_ERR_DOCUMENT_END;
7699 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7700 ctxt->sax->error(ctxt->userData,
7701 "Extra content at the end of the document\n");
7702 ctxt->wellFormed = 0;
7703 ctxt->disableSAX = 1;
7704 ctxt->instate = XML_PARSER_EOF;
7705#ifdef DEBUG_PUSH
7706 xmlGenericError(xmlGenericErrorContext,
7707 "PP: entering EOF\n");
7708#endif
7709 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7710 (!ctxt->disableSAX))
7711 ctxt->sax->endDocument(ctxt->userData);
7712 goto done;
7713 }
7714 break;
7715 case XML_PARSER_START_TAG: {
7716 xmlChar *name, *oldname;
7717
7718 if ((avail < 2) && (ctxt->inputNr == 1))
7719 goto done;
7720 cur = ctxt->input->cur[0];
7721 if (cur != '<') {
7722 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7724 ctxt->sax->error(ctxt->userData,
7725 "Start tag expect, '<' not found\n");
7726 ctxt->wellFormed = 0;
7727 ctxt->disableSAX = 1;
7728 ctxt->instate = XML_PARSER_EOF;
7729#ifdef DEBUG_PUSH
7730 xmlGenericError(xmlGenericErrorContext,
7731 "PP: entering EOF\n");
7732#endif
7733 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7734 (!ctxt->disableSAX))
7735 ctxt->sax->endDocument(ctxt->userData);
7736 goto done;
7737 }
7738 if ((!terminate) &&
7739 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7740 goto done;
7741 if (ctxt->spaceNr == 0)
7742 spacePush(ctxt, -1);
7743 else
7744 spacePush(ctxt, *ctxt->space);
7745 name = xmlParseStartTag(ctxt);
7746 if (name == NULL) {
7747 spacePop(ctxt);
7748 ctxt->instate = XML_PARSER_EOF;
7749#ifdef DEBUG_PUSH
7750 xmlGenericError(xmlGenericErrorContext,
7751 "PP: entering EOF\n");
7752#endif
7753 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7754 (!ctxt->disableSAX))
7755 ctxt->sax->endDocument(ctxt->userData);
7756 goto done;
7757 }
7758 namePush(ctxt, xmlStrdup(name));
7759
7760 /*
7761 * [ VC: Root Element Type ]
7762 * The Name in the document type declaration must match
7763 * the element type of the root element.
7764 */
7765 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7766 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7767 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7768
7769 /*
7770 * Check for an Empty Element.
7771 */
7772 if ((RAW == '/') && (NXT(1) == '>')) {
7773 SKIP(2);
7774 if ((ctxt->sax != NULL) &&
7775 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7776 ctxt->sax->endElement(ctxt->userData, name);
7777 xmlFree(name);
7778 oldname = namePop(ctxt);
7779 spacePop(ctxt);
7780 if (oldname != NULL) {
7781#ifdef DEBUG_STACK
7782 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7783#endif
7784 xmlFree(oldname);
7785 }
7786 if (ctxt->name == NULL) {
7787 ctxt->instate = XML_PARSER_EPILOG;
7788#ifdef DEBUG_PUSH
7789 xmlGenericError(xmlGenericErrorContext,
7790 "PP: entering EPILOG\n");
7791#endif
7792 } else {
7793 ctxt->instate = XML_PARSER_CONTENT;
7794#ifdef DEBUG_PUSH
7795 xmlGenericError(xmlGenericErrorContext,
7796 "PP: entering CONTENT\n");
7797#endif
7798 }
7799 break;
7800 }
7801 if (RAW == '>') {
7802 NEXT;
7803 } else {
7804 ctxt->errNo = XML_ERR_GT_REQUIRED;
7805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7806 ctxt->sax->error(ctxt->userData,
7807 "Couldn't find end of Start Tag %s\n",
7808 name);
7809 ctxt->wellFormed = 0;
7810 ctxt->disableSAX = 1;
7811
7812 /*
7813 * end of parsing of this node.
7814 */
7815 nodePop(ctxt);
7816 oldname = namePop(ctxt);
7817 spacePop(ctxt);
7818 if (oldname != NULL) {
7819#ifdef DEBUG_STACK
7820 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7821#endif
7822 xmlFree(oldname);
7823 }
7824 }
7825 xmlFree(name);
7826 ctxt->instate = XML_PARSER_CONTENT;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: entering CONTENT\n");
7830#endif
7831 break;
7832 }
7833 case XML_PARSER_CONTENT: {
7834 const xmlChar *test;
7835 int cons;
7836 xmlChar tok;
7837
7838 /*
7839 * Handle preparsed entities and charRef
7840 */
7841 if (ctxt->token != 0) {
7842 xmlChar cur[2] = { 0 , 0 } ;
7843
7844 cur[0] = (xmlChar) ctxt->token;
7845 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7846 (ctxt->sax->characters != NULL))
7847 ctxt->sax->characters(ctxt->userData, cur, 1);
7848 ctxt->token = 0;
7849 }
7850 if ((avail < 2) && (ctxt->inputNr == 1))
7851 goto done;
7852 cur = ctxt->input->cur[0];
7853 next = ctxt->input->cur[1];
7854
7855 test = CUR_PTR;
7856 cons = ctxt->input->consumed;
7857 tok = ctxt->token;
7858 if ((cur == '<') && (next == '?')) {
7859 if ((!terminate) &&
7860 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7861 goto done;
7862#ifdef DEBUG_PUSH
7863 xmlGenericError(xmlGenericErrorContext,
7864 "PP: Parsing PI\n");
7865#endif
7866 xmlParsePI(ctxt);
7867 } else if ((cur == '<') && (next == '!') &&
7868 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7869 if ((!terminate) &&
7870 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7871 goto done;
7872#ifdef DEBUG_PUSH
7873 xmlGenericError(xmlGenericErrorContext,
7874 "PP: Parsing Comment\n");
7875#endif
7876 xmlParseComment(ctxt);
7877 ctxt->instate = XML_PARSER_CONTENT;
7878 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7879 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7880 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7881 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7882 (ctxt->input->cur[8] == '[')) {
7883 SKIP(9);
7884 ctxt->instate = XML_PARSER_CDATA_SECTION;
7885#ifdef DEBUG_PUSH
7886 xmlGenericError(xmlGenericErrorContext,
7887 "PP: entering CDATA_SECTION\n");
7888#endif
7889 break;
7890 } else if ((cur == '<') && (next == '!') &&
7891 (avail < 9)) {
7892 goto done;
7893 } else if ((cur == '<') && (next == '/')) {
7894 ctxt->instate = XML_PARSER_END_TAG;
7895#ifdef DEBUG_PUSH
7896 xmlGenericError(xmlGenericErrorContext,
7897 "PP: entering END_TAG\n");
7898#endif
7899 break;
7900 } else if (cur == '<') {
7901 ctxt->instate = XML_PARSER_START_TAG;
7902#ifdef DEBUG_PUSH
7903 xmlGenericError(xmlGenericErrorContext,
7904 "PP: entering START_TAG\n");
7905#endif
7906 break;
7907 } else if (cur == '&') {
7908 if ((!terminate) &&
7909 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
7910 goto done;
7911#ifdef DEBUG_PUSH
7912 xmlGenericError(xmlGenericErrorContext,
7913 "PP: Parsing Reference\n");
7914#endif
7915 xmlParseReference(ctxt);
7916 } else {
7917 /* TODO Avoid the extra copy, handle directly !!! */
7918 /*
7919 * Goal of the following test is:
7920 * - minimize calls to the SAX 'character' callback
7921 * when they are mergeable
7922 * - handle an problem for isBlank when we only parse
7923 * a sequence of blank chars and the next one is
7924 * not available to check against '<' presence.
7925 * - tries to homogenize the differences in SAX
7926 * callbacks beween the push and pull versions
7927 * of the parser.
7928 */
7929 if ((ctxt->inputNr == 1) &&
7930 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
7931 if ((!terminate) &&
7932 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
7933 goto done;
7934 }
7935 ctxt->checkIndex = 0;
7936#ifdef DEBUG_PUSH
7937 xmlGenericError(xmlGenericErrorContext,
7938 "PP: Parsing char data\n");
7939#endif
7940 xmlParseCharData(ctxt, 0);
7941 }
7942 /*
7943 * Pop-up of finished entities.
7944 */
7945 while ((RAW == 0) && (ctxt->inputNr > 1))
7946 xmlPopInput(ctxt);
7947 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
7948 (tok == ctxt->token)) {
7949 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
7950 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7951 ctxt->sax->error(ctxt->userData,
7952 "detected an error in element content\n");
7953 ctxt->wellFormed = 0;
7954 ctxt->disableSAX = 1;
7955 ctxt->instate = XML_PARSER_EOF;
7956 break;
7957 }
7958 break;
7959 }
7960 case XML_PARSER_CDATA_SECTION: {
7961 /*
7962 * The Push mode need to have the SAX callback for
7963 * cdataBlock merge back contiguous callbacks.
7964 */
7965 int base;
7966
7967 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
7968 if (base < 0) {
7969 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
7970 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
7971 if (ctxt->sax->cdataBlock != NULL)
7972 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
7973 XML_PARSER_BIG_BUFFER_SIZE);
7974 }
7975 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
7976 ctxt->checkIndex = 0;
7977 }
7978 goto done;
7979 } else {
7980 if ((ctxt->sax != NULL) && (base > 0) &&
7981 (!ctxt->disableSAX)) {
7982 if (ctxt->sax->cdataBlock != NULL)
7983 ctxt->sax->cdataBlock(ctxt->userData,
7984 ctxt->input->cur, base);
7985 }
7986 SKIP(base + 3);
7987 ctxt->checkIndex = 0;
7988 ctxt->instate = XML_PARSER_CONTENT;
7989#ifdef DEBUG_PUSH
7990 xmlGenericError(xmlGenericErrorContext,
7991 "PP: entering CONTENT\n");
7992#endif
7993 }
7994 break;
7995 }
7996 case XML_PARSER_END_TAG:
7997 if (avail < 2)
7998 goto done;
7999 if ((!terminate) &&
8000 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8001 goto done;
8002 xmlParseEndTag(ctxt);
8003 if (ctxt->name == NULL) {
8004 ctxt->instate = XML_PARSER_EPILOG;
8005#ifdef DEBUG_PUSH
8006 xmlGenericError(xmlGenericErrorContext,
8007 "PP: entering EPILOG\n");
8008#endif
8009 } else {
8010 ctxt->instate = XML_PARSER_CONTENT;
8011#ifdef DEBUG_PUSH
8012 xmlGenericError(xmlGenericErrorContext,
8013 "PP: entering CONTENT\n");
8014#endif
8015 }
8016 break;
8017 case XML_PARSER_DTD: {
8018 /*
8019 * Sorry but progressive parsing of the internal subset
8020 * is not expected to be supported. We first check that
8021 * the full content of the internal subset is available and
8022 * the parsing is launched only at that point.
8023 * Internal subset ends up with "']' S? '>'" in an unescaped
8024 * section and not in a ']]>' sequence which are conditional
8025 * sections (whoever argued to keep that crap in XML deserve
8026 * a place in hell !).
8027 */
8028 int base, i;
8029 xmlChar *buf;
8030 xmlChar quote = 0;
8031
8032 base = ctxt->input->cur - ctxt->input->base;
8033 if (base < 0) return(0);
8034 if (ctxt->checkIndex > base)
8035 base = ctxt->checkIndex;
8036 buf = ctxt->input->buf->buffer->content;
8037 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8038 base++) {
8039 if (quote != 0) {
8040 if (buf[base] == quote)
8041 quote = 0;
8042 continue;
8043 }
8044 if (buf[base] == '"') {
8045 quote = '"';
8046 continue;
8047 }
8048 if (buf[base] == '\'') {
8049 quote = '\'';
8050 continue;
8051 }
8052 if (buf[base] == ']') {
8053 if ((unsigned int) base +1 >=
8054 ctxt->input->buf->buffer->use)
8055 break;
8056 if (buf[base + 1] == ']') {
8057 /* conditional crap, skip both ']' ! */
8058 base++;
8059 continue;
8060 }
8061 for (i = 0;
8062 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8063 i++) {
8064 if (buf[base + i] == '>')
8065 goto found_end_int_subset;
8066 }
8067 break;
8068 }
8069 }
8070 /*
8071 * We didn't found the end of the Internal subset
8072 */
8073 if (quote == 0)
8074 ctxt->checkIndex = base;
8075#ifdef DEBUG_PUSH
8076 if (next == 0)
8077 xmlGenericError(xmlGenericErrorContext,
8078 "PP: lookup of int subset end filed\n");
8079#endif
8080 goto done;
8081
8082found_end_int_subset:
8083 xmlParseInternalSubset(ctxt);
8084 ctxt->inSubset = 2;
8085 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8086 (ctxt->sax->externalSubset != NULL))
8087 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8088 ctxt->extSubSystem, ctxt->extSubURI);
8089 ctxt->inSubset = 0;
8090 ctxt->instate = XML_PARSER_PROLOG;
8091 ctxt->checkIndex = 0;
8092#ifdef DEBUG_PUSH
8093 xmlGenericError(xmlGenericErrorContext,
8094 "PP: entering PROLOG\n");
8095#endif
8096 break;
8097 }
8098 case XML_PARSER_COMMENT:
8099 xmlGenericError(xmlGenericErrorContext,
8100 "PP: internal error, state == COMMENT\n");
8101 ctxt->instate = XML_PARSER_CONTENT;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: entering CONTENT\n");
8105#endif
8106 break;
8107 case XML_PARSER_PI:
8108 xmlGenericError(xmlGenericErrorContext,
8109 "PP: internal error, state == PI\n");
8110 ctxt->instate = XML_PARSER_CONTENT;
8111#ifdef DEBUG_PUSH
8112 xmlGenericError(xmlGenericErrorContext,
8113 "PP: entering CONTENT\n");
8114#endif
8115 break;
8116 case XML_PARSER_ENTITY_DECL:
8117 xmlGenericError(xmlGenericErrorContext,
8118 "PP: internal error, state == ENTITY_DECL\n");
8119 ctxt->instate = XML_PARSER_DTD;
8120#ifdef DEBUG_PUSH
8121 xmlGenericError(xmlGenericErrorContext,
8122 "PP: entering DTD\n");
8123#endif
8124 break;
8125 case XML_PARSER_ENTITY_VALUE:
8126 xmlGenericError(xmlGenericErrorContext,
8127 "PP: internal error, state == ENTITY_VALUE\n");
8128 ctxt->instate = XML_PARSER_CONTENT;
8129#ifdef DEBUG_PUSH
8130 xmlGenericError(xmlGenericErrorContext,
8131 "PP: entering DTD\n");
8132#endif
8133 break;
8134 case XML_PARSER_ATTRIBUTE_VALUE:
8135 xmlGenericError(xmlGenericErrorContext,
8136 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8137 ctxt->instate = XML_PARSER_START_TAG;
8138#ifdef DEBUG_PUSH
8139 xmlGenericError(xmlGenericErrorContext,
8140 "PP: entering START_TAG\n");
8141#endif
8142 break;
8143 case XML_PARSER_SYSTEM_LITERAL:
8144 xmlGenericError(xmlGenericErrorContext,
8145 "PP: internal error, state == SYSTEM_LITERAL\n");
8146 ctxt->instate = XML_PARSER_START_TAG;
8147#ifdef DEBUG_PUSH
8148 xmlGenericError(xmlGenericErrorContext,
8149 "PP: entering START_TAG\n");
8150#endif
8151 break;
8152 }
8153 }
8154done:
8155#ifdef DEBUG_PUSH
8156 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8157#endif
8158 return(ret);
8159}
8160
8161/**
8162 * xmlParseTry:
8163 * @ctxt: an XML parser context
8164 *
8165 * Try to progress on parsing
8166 *
8167 * Returns zero if no parsing was possible
8168 */
8169int
8170xmlParseTry(xmlParserCtxtPtr ctxt) {
8171 return(xmlParseTryOrFinish(ctxt, 0));
8172}
8173
8174/**
8175 * xmlParseChunk:
8176 * @ctxt: an XML parser context
8177 * @chunk: an char array
8178 * @size: the size in byte of the chunk
8179 * @terminate: last chunk indicator
8180 *
8181 * Parse a Chunk of memory
8182 *
8183 * Returns zero if no error, the xmlParserErrors otherwise.
8184 */
8185int
8186xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8187 int terminate) {
8188 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8189 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8190 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8191 int cur = ctxt->input->cur - ctxt->input->base;
8192
8193 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8194 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8195 ctxt->input->cur = ctxt->input->base + cur;
8196#ifdef DEBUG_PUSH
8197 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8198#endif
8199
8200 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8201 xmlParseTryOrFinish(ctxt, terminate);
8202 } else if (ctxt->instate != XML_PARSER_EOF) {
8203 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8204 xmlParserInputBufferPtr in = ctxt->input->buf;
8205 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8206 (in->raw != NULL)) {
8207 int nbchars;
8208
8209 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8210 if (nbchars < 0) {
8211 xmlGenericError(xmlGenericErrorContext,
8212 "xmlParseChunk: encoder error\n");
8213 return(XML_ERR_INVALID_ENCODING);
8214 }
8215 }
8216 }
8217 }
8218 xmlParseTryOrFinish(ctxt, terminate);
8219 if (terminate) {
8220 /*
8221 * Check for termination
8222 */
8223 if ((ctxt->instate != XML_PARSER_EOF) &&
8224 (ctxt->instate != XML_PARSER_EPILOG)) {
8225 ctxt->errNo = XML_ERR_DOCUMENT_END;
8226 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8227 ctxt->sax->error(ctxt->userData,
8228 "Extra content at the end of the document\n");
8229 ctxt->wellFormed = 0;
8230 ctxt->disableSAX = 1;
8231 }
8232 if (ctxt->instate != XML_PARSER_EOF) {
8233 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8234 (!ctxt->disableSAX))
8235 ctxt->sax->endDocument(ctxt->userData);
8236 }
8237 ctxt->instate = XML_PARSER_EOF;
8238 }
8239 return((xmlParserErrors) ctxt->errNo);
8240}
8241
8242/************************************************************************
8243 * *
8244 * I/O front end functions to the parser *
8245 * *
8246 ************************************************************************/
8247
8248/**
8249 * xmlStopParser:
8250 * @ctxt: an XML parser context
8251 *
8252 * Blocks further parser processing
8253 */
8254void
8255xmlStopParser(xmlParserCtxtPtr ctxt) {
8256 ctxt->instate = XML_PARSER_EOF;
8257 if (ctxt->input != NULL)
8258 ctxt->input->cur = BAD_CAST"";
8259}
8260
8261/**
8262 * xmlCreatePushParserCtxt:
8263 * @sax: a SAX handler
8264 * @user_data: The user data returned on SAX callbacks
8265 * @chunk: a pointer to an array of chars
8266 * @size: number of chars in the array
8267 * @filename: an optional file name or URI
8268 *
8269 * Create a parser context for using the XML parser in push mode
8270 * To allow content encoding detection, @size should be >= 4
8271 * The value of @filename is used for fetching external entities
8272 * and error/warning reports.
8273 *
8274 * Returns the new parser context or NULL
8275 */
8276xmlParserCtxtPtr
8277xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8278 const char *chunk, int size, const char *filename) {
8279 xmlParserCtxtPtr ctxt;
8280 xmlParserInputPtr inputStream;
8281 xmlParserInputBufferPtr buf;
8282 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8283
8284 /*
8285 * plug some encoding conversion routines
8286 */
8287 if ((chunk != NULL) && (size >= 4))
8288 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8289
8290 buf = xmlAllocParserInputBuffer(enc);
8291 if (buf == NULL) return(NULL);
8292
8293 ctxt = xmlNewParserCtxt();
8294 if (ctxt == NULL) {
8295 xmlFree(buf);
8296 return(NULL);
8297 }
8298 if (sax != NULL) {
8299 if (ctxt->sax != &xmlDefaultSAXHandler)
8300 xmlFree(ctxt->sax);
8301 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8302 if (ctxt->sax == NULL) {
8303 xmlFree(buf);
8304 xmlFree(ctxt);
8305 return(NULL);
8306 }
8307 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8308 if (user_data != NULL)
8309 ctxt->userData = user_data;
8310 }
8311 if (filename == NULL) {
8312 ctxt->directory = NULL;
8313 } else {
8314 ctxt->directory = xmlParserGetDirectory(filename);
8315 }
8316
8317 inputStream = xmlNewInputStream(ctxt);
8318 if (inputStream == NULL) {
8319 xmlFreeParserCtxt(ctxt);
8320 return(NULL);
8321 }
8322
8323 if (filename == NULL)
8324 inputStream->filename = NULL;
8325 else
8326 inputStream->filename = xmlMemStrdup(filename);
8327 inputStream->buf = buf;
8328 inputStream->base = inputStream->buf->buffer->content;
8329 inputStream->cur = inputStream->buf->buffer->content;
8330 if (enc != XML_CHAR_ENCODING_NONE) {
8331 xmlSwitchEncoding(ctxt, enc);
8332 }
8333
8334 inputPush(ctxt, inputStream);
8335
8336 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8337 (ctxt->input->buf != NULL)) {
8338 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8339#ifdef DEBUG_PUSH
8340 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8341#endif
8342 }
8343
8344 return(ctxt);
8345}
8346
8347/**
8348 * xmlCreateIOParserCtxt:
8349 * @sax: a SAX handler
8350 * @user_data: The user data returned on SAX callbacks
8351 * @ioread: an I/O read function
8352 * @ioclose: an I/O close function
8353 * @ioctx: an I/O handler
8354 * @enc: the charset encoding if known
8355 *
8356 * Create a parser context for using the XML parser with an existing
8357 * I/O stream
8358 *
8359 * Returns the new parser context or NULL
8360 */
8361xmlParserCtxtPtr
8362xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8363 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8364 void *ioctx, xmlCharEncoding enc) {
8365 xmlParserCtxtPtr ctxt;
8366 xmlParserInputPtr inputStream;
8367 xmlParserInputBufferPtr buf;
8368
8369 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8370 if (buf == NULL) return(NULL);
8371
8372 ctxt = xmlNewParserCtxt();
8373 if (ctxt == NULL) {
8374 xmlFree(buf);
8375 return(NULL);
8376 }
8377 if (sax != NULL) {
8378 if (ctxt->sax != &xmlDefaultSAXHandler)
8379 xmlFree(ctxt->sax);
8380 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8381 if (ctxt->sax == NULL) {
8382 xmlFree(buf);
8383 xmlFree(ctxt);
8384 return(NULL);
8385 }
8386 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8387 if (user_data != NULL)
8388 ctxt->userData = user_data;
8389 }
8390
8391 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8392 if (inputStream == NULL) {
8393 xmlFreeParserCtxt(ctxt);
8394 return(NULL);
8395 }
8396 inputPush(ctxt, inputStream);
8397
8398 return(ctxt);
8399}
8400
8401/************************************************************************
8402 * *
8403 * Front ends when parsing a Dtd *
8404 * *
8405 ************************************************************************/
8406
8407/**
8408 * xmlIOParseDTD:
8409 * @sax: the SAX handler block or NULL
8410 * @input: an Input Buffer
8411 * @enc: the charset encoding if known
8412 *
8413 * Load and parse a DTD
8414 *
8415 * Returns the resulting xmlDtdPtr or NULL in case of error.
8416 * @input will be freed at parsing end.
8417 */
8418
8419xmlDtdPtr
8420xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8421 xmlCharEncoding enc) {
8422 xmlDtdPtr ret = NULL;
8423 xmlParserCtxtPtr ctxt;
8424 xmlParserInputPtr pinput = NULL;
8425
8426 if (input == NULL)
8427 return(NULL);
8428
8429 ctxt = xmlNewParserCtxt();
8430 if (ctxt == NULL) {
8431 return(NULL);
8432 }
8433
8434 /*
8435 * Set-up the SAX context
8436 */
8437 if (sax != NULL) {
8438 if (ctxt->sax != NULL)
8439 xmlFree(ctxt->sax);
8440 ctxt->sax = sax;
8441 ctxt->userData = NULL;
8442 }
8443
8444 /*
8445 * generate a parser input from the I/O handler
8446 */
8447
8448 pinput = xmlNewIOInputStream(ctxt, input, enc);
8449 if (pinput == NULL) {
8450 if (sax != NULL) ctxt->sax = NULL;
8451 xmlFreeParserCtxt(ctxt);
8452 return(NULL);
8453 }
8454
8455 /*
8456 * plug some encoding conversion routines here.
8457 */
8458 xmlPushInput(ctxt, pinput);
8459
8460 pinput->filename = NULL;
8461 pinput->line = 1;
8462 pinput->col = 1;
8463 pinput->base = ctxt->input->cur;
8464 pinput->cur = ctxt->input->cur;
8465 pinput->free = NULL;
8466
8467 /*
8468 * let's parse that entity knowing it's an external subset.
8469 */
8470 ctxt->inSubset = 2;
8471 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8472 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8473 BAD_CAST "none", BAD_CAST "none");
8474 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8475
8476 if (ctxt->myDoc != NULL) {
8477 if (ctxt->wellFormed) {
8478 ret = ctxt->myDoc->extSubset;
8479 ctxt->myDoc->extSubset = NULL;
8480 } else {
8481 ret = NULL;
8482 }
8483 xmlFreeDoc(ctxt->myDoc);
8484 ctxt->myDoc = NULL;
8485 }
8486 if (sax != NULL) ctxt->sax = NULL;
8487 xmlFreeParserCtxt(ctxt);
8488
8489 return(ret);
8490}
8491
8492/**
8493 * xmlSAXParseDTD:
8494 * @sax: the SAX handler block
8495 * @ExternalID: a NAME* containing the External ID of the DTD
8496 * @SystemID: a NAME* containing the URL to the DTD
8497 *
8498 * Load and parse an external subset.
8499 *
8500 * Returns the resulting xmlDtdPtr or NULL in case of error.
8501 */
8502
8503xmlDtdPtr
8504xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8505 const xmlChar *SystemID) {
8506 xmlDtdPtr ret = NULL;
8507 xmlParserCtxtPtr ctxt;
8508 xmlParserInputPtr input = NULL;
8509 xmlCharEncoding enc;
8510
8511 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8512
8513 ctxt = xmlNewParserCtxt();
8514 if (ctxt == NULL) {
8515 return(NULL);
8516 }
8517
8518 /*
8519 * Set-up the SAX context
8520 */
8521 if (sax != NULL) {
8522 if (ctxt->sax != NULL)
8523 xmlFree(ctxt->sax);
8524 ctxt->sax = sax;
8525 ctxt->userData = NULL;
8526 }
8527
8528 /*
8529 * Ask the Entity resolver to load the damn thing
8530 */
8531
8532 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8533 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8534 if (input == NULL) {
8535 if (sax != NULL) ctxt->sax = NULL;
8536 xmlFreeParserCtxt(ctxt);
8537 return(NULL);
8538 }
8539
8540 /*
8541 * plug some encoding conversion routines here.
8542 */
8543 xmlPushInput(ctxt, input);
8544 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8545 xmlSwitchEncoding(ctxt, enc);
8546
8547 if (input->filename == NULL)
8548 input->filename = (char *) xmlStrdup(SystemID);
8549 input->line = 1;
8550 input->col = 1;
8551 input->base = ctxt->input->cur;
8552 input->cur = ctxt->input->cur;
8553 input->free = NULL;
8554
8555 /*
8556 * let's parse that entity knowing it's an external subset.
8557 */
8558 ctxt->inSubset = 2;
8559 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8560 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8561 ExternalID, SystemID);
8562 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8563
8564 if (ctxt->myDoc != NULL) {
8565 if (ctxt->wellFormed) {
8566 ret = ctxt->myDoc->extSubset;
8567 ctxt->myDoc->extSubset = NULL;
8568 } else {
8569 ret = NULL;
8570 }
8571 xmlFreeDoc(ctxt->myDoc);
8572 ctxt->myDoc = NULL;
8573 }
8574 if (sax != NULL) ctxt->sax = NULL;
8575 xmlFreeParserCtxt(ctxt);
8576
8577 return(ret);
8578}
8579
8580/**
8581 * xmlParseDTD:
8582 * @ExternalID: a NAME* containing the External ID of the DTD
8583 * @SystemID: a NAME* containing the URL to the DTD
8584 *
8585 * Load and parse an external subset.
8586 *
8587 * Returns the resulting xmlDtdPtr or NULL in case of error.
8588 */
8589
8590xmlDtdPtr
8591xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8592 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8593}
8594
8595/************************************************************************
8596 * *
8597 * Front ends when parsing an Entity *
8598 * *
8599 ************************************************************************/
8600
8601/**
8602 * xmlSAXParseBalancedChunk:
8603 * @ctx: an XML parser context (possibly NULL)
8604 * @sax: the SAX handler bloc (possibly NULL)
8605 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8606 * @input: a parser input stream
8607 * @enc: the encoding
8608 *
8609 * Parse a well-balanced chunk of an XML document
8610 * The user has to provide SAX callback block whose routines will be
8611 * called by the parser
8612 * The allowed sequence for the Well Balanced Chunk is the one defined by
8613 * the content production in the XML grammar:
8614 *
8615 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8616 *
8617 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8618 * the error code otherwise
8619 */
8620
8621int
8622xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8623 void *user_data, xmlParserInputPtr input,
8624 xmlCharEncoding enc) {
8625 xmlParserCtxtPtr ctxt;
8626 int ret;
8627
8628 if (input == NULL) return(-1);
8629
8630 if (ctx != NULL)
8631 ctxt = ctx;
8632 else {
8633 ctxt = xmlNewParserCtxt();
8634 if (ctxt == NULL)
8635 return(-1);
8636 if (sax == NULL)
8637 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8638 }
8639
8640 /*
8641 * Set-up the SAX context
8642 */
8643 if (sax != NULL) {
8644 if (ctxt->sax != NULL)
8645 xmlFree(ctxt->sax);
8646 ctxt->sax = sax;
8647 ctxt->userData = user_data;
8648 }
8649
8650 /*
8651 * plug some encoding conversion routines here.
8652 */
8653 xmlPushInput(ctxt, input);
8654 if (enc != XML_CHAR_ENCODING_NONE)
8655 xmlSwitchEncoding(ctxt, enc);
8656
8657 /*
8658 * let's parse that entity knowing it's an external subset.
8659 */
8660 xmlParseContent(ctxt);
8661 ret = ctxt->errNo;
8662
8663 if (ctx == NULL) {
8664 if (sax != NULL)
8665 ctxt->sax = NULL;
8666 else
8667 xmlFreeDoc(ctxt->myDoc);
8668 xmlFreeParserCtxt(ctxt);
8669 }
8670 return(ret);
8671}
8672
8673/**
8674 * xmlParseCtxtExternalEntity:
8675 * @ctx: the existing parsing context
8676 * @URL: the URL for the entity to load
8677 * @ID: the System ID for the entity to load
8678 * @list: the return value for the set of parsed nodes
8679 *
8680 * Parse an external general entity within an existing parsing context
8681 * An external general parsed entity is well-formed if it matches the
8682 * production labeled extParsedEnt.
8683 *
8684 * [78] extParsedEnt ::= TextDecl? content
8685 *
8686 * Returns 0 if the entity is well formed, -1 in case of args problem and
8687 * the parser error code otherwise
8688 */
8689
8690int
8691xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8692 const xmlChar *ID, xmlNodePtr *list) {
8693 xmlParserCtxtPtr ctxt;
8694 xmlDocPtr newDoc;
8695 xmlSAXHandlerPtr oldsax = NULL;
8696 int ret = 0;
8697
8698 if (ctx->depth > 40) {
8699 return(XML_ERR_ENTITY_LOOP);
8700 }
8701
8702 if (list != NULL)
8703 *list = NULL;
8704 if ((URL == NULL) && (ID == NULL))
8705 return(-1);
8706 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8707 return(-1);
8708
8709
8710 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8711 if (ctxt == NULL) return(-1);
8712 ctxt->userData = ctxt;
8713 oldsax = ctxt->sax;
8714 ctxt->sax = ctx->sax;
8715 newDoc = xmlNewDoc(BAD_CAST "1.0");
8716 if (newDoc == NULL) {
8717 xmlFreeParserCtxt(ctxt);
8718 return(-1);
8719 }
8720 if (ctx->myDoc != NULL) {
8721 newDoc->intSubset = ctx->myDoc->intSubset;
8722 newDoc->extSubset = ctx->myDoc->extSubset;
8723 }
8724 if (ctx->myDoc->URL != NULL) {
8725 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8726 }
8727 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8728 if (newDoc->children == NULL) {
8729 ctxt->sax = oldsax;
8730 xmlFreeParserCtxt(ctxt);
8731 newDoc->intSubset = NULL;
8732 newDoc->extSubset = NULL;
8733 xmlFreeDoc(newDoc);
8734 return(-1);
8735 }
8736 nodePush(ctxt, newDoc->children);
8737 if (ctx->myDoc == NULL) {
8738 ctxt->myDoc = newDoc;
8739 } else {
8740 ctxt->myDoc = ctx->myDoc;
8741 newDoc->children->doc = ctx->myDoc;
8742 }
8743
8744 /*
8745 * Parse a possible text declaration first
8746 */
8747 GROW;
8748 if ((RAW == '<') && (NXT(1) == '?') &&
8749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8751 xmlParseTextDecl(ctxt);
8752 }
8753
8754 /*
8755 * Doing validity checking on chunk doesn't make sense
8756 */
8757 ctxt->instate = XML_PARSER_CONTENT;
8758 ctxt->validate = ctx->validate;
8759 ctxt->loadsubset = ctx->loadsubset;
8760 ctxt->depth = ctx->depth + 1;
8761 ctxt->replaceEntities = ctx->replaceEntities;
8762 if (ctxt->validate) {
8763 ctxt->vctxt.error = ctx->vctxt.error;
8764 ctxt->vctxt.warning = ctx->vctxt.warning;
8765 /* Allocate the Node stack */
8766 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8767 if (ctxt->vctxt.nodeTab == NULL) {
8768 xmlGenericError(xmlGenericErrorContext,
8769 "xmlParseCtxtExternalEntity: out of memory\n");
8770 ctxt->validate = 0;
8771 ctxt->vctxt.error = NULL;
8772 ctxt->vctxt.warning = NULL;
8773 } else {
8774 ctxt->vctxt.nodeNr = 0;
8775 ctxt->vctxt.nodeMax = 4;
8776 ctxt->vctxt.node = NULL;
8777 }
8778 } else {
8779 ctxt->vctxt.error = NULL;
8780 ctxt->vctxt.warning = NULL;
8781 }
8782
8783 xmlParseContent(ctxt);
8784
8785 if ((RAW == '<') && (NXT(1) == '/')) {
8786 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8787 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8788 ctxt->sax->error(ctxt->userData,
8789 "chunk is not well balanced\n");
8790 ctxt->wellFormed = 0;
8791 ctxt->disableSAX = 1;
8792 } else if (RAW != 0) {
8793 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8794 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8795 ctxt->sax->error(ctxt->userData,
8796 "extra content at the end of well balanced chunk\n");
8797 ctxt->wellFormed = 0;
8798 ctxt->disableSAX = 1;
8799 }
8800 if (ctxt->node != newDoc->children) {
8801 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8803 ctxt->sax->error(ctxt->userData,
8804 "chunk is not well balanced\n");
8805 ctxt->wellFormed = 0;
8806 ctxt->disableSAX = 1;
8807 }
8808
8809 if (!ctxt->wellFormed) {
8810 if (ctxt->errNo == 0)
8811 ret = 1;
8812 else
8813 ret = ctxt->errNo;
8814 } else {
8815 if (list != NULL) {
8816 xmlNodePtr cur;
8817
8818 /*
8819 * Return the newly created nodeset after unlinking it from
8820 * they pseudo parent.
8821 */
8822 cur = newDoc->children->children;
8823 *list = cur;
8824 while (cur != NULL) {
8825 cur->parent = NULL;
8826 cur = cur->next;
8827 }
8828 newDoc->children->children = NULL;
8829 }
8830 ret = 0;
8831 }
8832 ctxt->sax = oldsax;
8833 xmlFreeParserCtxt(ctxt);
8834 newDoc->intSubset = NULL;
8835 newDoc->extSubset = NULL;
8836 xmlFreeDoc(newDoc);
8837
8838 return(ret);
8839}
8840
8841/**
8842 * xmlParseExternalEntity:
8843 * @doc: the document the chunk pertains to
8844 * @sax: the SAX handler bloc (possibly NULL)
8845 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8846 * @depth: Used for loop detection, use 0
8847 * @URL: the URL for the entity to load
8848 * @ID: the System ID for the entity to load
8849 * @list: the return value for the set of parsed nodes
8850 *
8851 * Parse an external general entity
8852 * An external general parsed entity is well-formed if it matches the
8853 * production labeled extParsedEnt.
8854 *
8855 * [78] extParsedEnt ::= TextDecl? content
8856 *
8857 * Returns 0 if the entity is well formed, -1 in case of args problem and
8858 * the parser error code otherwise
8859 */
8860
8861int
8862xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8863 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8864 xmlParserCtxtPtr ctxt;
8865 xmlDocPtr newDoc;
8866 xmlSAXHandlerPtr oldsax = NULL;
8867 int ret = 0;
8868
8869 if (depth > 40) {
8870 return(XML_ERR_ENTITY_LOOP);
8871 }
8872
8873
8874
8875 if (list != NULL)
8876 *list = NULL;
8877 if ((URL == NULL) && (ID == NULL))
8878 return(-1);
8879 if (doc == NULL) /* @@ relax but check for dereferences */
8880 return(-1);
8881
8882
8883 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8884 if (ctxt == NULL) return(-1);
8885 ctxt->userData = ctxt;
8886 if (sax != NULL) {
8887 oldsax = ctxt->sax;
8888 ctxt->sax = sax;
8889 if (user_data != NULL)
8890 ctxt->userData = user_data;
8891 }
8892 newDoc = xmlNewDoc(BAD_CAST "1.0");
8893 if (newDoc == NULL) {
8894 xmlFreeParserCtxt(ctxt);
8895 return(-1);
8896 }
8897 if (doc != NULL) {
8898 newDoc->intSubset = doc->intSubset;
8899 newDoc->extSubset = doc->extSubset;
8900 }
8901 if (doc->URL != NULL) {
8902 newDoc->URL = xmlStrdup(doc->URL);
8903 }
8904 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8905 if (newDoc->children == NULL) {
8906 if (sax != NULL)
8907 ctxt->sax = oldsax;
8908 xmlFreeParserCtxt(ctxt);
8909 newDoc->intSubset = NULL;
8910 newDoc->extSubset = NULL;
8911 xmlFreeDoc(newDoc);
8912 return(-1);
8913 }
8914 nodePush(ctxt, newDoc->children);
8915 if (doc == NULL) {
8916 ctxt->myDoc = newDoc;
8917 } else {
8918 ctxt->myDoc = doc;
8919 newDoc->children->doc = doc;
8920 }
8921
8922 /*
8923 * Parse a possible text declaration first
8924 */
8925 GROW;
8926 if ((RAW == '<') && (NXT(1) == '?') &&
8927 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8928 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8929 xmlParseTextDecl(ctxt);
8930 }
8931
8932 /*
8933 * Doing validity checking on chunk doesn't make sense
8934 */
8935 ctxt->instate = XML_PARSER_CONTENT;
8936 ctxt->validate = 0;
8937 ctxt->loadsubset = 0;
8938 ctxt->depth = depth;
8939
8940 xmlParseContent(ctxt);
8941
8942 if ((RAW == '<') && (NXT(1) == '/')) {
8943 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8945 ctxt->sax->error(ctxt->userData,
8946 "chunk is not well balanced\n");
8947 ctxt->wellFormed = 0;
8948 ctxt->disableSAX = 1;
8949 } else if (RAW != 0) {
8950 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8951 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8952 ctxt->sax->error(ctxt->userData,
8953 "extra content at the end of well balanced chunk\n");
8954 ctxt->wellFormed = 0;
8955 ctxt->disableSAX = 1;
8956 }
8957 if (ctxt->node != newDoc->children) {
8958 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8959 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8960 ctxt->sax->error(ctxt->userData,
8961 "chunk is not well balanced\n");
8962 ctxt->wellFormed = 0;
8963 ctxt->disableSAX = 1;
8964 }
8965
8966 if (!ctxt->wellFormed) {
8967 if (ctxt->errNo == 0)
8968 ret = 1;
8969 else
8970 ret = ctxt->errNo;
8971 } else {
8972 if (list != NULL) {
8973 xmlNodePtr cur;
8974
8975 /*
8976 * Return the newly created nodeset after unlinking it from
8977 * they pseudo parent.
8978 */
8979 cur = newDoc->children->children;
8980 *list = cur;
8981 while (cur != NULL) {
8982 cur->parent = NULL;
8983 cur = cur->next;
8984 }
8985 newDoc->children->children = NULL;
8986 }
8987 ret = 0;
8988 }
8989 if (sax != NULL)
8990 ctxt->sax = oldsax;
8991 xmlFreeParserCtxt(ctxt);
8992 newDoc->intSubset = NULL;
8993 newDoc->extSubset = NULL;
8994 xmlFreeDoc(newDoc);
8995
8996 return(ret);
8997}
8998
8999/**
9000 * xmlParseBalancedChunk:
9001 * @doc: the document the chunk pertains to
9002 * @sax: the SAX handler bloc (possibly NULL)
9003 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9004 * @depth: Used for loop detection, use 0
9005 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9006 * @list: the return value for the set of parsed nodes
9007 *
9008 * Parse a well-balanced chunk of an XML document
9009 * called by the parser
9010 * The allowed sequence for the Well Balanced Chunk is the one defined by
9011 * the content production in the XML grammar:
9012 *
9013 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9014 *
9015 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9016 * the parser error code otherwise
9017 */
9018
9019int
9020xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9021 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9022 xmlParserCtxtPtr ctxt;
9023 xmlDocPtr newDoc;
9024 xmlSAXHandlerPtr oldsax = NULL;
9025 int size;
9026 int ret = 0;
9027
9028 if (depth > 40) {
9029 return(XML_ERR_ENTITY_LOOP);
9030 }
9031
9032
9033 if (list != NULL)
9034 *list = NULL;
9035 if (string == NULL)
9036 return(-1);
9037
9038 size = xmlStrlen(string);
9039
9040 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9041 if (ctxt == NULL) return(-1);
9042 ctxt->userData = ctxt;
9043 if (sax != NULL) {
9044 oldsax = ctxt->sax;
9045 ctxt->sax = sax;
9046 if (user_data != NULL)
9047 ctxt->userData = user_data;
9048 }
9049 newDoc = xmlNewDoc(BAD_CAST "1.0");
9050 if (newDoc == NULL) {
9051 xmlFreeParserCtxt(ctxt);
9052 return(-1);
9053 }
9054 if (doc != NULL) {
9055 newDoc->intSubset = doc->intSubset;
9056 newDoc->extSubset = doc->extSubset;
9057 }
9058 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9059 if (newDoc->children == NULL) {
9060 if (sax != NULL)
9061 ctxt->sax = oldsax;
9062 xmlFreeParserCtxt(ctxt);
9063 newDoc->intSubset = NULL;
9064 newDoc->extSubset = NULL;
9065 xmlFreeDoc(newDoc);
9066 return(-1);
9067 }
9068 nodePush(ctxt, newDoc->children);
9069 if (doc == NULL) {
9070 ctxt->myDoc = newDoc;
9071 } else {
9072 ctxt->myDoc = doc;
9073 newDoc->children->doc = doc;
9074 }
9075 ctxt->instate = XML_PARSER_CONTENT;
9076 ctxt->depth = depth;
9077
9078 /*
9079 * Doing validity checking on chunk doesn't make sense
9080 */
9081 ctxt->validate = 0;
9082 ctxt->loadsubset = 0;
9083
9084 xmlParseContent(ctxt);
9085
9086 if ((RAW == '<') && (NXT(1) == '/')) {
9087 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9088 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9089 ctxt->sax->error(ctxt->userData,
9090 "chunk is not well balanced\n");
9091 ctxt->wellFormed = 0;
9092 ctxt->disableSAX = 1;
9093 } else if (RAW != 0) {
9094 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9096 ctxt->sax->error(ctxt->userData,
9097 "extra content at the end of well balanced chunk\n");
9098 ctxt->wellFormed = 0;
9099 ctxt->disableSAX = 1;
9100 }
9101 if (ctxt->node != newDoc->children) {
9102 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9104 ctxt->sax->error(ctxt->userData,
9105 "chunk is not well balanced\n");
9106 ctxt->wellFormed = 0;
9107 ctxt->disableSAX = 1;
9108 }
9109
9110 if (!ctxt->wellFormed) {
9111 if (ctxt->errNo == 0)
9112 ret = 1;
9113 else
9114 ret = ctxt->errNo;
9115 } else {
9116 if (list != NULL) {
9117 xmlNodePtr cur;
9118
9119 /*
9120 * Return the newly created nodeset after unlinking it from
9121 * they pseudo parent.
9122 */
9123 cur = newDoc->children->children;
9124 *list = cur;
9125 while (cur != NULL) {
9126 cur->parent = NULL;
9127 cur = cur->next;
9128 }
9129 newDoc->children->children = NULL;
9130 }
9131 ret = 0;
9132 }
9133 if (sax != NULL)
9134 ctxt->sax = oldsax;
9135 xmlFreeParserCtxt(ctxt);
9136 newDoc->intSubset = NULL;
9137 newDoc->extSubset = NULL;
9138 xmlFreeDoc(newDoc);
9139
9140 return(ret);
9141}
9142
9143/**
9144 * xmlSAXParseEntity:
9145 * @sax: the SAX handler block
9146 * @filename: the filename
9147 *
9148 * parse an XML external entity out of context and build a tree.
9149 * It use the given SAX function block to handle the parsing callback.
9150 * If sax is NULL, fallback to the default DOM tree building routines.
9151 *
9152 * [78] extParsedEnt ::= TextDecl? content
9153 *
9154 * This correspond to a "Well Balanced" chunk
9155 *
9156 * Returns the resulting document tree
9157 */
9158
9159xmlDocPtr
9160xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9161 xmlDocPtr ret;
9162 xmlParserCtxtPtr ctxt;
9163 char *directory = NULL;
9164
9165 ctxt = xmlCreateFileParserCtxt(filename);
9166 if (ctxt == NULL) {
9167 return(NULL);
9168 }
9169 if (sax != NULL) {
9170 if (ctxt->sax != NULL)
9171 xmlFree(ctxt->sax);
9172 ctxt->sax = sax;
9173 ctxt->userData = NULL;
9174 }
9175
9176 if ((ctxt->directory == NULL) && (directory == NULL))
9177 directory = xmlParserGetDirectory(filename);
9178
9179 xmlParseExtParsedEnt(ctxt);
9180
9181 if (ctxt->wellFormed)
9182 ret = ctxt->myDoc;
9183 else {
9184 ret = NULL;
9185 xmlFreeDoc(ctxt->myDoc);
9186 ctxt->myDoc = NULL;
9187 }
9188 if (sax != NULL)
9189 ctxt->sax = NULL;
9190 xmlFreeParserCtxt(ctxt);
9191
9192 return(ret);
9193}
9194
9195/**
9196 * xmlParseEntity:
9197 * @filename: the filename
9198 *
9199 * parse an XML external entity out of context and build a tree.
9200 *
9201 * [78] extParsedEnt ::= TextDecl? content
9202 *
9203 * This correspond to a "Well Balanced" chunk
9204 *
9205 * Returns the resulting document tree
9206 */
9207
9208xmlDocPtr
9209xmlParseEntity(const char *filename) {
9210 return(xmlSAXParseEntity(NULL, filename));
9211}
9212
9213/**
9214 * xmlCreateEntityParserCtxt:
9215 * @URL: the entity URL
9216 * @ID: the entity PUBLIC ID
9217 * @base: a posible base for the target URI
9218 *
9219 * Create a parser context for an external entity
9220 * Automatic support for ZLIB/Compress compressed document is provided
9221 * by default if found at compile-time.
9222 *
9223 * Returns the new parser context or NULL
9224 */
9225xmlParserCtxtPtr
9226xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9227 const xmlChar *base) {
9228 xmlParserCtxtPtr ctxt;
9229 xmlParserInputPtr inputStream;
9230 char *directory = NULL;
9231 xmlChar *uri;
9232
9233 ctxt = xmlNewParserCtxt();
9234 if (ctxt == NULL) {
9235 return(NULL);
9236 }
9237
9238 uri = xmlBuildURI(URL, base);
9239
9240 if (uri == NULL) {
9241 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9242 if (inputStream == NULL) {
9243 xmlFreeParserCtxt(ctxt);
9244 return(NULL);
9245 }
9246
9247 inputPush(ctxt, inputStream);
9248
9249 if ((ctxt->directory == NULL) && (directory == NULL))
9250 directory = xmlParserGetDirectory((char *)URL);
9251 if ((ctxt->directory == NULL) && (directory != NULL))
9252 ctxt->directory = directory;
9253 } else {
9254 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9255 if (inputStream == NULL) {
9256 xmlFree(uri);
9257 xmlFreeParserCtxt(ctxt);
9258 return(NULL);
9259 }
9260
9261 inputPush(ctxt, inputStream);
9262
9263 if ((ctxt->directory == NULL) && (directory == NULL))
9264 directory = xmlParserGetDirectory((char *)uri);
9265 if ((ctxt->directory == NULL) && (directory != NULL))
9266 ctxt->directory = directory;
9267 xmlFree(uri);
9268 }
9269
9270 return(ctxt);
9271}
9272
9273/************************************************************************
9274 * *
9275 * Front ends when parsing from a file *
9276 * *
9277 ************************************************************************/
9278
9279/**
9280 * xmlCreateFileParserCtxt:
9281 * @filename: the filename
9282 *
9283 * Create a parser context for a file content.
9284 * Automatic support for ZLIB/Compress compressed document is provided
9285 * by default if found at compile-time.
9286 *
9287 * Returns the new parser context or NULL
9288 */
9289xmlParserCtxtPtr
9290xmlCreateFileParserCtxt(const char *filename)
9291{
9292 xmlParserCtxtPtr ctxt;
9293 xmlParserInputPtr inputStream;
9294 xmlParserInputBufferPtr buf;
9295 char *directory = NULL;
9296
9297 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9298 if (buf == NULL) {
9299 return(NULL);
9300 }
9301
9302 ctxt = xmlNewParserCtxt();
9303 if (ctxt == NULL) {
9304 if (xmlDefaultSAXHandler.error != NULL) {
9305 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9306 }
9307 return(NULL);
9308 }
9309
9310 inputStream = xmlNewInputStream(ctxt);
9311 if (inputStream == NULL) {
9312 xmlFreeParserCtxt(ctxt);
9313 return(NULL);
9314 }
9315
9316 inputStream->filename = xmlMemStrdup(filename);
9317 inputStream->buf = buf;
9318 inputStream->base = inputStream->buf->buffer->content;
9319 inputStream->cur = inputStream->buf->buffer->content;
9320
9321 inputPush(ctxt, inputStream);
9322 if ((ctxt->directory == NULL) && (directory == NULL))
9323 directory = xmlParserGetDirectory(filename);
9324 if ((ctxt->directory == NULL) && (directory != NULL))
9325 ctxt->directory = directory;
9326
9327 return(ctxt);
9328}
9329
9330/**
9331 * xmlSAXParseFile:
9332 * @sax: the SAX handler block
9333 * @filename: the filename
9334 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9335 * documents
9336 *
9337 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9338 * compressed document is provided by default if found at compile-time.
9339 * It use the given SAX function block to handle the parsing callback.
9340 * If sax is NULL, fallback to the default DOM tree building routines.
9341 *
9342 * Returns the resulting document tree
9343 */
9344
9345xmlDocPtr
9346xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9347 int recovery) {
9348 xmlDocPtr ret;
9349 xmlParserCtxtPtr ctxt;
9350 char *directory = NULL;
9351
9352 ctxt = xmlCreateFileParserCtxt(filename);
9353 if (ctxt == NULL) {
9354 return(NULL);
9355 }
9356 if (sax != NULL) {
9357 if (ctxt->sax != NULL)
9358 xmlFree(ctxt->sax);
9359 ctxt->sax = sax;
9360 ctxt->userData = NULL;
9361 }
9362
9363 if ((ctxt->directory == NULL) && (directory == NULL))
9364 directory = xmlParserGetDirectory(filename);
9365 if ((ctxt->directory == NULL) && (directory != NULL))
9366 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9367
9368 xmlParseDocument(ctxt);
9369
9370 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9371 else {
9372 ret = NULL;
9373 xmlFreeDoc(ctxt->myDoc);
9374 ctxt->myDoc = NULL;
9375 }
9376 if (sax != NULL)
9377 ctxt->sax = NULL;
9378 xmlFreeParserCtxt(ctxt);
9379
9380 return(ret);
9381}
9382
9383/**
9384 * xmlRecoverDoc:
9385 * @cur: a pointer to an array of xmlChar
9386 *
9387 * parse an XML in-memory document and build a tree.
9388 * In the case the document is not Well Formed, a tree is built anyway
9389 *
9390 * Returns the resulting document tree
9391 */
9392
9393xmlDocPtr
9394xmlRecoverDoc(xmlChar *cur) {
9395 return(xmlSAXParseDoc(NULL, cur, 1));
9396}
9397
9398/**
9399 * xmlParseFile:
9400 * @filename: the filename
9401 *
9402 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9403 * compressed document is provided by default if found at compile-time.
9404 *
9405 * Returns the resulting document tree
9406 */
9407
9408xmlDocPtr
9409xmlParseFile(const char *filename) {
9410 return(xmlSAXParseFile(NULL, filename, 0));
9411}
9412
9413/**
9414 * xmlRecoverFile:
9415 * @filename: the filename
9416 *
9417 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9418 * compressed document is provided by default if found at compile-time.
9419 * In the case the document is not Well Formed, a tree is built anyway
9420 *
9421 * Returns the resulting document tree
9422 */
9423
9424xmlDocPtr
9425xmlRecoverFile(const char *filename) {
9426 return(xmlSAXParseFile(NULL, filename, 1));
9427}
9428
9429
9430/**
9431 * xmlSetupParserForBuffer:
9432 * @ctxt: an XML parser context
9433 * @buffer: a xmlChar * buffer
9434 * @filename: a file name
9435 *
9436 * Setup the parser context to parse a new buffer; Clears any prior
9437 * contents from the parser context. The buffer parameter must not be
9438 * NULL, but the filename parameter can be
9439 */
9440void
9441xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9442 const char* filename)
9443{
9444 xmlParserInputPtr input;
9445
9446 input = xmlNewInputStream(ctxt);
9447 if (input == NULL) {
9448 perror("malloc");
9449 xmlFree(ctxt);
9450 return;
9451 }
9452
9453 xmlClearParserCtxt(ctxt);
9454 if (filename != NULL)
9455 input->filename = xmlMemStrdup(filename);
9456 input->base = buffer;
9457 input->cur = buffer;
9458 inputPush(ctxt, input);
9459}
9460
9461/**
9462 * xmlSAXUserParseFile:
9463 * @sax: a SAX handler
9464 * @user_data: The user data returned on SAX callbacks
9465 * @filename: a file name
9466 *
9467 * parse an XML file and call the given SAX handler routines.
9468 * Automatic support for ZLIB/Compress compressed document is provided
9469 *
9470 * Returns 0 in case of success or a error number otherwise
9471 */
9472int
9473xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9474 const char *filename) {
9475 int ret = 0;
9476 xmlParserCtxtPtr ctxt;
9477
9478 ctxt = xmlCreateFileParserCtxt(filename);
9479 if (ctxt == NULL) return -1;
9480 if (ctxt->sax != &xmlDefaultSAXHandler)
9481 xmlFree(ctxt->sax);
9482 ctxt->sax = sax;
9483 if (user_data != NULL)
9484 ctxt->userData = user_data;
9485
9486 xmlParseDocument(ctxt);
9487
9488 if (ctxt->wellFormed)
9489 ret = 0;
9490 else {
9491 if (ctxt->errNo != 0)
9492 ret = ctxt->errNo;
9493 else
9494 ret = -1;
9495 }
9496 if (sax != NULL)
9497 ctxt->sax = NULL;
9498 xmlFreeParserCtxt(ctxt);
9499
9500 return ret;
9501}
9502
9503/************************************************************************
9504 * *
9505 * Front ends when parsing from memory *
9506 * *
9507 ************************************************************************/
9508
9509/**
9510 * xmlCreateMemoryParserCtxt:
9511 * @buffer: a pointer to a char array
9512 * @size: the size of the array
9513 *
9514 * Create a parser context for an XML in-memory document.
9515 *
9516 * Returns the new parser context or NULL
9517 */
9518xmlParserCtxtPtr
9519xmlCreateMemoryParserCtxt(char *buffer, int size) {
9520 xmlParserCtxtPtr ctxt;
9521 xmlParserInputPtr input;
9522 xmlParserInputBufferPtr buf;
9523
9524 if (buffer == NULL)
9525 return(NULL);
9526 if (size <= 0)
9527 return(NULL);
9528
9529 ctxt = xmlNewParserCtxt();
9530 if (ctxt == NULL)
9531 return(NULL);
9532
9533 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9534 if (buf == NULL) return(NULL);
9535
9536 input = xmlNewInputStream(ctxt);
9537 if (input == NULL) {
9538 xmlFreeParserCtxt(ctxt);
9539 return(NULL);
9540 }
9541
9542 input->filename = NULL;
9543 input->buf = buf;
9544 input->base = input->buf->buffer->content;
9545 input->cur = input->buf->buffer->content;
9546
9547 inputPush(ctxt, input);
9548 return(ctxt);
9549}
9550
9551/**
9552 * xmlSAXParseMemory:
9553 * @sax: the SAX handler block
9554 * @buffer: an pointer to a char array
9555 * @size: the size of the array
9556 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9557 * documents
9558 *
9559 * parse an XML in-memory block and use the given SAX function block
9560 * to handle the parsing callback. If sax is NULL, fallback to the default
9561 * DOM tree building routines.
9562 *
9563 * Returns the resulting document tree
9564 */
9565xmlDocPtr
9566xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9567 xmlDocPtr ret;
9568 xmlParserCtxtPtr ctxt;
9569
9570 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9571 if (ctxt == NULL) return(NULL);
9572 if (sax != NULL) {
9573 ctxt->sax = sax;
9574 ctxt->userData = NULL;
9575 }
9576
9577 xmlParseDocument(ctxt);
9578
9579 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9580 else {
9581 ret = NULL;
9582 xmlFreeDoc(ctxt->myDoc);
9583 ctxt->myDoc = NULL;
9584 }
9585 if (sax != NULL)
9586 ctxt->sax = NULL;
9587 xmlFreeParserCtxt(ctxt);
9588
9589 return(ret);
9590}
9591
9592/**
9593 * xmlParseMemory:
9594 * @buffer: an pointer to a char array
9595 * @size: the size of the array
9596 *
9597 * parse an XML in-memory block and build a tree.
9598 *
9599 * Returns the resulting document tree
9600 */
9601
9602xmlDocPtr xmlParseMemory(char *buffer, int size) {
9603 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9604}
9605
9606/**
9607 * xmlRecoverMemory:
9608 * @buffer: an pointer to a char array
9609 * @size: the size of the array
9610 *
9611 * parse an XML in-memory block and build a tree.
9612 * In the case the document is not Well Formed, a tree is built anyway
9613 *
9614 * Returns the resulting document tree
9615 */
9616
9617xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9618 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9619}
9620
9621/**
9622 * xmlSAXUserParseMemory:
9623 * @sax: a SAX handler
9624 * @user_data: The user data returned on SAX callbacks
9625 * @buffer: an in-memory XML document input
9626 * @size: the length of the XML document in bytes
9627 *
9628 * A better SAX parsing routine.
9629 * parse an XML in-memory buffer and call the given SAX handler routines.
9630 *
9631 * Returns 0 in case of success or a error number otherwise
9632 */
9633int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9634 char *buffer, int size) {
9635 int ret = 0;
9636 xmlParserCtxtPtr ctxt;
9637 xmlSAXHandlerPtr oldsax = NULL;
9638
9639 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9640 if (ctxt == NULL) return -1;
9641 if (sax != NULL) {
9642 oldsax = ctxt->sax;
9643 ctxt->sax = sax;
9644 }
9645 ctxt->userData = user_data;
9646
9647 xmlParseDocument(ctxt);
9648
9649 if (ctxt->wellFormed)
9650 ret = 0;
9651 else {
9652 if (ctxt->errNo != 0)
9653 ret = ctxt->errNo;
9654 else
9655 ret = -1;
9656 }
9657 if (sax != NULL) {
9658 ctxt->sax = oldsax;
9659 }
9660 xmlFreeParserCtxt(ctxt);
9661
9662 return ret;
9663}
9664
9665/**
9666 * xmlCreateDocParserCtxt:
9667 * @cur: a pointer to an array of xmlChar
9668 *
9669 * Creates a parser context for an XML in-memory document.
9670 *
9671 * Returns the new parser context or NULL
9672 */
9673xmlParserCtxtPtr
9674xmlCreateDocParserCtxt(xmlChar *cur) {
9675 int len;
9676
9677 if (cur == NULL)
9678 return(NULL);
9679 len = xmlStrlen(cur);
9680 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9681}
9682
9683/**
9684 * xmlSAXParseDoc:
9685 * @sax: the SAX handler block
9686 * @cur: a pointer to an array of xmlChar
9687 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9688 * documents
9689 *
9690 * parse an XML in-memory document and build a tree.
9691 * It use the given SAX function block to handle the parsing callback.
9692 * If sax is NULL, fallback to the default DOM tree building routines.
9693 *
9694 * Returns the resulting document tree
9695 */
9696
9697xmlDocPtr
9698xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9699 xmlDocPtr ret;
9700 xmlParserCtxtPtr ctxt;
9701
9702 if (cur == NULL) return(NULL);
9703
9704
9705 ctxt = xmlCreateDocParserCtxt(cur);
9706 if (ctxt == NULL) return(NULL);
9707 if (sax != NULL) {
9708 ctxt->sax = sax;
9709 ctxt->userData = NULL;
9710 }
9711
9712 xmlParseDocument(ctxt);
9713 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9714 else {
9715 ret = NULL;
9716 xmlFreeDoc(ctxt->myDoc);
9717 ctxt->myDoc = NULL;
9718 }
9719 if (sax != NULL)
9720 ctxt->sax = NULL;
9721 xmlFreeParserCtxt(ctxt);
9722
9723 return(ret);
9724}
9725
9726/**
9727 * xmlParseDoc:
9728 * @cur: a pointer to an array of xmlChar
9729 *
9730 * parse an XML in-memory document and build a tree.
9731 *
9732 * Returns the resulting document tree
9733 */
9734
9735xmlDocPtr
9736xmlParseDoc(xmlChar *cur) {
9737 return(xmlSAXParseDoc(NULL, cur, 0));
9738}
9739
9740
9741/************************************************************************
9742 * *
9743 * Miscellaneous *
9744 * *
9745 ************************************************************************/
9746
9747#ifdef LIBXML_XPATH_ENABLED
9748#include <libxml/xpath.h>
9749#endif
9750
9751static int xmlParserInitialized = 0;
9752
9753/**
9754 * xmlInitParser:
9755 *
9756 * Initialization function for the XML parser.
9757 * This is not reentrant. Call once before processing in case of
9758 * use in multithreaded programs.
9759 */
9760
9761void
9762xmlInitParser(void) {
9763 if (xmlParserInitialized) return;
9764
9765 xmlInitCharEncodingHandlers();
9766 xmlInitializePredefinedEntities();
9767 xmlDefaultSAXHandlerInit();
9768 xmlRegisterDefaultInputCallbacks();
9769 xmlRegisterDefaultOutputCallbacks();
9770#ifdef LIBXML_HTML_ENABLED
9771 htmlInitAutoClose();
9772 htmlDefaultSAXHandlerInit();
9773#endif
9774#ifdef LIBXML_XPATH_ENABLED
9775 xmlXPathInit();
9776#endif
9777 xmlParserInitialized = 1;
9778}
9779
9780/**
9781 * xmlCleanupParser:
9782 *
9783 * Cleanup function for the XML parser. It tries to reclaim all
9784 * parsing related global memory allocated for the parser processing.
9785 * It doesn't deallocate any document related memory. Calling this
9786 * function should not prevent reusing the parser.
9787 */
9788
9789void
9790xmlCleanupParser(void) {
9791 xmlParserInitialized = 0;
9792 xmlCleanupCharEncodingHandlers();
9793 xmlCleanupPredefinedEntities();
9794}
9795
9796/**
9797 * xmlPedanticParserDefault:
9798 * @val: int 0 or 1
9799 *
9800 * Set and return the previous value for enabling pedantic warnings.
9801 *
9802 * Returns the last value for 0 for no substitution, 1 for substitution.
9803 */
9804
9805int
9806xmlPedanticParserDefault(int val) {
9807 int old = xmlPedanticParserDefaultValue;
9808
9809 xmlPedanticParserDefaultValue = val;
9810 return(old);
9811}
9812
9813/**
9814 * xmlSubstituteEntitiesDefault:
9815 * @val: int 0 or 1
9816 *
9817 * Set and return the previous value for default entity support.
9818 * Initially the parser always keep entity references instead of substituting
9819 * entity values in the output. This function has to be used to change the
9820 * default parser behaviour
9821 * SAX::subtituteEntities() has to be used for changing that on a file by
9822 * file basis.
9823 *
9824 * Returns the last value for 0 for no substitution, 1 for substitution.
9825 */
9826
9827int
9828xmlSubstituteEntitiesDefault(int val) {
9829 int old = xmlSubstituteEntitiesDefaultValue;
9830
9831 xmlSubstituteEntitiesDefaultValue = val;
9832 return(old);
9833}
9834
9835/**
9836 * xmlKeepBlanksDefault:
9837 * @val: int 0 or 1
9838 *
9839 * Set and return the previous value for default blanks text nodes support.
9840 * The 1.x version of the parser used an heuristic to try to detect
9841 * ignorable white spaces. As a result the SAX callback was generating
9842 * ignorableWhitespace() callbacks instead of characters() one, and when
9843 * using the DOM output text nodes containing those blanks were not generated.
9844 * The 2.x and later version will switch to the XML standard way and
9845 * ignorableWhitespace() are only generated when running the parser in
9846 * validating mode and when the current element doesn't allow CDATA or
9847 * mixed content.
9848 * This function is provided as a way to force the standard behaviour
9849 * on 1.X libs and to switch back to the old mode for compatibility when
9850 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9851 * by using xmlIsBlankNode() commodity function to detect the "empty"
9852 * nodes generated.
9853 * This value also affect autogeneration of indentation when saving code
9854 * if blanks sections are kept, indentation is not generated.
9855 *
9856 * Returns the last value for 0 for no substitution, 1 for substitution.
9857 */
9858
9859int
9860xmlKeepBlanksDefault(int val) {
9861 int old = xmlKeepBlanksDefaultValue;
9862
9863 xmlKeepBlanksDefaultValue = val;
9864 xmlIndentTreeOutput = !val;
9865 return(old);
9866}
9867