blob: 0077ea5fa56ef6d6448c220f690f23733649dfb5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
78#define XML_PARSER_BIG_BUFFER_SIZE 1000
79#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
257#define NEXTL(l) do { \
258 if (*(ctxt->input->cur) == '\n') { \
259 ctxt->input->line++; ctxt->input->col = 1; \
260 } else ctxt->input->col++; \
261 ctxt->token = 0; ctxt->input->cur += l; \
262 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000263 } while (0)
264
265#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
266#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
267
268#define COPY_BUF(l,b,i,v) \
269 if (l == 1) b[i++] = (xmlChar) v; \
270 else i += xmlCopyChar(l,&b[i],v)
271
272/**
273 * xmlSkipBlankChars:
274 * @ctxt: the XML parser context
275 *
276 * skip all blanks character found at that point in the input streams.
277 * It pops up finished entities in the process if allowable at that point.
278 *
279 * Returns the number of space chars skipped
280 */
281
282int
283xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
284 int cur, res = 0;
285
286 /*
287 * It's Okay to use CUR/NEXT here since all the blanks are on
288 * the ASCII range.
289 */
290 do {
291 cur = CUR;
292 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
293 NEXT;
294 cur = CUR;
295 res++;
296 }
297 while ((cur == 0) && (ctxt->inputNr > 1) &&
298 (ctxt->instate != XML_PARSER_COMMENT)) {
299 xmlPopInput(ctxt);
300 cur = CUR;
301 }
302 /*
303 * Need to handle support of entities branching here
304 */
305 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
306 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
307 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
308 return(res);
309}
310
311/************************************************************************
312 * *
313 * Commodity functions to handle entities *
314 * *
315 ************************************************************************/
316
317/**
318 * xmlPopInput:
319 * @ctxt: an XML parser context
320 *
321 * xmlPopInput: the current input pointed by ctxt->input came to an end
322 * pop it and return the next char.
323 *
324 * Returns the current xmlChar in the parser context
325 */
326xmlChar
327xmlPopInput(xmlParserCtxtPtr ctxt) {
328 if (ctxt->inputNr == 1) return(0); /* End of main Input */
329 if (xmlParserDebugEntities)
330 xmlGenericError(xmlGenericErrorContext,
331 "Popping input %d\n", ctxt->inputNr);
332 xmlFreeInputStream(inputPop(ctxt));
333 if ((*ctxt->input->cur == 0) &&
334 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
335 return(xmlPopInput(ctxt));
336 return(CUR);
337}
338
339/**
340 * xmlPushInput:
341 * @ctxt: an XML parser context
342 * @input: an XML parser input fragment (entity, XML fragment ...).
343 *
344 * xmlPushInput: switch to a new input stream which is stacked on top
345 * of the previous one(s).
346 */
347void
348xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
349 if (input == NULL) return;
350
351 if (xmlParserDebugEntities) {
352 if ((ctxt->input != NULL) && (ctxt->input->filename))
353 xmlGenericError(xmlGenericErrorContext,
354 "%s(%d): ", ctxt->input->filename,
355 ctxt->input->line);
356 xmlGenericError(xmlGenericErrorContext,
357 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
358 }
359 inputPush(ctxt, input);
360 GROW;
361}
362
363/**
364 * xmlParseCharRef:
365 * @ctxt: an XML parser context
366 *
367 * parse Reference declarations
368 *
369 * [66] CharRef ::= '&#' [0-9]+ ';' |
370 * '&#x' [0-9a-fA-F]+ ';'
371 *
372 * [ WFC: Legal Character ]
373 * Characters referred to using character references must match the
374 * production for Char.
375 *
376 * Returns the value parsed (as an int), 0 in case of error
377 */
378int
379xmlParseCharRef(xmlParserCtxtPtr ctxt) {
380 int val = 0;
381 int count = 0;
382
383 if (ctxt->token != 0) {
384 val = ctxt->token;
385 ctxt->token = 0;
386 return(val);
387 }
388 /*
389 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
390 */
391 if ((RAW == '&') && (NXT(1) == '#') &&
392 (NXT(2) == 'x')) {
393 SKIP(3);
394 GROW;
395 while (RAW != ';') { /* loop blocked by count */
396 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
397 val = val * 16 + (CUR - '0');
398 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
399 val = val * 16 + (CUR - 'a') + 10;
400 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
401 val = val * 16 + (CUR - 'A') + 10;
402 else {
403 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
405 ctxt->sax->error(ctxt->userData,
406 "xmlParseCharRef: invalid hexadecimal value\n");
407 ctxt->wellFormed = 0;
408 ctxt->disableSAX = 1;
409 val = 0;
410 break;
411 }
412 NEXT;
413 count++;
414 }
415 if (RAW == ';') {
416 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
417 ctxt->nbChars ++;
418 ctxt->input->cur++;
419 }
420 } else if ((RAW == '&') && (NXT(1) == '#')) {
421 SKIP(2);
422 GROW;
423 while (RAW != ';') { /* loop blocked by count */
424 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
425 val = val * 10 + (CUR - '0');
426 else {
427 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
428 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
429 ctxt->sax->error(ctxt->userData,
430 "xmlParseCharRef: invalid decimal value\n");
431 ctxt->wellFormed = 0;
432 ctxt->disableSAX = 1;
433 val = 0;
434 break;
435 }
436 NEXT;
437 count++;
438 }
439 if (RAW == ';') {
440 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
441 ctxt->nbChars ++;
442 ctxt->input->cur++;
443 }
444 } else {
445 ctxt->errNo = XML_ERR_INVALID_CHARREF;
446 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
447 ctxt->sax->error(ctxt->userData,
448 "xmlParseCharRef: invalid value\n");
449 ctxt->wellFormed = 0;
450 ctxt->disableSAX = 1;
451 }
452
453 /*
454 * [ WFC: Legal Character ]
455 * Characters referred to using character references must match the
456 * production for Char.
457 */
458 if (IS_CHAR(val)) {
459 return(val);
460 } else {
461 ctxt->errNo = XML_ERR_INVALID_CHAR;
462 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
463 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
464 val);
465 ctxt->wellFormed = 0;
466 ctxt->disableSAX = 1;
467 }
468 return(0);
469}
470
471/**
472 * xmlParseStringCharRef:
473 * @ctxt: an XML parser context
474 * @str: a pointer to an index in the string
475 *
476 * parse Reference declarations, variant parsing from a string rather
477 * than an an input flow.
478 *
479 * [66] CharRef ::= '&#' [0-9]+ ';' |
480 * '&#x' [0-9a-fA-F]+ ';'
481 *
482 * [ WFC: Legal Character ]
483 * Characters referred to using character references must match the
484 * production for Char.
485 *
486 * Returns the value parsed (as an int), 0 in case of error, str will be
487 * updated to the current value of the index
488 */
489int
490xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
491 const xmlChar *ptr;
492 xmlChar cur;
493 int val = 0;
494
495 if ((str == NULL) || (*str == NULL)) return(0);
496 ptr = *str;
497 cur = *ptr;
498 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
499 ptr += 3;
500 cur = *ptr;
501 while (cur != ';') { /* Non input consuming loop */
502 if ((cur >= '0') && (cur <= '9'))
503 val = val * 16 + (cur - '0');
504 else if ((cur >= 'a') && (cur <= 'f'))
505 val = val * 16 + (cur - 'a') + 10;
506 else if ((cur >= 'A') && (cur <= 'F'))
507 val = val * 16 + (cur - 'A') + 10;
508 else {
509 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
510 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
511 ctxt->sax->error(ctxt->userData,
512 "xmlParseStringCharRef: invalid hexadecimal value\n");
513 ctxt->wellFormed = 0;
514 ctxt->disableSAX = 1;
515 val = 0;
516 break;
517 }
518 ptr++;
519 cur = *ptr;
520 }
521 if (cur == ';')
522 ptr++;
523 } else if ((cur == '&') && (ptr[1] == '#')){
524 ptr += 2;
525 cur = *ptr;
526 while (cur != ';') { /* Non input consuming loops */
527 if ((cur >= '0') && (cur <= '9'))
528 val = val * 10 + (cur - '0');
529 else {
530 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
531 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
532 ctxt->sax->error(ctxt->userData,
533 "xmlParseStringCharRef: invalid decimal value\n");
534 ctxt->wellFormed = 0;
535 ctxt->disableSAX = 1;
536 val = 0;
537 break;
538 }
539 ptr++;
540 cur = *ptr;
541 }
542 if (cur == ';')
543 ptr++;
544 } else {
545 ctxt->errNo = XML_ERR_INVALID_CHARREF;
546 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
547 ctxt->sax->error(ctxt->userData,
548 "xmlParseCharRef: invalid value\n");
549 ctxt->wellFormed = 0;
550 ctxt->disableSAX = 1;
551 return(0);
552 }
553 *str = ptr;
554
555 /*
556 * [ WFC: Legal Character ]
557 * Characters referred to using character references must match the
558 * production for Char.
559 */
560 if (IS_CHAR(val)) {
561 return(val);
562 } else {
563 ctxt->errNo = XML_ERR_INVALID_CHAR;
564 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
565 ctxt->sax->error(ctxt->userData,
566 "CharRef: invalid xmlChar value %d\n", val);
567 ctxt->wellFormed = 0;
568 ctxt->disableSAX = 1;
569 }
570 return(0);
571}
572
573/**
574 * xmlParserHandlePEReference:
575 * @ctxt: the parser context
576 *
577 * [69] PEReference ::= '%' Name ';'
578 *
579 * [ WFC: No Recursion ]
580 * A parsed entity must not contain a recursive
581 * reference to itself, either directly or indirectly.
582 *
583 * [ WFC: Entity Declared ]
584 * In a document without any DTD, a document with only an internal DTD
585 * subset which contains no parameter entity references, or a document
586 * with "standalone='yes'", ... ... The declaration of a parameter
587 * entity must precede any reference to it...
588 *
589 * [ VC: Entity Declared ]
590 * In a document with an external subset or external parameter entities
591 * with "standalone='no'", ... ... The declaration of a parameter entity
592 * must precede any reference to it...
593 *
594 * [ WFC: In DTD ]
595 * Parameter-entity references may only appear in the DTD.
596 * NOTE: misleading but this is handled.
597 *
598 * A PEReference may have been detected in the current input stream
599 * the handling is done accordingly to
600 * http://www.w3.org/TR/REC-xml#entproc
601 * i.e.
602 * - Included in literal in entity values
603 * - Included as Paraemeter Entity reference within DTDs
604 */
605void
606xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
607 xmlChar *name;
608 xmlEntityPtr entity = NULL;
609 xmlParserInputPtr input;
610
611 if (ctxt->token != 0) {
612 return;
613 }
614 if (RAW != '%') return;
615 switch(ctxt->instate) {
616 case XML_PARSER_CDATA_SECTION:
617 return;
618 case XML_PARSER_COMMENT:
619 return;
620 case XML_PARSER_START_TAG:
621 return;
622 case XML_PARSER_END_TAG:
623 return;
624 case XML_PARSER_EOF:
625 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
626 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
627 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
628 ctxt->wellFormed = 0;
629 ctxt->disableSAX = 1;
630 return;
631 case XML_PARSER_PROLOG:
632 case XML_PARSER_START:
633 case XML_PARSER_MISC:
634 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
635 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
636 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
637 ctxt->wellFormed = 0;
638 ctxt->disableSAX = 1;
639 return;
640 case XML_PARSER_ENTITY_DECL:
641 case XML_PARSER_CONTENT:
642 case XML_PARSER_ATTRIBUTE_VALUE:
643 case XML_PARSER_PI:
644 case XML_PARSER_SYSTEM_LITERAL:
645 /* we just ignore it there */
646 return;
647 case XML_PARSER_EPILOG:
648 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
650 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
651 ctxt->wellFormed = 0;
652 ctxt->disableSAX = 1;
653 return;
654 case XML_PARSER_ENTITY_VALUE:
655 /*
656 * NOTE: in the case of entity values, we don't do the
657 * substitution here since we need the literal
658 * entity value to be able to save the internal
659 * subset of the document.
660 * This will be handled by xmlStringDecodeEntities
661 */
662 return;
663 case XML_PARSER_DTD:
664 /*
665 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
666 * In the internal DTD subset, parameter-entity references
667 * can occur only where markup declarations can occur, not
668 * within markup declarations.
669 * In that case this is handled in xmlParseMarkupDecl
670 */
671 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
672 return;
673 break;
674 case XML_PARSER_IGNORE:
675 return;
676 }
677
678 NEXT;
679 name = xmlParseName(ctxt);
680 if (xmlParserDebugEntities)
681 xmlGenericError(xmlGenericErrorContext,
682 "PE Reference: %s\n", name);
683 if (name == NULL) {
684 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
685 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
686 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
687 ctxt->wellFormed = 0;
688 ctxt->disableSAX = 1;
689 } else {
690 if (RAW == ';') {
691 NEXT;
692 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
693 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
694 if (entity == NULL) {
695
696 /*
697 * [ WFC: Entity Declared ]
698 * In a document without any DTD, a document with only an
699 * internal DTD subset which contains no parameter entity
700 * references, or a document with "standalone='yes'", ...
701 * ... The declaration of a parameter entity must precede
702 * any reference to it...
703 */
704 if ((ctxt->standalone == 1) ||
705 ((ctxt->hasExternalSubset == 0) &&
706 (ctxt->hasPErefs == 0))) {
707 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
708 ctxt->sax->error(ctxt->userData,
709 "PEReference: %%%s; not found\n", name);
710 ctxt->wellFormed = 0;
711 ctxt->disableSAX = 1;
712 } else {
713 /*
714 * [ VC: Entity Declared ]
715 * In a document with an external subset or external
716 * parameter entities with "standalone='no'", ...
717 * ... The declaration of a parameter entity must precede
718 * any reference to it...
719 */
720 if ((!ctxt->disableSAX) &&
721 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
722 ctxt->vctxt.error(ctxt->vctxt.userData,
723 "PEReference: %%%s; not found\n", name);
724 } else if ((!ctxt->disableSAX) &&
725 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
726 ctxt->sax->warning(ctxt->userData,
727 "PEReference: %%%s; not found\n", name);
728 ctxt->valid = 0;
729 }
730 } else {
731 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
732 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
733 /*
734 * handle the extra spaces added before and after
735 * c.f. http://www.w3.org/TR/REC-xml#as-PE
736 * this is done independantly.
737 */
738 input = xmlNewEntityInputStream(ctxt, entity);
739 xmlPushInput(ctxt, input);
740 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
741 (RAW == '<') && (NXT(1) == '?') &&
742 (NXT(2) == 'x') && (NXT(3) == 'm') &&
743 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
744 xmlParseTextDecl(ctxt);
745 }
746 if (ctxt->token == 0)
747 ctxt->token = ' ';
748 } else {
749 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
750 ctxt->sax->error(ctxt->userData,
751 "xmlHandlePEReference: %s is not a parameter entity\n",
752 name);
753 ctxt->wellFormed = 0;
754 ctxt->disableSAX = 1;
755 }
756 }
757 } else {
758 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
759 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
760 ctxt->sax->error(ctxt->userData,
761 "xmlHandlePEReference: expecting ';'\n");
762 ctxt->wellFormed = 0;
763 ctxt->disableSAX = 1;
764 }
765 xmlFree(name);
766 }
767}
768
769/*
770 * Macro used to grow the current buffer.
771 */
772#define growBuffer(buffer) { \
773 buffer##_size *= 2; \
774 buffer = (xmlChar *) \
775 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
776 if (buffer == NULL) { \
777 perror("realloc failed"); \
778 return(NULL); \
779 } \
780}
781
782/**
783 * xmlStringDecodeEntities:
784 * @ctxt: the parser context
785 * @str: the input string
786 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
787 * @end: an end marker xmlChar, 0 if none
788 * @end2: an end marker xmlChar, 0 if none
789 * @end3: an end marker xmlChar, 0 if none
790 *
791 * Takes a entity string content and process to do the adequate subtitutions.
792 *
793 * [67] Reference ::= EntityRef | CharRef
794 *
795 * [69] PEReference ::= '%' Name ';'
796 *
797 * Returns A newly allocated string with the substitution done. The caller
798 * must deallocate it !
799 */
800xmlChar *
801xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
802 xmlChar end, xmlChar end2, xmlChar end3) {
803 xmlChar *buffer = NULL;
804 int buffer_size = 0;
805
806 xmlChar *current = NULL;
807 xmlEntityPtr ent;
808 int c,l;
809 int nbchars = 0;
810
811 if (str == NULL)
812 return(NULL);
813
814 if (ctxt->depth > 40) {
815 ctxt->errNo = XML_ERR_ENTITY_LOOP;
816 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
817 ctxt->sax->error(ctxt->userData,
818 "Detected entity reference loop\n");
819 ctxt->wellFormed = 0;
820 ctxt->disableSAX = 1;
821 return(NULL);
822 }
823
824 /*
825 * allocate a translation buffer.
826 */
827 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
828 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
829 if (buffer == NULL) {
830 perror("xmlDecodeEntities: malloc failed");
831 return(NULL);
832 }
833
834 /*
835 * Ok loop until we reach one of the ending char or a size limit.
836 * we are operating on already parsed values.
837 */
838 c = CUR_SCHAR(str, l);
839 while ((c != 0) && (c != end) && /* non input consuming loop */
840 (c != end2) && (c != end3)) {
841
842 if (c == 0) break;
843 if ((c == '&') && (str[1] == '#')) {
844 int val = xmlParseStringCharRef(ctxt, &str);
845 if (val != 0) {
846 COPY_BUF(0,buffer,nbchars,val);
847 }
848 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
849 if (xmlParserDebugEntities)
850 xmlGenericError(xmlGenericErrorContext,
851 "String decoding Entity Reference: %.30s\n",
852 str);
853 ent = xmlParseStringEntityRef(ctxt, &str);
854 if ((ent != NULL) &&
855 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
856 if (ent->content != NULL) {
857 COPY_BUF(0,buffer,nbchars,ent->content[0]);
858 } else {
859 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
860 ctxt->sax->error(ctxt->userData,
861 "internal error entity has no content\n");
862 }
863 } else if ((ent != NULL) && (ent->content != NULL)) {
864 xmlChar *rep;
865
866 ctxt->depth++;
867 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
868 0, 0, 0);
869 ctxt->depth--;
870 if (rep != NULL) {
871 current = rep;
872 while (*current != 0) { /* non input consuming loop */
873 buffer[nbchars++] = *current++;
874 if (nbchars >
875 buffer_size - XML_PARSER_BUFFER_SIZE) {
876 growBuffer(buffer);
877 }
878 }
879 xmlFree(rep);
880 }
881 } else if (ent != NULL) {
882 int i = xmlStrlen(ent->name);
883 const xmlChar *cur = ent->name;
884
885 buffer[nbchars++] = '&';
886 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
887 growBuffer(buffer);
888 }
889 for (;i > 0;i--)
890 buffer[nbchars++] = *cur++;
891 buffer[nbchars++] = ';';
892 }
893 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
894 if (xmlParserDebugEntities)
895 xmlGenericError(xmlGenericErrorContext,
896 "String decoding PE Reference: %.30s\n", str);
897 ent = xmlParseStringPEReference(ctxt, &str);
898 if (ent != NULL) {
899 xmlChar *rep;
900
901 ctxt->depth++;
902 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
903 0, 0, 0);
904 ctxt->depth--;
905 if (rep != NULL) {
906 current = rep;
907 while (*current != 0) { /* non input consuming loop */
908 buffer[nbchars++] = *current++;
909 if (nbchars >
910 buffer_size - XML_PARSER_BUFFER_SIZE) {
911 growBuffer(buffer);
912 }
913 }
914 xmlFree(rep);
915 }
916 }
917 } else {
918 COPY_BUF(l,buffer,nbchars,c);
919 str += l;
920 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
921 growBuffer(buffer);
922 }
923 }
924 c = CUR_SCHAR(str, l);
925 }
926 buffer[nbchars++] = 0;
927 return(buffer);
928}
929
930
931/************************************************************************
932 * *
933 * Commodity functions to handle xmlChars *
934 * *
935 ************************************************************************/
936
937/**
938 * xmlStrndup:
939 * @cur: the input xmlChar *
940 * @len: the len of @cur
941 *
942 * a strndup for array of xmlChar's
943 *
944 * Returns a new xmlChar * or NULL
945 */
946xmlChar *
947xmlStrndup(const xmlChar *cur, int len) {
948 xmlChar *ret;
949
950 if ((cur == NULL) || (len < 0)) return(NULL);
951 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
952 if (ret == NULL) {
953 xmlGenericError(xmlGenericErrorContext,
954 "malloc of %ld byte failed\n",
955 (len + 1) * (long)sizeof(xmlChar));
956 return(NULL);
957 }
958 memcpy(ret, cur, len * sizeof(xmlChar));
959 ret[len] = 0;
960 return(ret);
961}
962
963/**
964 * xmlStrdup:
965 * @cur: the input xmlChar *
966 *
967 * a strdup for array of xmlChar's. Since they are supposed to be
968 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
969 * a termination mark of '0'.
970 *
971 * Returns a new xmlChar * or NULL
972 */
973xmlChar *
974xmlStrdup(const xmlChar *cur) {
975 const xmlChar *p = cur;
976
977 if (cur == NULL) return(NULL);
978 while (*p != 0) p++; /* non input consuming */
979 return(xmlStrndup(cur, p - cur));
980}
981
982/**
983 * xmlCharStrndup:
984 * @cur: the input char *
985 * @len: the len of @cur
986 *
987 * a strndup for char's to xmlChar's
988 *
989 * Returns a new xmlChar * or NULL
990 */
991
992xmlChar *
993xmlCharStrndup(const char *cur, int len) {
994 int i;
995 xmlChar *ret;
996
997 if ((cur == NULL) || (len < 0)) return(NULL);
998 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
999 if (ret == NULL) {
1000 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1001 (len + 1) * (long)sizeof(xmlChar));
1002 return(NULL);
1003 }
1004 for (i = 0;i < len;i++)
1005 ret[i] = (xmlChar) cur[i];
1006 ret[len] = 0;
1007 return(ret);
1008}
1009
1010/**
1011 * xmlCharStrdup:
1012 * @cur: the input char *
1013 * @len: the len of @cur
1014 *
1015 * a strdup for char's to xmlChar's
1016 *
1017 * Returns a new xmlChar * or NULL
1018 */
1019
1020xmlChar *
1021xmlCharStrdup(const char *cur) {
1022 const char *p = cur;
1023
1024 if (cur == NULL) return(NULL);
1025 while (*p != '\0') p++; /* non input consuming */
1026 return(xmlCharStrndup(cur, p - cur));
1027}
1028
1029/**
1030 * xmlStrcmp:
1031 * @str1: the first xmlChar *
1032 * @str2: the second xmlChar *
1033 *
1034 * a strcmp for xmlChar's
1035 *
1036 * Returns the integer result of the comparison
1037 */
1038
1039int
1040xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1041 register int tmp;
1042
1043 if (str1 == str2) return(0);
1044 if (str1 == NULL) return(-1);
1045 if (str2 == NULL) return(1);
1046 do {
1047 tmp = *str1++ - *str2;
1048 if (tmp != 0) return(tmp);
1049 } while (*str2++ != 0);
1050 return 0;
1051}
1052
1053/**
1054 * xmlStrEqual:
1055 * @str1: the first xmlChar *
1056 * @str2: the second xmlChar *
1057 *
1058 * Check if both string are equal of have same content
1059 * Should be a bit more readable and faster than xmlStrEqual()
1060 *
1061 * Returns 1 if they are equal, 0 if they are different
1062 */
1063
1064int
1065xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1066 if (str1 == str2) return(1);
1067 if (str1 == NULL) return(0);
1068 if (str2 == NULL) return(0);
1069 do {
1070 if (*str1++ != *str2) return(0);
1071 } while (*str2++);
1072 return(1);
1073}
1074
1075/**
1076 * xmlStrncmp:
1077 * @str1: the first xmlChar *
1078 * @str2: the second xmlChar *
1079 * @len: the max comparison length
1080 *
1081 * a strncmp for xmlChar's
1082 *
1083 * Returns the integer result of the comparison
1084 */
1085
1086int
1087xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1088 register int tmp;
1089
1090 if (len <= 0) return(0);
1091 if (str1 == str2) return(0);
1092 if (str1 == NULL) return(-1);
1093 if (str2 == NULL) return(1);
1094 do {
1095 tmp = *str1++ - *str2;
1096 if (tmp != 0 || --len == 0) return(tmp);
1097 } while (*str2++ != 0);
1098 return 0;
1099}
1100
1101static xmlChar casemap[256] = {
1102 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1103 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1104 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1105 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1106 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1107 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1108 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1109 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1110 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1111 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1112 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1113 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1114 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1115 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1116 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1117 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1118 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1119 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1120 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1121 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1122 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1123 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1124 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1125 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1126 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1127 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1128 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1129 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1130 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1131 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1132 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1133 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1134};
1135
1136/**
1137 * xmlStrcasecmp:
1138 * @str1: the first xmlChar *
1139 * @str2: the second xmlChar *
1140 *
1141 * a strcasecmp for xmlChar's
1142 *
1143 * Returns the integer result of the comparison
1144 */
1145
1146int
1147xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1148 register int tmp;
1149
1150 if (str1 == str2) return(0);
1151 if (str1 == NULL) return(-1);
1152 if (str2 == NULL) return(1);
1153 do {
1154 tmp = casemap[*str1++] - casemap[*str2];
1155 if (tmp != 0) return(tmp);
1156 } while (*str2++ != 0);
1157 return 0;
1158}
1159
1160/**
1161 * xmlStrncasecmp:
1162 * @str1: the first xmlChar *
1163 * @str2: the second xmlChar *
1164 * @len: the max comparison length
1165 *
1166 * a strncasecmp for xmlChar's
1167 *
1168 * Returns the integer result of the comparison
1169 */
1170
1171int
1172xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1173 register int tmp;
1174
1175 if (len <= 0) return(0);
1176 if (str1 == str2) return(0);
1177 if (str1 == NULL) return(-1);
1178 if (str2 == NULL) return(1);
1179 do {
1180 tmp = casemap[*str1++] - casemap[*str2];
1181 if (tmp != 0 || --len == 0) return(tmp);
1182 } while (*str2++ != 0);
1183 return 0;
1184}
1185
1186/**
1187 * xmlStrchr:
1188 * @str: the xmlChar * array
1189 * @val: the xmlChar to search
1190 *
1191 * a strchr for xmlChar's
1192 *
1193 * Returns the xmlChar * for the first occurence or NULL.
1194 */
1195
1196const xmlChar *
1197xmlStrchr(const xmlChar *str, xmlChar val) {
1198 if (str == NULL) return(NULL);
1199 while (*str != 0) { /* non input consuming */
1200 if (*str == val) return((xmlChar *) str);
1201 str++;
1202 }
1203 return(NULL);
1204}
1205
1206/**
1207 * xmlStrstr:
1208 * @str: the xmlChar * array (haystack)
1209 * @val: the xmlChar to search (needle)
1210 *
1211 * a strstr for xmlChar's
1212 *
1213 * Returns the xmlChar * for the first occurence or NULL.
1214 */
1215
1216const xmlChar *
1217xmlStrstr(const xmlChar *str, xmlChar *val) {
1218 int n;
1219
1220 if (str == NULL) return(NULL);
1221 if (val == NULL) return(NULL);
1222 n = xmlStrlen(val);
1223
1224 if (n == 0) return(str);
1225 while (*str != 0) { /* non input consuming */
1226 if (*str == *val) {
1227 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1228 }
1229 str++;
1230 }
1231 return(NULL);
1232}
1233
1234/**
1235 * xmlStrcasestr:
1236 * @str: the xmlChar * array (haystack)
1237 * @val: the xmlChar to search (needle)
1238 *
1239 * a case-ignoring strstr for xmlChar's
1240 *
1241 * Returns the xmlChar * for the first occurence or NULL.
1242 */
1243
1244const xmlChar *
1245xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1246 int n;
1247
1248 if (str == NULL) return(NULL);
1249 if (val == NULL) return(NULL);
1250 n = xmlStrlen(val);
1251
1252 if (n == 0) return(str);
1253 while (*str != 0) { /* non input consuming */
1254 if (casemap[*str] == casemap[*val])
1255 if (!xmlStrncasecmp(str, val, n)) return(str);
1256 str++;
1257 }
1258 return(NULL);
1259}
1260
1261/**
1262 * xmlStrsub:
1263 * @str: the xmlChar * array (haystack)
1264 * @start: the index of the first char (zero based)
1265 * @len: the length of the substring
1266 *
1267 * Extract a substring of a given string
1268 *
1269 * Returns the xmlChar * for the first occurence or NULL.
1270 */
1271
1272xmlChar *
1273xmlStrsub(const xmlChar *str, int start, int len) {
1274 int i;
1275
1276 if (str == NULL) return(NULL);
1277 if (start < 0) return(NULL);
1278 if (len < 0) return(NULL);
1279
1280 for (i = 0;i < start;i++) {
1281 if (*str == 0) return(NULL);
1282 str++;
1283 }
1284 if (*str == 0) return(NULL);
1285 return(xmlStrndup(str, len));
1286}
1287
1288/**
1289 * xmlStrlen:
1290 * @str: the xmlChar * array
1291 *
1292 * length of a xmlChar's string
1293 *
1294 * Returns the number of xmlChar contained in the ARRAY.
1295 */
1296
1297int
1298xmlStrlen(const xmlChar *str) {
1299 int len = 0;
1300
1301 if (str == NULL) return(0);
1302 while (*str != 0) { /* non input consuming */
1303 str++;
1304 len++;
1305 }
1306 return(len);
1307}
1308
1309/**
1310 * xmlStrncat:
1311 * @cur: the original xmlChar * array
1312 * @add: the xmlChar * array added
1313 * @len: the length of @add
1314 *
1315 * a strncat for array of xmlChar's, it will extend cur with the len
1316 * first bytes of @add.
1317 *
1318 * Returns a new xmlChar *, the original @cur is reallocated if needed
1319 * and should not be freed
1320 */
1321
1322xmlChar *
1323xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1324 int size;
1325 xmlChar *ret;
1326
1327 if ((add == NULL) || (len == 0))
1328 return(cur);
1329 if (cur == NULL)
1330 return(xmlStrndup(add, len));
1331
1332 size = xmlStrlen(cur);
1333 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1334 if (ret == NULL) {
1335 xmlGenericError(xmlGenericErrorContext,
1336 "xmlStrncat: realloc of %ld byte failed\n",
1337 (size + len + 1) * (long)sizeof(xmlChar));
1338 return(cur);
1339 }
1340 memcpy(&ret[size], add, len * sizeof(xmlChar));
1341 ret[size + len] = 0;
1342 return(ret);
1343}
1344
1345/**
1346 * xmlStrcat:
1347 * @cur: the original xmlChar * array
1348 * @add: the xmlChar * array added
1349 *
1350 * a strcat for array of xmlChar's. Since they are supposed to be
1351 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1352 * a termination mark of '0'.
1353 *
1354 * Returns a new xmlChar * containing the concatenated string.
1355 */
1356xmlChar *
1357xmlStrcat(xmlChar *cur, const xmlChar *add) {
1358 const xmlChar *p = add;
1359
1360 if (add == NULL) return(cur);
1361 if (cur == NULL)
1362 return(xmlStrdup(add));
1363
1364 while (*p != 0) p++; /* non input consuming */
1365 return(xmlStrncat(cur, add, p - add));
1366}
1367
1368/************************************************************************
1369 * *
1370 * Commodity functions, cleanup needed ? *
1371 * *
1372 ************************************************************************/
1373
1374/**
1375 * areBlanks:
1376 * @ctxt: an XML parser context
1377 * @str: a xmlChar *
1378 * @len: the size of @str
1379 *
1380 * Is this a sequence of blank chars that one can ignore ?
1381 *
1382 * Returns 1 if ignorable 0 otherwise.
1383 */
1384
1385static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1386 int i, ret;
1387 xmlNodePtr lastChild;
1388
1389 /*
1390 * Check for xml:space value.
1391 */
1392 if (*(ctxt->space) == 1)
1393 return(0);
1394
1395 /*
1396 * Check that the string is made of blanks
1397 */
1398 for (i = 0;i < len;i++)
1399 if (!(IS_BLANK(str[i]))) return(0);
1400
1401 /*
1402 * Look if the element is mixed content in the Dtd if available
1403 */
1404 if (ctxt->myDoc != NULL) {
1405 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1406 if (ret == 0) return(1);
1407 if (ret == 1) return(0);
1408 }
1409
1410 /*
1411 * Otherwise, heuristic :-\
1412 */
1413 if (ctxt->keepBlanks)
1414 return(0);
1415 if (RAW != '<') return(0);
1416 if (ctxt->node == NULL) return(0);
1417 if ((ctxt->node->children == NULL) &&
1418 (RAW == '<') && (NXT(1) == '/')) return(0);
1419
1420 lastChild = xmlGetLastChild(ctxt->node);
1421 if (lastChild == NULL) {
1422 if (ctxt->node->content != NULL) return(0);
1423 } else if (xmlNodeIsText(lastChild))
1424 return(0);
1425 else if ((ctxt->node->children != NULL) &&
1426 (xmlNodeIsText(ctxt->node->children)))
1427 return(0);
1428 return(1);
1429}
1430
1431/*
1432 * Forward definition for recusive behaviour.
1433 */
1434void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1435void xmlParseReference(xmlParserCtxtPtr ctxt);
1436
1437/************************************************************************
1438 * *
1439 * Extra stuff for namespace support *
1440 * Relates to http://www.w3.org/TR/WD-xml-names *
1441 * *
1442 ************************************************************************/
1443
1444/**
1445 * xmlSplitQName:
1446 * @ctxt: an XML parser context
1447 * @name: an XML parser context
1448 * @prefix: a xmlChar **
1449 *
1450 * parse an UTF8 encoded XML qualified name string
1451 *
1452 * [NS 5] QName ::= (Prefix ':')? LocalPart
1453 *
1454 * [NS 6] Prefix ::= NCName
1455 *
1456 * [NS 7] LocalPart ::= NCName
1457 *
1458 * Returns the local part, and prefix is updated
1459 * to get the Prefix if any.
1460 */
1461
1462xmlChar *
1463xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1464 xmlChar buf[XML_MAX_NAMELEN + 5];
1465 xmlChar *buffer = NULL;
1466 int len = 0;
1467 int max = XML_MAX_NAMELEN;
1468 xmlChar *ret = NULL;
1469 const xmlChar *cur = name;
1470 int c;
1471
1472 *prefix = NULL;
1473
1474#ifndef XML_XML_NAMESPACE
1475 /* xml: prefix is not really a namespace */
1476 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1477 (cur[2] == 'l') && (cur[3] == ':'))
1478 return(xmlStrdup(name));
1479#endif
1480
1481 /* nasty but valid */
1482 if (cur[0] == ':')
1483 return(xmlStrdup(name));
1484
1485 c = *cur++;
1486 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1487 buf[len++] = c;
1488 c = *cur++;
1489 }
1490 if (len >= max) {
1491 /*
1492 * Okay someone managed to make a huge name, so he's ready to pay
1493 * for the processing speed.
1494 */
1495 max = len * 2;
1496
1497 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1498 if (buffer == NULL) {
1499 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1500 ctxt->sax->error(ctxt->userData,
1501 "xmlSplitQName: out of memory\n");
1502 return(NULL);
1503 }
1504 memcpy(buffer, buf, len);
1505 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1506 if (len + 10 > max) {
1507 max *= 2;
1508 buffer = (xmlChar *) xmlRealloc(buffer,
1509 max * sizeof(xmlChar));
1510 if (buffer == NULL) {
1511 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1512 ctxt->sax->error(ctxt->userData,
1513 "xmlSplitQName: out of memory\n");
1514 return(NULL);
1515 }
1516 }
1517 buffer[len++] = c;
1518 c = *cur++;
1519 }
1520 buffer[len] = 0;
1521 }
1522
1523 if (buffer == NULL)
1524 ret = xmlStrndup(buf, len);
1525 else {
1526 ret = buffer;
1527 buffer = NULL;
1528 max = XML_MAX_NAMELEN;
1529 }
1530
1531
1532 if (c == ':') {
1533 c = *cur++;
1534 if (c == 0) return(ret);
1535 *prefix = ret;
1536 len = 0;
1537
1538 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1539 buf[len++] = c;
1540 c = *cur++;
1541 }
1542 if (len >= max) {
1543 /*
1544 * Okay someone managed to make a huge name, so he's ready to pay
1545 * for the processing speed.
1546 */
1547 max = len * 2;
1548
1549 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1550 if (buffer == NULL) {
1551 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1552 ctxt->sax->error(ctxt->userData,
1553 "xmlSplitQName: out of memory\n");
1554 return(NULL);
1555 }
1556 memcpy(buffer, buf, len);
1557 while (c != 0) { /* tested bigname2.xml */
1558 if (len + 10 > max) {
1559 max *= 2;
1560 buffer = (xmlChar *) xmlRealloc(buffer,
1561 max * sizeof(xmlChar));
1562 if (buffer == NULL) {
1563 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1564 ctxt->sax->error(ctxt->userData,
1565 "xmlSplitQName: out of memory\n");
1566 return(NULL);
1567 }
1568 }
1569 buffer[len++] = c;
1570 c = *cur++;
1571 }
1572 buffer[len] = 0;
1573 }
1574
1575 if (buffer == NULL)
1576 ret = xmlStrndup(buf, len);
1577 else {
1578 ret = buffer;
1579 }
1580 }
1581
1582 return(ret);
1583}
1584
1585/************************************************************************
1586 * *
1587 * The parser itself *
1588 * Relates to http://www.w3.org/TR/REC-xml *
1589 * *
1590 ************************************************************************/
1591
1592/**
1593 * xmlParseName:
1594 * @ctxt: an XML parser context
1595 *
1596 * parse an XML name.
1597 *
1598 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1599 * CombiningChar | Extender
1600 *
1601 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1602 *
1603 * [6] Names ::= Name (S Name)*
1604 *
1605 * Returns the Name parsed or NULL
1606 */
1607
1608xmlChar *
1609xmlParseName(xmlParserCtxtPtr ctxt) {
1610 xmlChar buf[XML_MAX_NAMELEN + 5];
Daniel Veillard48b2f892001-02-25 16:11:03 +00001611 const xmlChar *in;
1612 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001613 int len = 0, l;
1614 int c;
1615 int count = 0;
1616
1617 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001618
1619 /*
1620 * Accelerator for simple ASCII names
1621 */
1622 in = ctxt->input->cur;
1623 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1624 ((*in >= 0x41) && (*in <= 0x5A)) ||
1625 (*in == '_') || (*in == ':')) {
1626 in++;
1627 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1628 ((*in >= 0x41) && (*in <= 0x5A)) ||
1629 ((*in >= 0x30) && (*in <= 0x39)) ||
1630 (*in == '_') || (*in == ':'))
1631 in++;
1632 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1633 count = in - ctxt->input->cur;
1634 ret = xmlStrndup(ctxt->input->cur, count);
1635 ctxt->input->cur = in;
1636 return(ret);
1637 }
1638 }
1639
Owen Taylor3473f882001-02-23 17:55:21 +00001640 c = CUR_CHAR(l);
1641 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1642 (!IS_LETTER(c) && (c != '_') &&
1643 (c != ':'))) {
1644 return(NULL);
1645 }
1646
1647 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1648 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1649 (c == '.') || (c == '-') ||
1650 (c == '_') || (c == ':') ||
1651 (IS_COMBINING(c)) ||
1652 (IS_EXTENDER(c)))) {
1653 if (count++ > 100) {
1654 count = 0;
1655 GROW;
1656 }
1657 COPY_BUF(l,buf,len,c);
1658 NEXTL(l);
1659 c = CUR_CHAR(l);
1660 if (len >= XML_MAX_NAMELEN) {
1661 /*
1662 * Okay someone managed to make a huge name, so he's ready to pay
1663 * for the processing speed.
1664 */
1665 xmlChar *buffer;
1666 int max = len * 2;
1667
1668 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1669 if (buffer == NULL) {
1670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1671 ctxt->sax->error(ctxt->userData,
1672 "xmlParseName: out of memory\n");
1673 return(NULL);
1674 }
1675 memcpy(buffer, buf, len);
1676 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1677 (c == '.') || (c == '-') ||
1678 (c == '_') || (c == ':') ||
1679 (IS_COMBINING(c)) ||
1680 (IS_EXTENDER(c))) {
1681 if (count++ > 100) {
1682 count = 0;
1683 GROW;
1684 }
1685 if (len + 10 > max) {
1686 max *= 2;
1687 buffer = (xmlChar *) xmlRealloc(buffer,
1688 max * sizeof(xmlChar));
1689 if (buffer == NULL) {
1690 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1691 ctxt->sax->error(ctxt->userData,
1692 "xmlParseName: out of memory\n");
1693 return(NULL);
1694 }
1695 }
1696 COPY_BUF(l,buffer,len,c);
1697 NEXTL(l);
1698 c = CUR_CHAR(l);
1699 }
1700 buffer[len] = 0;
1701 return(buffer);
1702 }
1703 }
1704 return(xmlStrndup(buf, len));
1705}
1706
1707/**
1708 * xmlParseStringName:
1709 * @ctxt: an XML parser context
1710 * @str: a pointer to the string pointer (IN/OUT)
1711 *
1712 * parse an XML name.
1713 *
1714 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1715 * CombiningChar | Extender
1716 *
1717 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1718 *
1719 * [6] Names ::= Name (S Name)*
1720 *
1721 * Returns the Name parsed or NULL. The str pointer
1722 * is updated to the current location in the string.
1723 */
1724
1725xmlChar *
1726xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1727 xmlChar buf[XML_MAX_NAMELEN + 5];
1728 const xmlChar *cur = *str;
1729 int len = 0, l;
1730 int c;
1731
1732 c = CUR_SCHAR(cur, l);
1733 if (!IS_LETTER(c) && (c != '_') &&
1734 (c != ':')) {
1735 return(NULL);
1736 }
1737
1738 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1739 (c == '.') || (c == '-') ||
1740 (c == '_') || (c == ':') ||
1741 (IS_COMBINING(c)) ||
1742 (IS_EXTENDER(c))) {
1743 COPY_BUF(l,buf,len,c);
1744 cur += l;
1745 c = CUR_SCHAR(cur, l);
1746 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1747 /*
1748 * Okay someone managed to make a huge name, so he's ready to pay
1749 * for the processing speed.
1750 */
1751 xmlChar *buffer;
1752 int max = len * 2;
1753
1754 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1755 if (buffer == NULL) {
1756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1757 ctxt->sax->error(ctxt->userData,
1758 "xmlParseStringName: out of memory\n");
1759 return(NULL);
1760 }
1761 memcpy(buffer, buf, len);
1762 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1763 (c == '.') || (c == '-') ||
1764 (c == '_') || (c == ':') ||
1765 (IS_COMBINING(c)) ||
1766 (IS_EXTENDER(c))) {
1767 if (len + 10 > max) {
1768 max *= 2;
1769 buffer = (xmlChar *) xmlRealloc(buffer,
1770 max * sizeof(xmlChar));
1771 if (buffer == NULL) {
1772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1773 ctxt->sax->error(ctxt->userData,
1774 "xmlParseStringName: out of memory\n");
1775 return(NULL);
1776 }
1777 }
1778 COPY_BUF(l,buffer,len,c);
1779 cur += l;
1780 c = CUR_SCHAR(cur, l);
1781 }
1782 buffer[len] = 0;
1783 *str = cur;
1784 return(buffer);
1785 }
1786 }
1787 *str = cur;
1788 return(xmlStrndup(buf, len));
1789}
1790
1791/**
1792 * xmlParseNmtoken:
1793 * @ctxt: an XML parser context
1794 *
1795 * parse an XML Nmtoken.
1796 *
1797 * [7] Nmtoken ::= (NameChar)+
1798 *
1799 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1800 *
1801 * Returns the Nmtoken parsed or NULL
1802 */
1803
1804xmlChar *
1805xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1806 xmlChar buf[XML_MAX_NAMELEN + 5];
1807 int len = 0, l;
1808 int c;
1809 int count = 0;
1810
1811 GROW;
1812 c = CUR_CHAR(l);
1813
1814 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1815 (c == '.') || (c == '-') ||
1816 (c == '_') || (c == ':') ||
1817 (IS_COMBINING(c)) ||
1818 (IS_EXTENDER(c))) {
1819 if (count++ > 100) {
1820 count = 0;
1821 GROW;
1822 }
1823 COPY_BUF(l,buf,len,c);
1824 NEXTL(l);
1825 c = CUR_CHAR(l);
1826 if (len >= XML_MAX_NAMELEN) {
1827 /*
1828 * Okay someone managed to make a huge token, so he's ready to pay
1829 * for the processing speed.
1830 */
1831 xmlChar *buffer;
1832 int max = len * 2;
1833
1834 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1835 if (buffer == NULL) {
1836 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1837 ctxt->sax->error(ctxt->userData,
1838 "xmlParseNmtoken: out of memory\n");
1839 return(NULL);
1840 }
1841 memcpy(buffer, buf, len);
1842 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1843 (c == '.') || (c == '-') ||
1844 (c == '_') || (c == ':') ||
1845 (IS_COMBINING(c)) ||
1846 (IS_EXTENDER(c))) {
1847 if (count++ > 100) {
1848 count = 0;
1849 GROW;
1850 }
1851 if (len + 10 > max) {
1852 max *= 2;
1853 buffer = (xmlChar *) xmlRealloc(buffer,
1854 max * sizeof(xmlChar));
1855 if (buffer == NULL) {
1856 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1857 ctxt->sax->error(ctxt->userData,
1858 "xmlParseName: out of memory\n");
1859 return(NULL);
1860 }
1861 }
1862 COPY_BUF(l,buffer,len,c);
1863 NEXTL(l);
1864 c = CUR_CHAR(l);
1865 }
1866 buffer[len] = 0;
1867 return(buffer);
1868 }
1869 }
1870 if (len == 0)
1871 return(NULL);
1872 return(xmlStrndup(buf, len));
1873}
1874
1875/**
1876 * xmlParseEntityValue:
1877 * @ctxt: an XML parser context
1878 * @orig: if non-NULL store a copy of the original entity value
1879 *
1880 * parse a value for ENTITY declarations
1881 *
1882 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1883 * "'" ([^%&'] | PEReference | Reference)* "'"
1884 *
1885 * Returns the EntityValue parsed with reference substitued or NULL
1886 */
1887
1888xmlChar *
1889xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1890 xmlChar *buf = NULL;
1891 int len = 0;
1892 int size = XML_PARSER_BUFFER_SIZE;
1893 int c, l;
1894 xmlChar stop;
1895 xmlChar *ret = NULL;
1896 const xmlChar *cur = NULL;
1897 xmlParserInputPtr input;
1898
1899 if (RAW == '"') stop = '"';
1900 else if (RAW == '\'') stop = '\'';
1901 else {
1902 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1904 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1905 ctxt->wellFormed = 0;
1906 ctxt->disableSAX = 1;
1907 return(NULL);
1908 }
1909 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1910 if (buf == NULL) {
1911 xmlGenericError(xmlGenericErrorContext,
1912 "malloc of %d byte failed\n", size);
1913 return(NULL);
1914 }
1915
1916 /*
1917 * The content of the entity definition is copied in a buffer.
1918 */
1919
1920 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1921 input = ctxt->input;
1922 GROW;
1923 NEXT;
1924 c = CUR_CHAR(l);
1925 /*
1926 * NOTE: 4.4.5 Included in Literal
1927 * When a parameter entity reference appears in a literal entity
1928 * value, ... a single or double quote character in the replacement
1929 * text is always treated as a normal data character and will not
1930 * terminate the literal.
1931 * In practice it means we stop the loop only when back at parsing
1932 * the initial entity and the quote is found
1933 */
1934 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1935 (ctxt->input != input))) {
1936 if (len + 5 >= size) {
1937 size *= 2;
1938 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1939 if (buf == NULL) {
1940 xmlGenericError(xmlGenericErrorContext,
1941 "realloc of %d byte failed\n", size);
1942 return(NULL);
1943 }
1944 }
1945 COPY_BUF(l,buf,len,c);
1946 NEXTL(l);
1947 /*
1948 * Pop-up of finished entities.
1949 */
1950 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1951 xmlPopInput(ctxt);
1952
1953 GROW;
1954 c = CUR_CHAR(l);
1955 if (c == 0) {
1956 GROW;
1957 c = CUR_CHAR(l);
1958 }
1959 }
1960 buf[len] = 0;
1961
1962 /*
1963 * Raise problem w.r.t. '&' and '%' being used in non-entities
1964 * reference constructs. Note Charref will be handled in
1965 * xmlStringDecodeEntities()
1966 */
1967 cur = buf;
1968 while (*cur != 0) { /* non input consuming */
1969 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1970 xmlChar *name;
1971 xmlChar tmp = *cur;
1972
1973 cur++;
1974 name = xmlParseStringName(ctxt, &cur);
1975 if ((name == NULL) || (*cur != ';')) {
1976 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1977 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1978 ctxt->sax->error(ctxt->userData,
1979 "EntityValue: '%c' forbidden except for entities references\n",
1980 tmp);
1981 ctxt->wellFormed = 0;
1982 ctxt->disableSAX = 1;
1983 }
1984 if ((ctxt->inSubset == 1) && (tmp == '%')) {
1985 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
1986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1987 ctxt->sax->error(ctxt->userData,
1988 "EntityValue: PEReferences forbidden in internal subset\n",
1989 tmp);
1990 ctxt->wellFormed = 0;
1991 ctxt->disableSAX = 1;
1992 }
1993 if (name != NULL)
1994 xmlFree(name);
1995 }
1996 cur++;
1997 }
1998
1999 /*
2000 * Then PEReference entities are substituted.
2001 */
2002 if (c != stop) {
2003 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2006 ctxt->wellFormed = 0;
2007 ctxt->disableSAX = 1;
2008 xmlFree(buf);
2009 } else {
2010 NEXT;
2011 /*
2012 * NOTE: 4.4.7 Bypassed
2013 * When a general entity reference appears in the EntityValue in
2014 * an entity declaration, it is bypassed and left as is.
2015 * so XML_SUBSTITUTE_REF is not set here.
2016 */
2017 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2018 0, 0, 0);
2019 if (orig != NULL)
2020 *orig = buf;
2021 else
2022 xmlFree(buf);
2023 }
2024
2025 return(ret);
2026}
2027
2028/**
2029 * xmlParseAttValue:
2030 * @ctxt: an XML parser context
2031 *
2032 * parse a value for an attribute
2033 * Note: the parser won't do substitution of entities here, this
2034 * will be handled later in xmlStringGetNodeList
2035 *
2036 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2037 * "'" ([^<&'] | Reference)* "'"
2038 *
2039 * 3.3.3 Attribute-Value Normalization:
2040 * Before the value of an attribute is passed to the application or
2041 * checked for validity, the XML processor must normalize it as follows:
2042 * - a character reference is processed by appending the referenced
2043 * character to the attribute value
2044 * - an entity reference is processed by recursively processing the
2045 * replacement text of the entity
2046 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2047 * appending #x20 to the normalized value, except that only a single
2048 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2049 * parsed entity or the literal entity value of an internal parsed entity
2050 * - other characters are processed by appending them to the normalized value
2051 * If the declared value is not CDATA, then the XML processor must further
2052 * process the normalized attribute value by discarding any leading and
2053 * trailing space (#x20) characters, and by replacing sequences of space
2054 * (#x20) characters by a single space (#x20) character.
2055 * All attributes for which no declaration has been read should be treated
2056 * by a non-validating parser as if declared CDATA.
2057 *
2058 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2059 */
2060
2061xmlChar *
2062xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2063 xmlChar limit = 0;
2064 xmlChar *buf = NULL;
2065 int len = 0;
2066 int buf_size = 0;
2067 int c, l;
2068 xmlChar *current = NULL;
2069 xmlEntityPtr ent;
2070
2071
2072 SHRINK;
2073 if (NXT(0) == '"') {
2074 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2075 limit = '"';
2076 NEXT;
2077 } else if (NXT(0) == '\'') {
2078 limit = '\'';
2079 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2080 NEXT;
2081 } else {
2082 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2083 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2084 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2085 ctxt->wellFormed = 0;
2086 ctxt->disableSAX = 1;
2087 return(NULL);
2088 }
2089
2090 /*
2091 * allocate a translation buffer.
2092 */
2093 buf_size = XML_PARSER_BUFFER_SIZE;
2094 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2095 if (buf == NULL) {
2096 perror("xmlParseAttValue: malloc failed");
2097 return(NULL);
2098 }
2099
2100 /*
2101 * Ok loop until we reach one of the ending char or a size limit.
2102 */
2103 c = CUR_CHAR(l);
2104 while (((NXT(0) != limit) && /* checked */
2105 (c != '<')) || (ctxt->token != 0)) {
2106 if (c == 0) break;
2107 if (ctxt->token == '&') {
2108 /*
2109 * The reparsing will be done in xmlStringGetNodeList()
2110 * called by the attribute() function in SAX.c
2111 */
2112 static xmlChar buffer[6] = "&#38;";
2113
2114 if (len > buf_size - 10) {
2115 growBuffer(buf);
2116 }
2117 current = &buffer[0];
2118 while (*current != 0) { /* non input consuming */
2119 buf[len++] = *current++;
2120 }
2121 ctxt->token = 0;
2122 } else if (c == '&') {
2123 if (NXT(1) == '#') {
2124 int val = xmlParseCharRef(ctxt);
2125 if (val == '&') {
2126 /*
2127 * The reparsing will be done in xmlStringGetNodeList()
2128 * called by the attribute() function in SAX.c
2129 */
2130 static xmlChar buffer[6] = "&#38;";
2131
2132 if (len > buf_size - 10) {
2133 growBuffer(buf);
2134 }
2135 current = &buffer[0];
2136 while (*current != 0) { /* non input consuming */
2137 buf[len++] = *current++;
2138 }
2139 } else {
2140 len += xmlCopyChar(0, &buf[len], val);
2141 }
2142 } else {
2143 ent = xmlParseEntityRef(ctxt);
2144 if ((ent != NULL) &&
2145 (ctxt->replaceEntities != 0)) {
2146 xmlChar *rep;
2147
2148 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2149 rep = xmlStringDecodeEntities(ctxt, ent->content,
2150 XML_SUBSTITUTE_REF, 0, 0, 0);
2151 if (rep != NULL) {
2152 current = rep;
2153 while (*current != 0) { /* non input consuming */
2154 buf[len++] = *current++;
2155 if (len > buf_size - 10) {
2156 growBuffer(buf);
2157 }
2158 }
2159 xmlFree(rep);
2160 }
2161 } else {
2162 if (ent->content != NULL)
2163 buf[len++] = ent->content[0];
2164 }
2165 } else if (ent != NULL) {
2166 int i = xmlStrlen(ent->name);
2167 const xmlChar *cur = ent->name;
2168
2169 /*
2170 * This may look absurd but is needed to detect
2171 * entities problems
2172 */
2173 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2174 (ent->content != NULL)) {
2175 xmlChar *rep;
2176 rep = xmlStringDecodeEntities(ctxt, ent->content,
2177 XML_SUBSTITUTE_REF, 0, 0, 0);
2178 if (rep != NULL)
2179 xmlFree(rep);
2180 }
2181
2182 /*
2183 * Just output the reference
2184 */
2185 buf[len++] = '&';
2186 if (len > buf_size - i - 10) {
2187 growBuffer(buf);
2188 }
2189 for (;i > 0;i--)
2190 buf[len++] = *cur++;
2191 buf[len++] = ';';
2192 }
2193 }
2194 } else {
2195 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2196 COPY_BUF(l,buf,len,0x20);
2197 if (len > buf_size - 10) {
2198 growBuffer(buf);
2199 }
2200 } else {
2201 COPY_BUF(l,buf,len,c);
2202 if (len > buf_size - 10) {
2203 growBuffer(buf);
2204 }
2205 }
2206 NEXTL(l);
2207 }
2208 GROW;
2209 c = CUR_CHAR(l);
2210 }
2211 buf[len++] = 0;
2212 if (RAW == '<') {
2213 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2215 ctxt->sax->error(ctxt->userData,
2216 "Unescaped '<' not allowed in attributes values\n");
2217 ctxt->wellFormed = 0;
2218 ctxt->disableSAX = 1;
2219 } else if (RAW != limit) {
2220 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2221 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2222 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2223 ctxt->wellFormed = 0;
2224 ctxt->disableSAX = 1;
2225 } else
2226 NEXT;
2227 return(buf);
2228}
2229
2230/**
2231 * xmlParseSystemLiteral:
2232 * @ctxt: an XML parser context
2233 *
2234 * parse an XML Literal
2235 *
2236 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2237 *
2238 * Returns the SystemLiteral parsed or NULL
2239 */
2240
2241xmlChar *
2242xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2243 xmlChar *buf = NULL;
2244 int len = 0;
2245 int size = XML_PARSER_BUFFER_SIZE;
2246 int cur, l;
2247 xmlChar stop;
2248 int state = ctxt->instate;
2249 int count = 0;
2250
2251 SHRINK;
2252 if (RAW == '"') {
2253 NEXT;
2254 stop = '"';
2255 } else if (RAW == '\'') {
2256 NEXT;
2257 stop = '\'';
2258 } else {
2259 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2261 ctxt->sax->error(ctxt->userData,
2262 "SystemLiteral \" or ' expected\n");
2263 ctxt->wellFormed = 0;
2264 ctxt->disableSAX = 1;
2265 return(NULL);
2266 }
2267
2268 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2269 if (buf == NULL) {
2270 xmlGenericError(xmlGenericErrorContext,
2271 "malloc of %d byte failed\n", size);
2272 return(NULL);
2273 }
2274 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2275 cur = CUR_CHAR(l);
2276 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2277 if (len + 5 >= size) {
2278 size *= 2;
2279 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2280 if (buf == NULL) {
2281 xmlGenericError(xmlGenericErrorContext,
2282 "realloc of %d byte failed\n", size);
2283 ctxt->instate = (xmlParserInputState) state;
2284 return(NULL);
2285 }
2286 }
2287 count++;
2288 if (count > 50) {
2289 GROW;
2290 count = 0;
2291 }
2292 COPY_BUF(l,buf,len,cur);
2293 NEXTL(l);
2294 cur = CUR_CHAR(l);
2295 if (cur == 0) {
2296 GROW;
2297 SHRINK;
2298 cur = CUR_CHAR(l);
2299 }
2300 }
2301 buf[len] = 0;
2302 ctxt->instate = (xmlParserInputState) state;
2303 if (!IS_CHAR(cur)) {
2304 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2305 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2306 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2307 ctxt->wellFormed = 0;
2308 ctxt->disableSAX = 1;
2309 } else {
2310 NEXT;
2311 }
2312 return(buf);
2313}
2314
2315/**
2316 * xmlParsePubidLiteral:
2317 * @ctxt: an XML parser context
2318 *
2319 * parse an XML public literal
2320 *
2321 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2322 *
2323 * Returns the PubidLiteral parsed or NULL.
2324 */
2325
2326xmlChar *
2327xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2328 xmlChar *buf = NULL;
2329 int len = 0;
2330 int size = XML_PARSER_BUFFER_SIZE;
2331 xmlChar cur;
2332 xmlChar stop;
2333 int count = 0;
2334
2335 SHRINK;
2336 if (RAW == '"') {
2337 NEXT;
2338 stop = '"';
2339 } else if (RAW == '\'') {
2340 NEXT;
2341 stop = '\'';
2342 } else {
2343 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2344 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2345 ctxt->sax->error(ctxt->userData,
2346 "SystemLiteral \" or ' expected\n");
2347 ctxt->wellFormed = 0;
2348 ctxt->disableSAX = 1;
2349 return(NULL);
2350 }
2351 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2352 if (buf == NULL) {
2353 xmlGenericError(xmlGenericErrorContext,
2354 "malloc of %d byte failed\n", size);
2355 return(NULL);
2356 }
2357 cur = CUR;
2358 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2359 if (len + 1 >= size) {
2360 size *= 2;
2361 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2362 if (buf == NULL) {
2363 xmlGenericError(xmlGenericErrorContext,
2364 "realloc of %d byte failed\n", size);
2365 return(NULL);
2366 }
2367 }
2368 buf[len++] = cur;
2369 count++;
2370 if (count > 50) {
2371 GROW;
2372 count = 0;
2373 }
2374 NEXT;
2375 cur = CUR;
2376 if (cur == 0) {
2377 GROW;
2378 SHRINK;
2379 cur = CUR;
2380 }
2381 }
2382 buf[len] = 0;
2383 if (cur != stop) {
2384 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2385 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2386 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2387 ctxt->wellFormed = 0;
2388 ctxt->disableSAX = 1;
2389 } else {
2390 NEXT;
2391 }
2392 return(buf);
2393}
2394
Daniel Veillard48b2f892001-02-25 16:11:03 +00002395void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002396/**
2397 * xmlParseCharData:
2398 * @ctxt: an XML parser context
2399 * @cdata: int indicating whether we are within a CDATA section
2400 *
2401 * parse a CharData section.
2402 * if we are within a CDATA section ']]>' marks an end of section.
2403 *
2404 * The right angle bracket (>) may be represented using the string "&gt;",
2405 * and must, for compatibility, be escaped using "&gt;" or a character
2406 * reference when it appears in the string "]]>" in content, when that
2407 * string is not marking the end of a CDATA section.
2408 *
2409 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2410 */
2411
2412void
2413xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002414 const xmlChar *in;
2415 int nbchar = 0;
2416
2417 SHRINK;
2418 GROW;
2419 /*
2420 * Accelerated common case where input don't need to be
2421 * modified before passing it to the handler.
2422 */
2423 if ((ctxt->token == 0) && (!cdata)) {
2424 in = ctxt->input->cur;
2425 do {
2426 while (((*in >= 0x20) && (*in != '<') &&
2427 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2428 in++;
2429 if (*in == 0xA) {
2430 ctxt->input->line++;
2431 continue; /* while */
2432 }
2433 nbchar = in - ctxt->input->cur;
2434 if (IS_BLANK(*ctxt->input->cur) &&
2435 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2436 if (ctxt->sax->ignorableWhitespace != NULL)
2437 ctxt->sax->ignorableWhitespace(ctxt->userData,
2438 ctxt->input->cur, nbchar);
2439 } else {
2440 if (ctxt->sax->characters != NULL)
2441 ctxt->sax->characters(ctxt->userData,
2442 ctxt->input->cur, nbchar);
2443 }
2444 ctxt->input->cur = in;
2445 if (*in == 0xD) {
2446 in++;
2447 if (*in == 0xA) {
2448 ctxt->input->cur = in;
2449 in++;
2450 ctxt->input->line++;
2451 continue; /* while */
2452 }
2453 in--;
2454 }
2455 if ((*in == '<') || (*in == '&')) {
2456 return;
2457 }
2458 SHRINK;
2459 GROW;
2460 in = ctxt->input->cur;
2461 } while ((*in >= 0x20) && (*in <= 0x7F));
2462 nbchar = 0;
2463 }
2464 xmlParseCharDataComplex(ctxt, cdata);
2465}
2466
2467void
2468xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002469 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2470 int nbchar = 0;
2471 int cur, l;
2472 int count = 0;
2473
2474 SHRINK;
2475 GROW;
2476 cur = CUR_CHAR(l);
2477 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2478 ((cur != '&') || (ctxt->token == '&')) &&
2479 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2480 if ((cur == ']') && (NXT(1) == ']') &&
2481 (NXT(2) == '>')) {
2482 if (cdata) break;
2483 else {
2484 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2485 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2486 ctxt->sax->error(ctxt->userData,
2487 "Sequence ']]>' not allowed in content\n");
2488 /* Should this be relaxed ??? I see a "must here */
2489 ctxt->wellFormed = 0;
2490 ctxt->disableSAX = 1;
2491 }
2492 }
2493 COPY_BUF(l,buf,nbchar,cur);
2494 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2495 /*
2496 * Ok the segment is to be consumed as chars.
2497 */
2498 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2499 if (areBlanks(ctxt, buf, nbchar)) {
2500 if (ctxt->sax->ignorableWhitespace != NULL)
2501 ctxt->sax->ignorableWhitespace(ctxt->userData,
2502 buf, nbchar);
2503 } else {
2504 if (ctxt->sax->characters != NULL)
2505 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2506 }
2507 }
2508 nbchar = 0;
2509 }
2510 count++;
2511 if (count > 50) {
2512 GROW;
2513 count = 0;
2514 }
2515 NEXTL(l);
2516 cur = CUR_CHAR(l);
2517 }
2518 if (nbchar != 0) {
2519 /*
2520 * Ok the segment is to be consumed as chars.
2521 */
2522 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2523 if (areBlanks(ctxt, buf, nbchar)) {
2524 if (ctxt->sax->ignorableWhitespace != NULL)
2525 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2526 } else {
2527 if (ctxt->sax->characters != NULL)
2528 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2529 }
2530 }
2531 }
2532}
2533
2534/**
2535 * xmlParseExternalID:
2536 * @ctxt: an XML parser context
2537 * @publicID: a xmlChar** receiving PubidLiteral
2538 * @strict: indicate whether we should restrict parsing to only
2539 * production [75], see NOTE below
2540 *
2541 * Parse an External ID or a Public ID
2542 *
2543 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2544 * 'PUBLIC' S PubidLiteral S SystemLiteral
2545 *
2546 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2547 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2548 *
2549 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2550 *
2551 * Returns the function returns SystemLiteral and in the second
2552 * case publicID receives PubidLiteral, is strict is off
2553 * it is possible to return NULL and have publicID set.
2554 */
2555
2556xmlChar *
2557xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2558 xmlChar *URI = NULL;
2559
2560 SHRINK;
2561 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2562 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2563 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2564 SKIP(6);
2565 if (!IS_BLANK(CUR)) {
2566 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2567 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2568 ctxt->sax->error(ctxt->userData,
2569 "Space required after 'SYSTEM'\n");
2570 ctxt->wellFormed = 0;
2571 ctxt->disableSAX = 1;
2572 }
2573 SKIP_BLANKS;
2574 URI = xmlParseSystemLiteral(ctxt);
2575 if (URI == NULL) {
2576 ctxt->errNo = XML_ERR_URI_REQUIRED;
2577 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2578 ctxt->sax->error(ctxt->userData,
2579 "xmlParseExternalID: SYSTEM, no URI\n");
2580 ctxt->wellFormed = 0;
2581 ctxt->disableSAX = 1;
2582 }
2583 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2584 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2585 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2586 SKIP(6);
2587 if (!IS_BLANK(CUR)) {
2588 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2590 ctxt->sax->error(ctxt->userData,
2591 "Space required after 'PUBLIC'\n");
2592 ctxt->wellFormed = 0;
2593 ctxt->disableSAX = 1;
2594 }
2595 SKIP_BLANKS;
2596 *publicID = xmlParsePubidLiteral(ctxt);
2597 if (*publicID == NULL) {
2598 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2600 ctxt->sax->error(ctxt->userData,
2601 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2602 ctxt->wellFormed = 0;
2603 ctxt->disableSAX = 1;
2604 }
2605 if (strict) {
2606 /*
2607 * We don't handle [83] so "S SystemLiteral" is required.
2608 */
2609 if (!IS_BLANK(CUR)) {
2610 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2611 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2612 ctxt->sax->error(ctxt->userData,
2613 "Space required after the Public Identifier\n");
2614 ctxt->wellFormed = 0;
2615 ctxt->disableSAX = 1;
2616 }
2617 } else {
2618 /*
2619 * We handle [83] so we return immediately, if
2620 * "S SystemLiteral" is not detected. From a purely parsing
2621 * point of view that's a nice mess.
2622 */
2623 const xmlChar *ptr;
2624 GROW;
2625
2626 ptr = CUR_PTR;
2627 if (!IS_BLANK(*ptr)) return(NULL);
2628
2629 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2630 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2631 }
2632 SKIP_BLANKS;
2633 URI = xmlParseSystemLiteral(ctxt);
2634 if (URI == NULL) {
2635 ctxt->errNo = XML_ERR_URI_REQUIRED;
2636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2637 ctxt->sax->error(ctxt->userData,
2638 "xmlParseExternalID: PUBLIC, no URI\n");
2639 ctxt->wellFormed = 0;
2640 ctxt->disableSAX = 1;
2641 }
2642 }
2643 return(URI);
2644}
2645
2646/**
2647 * xmlParseComment:
2648 * @ctxt: an XML parser context
2649 *
2650 * Skip an XML (SGML) comment <!-- .... -->
2651 * The spec says that "For compatibility, the string "--" (double-hyphen)
2652 * must not occur within comments. "
2653 *
2654 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2655 */
2656void
2657xmlParseComment(xmlParserCtxtPtr ctxt) {
2658 xmlChar *buf = NULL;
2659 int len;
2660 int size = XML_PARSER_BUFFER_SIZE;
2661 int q, ql;
2662 int r, rl;
2663 int cur, l;
2664 xmlParserInputState state;
2665 xmlParserInputPtr input = ctxt->input;
2666 int count = 0;
2667
2668 /*
2669 * Check that there is a comment right here.
2670 */
2671 if ((RAW != '<') || (NXT(1) != '!') ||
2672 (NXT(2) != '-') || (NXT(3) != '-')) return;
2673
2674 state = ctxt->instate;
2675 ctxt->instate = XML_PARSER_COMMENT;
2676 SHRINK;
2677 SKIP(4);
2678 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2679 if (buf == NULL) {
2680 xmlGenericError(xmlGenericErrorContext,
2681 "malloc of %d byte failed\n", size);
2682 ctxt->instate = state;
2683 return;
2684 }
2685 q = CUR_CHAR(ql);
2686 NEXTL(ql);
2687 r = CUR_CHAR(rl);
2688 NEXTL(rl);
2689 cur = CUR_CHAR(l);
2690 len = 0;
2691 while (IS_CHAR(cur) && /* checked */
2692 ((cur != '>') ||
2693 (r != '-') || (q != '-'))) {
2694 if ((r == '-') && (q == '-') && (len > 1)) {
2695 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2696 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2697 ctxt->sax->error(ctxt->userData,
2698 "Comment must not contain '--' (double-hyphen)`\n");
2699 ctxt->wellFormed = 0;
2700 ctxt->disableSAX = 1;
2701 }
2702 if (len + 5 >= size) {
2703 size *= 2;
2704 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2705 if (buf == NULL) {
2706 xmlGenericError(xmlGenericErrorContext,
2707 "realloc of %d byte failed\n", size);
2708 ctxt->instate = state;
2709 return;
2710 }
2711 }
2712 COPY_BUF(ql,buf,len,q);
2713 q = r;
2714 ql = rl;
2715 r = cur;
2716 rl = l;
2717
2718 count++;
2719 if (count > 50) {
2720 GROW;
2721 count = 0;
2722 }
2723 NEXTL(l);
2724 cur = CUR_CHAR(l);
2725 if (cur == 0) {
2726 SHRINK;
2727 GROW;
2728 cur = CUR_CHAR(l);
2729 }
2730 }
2731 buf[len] = 0;
2732 if (!IS_CHAR(cur)) {
2733 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2734 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2735 ctxt->sax->error(ctxt->userData,
2736 "Comment not terminated \n<!--%.50s\n", buf);
2737 ctxt->wellFormed = 0;
2738 ctxt->disableSAX = 1;
2739 xmlFree(buf);
2740 } else {
2741 if (input != ctxt->input) {
2742 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2743 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2744 ctxt->sax->error(ctxt->userData,
2745"Comment doesn't start and stop in the same entity\n");
2746 ctxt->wellFormed = 0;
2747 ctxt->disableSAX = 1;
2748 }
2749 NEXT;
2750 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2751 (!ctxt->disableSAX))
2752 ctxt->sax->comment(ctxt->userData, buf);
2753 xmlFree(buf);
2754 }
2755 ctxt->instate = state;
2756}
2757
2758/**
2759 * xmlParsePITarget:
2760 * @ctxt: an XML parser context
2761 *
2762 * parse the name of a PI
2763 *
2764 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2765 *
2766 * Returns the PITarget name or NULL
2767 */
2768
2769xmlChar *
2770xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2771 xmlChar *name;
2772
2773 name = xmlParseName(ctxt);
2774 if ((name != NULL) &&
2775 ((name[0] == 'x') || (name[0] == 'X')) &&
2776 ((name[1] == 'm') || (name[1] == 'M')) &&
2777 ((name[2] == 'l') || (name[2] == 'L'))) {
2778 int i;
2779 if ((name[0] == 'x') && (name[1] == 'm') &&
2780 (name[2] == 'l') && (name[3] == 0)) {
2781 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2782 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2783 ctxt->sax->error(ctxt->userData,
2784 "XML declaration allowed only at the start of the document\n");
2785 ctxt->wellFormed = 0;
2786 ctxt->disableSAX = 1;
2787 return(name);
2788 } else if (name[3] == 0) {
2789 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2791 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2792 ctxt->wellFormed = 0;
2793 ctxt->disableSAX = 1;
2794 return(name);
2795 }
2796 for (i = 0;;i++) {
2797 if (xmlW3CPIs[i] == NULL) break;
2798 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2799 return(name);
2800 }
2801 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2802 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2803 ctxt->sax->warning(ctxt->userData,
2804 "xmlParsePItarget: invalid name prefix 'xml'\n");
2805 }
2806 }
2807 return(name);
2808}
2809
2810/**
2811 * xmlParsePI:
2812 * @ctxt: an XML parser context
2813 *
2814 * parse an XML Processing Instruction.
2815 *
2816 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2817 *
2818 * The processing is transfered to SAX once parsed.
2819 */
2820
2821void
2822xmlParsePI(xmlParserCtxtPtr ctxt) {
2823 xmlChar *buf = NULL;
2824 int len = 0;
2825 int size = XML_PARSER_BUFFER_SIZE;
2826 int cur, l;
2827 xmlChar *target;
2828 xmlParserInputState state;
2829 int count = 0;
2830
2831 if ((RAW == '<') && (NXT(1) == '?')) {
2832 xmlParserInputPtr input = ctxt->input;
2833 state = ctxt->instate;
2834 ctxt->instate = XML_PARSER_PI;
2835 /*
2836 * this is a Processing Instruction.
2837 */
2838 SKIP(2);
2839 SHRINK;
2840
2841 /*
2842 * Parse the target name and check for special support like
2843 * namespace.
2844 */
2845 target = xmlParsePITarget(ctxt);
2846 if (target != NULL) {
2847 if ((RAW == '?') && (NXT(1) == '>')) {
2848 if (input != ctxt->input) {
2849 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2850 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2851 ctxt->sax->error(ctxt->userData,
2852 "PI declaration doesn't start and stop in the same entity\n");
2853 ctxt->wellFormed = 0;
2854 ctxt->disableSAX = 1;
2855 }
2856 SKIP(2);
2857
2858 /*
2859 * SAX: PI detected.
2860 */
2861 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2862 (ctxt->sax->processingInstruction != NULL))
2863 ctxt->sax->processingInstruction(ctxt->userData,
2864 target, NULL);
2865 ctxt->instate = state;
2866 xmlFree(target);
2867 return;
2868 }
2869 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2870 if (buf == NULL) {
2871 xmlGenericError(xmlGenericErrorContext,
2872 "malloc of %d byte failed\n", size);
2873 ctxt->instate = state;
2874 return;
2875 }
2876 cur = CUR;
2877 if (!IS_BLANK(cur)) {
2878 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2879 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2880 ctxt->sax->error(ctxt->userData,
2881 "xmlParsePI: PI %s space expected\n", target);
2882 ctxt->wellFormed = 0;
2883 ctxt->disableSAX = 1;
2884 }
2885 SKIP_BLANKS;
2886 cur = CUR_CHAR(l);
2887 while (IS_CHAR(cur) && /* checked */
2888 ((cur != '?') || (NXT(1) != '>'))) {
2889 if (len + 5 >= size) {
2890 size *= 2;
2891 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2892 if (buf == NULL) {
2893 xmlGenericError(xmlGenericErrorContext,
2894 "realloc of %d byte failed\n", size);
2895 ctxt->instate = state;
2896 return;
2897 }
2898 }
2899 count++;
2900 if (count > 50) {
2901 GROW;
2902 count = 0;
2903 }
2904 COPY_BUF(l,buf,len,cur);
2905 NEXTL(l);
2906 cur = CUR_CHAR(l);
2907 if (cur == 0) {
2908 SHRINK;
2909 GROW;
2910 cur = CUR_CHAR(l);
2911 }
2912 }
2913 buf[len] = 0;
2914 if (cur != '?') {
2915 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2916 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2917 ctxt->sax->error(ctxt->userData,
2918 "xmlParsePI: PI %s never end ...\n", target);
2919 ctxt->wellFormed = 0;
2920 ctxt->disableSAX = 1;
2921 } else {
2922 if (input != ctxt->input) {
2923 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2925 ctxt->sax->error(ctxt->userData,
2926 "PI declaration doesn't start and stop in the same entity\n");
2927 ctxt->wellFormed = 0;
2928 ctxt->disableSAX = 1;
2929 }
2930 SKIP(2);
2931
2932 /*
2933 * SAX: PI detected.
2934 */
2935 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2936 (ctxt->sax->processingInstruction != NULL))
2937 ctxt->sax->processingInstruction(ctxt->userData,
2938 target, buf);
2939 }
2940 xmlFree(buf);
2941 xmlFree(target);
2942 } else {
2943 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2944 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2945 ctxt->sax->error(ctxt->userData,
2946 "xmlParsePI : no target name\n");
2947 ctxt->wellFormed = 0;
2948 ctxt->disableSAX = 1;
2949 }
2950 ctxt->instate = state;
2951 }
2952}
2953
2954/**
2955 * xmlParseNotationDecl:
2956 * @ctxt: an XML parser context
2957 *
2958 * parse a notation declaration
2959 *
2960 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2961 *
2962 * Hence there is actually 3 choices:
2963 * 'PUBLIC' S PubidLiteral
2964 * 'PUBLIC' S PubidLiteral S SystemLiteral
2965 * and 'SYSTEM' S SystemLiteral
2966 *
2967 * See the NOTE on xmlParseExternalID().
2968 */
2969
2970void
2971xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
2972 xmlChar *name;
2973 xmlChar *Pubid;
2974 xmlChar *Systemid;
2975
2976 if ((RAW == '<') && (NXT(1) == '!') &&
2977 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2978 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2979 (NXT(6) == 'T') && (NXT(7) == 'I') &&
2980 (NXT(8) == 'O') && (NXT(9) == 'N')) {
2981 xmlParserInputPtr input = ctxt->input;
2982 SHRINK;
2983 SKIP(10);
2984 if (!IS_BLANK(CUR)) {
2985 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2986 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2987 ctxt->sax->error(ctxt->userData,
2988 "Space required after '<!NOTATION'\n");
2989 ctxt->wellFormed = 0;
2990 ctxt->disableSAX = 1;
2991 return;
2992 }
2993 SKIP_BLANKS;
2994
2995 name = xmlParseName(ctxt);
2996 if (name == NULL) {
2997 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
2998 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2999 ctxt->sax->error(ctxt->userData,
3000 "NOTATION: Name expected here\n");
3001 ctxt->wellFormed = 0;
3002 ctxt->disableSAX = 1;
3003 return;
3004 }
3005 if (!IS_BLANK(CUR)) {
3006 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3007 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3008 ctxt->sax->error(ctxt->userData,
3009 "Space required after the NOTATION name'\n");
3010 ctxt->wellFormed = 0;
3011 ctxt->disableSAX = 1;
3012 return;
3013 }
3014 SKIP_BLANKS;
3015
3016 /*
3017 * Parse the IDs.
3018 */
3019 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3020 SKIP_BLANKS;
3021
3022 if (RAW == '>') {
3023 if (input != ctxt->input) {
3024 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3026 ctxt->sax->error(ctxt->userData,
3027"Notation declaration doesn't start and stop in the same entity\n");
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 }
3031 NEXT;
3032 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3033 (ctxt->sax->notationDecl != NULL))
3034 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3035 } else {
3036 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3038 ctxt->sax->error(ctxt->userData,
3039 "'>' required to close NOTATION declaration\n");
3040 ctxt->wellFormed = 0;
3041 ctxt->disableSAX = 1;
3042 }
3043 xmlFree(name);
3044 if (Systemid != NULL) xmlFree(Systemid);
3045 if (Pubid != NULL) xmlFree(Pubid);
3046 }
3047}
3048
3049/**
3050 * xmlParseEntityDecl:
3051 * @ctxt: an XML parser context
3052 *
3053 * parse <!ENTITY declarations
3054 *
3055 * [70] EntityDecl ::= GEDecl | PEDecl
3056 *
3057 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3058 *
3059 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3060 *
3061 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3062 *
3063 * [74] PEDef ::= EntityValue | ExternalID
3064 *
3065 * [76] NDataDecl ::= S 'NDATA' S Name
3066 *
3067 * [ VC: Notation Declared ]
3068 * The Name must match the declared name of a notation.
3069 */
3070
3071void
3072xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3073 xmlChar *name = NULL;
3074 xmlChar *value = NULL;
3075 xmlChar *URI = NULL, *literal = NULL;
3076 xmlChar *ndata = NULL;
3077 int isParameter = 0;
3078 xmlChar *orig = NULL;
3079
3080 GROW;
3081 if ((RAW == '<') && (NXT(1) == '!') &&
3082 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3083 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3084 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3085 xmlParserInputPtr input = ctxt->input;
3086 ctxt->instate = XML_PARSER_ENTITY_DECL;
3087 SHRINK;
3088 SKIP(8);
3089 if (!IS_BLANK(CUR)) {
3090 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3091 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3092 ctxt->sax->error(ctxt->userData,
3093 "Space required after '<!ENTITY'\n");
3094 ctxt->wellFormed = 0;
3095 ctxt->disableSAX = 1;
3096 }
3097 SKIP_BLANKS;
3098
3099 if (RAW == '%') {
3100 NEXT;
3101 if (!IS_BLANK(CUR)) {
3102 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3103 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3104 ctxt->sax->error(ctxt->userData,
3105 "Space required after '%'\n");
3106 ctxt->wellFormed = 0;
3107 ctxt->disableSAX = 1;
3108 }
3109 SKIP_BLANKS;
3110 isParameter = 1;
3111 }
3112
3113 name = xmlParseName(ctxt);
3114 if (name == NULL) {
3115 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3116 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3117 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3118 ctxt->wellFormed = 0;
3119 ctxt->disableSAX = 1;
3120 return;
3121 }
3122 if (!IS_BLANK(CUR)) {
3123 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3124 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3125 ctxt->sax->error(ctxt->userData,
3126 "Space required after the entity name\n");
3127 ctxt->wellFormed = 0;
3128 ctxt->disableSAX = 1;
3129 }
3130 SKIP_BLANKS;
3131
3132 /*
3133 * handle the various case of definitions...
3134 */
3135 if (isParameter) {
3136 if ((RAW == '"') || (RAW == '\'')) {
3137 value = xmlParseEntityValue(ctxt, &orig);
3138 if (value) {
3139 if ((ctxt->sax != NULL) &&
3140 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3141 ctxt->sax->entityDecl(ctxt->userData, name,
3142 XML_INTERNAL_PARAMETER_ENTITY,
3143 NULL, NULL, value);
3144 }
3145 } else {
3146 URI = xmlParseExternalID(ctxt, &literal, 1);
3147 if ((URI == NULL) && (literal == NULL)) {
3148 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3149 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3150 ctxt->sax->error(ctxt->userData,
3151 "Entity value required\n");
3152 ctxt->wellFormed = 0;
3153 ctxt->disableSAX = 1;
3154 }
3155 if (URI) {
3156 xmlURIPtr uri;
3157
3158 uri = xmlParseURI((const char *) URI);
3159 if (uri == NULL) {
3160 ctxt->errNo = XML_ERR_INVALID_URI;
3161 if ((ctxt->sax != NULL) &&
3162 (!ctxt->disableSAX) &&
3163 (ctxt->sax->error != NULL))
3164 ctxt->sax->error(ctxt->userData,
3165 "Invalid URI: %s\n", URI);
3166 ctxt->wellFormed = 0;
3167 } else {
3168 if (uri->fragment != NULL) {
3169 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3170 if ((ctxt->sax != NULL) &&
3171 (!ctxt->disableSAX) &&
3172 (ctxt->sax->error != NULL))
3173 ctxt->sax->error(ctxt->userData,
3174 "Fragment not allowed: %s\n", URI);
3175 ctxt->wellFormed = 0;
3176 } else {
3177 if ((ctxt->sax != NULL) &&
3178 (!ctxt->disableSAX) &&
3179 (ctxt->sax->entityDecl != NULL))
3180 ctxt->sax->entityDecl(ctxt->userData, name,
3181 XML_EXTERNAL_PARAMETER_ENTITY,
3182 literal, URI, NULL);
3183 }
3184 xmlFreeURI(uri);
3185 }
3186 }
3187 }
3188 } else {
3189 if ((RAW == '"') || (RAW == '\'')) {
3190 value = xmlParseEntityValue(ctxt, &orig);
3191 if ((ctxt->sax != NULL) &&
3192 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3193 ctxt->sax->entityDecl(ctxt->userData, name,
3194 XML_INTERNAL_GENERAL_ENTITY,
3195 NULL, NULL, value);
3196 } else {
3197 URI = xmlParseExternalID(ctxt, &literal, 1);
3198 if ((URI == NULL) && (literal == NULL)) {
3199 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3200 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3201 ctxt->sax->error(ctxt->userData,
3202 "Entity value required\n");
3203 ctxt->wellFormed = 0;
3204 ctxt->disableSAX = 1;
3205 }
3206 if (URI) {
3207 xmlURIPtr uri;
3208
3209 uri = xmlParseURI((const char *)URI);
3210 if (uri == NULL) {
3211 ctxt->errNo = XML_ERR_INVALID_URI;
3212 if ((ctxt->sax != NULL) &&
3213 (!ctxt->disableSAX) &&
3214 (ctxt->sax->error != NULL))
3215 ctxt->sax->error(ctxt->userData,
3216 "Invalid URI: %s\n", URI);
3217 ctxt->wellFormed = 0;
3218 } else {
3219 if (uri->fragment != NULL) {
3220 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3221 if ((ctxt->sax != NULL) &&
3222 (!ctxt->disableSAX) &&
3223 (ctxt->sax->error != NULL))
3224 ctxt->sax->error(ctxt->userData,
3225 "Fragment not allowed: %s\n", URI);
3226 ctxt->wellFormed = 0;
3227 }
3228 xmlFreeURI(uri);
3229 }
3230 }
3231 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3232 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3233 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3234 ctxt->sax->error(ctxt->userData,
3235 "Space required before 'NDATA'\n");
3236 ctxt->wellFormed = 0;
3237 ctxt->disableSAX = 1;
3238 }
3239 SKIP_BLANKS;
3240 if ((RAW == 'N') && (NXT(1) == 'D') &&
3241 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3242 (NXT(4) == 'A')) {
3243 SKIP(5);
3244 if (!IS_BLANK(CUR)) {
3245 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3246 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3247 ctxt->sax->error(ctxt->userData,
3248 "Space required after 'NDATA'\n");
3249 ctxt->wellFormed = 0;
3250 ctxt->disableSAX = 1;
3251 }
3252 SKIP_BLANKS;
3253 ndata = xmlParseName(ctxt);
3254 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3255 (ctxt->sax->unparsedEntityDecl != NULL))
3256 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3257 literal, URI, ndata);
3258 } else {
3259 if ((ctxt->sax != NULL) &&
3260 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3261 ctxt->sax->entityDecl(ctxt->userData, name,
3262 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3263 literal, URI, NULL);
3264 }
3265 }
3266 }
3267 SKIP_BLANKS;
3268 if (RAW != '>') {
3269 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3270 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3271 ctxt->sax->error(ctxt->userData,
3272 "xmlParseEntityDecl: entity %s not terminated\n", name);
3273 ctxt->wellFormed = 0;
3274 ctxt->disableSAX = 1;
3275 } else {
3276 if (input != ctxt->input) {
3277 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3279 ctxt->sax->error(ctxt->userData,
3280"Entity declaration doesn't start and stop in the same entity\n");
3281 ctxt->wellFormed = 0;
3282 ctxt->disableSAX = 1;
3283 }
3284 NEXT;
3285 }
3286 if (orig != NULL) {
3287 /*
3288 * Ugly mechanism to save the raw entity value.
3289 */
3290 xmlEntityPtr cur = NULL;
3291
3292 if (isParameter) {
3293 if ((ctxt->sax != NULL) &&
3294 (ctxt->sax->getParameterEntity != NULL))
3295 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3296 } else {
3297 if ((ctxt->sax != NULL) &&
3298 (ctxt->sax->getEntity != NULL))
3299 cur = ctxt->sax->getEntity(ctxt->userData, name);
3300 }
3301 if (cur != NULL) {
3302 if (cur->orig != NULL)
3303 xmlFree(orig);
3304 else
3305 cur->orig = orig;
3306 } else
3307 xmlFree(orig);
3308 }
3309 if (name != NULL) xmlFree(name);
3310 if (value != NULL) xmlFree(value);
3311 if (URI != NULL) xmlFree(URI);
3312 if (literal != NULL) xmlFree(literal);
3313 if (ndata != NULL) xmlFree(ndata);
3314 }
3315}
3316
3317/**
3318 * xmlParseDefaultDecl:
3319 * @ctxt: an XML parser context
3320 * @value: Receive a possible fixed default value for the attribute
3321 *
3322 * Parse an attribute default declaration
3323 *
3324 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3325 *
3326 * [ VC: Required Attribute ]
3327 * if the default declaration is the keyword #REQUIRED, then the
3328 * attribute must be specified for all elements of the type in the
3329 * attribute-list declaration.
3330 *
3331 * [ VC: Attribute Default Legal ]
3332 * The declared default value must meet the lexical constraints of
3333 * the declared attribute type c.f. xmlValidateAttributeDecl()
3334 *
3335 * [ VC: Fixed Attribute Default ]
3336 * if an attribute has a default value declared with the #FIXED
3337 * keyword, instances of that attribute must match the default value.
3338 *
3339 * [ WFC: No < in Attribute Values ]
3340 * handled in xmlParseAttValue()
3341 *
3342 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3343 * or XML_ATTRIBUTE_FIXED.
3344 */
3345
3346int
3347xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3348 int val;
3349 xmlChar *ret;
3350
3351 *value = NULL;
3352 if ((RAW == '#') && (NXT(1) == 'R') &&
3353 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3354 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3355 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3356 (NXT(8) == 'D')) {
3357 SKIP(9);
3358 return(XML_ATTRIBUTE_REQUIRED);
3359 }
3360 if ((RAW == '#') && (NXT(1) == 'I') &&
3361 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3362 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3363 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3364 SKIP(8);
3365 return(XML_ATTRIBUTE_IMPLIED);
3366 }
3367 val = XML_ATTRIBUTE_NONE;
3368 if ((RAW == '#') && (NXT(1) == 'F') &&
3369 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3370 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3371 SKIP(6);
3372 val = XML_ATTRIBUTE_FIXED;
3373 if (!IS_BLANK(CUR)) {
3374 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3376 ctxt->sax->error(ctxt->userData,
3377 "Space required after '#FIXED'\n");
3378 ctxt->wellFormed = 0;
3379 ctxt->disableSAX = 1;
3380 }
3381 SKIP_BLANKS;
3382 }
3383 ret = xmlParseAttValue(ctxt);
3384 ctxt->instate = XML_PARSER_DTD;
3385 if (ret == NULL) {
3386 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3387 ctxt->sax->error(ctxt->userData,
3388 "Attribute default value declaration error\n");
3389 ctxt->wellFormed = 0;
3390 ctxt->disableSAX = 1;
3391 } else
3392 *value = ret;
3393 return(val);
3394}
3395
3396/**
3397 * xmlParseNotationType:
3398 * @ctxt: an XML parser context
3399 *
3400 * parse an Notation attribute type.
3401 *
3402 * Note: the leading 'NOTATION' S part has already being parsed...
3403 *
3404 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3405 *
3406 * [ VC: Notation Attributes ]
3407 * Values of this type must match one of the notation names included
3408 * in the declaration; all notation names in the declaration must be declared.
3409 *
3410 * Returns: the notation attribute tree built while parsing
3411 */
3412
3413xmlEnumerationPtr
3414xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3415 xmlChar *name;
3416 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3417
3418 if (RAW != '(') {
3419 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3420 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3421 ctxt->sax->error(ctxt->userData,
3422 "'(' required to start 'NOTATION'\n");
3423 ctxt->wellFormed = 0;
3424 ctxt->disableSAX = 1;
3425 return(NULL);
3426 }
3427 SHRINK;
3428 do {
3429 NEXT;
3430 SKIP_BLANKS;
3431 name = xmlParseName(ctxt);
3432 if (name == NULL) {
3433 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3434 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3435 ctxt->sax->error(ctxt->userData,
3436 "Name expected in NOTATION declaration\n");
3437 ctxt->wellFormed = 0;
3438 ctxt->disableSAX = 1;
3439 return(ret);
3440 }
3441 cur = xmlCreateEnumeration(name);
3442 xmlFree(name);
3443 if (cur == NULL) return(ret);
3444 if (last == NULL) ret = last = cur;
3445 else {
3446 last->next = cur;
3447 last = cur;
3448 }
3449 SKIP_BLANKS;
3450 } while (RAW == '|');
3451 if (RAW != ')') {
3452 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3454 ctxt->sax->error(ctxt->userData,
3455 "')' required to finish NOTATION declaration\n");
3456 ctxt->wellFormed = 0;
3457 ctxt->disableSAX = 1;
3458 if ((last != NULL) && (last != ret))
3459 xmlFreeEnumeration(last);
3460 return(ret);
3461 }
3462 NEXT;
3463 return(ret);
3464}
3465
3466/**
3467 * xmlParseEnumerationType:
3468 * @ctxt: an XML parser context
3469 *
3470 * parse an Enumeration attribute type.
3471 *
3472 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3473 *
3474 * [ VC: Enumeration ]
3475 * Values of this type must match one of the Nmtoken tokens in
3476 * the declaration
3477 *
3478 * Returns: the enumeration attribute tree built while parsing
3479 */
3480
3481xmlEnumerationPtr
3482xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3483 xmlChar *name;
3484 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3485
3486 if (RAW != '(') {
3487 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3488 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3489 ctxt->sax->error(ctxt->userData,
3490 "'(' required to start ATTLIST enumeration\n");
3491 ctxt->wellFormed = 0;
3492 ctxt->disableSAX = 1;
3493 return(NULL);
3494 }
3495 SHRINK;
3496 do {
3497 NEXT;
3498 SKIP_BLANKS;
3499 name = xmlParseNmtoken(ctxt);
3500 if (name == NULL) {
3501 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3502 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3503 ctxt->sax->error(ctxt->userData,
3504 "NmToken expected in ATTLIST enumeration\n");
3505 ctxt->wellFormed = 0;
3506 ctxt->disableSAX = 1;
3507 return(ret);
3508 }
3509 cur = xmlCreateEnumeration(name);
3510 xmlFree(name);
3511 if (cur == NULL) return(ret);
3512 if (last == NULL) ret = last = cur;
3513 else {
3514 last->next = cur;
3515 last = cur;
3516 }
3517 SKIP_BLANKS;
3518 } while (RAW == '|');
3519 if (RAW != ')') {
3520 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3521 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3522 ctxt->sax->error(ctxt->userData,
3523 "')' required to finish ATTLIST enumeration\n");
3524 ctxt->wellFormed = 0;
3525 ctxt->disableSAX = 1;
3526 return(ret);
3527 }
3528 NEXT;
3529 return(ret);
3530}
3531
3532/**
3533 * xmlParseEnumeratedType:
3534 * @ctxt: an XML parser context
3535 * @tree: the enumeration tree built while parsing
3536 *
3537 * parse an Enumerated attribute type.
3538 *
3539 * [57] EnumeratedType ::= NotationType | Enumeration
3540 *
3541 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3542 *
3543 *
3544 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3545 */
3546
3547int
3548xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3549 if ((RAW == 'N') && (NXT(1) == 'O') &&
3550 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3551 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3552 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3553 SKIP(8);
3554 if (!IS_BLANK(CUR)) {
3555 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3556 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3557 ctxt->sax->error(ctxt->userData,
3558 "Space required after 'NOTATION'\n");
3559 ctxt->wellFormed = 0;
3560 ctxt->disableSAX = 1;
3561 return(0);
3562 }
3563 SKIP_BLANKS;
3564 *tree = xmlParseNotationType(ctxt);
3565 if (*tree == NULL) return(0);
3566 return(XML_ATTRIBUTE_NOTATION);
3567 }
3568 *tree = xmlParseEnumerationType(ctxt);
3569 if (*tree == NULL) return(0);
3570 return(XML_ATTRIBUTE_ENUMERATION);
3571}
3572
3573/**
3574 * xmlParseAttributeType:
3575 * @ctxt: an XML parser context
3576 * @tree: the enumeration tree built while parsing
3577 *
3578 * parse the Attribute list def for an element
3579 *
3580 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3581 *
3582 * [55] StringType ::= 'CDATA'
3583 *
3584 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3585 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3586 *
3587 * Validity constraints for attribute values syntax are checked in
3588 * xmlValidateAttributeValue()
3589 *
3590 * [ VC: ID ]
3591 * Values of type ID must match the Name production. A name must not
3592 * appear more than once in an XML document as a value of this type;
3593 * i.e., ID values must uniquely identify the elements which bear them.
3594 *
3595 * [ VC: One ID per Element Type ]
3596 * No element type may have more than one ID attribute specified.
3597 *
3598 * [ VC: ID Attribute Default ]
3599 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3600 *
3601 * [ VC: IDREF ]
3602 * Values of type IDREF must match the Name production, and values
3603 * of type IDREFS must match Names; each IDREF Name must match the value
3604 * of an ID attribute on some element in the XML document; i.e. IDREF
3605 * values must match the value of some ID attribute.
3606 *
3607 * [ VC: Entity Name ]
3608 * Values of type ENTITY must match the Name production, values
3609 * of type ENTITIES must match Names; each Entity Name must match the
3610 * name of an unparsed entity declared in the DTD.
3611 *
3612 * [ VC: Name Token ]
3613 * Values of type NMTOKEN must match the Nmtoken production; values
3614 * of type NMTOKENS must match Nmtokens.
3615 *
3616 * Returns the attribute type
3617 */
3618int
3619xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3620 SHRINK;
3621 if ((RAW == 'C') && (NXT(1) == 'D') &&
3622 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3623 (NXT(4) == 'A')) {
3624 SKIP(5);
3625 return(XML_ATTRIBUTE_CDATA);
3626 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3627 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3628 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3629 SKIP(6);
3630 return(XML_ATTRIBUTE_IDREFS);
3631 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3632 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3633 (NXT(4) == 'F')) {
3634 SKIP(5);
3635 return(XML_ATTRIBUTE_IDREF);
3636 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3637 SKIP(2);
3638 return(XML_ATTRIBUTE_ID);
3639 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3640 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3641 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3642 SKIP(6);
3643 return(XML_ATTRIBUTE_ENTITY);
3644 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3645 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3646 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3647 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3648 SKIP(8);
3649 return(XML_ATTRIBUTE_ENTITIES);
3650 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3651 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3652 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3653 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3654 SKIP(8);
3655 return(XML_ATTRIBUTE_NMTOKENS);
3656 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3657 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3658 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3659 (NXT(6) == 'N')) {
3660 SKIP(7);
3661 return(XML_ATTRIBUTE_NMTOKEN);
3662 }
3663 return(xmlParseEnumeratedType(ctxt, tree));
3664}
3665
3666/**
3667 * xmlParseAttributeListDecl:
3668 * @ctxt: an XML parser context
3669 *
3670 * : parse the Attribute list def for an element
3671 *
3672 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3673 *
3674 * [53] AttDef ::= S Name S AttType S DefaultDecl
3675 *
3676 */
3677void
3678xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3679 xmlChar *elemName;
3680 xmlChar *attrName;
3681 xmlEnumerationPtr tree;
3682
3683 if ((RAW == '<') && (NXT(1) == '!') &&
3684 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3685 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3686 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3687 (NXT(8) == 'T')) {
3688 xmlParserInputPtr input = ctxt->input;
3689
3690 SKIP(9);
3691 if (!IS_BLANK(CUR)) {
3692 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3693 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3694 ctxt->sax->error(ctxt->userData,
3695 "Space required after '<!ATTLIST'\n");
3696 ctxt->wellFormed = 0;
3697 ctxt->disableSAX = 1;
3698 }
3699 SKIP_BLANKS;
3700 elemName = xmlParseName(ctxt);
3701 if (elemName == NULL) {
3702 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3703 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3704 ctxt->sax->error(ctxt->userData,
3705 "ATTLIST: no name for Element\n");
3706 ctxt->wellFormed = 0;
3707 ctxt->disableSAX = 1;
3708 return;
3709 }
3710 SKIP_BLANKS;
3711 GROW;
3712 while (RAW != '>') {
3713 const xmlChar *check = CUR_PTR;
3714 int type;
3715 int def;
3716 xmlChar *defaultValue = NULL;
3717
3718 GROW;
3719 tree = NULL;
3720 attrName = xmlParseName(ctxt);
3721 if (attrName == NULL) {
3722 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3723 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3724 ctxt->sax->error(ctxt->userData,
3725 "ATTLIST: no name for Attribute\n");
3726 ctxt->wellFormed = 0;
3727 ctxt->disableSAX = 1;
3728 break;
3729 }
3730 GROW;
3731 if (!IS_BLANK(CUR)) {
3732 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3733 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3734 ctxt->sax->error(ctxt->userData,
3735 "Space required after the attribute name\n");
3736 ctxt->wellFormed = 0;
3737 ctxt->disableSAX = 1;
3738 if (attrName != NULL)
3739 xmlFree(attrName);
3740 if (defaultValue != NULL)
3741 xmlFree(defaultValue);
3742 break;
3743 }
3744 SKIP_BLANKS;
3745
3746 type = xmlParseAttributeType(ctxt, &tree);
3747 if (type <= 0) {
3748 if (attrName != NULL)
3749 xmlFree(attrName);
3750 if (defaultValue != NULL)
3751 xmlFree(defaultValue);
3752 break;
3753 }
3754
3755 GROW;
3756 if (!IS_BLANK(CUR)) {
3757 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3758 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3759 ctxt->sax->error(ctxt->userData,
3760 "Space required after the attribute type\n");
3761 ctxt->wellFormed = 0;
3762 ctxt->disableSAX = 1;
3763 if (attrName != NULL)
3764 xmlFree(attrName);
3765 if (defaultValue != NULL)
3766 xmlFree(defaultValue);
3767 if (tree != NULL)
3768 xmlFreeEnumeration(tree);
3769 break;
3770 }
3771 SKIP_BLANKS;
3772
3773 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3774 if (def <= 0) {
3775 if (attrName != NULL)
3776 xmlFree(attrName);
3777 if (defaultValue != NULL)
3778 xmlFree(defaultValue);
3779 if (tree != NULL)
3780 xmlFreeEnumeration(tree);
3781 break;
3782 }
3783
3784 GROW;
3785 if (RAW != '>') {
3786 if (!IS_BLANK(CUR)) {
3787 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3788 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3789 ctxt->sax->error(ctxt->userData,
3790 "Space required after the attribute default value\n");
3791 ctxt->wellFormed = 0;
3792 ctxt->disableSAX = 1;
3793 if (attrName != NULL)
3794 xmlFree(attrName);
3795 if (defaultValue != NULL)
3796 xmlFree(defaultValue);
3797 if (tree != NULL)
3798 xmlFreeEnumeration(tree);
3799 break;
3800 }
3801 SKIP_BLANKS;
3802 }
3803 if (check == CUR_PTR) {
3804 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3805 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3806 ctxt->sax->error(ctxt->userData,
3807 "xmlParseAttributeListDecl: detected internal error\n");
3808 if (attrName != NULL)
3809 xmlFree(attrName);
3810 if (defaultValue != NULL)
3811 xmlFree(defaultValue);
3812 if (tree != NULL)
3813 xmlFreeEnumeration(tree);
3814 break;
3815 }
3816 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3817 (ctxt->sax->attributeDecl != NULL))
3818 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3819 type, def, defaultValue, tree);
3820 if (attrName != NULL)
3821 xmlFree(attrName);
3822 if (defaultValue != NULL)
3823 xmlFree(defaultValue);
3824 GROW;
3825 }
3826 if (RAW == '>') {
3827 if (input != ctxt->input) {
3828 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3829 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3830 ctxt->sax->error(ctxt->userData,
3831"Attribute list declaration doesn't start and stop in the same entity\n");
3832 ctxt->wellFormed = 0;
3833 ctxt->disableSAX = 1;
3834 }
3835 NEXT;
3836 }
3837
3838 xmlFree(elemName);
3839 }
3840}
3841
3842/**
3843 * xmlParseElementMixedContentDecl:
3844 * @ctxt: an XML parser context
3845 *
3846 * parse the declaration for a Mixed Element content
3847 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3848 *
3849 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3850 * '(' S? '#PCDATA' S? ')'
3851 *
3852 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3853 *
3854 * [ VC: No Duplicate Types ]
3855 * The same name must not appear more than once in a single
3856 * mixed-content declaration.
3857 *
3858 * returns: the list of the xmlElementContentPtr describing the element choices
3859 */
3860xmlElementContentPtr
3861xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3862 xmlElementContentPtr ret = NULL, cur = NULL, n;
3863 xmlChar *elem = NULL;
3864
3865 GROW;
3866 if ((RAW == '#') && (NXT(1) == 'P') &&
3867 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3868 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3869 (NXT(6) == 'A')) {
3870 SKIP(7);
3871 SKIP_BLANKS;
3872 SHRINK;
3873 if (RAW == ')') {
3874 ctxt->entity = ctxt->input;
3875 NEXT;
3876 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3877 if (RAW == '*') {
3878 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3879 NEXT;
3880 }
3881 return(ret);
3882 }
3883 if ((RAW == '(') || (RAW == '|')) {
3884 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3885 if (ret == NULL) return(NULL);
3886 }
3887 while (RAW == '|') {
3888 NEXT;
3889 if (elem == NULL) {
3890 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3891 if (ret == NULL) return(NULL);
3892 ret->c1 = cur;
3893 cur = ret;
3894 } else {
3895 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3896 if (n == NULL) return(NULL);
3897 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3898 cur->c2 = n;
3899 cur = n;
3900 xmlFree(elem);
3901 }
3902 SKIP_BLANKS;
3903 elem = xmlParseName(ctxt);
3904 if (elem == NULL) {
3905 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3906 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3907 ctxt->sax->error(ctxt->userData,
3908 "xmlParseElementMixedContentDecl : Name expected\n");
3909 ctxt->wellFormed = 0;
3910 ctxt->disableSAX = 1;
3911 xmlFreeElementContent(cur);
3912 return(NULL);
3913 }
3914 SKIP_BLANKS;
3915 GROW;
3916 }
3917 if ((RAW == ')') && (NXT(1) == '*')) {
3918 if (elem != NULL) {
3919 cur->c2 = xmlNewElementContent(elem,
3920 XML_ELEMENT_CONTENT_ELEMENT);
3921 xmlFree(elem);
3922 }
3923 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3924 ctxt->entity = ctxt->input;
3925 SKIP(2);
3926 } else {
3927 if (elem != NULL) xmlFree(elem);
3928 xmlFreeElementContent(ret);
3929 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3931 ctxt->sax->error(ctxt->userData,
3932 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3933 ctxt->wellFormed = 0;
3934 ctxt->disableSAX = 1;
3935 return(NULL);
3936 }
3937
3938 } else {
3939 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3941 ctxt->sax->error(ctxt->userData,
3942 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3943 ctxt->wellFormed = 0;
3944 ctxt->disableSAX = 1;
3945 }
3946 return(ret);
3947}
3948
3949/**
3950 * xmlParseElementChildrenContentDecl:
3951 * @ctxt: an XML parser context
3952 *
3953 * parse the declaration for a Mixed Element content
3954 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3955 *
3956 *
3957 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3958 *
3959 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3960 *
3961 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3962 *
3963 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3964 *
3965 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3966 * TODO Parameter-entity replacement text must be properly nested
3967 * with parenthetized groups. That is to say, if either of the
3968 * opening or closing parentheses in a choice, seq, or Mixed
3969 * construct is contained in the replacement text for a parameter
3970 * entity, both must be contained in the same replacement text. For
3971 * interoperability, if a parameter-entity reference appears in a
3972 * choice, seq, or Mixed construct, its replacement text should not
3973 * be empty, and neither the first nor last non-blank character of
3974 * the replacement text should be a connector (| or ,).
3975 *
3976 * returns: the tree of xmlElementContentPtr describing the element
3977 * hierarchy.
3978 */
3979xmlElementContentPtr
3980#ifdef VMS
3981xmlParseElementChildrenContentD
3982#else
3983xmlParseElementChildrenContentDecl
3984#endif
3985(xmlParserCtxtPtr ctxt) {
3986 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
3987 xmlChar *elem;
3988 xmlChar type = 0;
3989
3990 SKIP_BLANKS;
3991 GROW;
3992 if (RAW == '(') {
3993 /* Recurse on first child */
3994 NEXT;
3995 SKIP_BLANKS;
3996 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
3997 SKIP_BLANKS;
3998 GROW;
3999 } else {
4000 elem = xmlParseName(ctxt);
4001 if (elem == NULL) {
4002 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4003 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4004 ctxt->sax->error(ctxt->userData,
4005 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4006 ctxt->wellFormed = 0;
4007 ctxt->disableSAX = 1;
4008 return(NULL);
4009 }
4010 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4011 GROW;
4012 if (RAW == '?') {
4013 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4014 NEXT;
4015 } else if (RAW == '*') {
4016 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4017 NEXT;
4018 } else if (RAW == '+') {
4019 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4020 NEXT;
4021 } else {
4022 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4023 }
4024 xmlFree(elem);
4025 GROW;
4026 }
4027 SKIP_BLANKS;
4028 SHRINK;
4029 while (RAW != ')') {
4030 /*
4031 * Each loop we parse one separator and one element.
4032 */
4033 if (RAW == ',') {
4034 if (type == 0) type = CUR;
4035
4036 /*
4037 * Detect "Name | Name , Name" error
4038 */
4039 else if (type != CUR) {
4040 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4042 ctxt->sax->error(ctxt->userData,
4043 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4044 type);
4045 ctxt->wellFormed = 0;
4046 ctxt->disableSAX = 1;
4047 if ((op != NULL) && (op != ret))
4048 xmlFreeElementContent(op);
4049 if ((last != NULL) && (last != ret) &&
4050 (last != ret->c1) && (last != ret->c2))
4051 xmlFreeElementContent(last);
4052 if (ret != NULL)
4053 xmlFreeElementContent(ret);
4054 return(NULL);
4055 }
4056 NEXT;
4057
4058 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4059 if (op == NULL) {
4060 xmlFreeElementContent(ret);
4061 return(NULL);
4062 }
4063 if (last == NULL) {
4064 op->c1 = ret;
4065 ret = cur = op;
4066 } else {
4067 cur->c2 = op;
4068 op->c1 = last;
4069 cur =op;
4070 last = NULL;
4071 }
4072 } else if (RAW == '|') {
4073 if (type == 0) type = CUR;
4074
4075 /*
4076 * Detect "Name , Name | Name" error
4077 */
4078 else if (type != CUR) {
4079 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4080 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4081 ctxt->sax->error(ctxt->userData,
4082 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4083 type);
4084 ctxt->wellFormed = 0;
4085 ctxt->disableSAX = 1;
4086 if ((op != NULL) && (op != ret) && (op != last))
4087 xmlFreeElementContent(op);
4088 if ((last != NULL) && (last != ret) &&
4089 (last != ret->c1) && (last != ret->c2))
4090 xmlFreeElementContent(last);
4091 if (ret != NULL)
4092 xmlFreeElementContent(ret);
4093 return(NULL);
4094 }
4095 NEXT;
4096
4097 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4098 if (op == NULL) {
4099 if ((op != NULL) && (op != ret))
4100 xmlFreeElementContent(op);
4101 if ((last != NULL) && (last != ret) &&
4102 (last != ret->c1) && (last != ret->c2))
4103 xmlFreeElementContent(last);
4104 if (ret != NULL)
4105 xmlFreeElementContent(ret);
4106 return(NULL);
4107 }
4108 if (last == NULL) {
4109 op->c1 = ret;
4110 ret = cur = op;
4111 } else {
4112 cur->c2 = op;
4113 op->c1 = last;
4114 cur =op;
4115 last = NULL;
4116 }
4117 } else {
4118 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4119 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4120 ctxt->sax->error(ctxt->userData,
4121 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4122 ctxt->wellFormed = 0;
4123 ctxt->disableSAX = 1;
4124 if ((op != NULL) && (op != ret))
4125 xmlFreeElementContent(op);
4126 if ((last != NULL) && (last != ret) &&
4127 (last != ret->c1) && (last != ret->c2))
4128 xmlFreeElementContent(last);
4129 if (ret != NULL)
4130 xmlFreeElementContent(ret);
4131 return(NULL);
4132 }
4133 GROW;
4134 SKIP_BLANKS;
4135 GROW;
4136 if (RAW == '(') {
4137 /* Recurse on second child */
4138 NEXT;
4139 SKIP_BLANKS;
4140 last = xmlParseElementChildrenContentDecl(ctxt);
4141 SKIP_BLANKS;
4142 } else {
4143 elem = xmlParseName(ctxt);
4144 if (elem == NULL) {
4145 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4146 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4147 ctxt->sax->error(ctxt->userData,
4148 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4149 ctxt->wellFormed = 0;
4150 ctxt->disableSAX = 1;
4151 if ((op != NULL) && (op != ret))
4152 xmlFreeElementContent(op);
4153 if ((last != NULL) && (last != ret) &&
4154 (last != ret->c1) && (last != ret->c2))
4155 xmlFreeElementContent(last);
4156 if (ret != NULL)
4157 xmlFreeElementContent(ret);
4158 return(NULL);
4159 }
4160 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4161 xmlFree(elem);
4162 if (RAW == '?') {
4163 last->ocur = XML_ELEMENT_CONTENT_OPT;
4164 NEXT;
4165 } else if (RAW == '*') {
4166 last->ocur = XML_ELEMENT_CONTENT_MULT;
4167 NEXT;
4168 } else if (RAW == '+') {
4169 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4170 NEXT;
4171 } else {
4172 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4173 }
4174 }
4175 SKIP_BLANKS;
4176 GROW;
4177 }
4178 if ((cur != NULL) && (last != NULL)) {
4179 cur->c2 = last;
4180 }
4181 ctxt->entity = ctxt->input;
4182 NEXT;
4183 if (RAW == '?') {
4184 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4185 NEXT;
4186 } else if (RAW == '*') {
4187 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4188 NEXT;
4189 } else if (RAW == '+') {
4190 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4191 NEXT;
4192 }
4193 return(ret);
4194}
4195
4196/**
4197 * xmlParseElementContentDecl:
4198 * @ctxt: an XML parser context
4199 * @name: the name of the element being defined.
4200 * @result: the Element Content pointer will be stored here if any
4201 *
4202 * parse the declaration for an Element content either Mixed or Children,
4203 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4204 *
4205 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4206 *
4207 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4208 */
4209
4210int
4211xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4212 xmlElementContentPtr *result) {
4213
4214 xmlElementContentPtr tree = NULL;
4215 xmlParserInputPtr input = ctxt->input;
4216 int res;
4217
4218 *result = NULL;
4219
4220 if (RAW != '(') {
4221 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4222 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4223 ctxt->sax->error(ctxt->userData,
4224 "xmlParseElementContentDecl : '(' expected\n");
4225 ctxt->wellFormed = 0;
4226 ctxt->disableSAX = 1;
4227 return(-1);
4228 }
4229 NEXT;
4230 GROW;
4231 SKIP_BLANKS;
4232 if ((RAW == '#') && (NXT(1) == 'P') &&
4233 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4234 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4235 (NXT(6) == 'A')) {
4236 tree = xmlParseElementMixedContentDecl(ctxt);
4237 res = XML_ELEMENT_TYPE_MIXED;
4238 } else {
4239 tree = xmlParseElementChildrenContentDecl(ctxt);
4240 res = XML_ELEMENT_TYPE_ELEMENT;
4241 }
4242 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4243 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4244 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4245 ctxt->sax->error(ctxt->userData,
4246"Element content declaration doesn't start and stop in the same entity\n");
4247 ctxt->wellFormed = 0;
4248 ctxt->disableSAX = 1;
4249 }
4250 SKIP_BLANKS;
4251 *result = tree;
4252 return(res);
4253}
4254
4255/**
4256 * xmlParseElementDecl:
4257 * @ctxt: an XML parser context
4258 *
4259 * parse an Element declaration.
4260 *
4261 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4262 *
4263 * [ VC: Unique Element Type Declaration ]
4264 * No element type may be declared more than once
4265 *
4266 * Returns the type of the element, or -1 in case of error
4267 */
4268int
4269xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4270 xmlChar *name;
4271 int ret = -1;
4272 xmlElementContentPtr content = NULL;
4273
4274 GROW;
4275 if ((RAW == '<') && (NXT(1) == '!') &&
4276 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4277 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4278 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4279 (NXT(8) == 'T')) {
4280 xmlParserInputPtr input = ctxt->input;
4281
4282 SKIP(9);
4283 if (!IS_BLANK(CUR)) {
4284 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4285 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4286 ctxt->sax->error(ctxt->userData,
4287 "Space required after 'ELEMENT'\n");
4288 ctxt->wellFormed = 0;
4289 ctxt->disableSAX = 1;
4290 }
4291 SKIP_BLANKS;
4292 name = xmlParseName(ctxt);
4293 if (name == NULL) {
4294 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4295 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4296 ctxt->sax->error(ctxt->userData,
4297 "xmlParseElementDecl: no name for Element\n");
4298 ctxt->wellFormed = 0;
4299 ctxt->disableSAX = 1;
4300 return(-1);
4301 }
4302 while ((RAW == 0) && (ctxt->inputNr > 1))
4303 xmlPopInput(ctxt);
4304 if (!IS_BLANK(CUR)) {
4305 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4307 ctxt->sax->error(ctxt->userData,
4308 "Space required after the element name\n");
4309 ctxt->wellFormed = 0;
4310 ctxt->disableSAX = 1;
4311 }
4312 SKIP_BLANKS;
4313 if ((RAW == 'E') && (NXT(1) == 'M') &&
4314 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4315 (NXT(4) == 'Y')) {
4316 SKIP(5);
4317 /*
4318 * Element must always be empty.
4319 */
4320 ret = XML_ELEMENT_TYPE_EMPTY;
4321 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4322 (NXT(2) == 'Y')) {
4323 SKIP(3);
4324 /*
4325 * Element is a generic container.
4326 */
4327 ret = XML_ELEMENT_TYPE_ANY;
4328 } else if (RAW == '(') {
4329 ret = xmlParseElementContentDecl(ctxt, name, &content);
4330 } else {
4331 /*
4332 * [ WFC: PEs in Internal Subset ] error handling.
4333 */
4334 if ((RAW == '%') && (ctxt->external == 0) &&
4335 (ctxt->inputNr == 1)) {
4336 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4337 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4338 ctxt->sax->error(ctxt->userData,
4339 "PEReference: forbidden within markup decl in internal subset\n");
4340 } else {
4341 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4343 ctxt->sax->error(ctxt->userData,
4344 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4345 }
4346 ctxt->wellFormed = 0;
4347 ctxt->disableSAX = 1;
4348 if (name != NULL) xmlFree(name);
4349 return(-1);
4350 }
4351
4352 SKIP_BLANKS;
4353 /*
4354 * Pop-up of finished entities.
4355 */
4356 while ((RAW == 0) && (ctxt->inputNr > 1))
4357 xmlPopInput(ctxt);
4358 SKIP_BLANKS;
4359
4360 if (RAW != '>') {
4361 ctxt->errNo = XML_ERR_GT_REQUIRED;
4362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4363 ctxt->sax->error(ctxt->userData,
4364 "xmlParseElementDecl: expected '>' at the end\n");
4365 ctxt->wellFormed = 0;
4366 ctxt->disableSAX = 1;
4367 } else {
4368 if (input != ctxt->input) {
4369 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4370 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4371 ctxt->sax->error(ctxt->userData,
4372"Element declaration doesn't start and stop in the same entity\n");
4373 ctxt->wellFormed = 0;
4374 ctxt->disableSAX = 1;
4375 }
4376
4377 NEXT;
4378 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4379 (ctxt->sax->elementDecl != NULL))
4380 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4381 content);
4382 }
4383 if (content != NULL) {
4384 xmlFreeElementContent(content);
4385 }
4386 if (name != NULL) {
4387 xmlFree(name);
4388 }
4389 }
4390 return(ret);
4391}
4392
4393/**
4394 * xmlParseMarkupDecl:
4395 * @ctxt: an XML parser context
4396 *
4397 * parse Markup declarations
4398 *
4399 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4400 * NotationDecl | PI | Comment
4401 *
4402 * [ VC: Proper Declaration/PE Nesting ]
4403 * Parameter-entity replacement text must be properly nested with
4404 * markup declarations. That is to say, if either the first character
4405 * or the last character of a markup declaration (markupdecl above) is
4406 * contained in the replacement text for a parameter-entity reference,
4407 * both must be contained in the same replacement text.
4408 *
4409 * [ WFC: PEs in Internal Subset ]
4410 * In the internal DTD subset, parameter-entity references can occur
4411 * only where markup declarations can occur, not within markup declarations.
4412 * (This does not apply to references that occur in external parameter
4413 * entities or to the external subset.)
4414 */
4415void
4416xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4417 GROW;
4418 xmlParseElementDecl(ctxt);
4419 xmlParseAttributeListDecl(ctxt);
4420 xmlParseEntityDecl(ctxt);
4421 xmlParseNotationDecl(ctxt);
4422 xmlParsePI(ctxt);
4423 xmlParseComment(ctxt);
4424 /*
4425 * This is only for internal subset. On external entities,
4426 * the replacement is done before parsing stage
4427 */
4428 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4429 xmlParsePEReference(ctxt);
4430 ctxt->instate = XML_PARSER_DTD;
4431}
4432
4433/**
4434 * xmlParseTextDecl:
4435 * @ctxt: an XML parser context
4436 *
4437 * parse an XML declaration header for external entities
4438 *
4439 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4440 *
4441 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4442 */
4443
4444void
4445xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4446 xmlChar *version;
4447
4448 /*
4449 * We know that '<?xml' is here.
4450 */
4451 if ((RAW == '<') && (NXT(1) == '?') &&
4452 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4453 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4454 SKIP(5);
4455 } else {
4456 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4457 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4458 ctxt->sax->error(ctxt->userData,
4459 "Text declaration '<?xml' required\n");
4460 ctxt->wellFormed = 0;
4461 ctxt->disableSAX = 1;
4462
4463 return;
4464 }
4465
4466 if (!IS_BLANK(CUR)) {
4467 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4468 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4469 ctxt->sax->error(ctxt->userData,
4470 "Space needed after '<?xml'\n");
4471 ctxt->wellFormed = 0;
4472 ctxt->disableSAX = 1;
4473 }
4474 SKIP_BLANKS;
4475
4476 /*
4477 * We may have the VersionInfo here.
4478 */
4479 version = xmlParseVersionInfo(ctxt);
4480 if (version == NULL)
4481 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4482 ctxt->input->version = version;
4483
4484 /*
4485 * We must have the encoding declaration
4486 */
4487 if (!IS_BLANK(CUR)) {
4488 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4490 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4491 ctxt->wellFormed = 0;
4492 ctxt->disableSAX = 1;
4493 }
4494 xmlParseEncodingDecl(ctxt);
4495 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4496 /*
4497 * The XML REC instructs us to stop parsing right here
4498 */
4499 return;
4500 }
4501
4502 SKIP_BLANKS;
4503 if ((RAW == '?') && (NXT(1) == '>')) {
4504 SKIP(2);
4505 } else if (RAW == '>') {
4506 /* Deprecated old WD ... */
4507 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4509 ctxt->sax->error(ctxt->userData,
4510 "XML declaration must end-up with '?>'\n");
4511 ctxt->wellFormed = 0;
4512 ctxt->disableSAX = 1;
4513 NEXT;
4514 } else {
4515 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4516 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4517 ctxt->sax->error(ctxt->userData,
4518 "parsing XML declaration: '?>' expected\n");
4519 ctxt->wellFormed = 0;
4520 ctxt->disableSAX = 1;
4521 MOVETO_ENDTAG(CUR_PTR);
4522 NEXT;
4523 }
4524}
4525
4526/*
4527 * xmlParseConditionalSections
4528 * @ctxt: an XML parser context
4529 *
4530 * [61] conditionalSect ::= includeSect | ignoreSect
4531 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4532 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4533 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4534 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4535 */
4536
4537void
4538xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4539 SKIP(3);
4540 SKIP_BLANKS;
4541 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4542 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4543 (NXT(6) == 'E')) {
4544 SKIP(7);
4545 SKIP_BLANKS;
4546 if (RAW != '[') {
4547 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4548 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4549 ctxt->sax->error(ctxt->userData,
4550 "XML conditional section '[' expected\n");
4551 ctxt->wellFormed = 0;
4552 ctxt->disableSAX = 1;
4553 } else {
4554 NEXT;
4555 }
4556 if (xmlParserDebugEntities) {
4557 if ((ctxt->input != NULL) && (ctxt->input->filename))
4558 xmlGenericError(xmlGenericErrorContext,
4559 "%s(%d): ", ctxt->input->filename,
4560 ctxt->input->line);
4561 xmlGenericError(xmlGenericErrorContext,
4562 "Entering INCLUDE Conditional Section\n");
4563 }
4564
4565 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4566 (NXT(2) != '>'))) {
4567 const xmlChar *check = CUR_PTR;
4568 int cons = ctxt->input->consumed;
4569 int tok = ctxt->token;
4570
4571 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4572 xmlParseConditionalSections(ctxt);
4573 } else if (IS_BLANK(CUR)) {
4574 NEXT;
4575 } else if (RAW == '%') {
4576 xmlParsePEReference(ctxt);
4577 } else
4578 xmlParseMarkupDecl(ctxt);
4579
4580 /*
4581 * Pop-up of finished entities.
4582 */
4583 while ((RAW == 0) && (ctxt->inputNr > 1))
4584 xmlPopInput(ctxt);
4585
4586 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4587 (tok == ctxt->token)) {
4588 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4589 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4590 ctxt->sax->error(ctxt->userData,
4591 "Content error in the external subset\n");
4592 ctxt->wellFormed = 0;
4593 ctxt->disableSAX = 1;
4594 break;
4595 }
4596 }
4597 if (xmlParserDebugEntities) {
4598 if ((ctxt->input != NULL) && (ctxt->input->filename))
4599 xmlGenericError(xmlGenericErrorContext,
4600 "%s(%d): ", ctxt->input->filename,
4601 ctxt->input->line);
4602 xmlGenericError(xmlGenericErrorContext,
4603 "Leaving INCLUDE Conditional Section\n");
4604 }
4605
4606 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4607 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4608 int state;
4609 int instate;
4610 int depth = 0;
4611
4612 SKIP(6);
4613 SKIP_BLANKS;
4614 if (RAW != '[') {
4615 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4616 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4617 ctxt->sax->error(ctxt->userData,
4618 "XML conditional section '[' expected\n");
4619 ctxt->wellFormed = 0;
4620 ctxt->disableSAX = 1;
4621 } else {
4622 NEXT;
4623 }
4624 if (xmlParserDebugEntities) {
4625 if ((ctxt->input != NULL) && (ctxt->input->filename))
4626 xmlGenericError(xmlGenericErrorContext,
4627 "%s(%d): ", ctxt->input->filename,
4628 ctxt->input->line);
4629 xmlGenericError(xmlGenericErrorContext,
4630 "Entering IGNORE Conditional Section\n");
4631 }
4632
4633 /*
4634 * Parse up to the end of the conditionnal section
4635 * But disable SAX event generating DTD building in the meantime
4636 */
4637 state = ctxt->disableSAX;
4638 instate = ctxt->instate;
4639 ctxt->disableSAX = 1;
4640 ctxt->instate = XML_PARSER_IGNORE;
4641
4642 while (depth >= 0) {
4643 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4644 depth++;
4645 SKIP(3);
4646 continue;
4647 }
4648 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4649 if (--depth >= 0) SKIP(3);
4650 continue;
4651 }
4652 NEXT;
4653 continue;
4654 }
4655
4656 ctxt->disableSAX = state;
4657 ctxt->instate = instate;
4658
4659 if (xmlParserDebugEntities) {
4660 if ((ctxt->input != NULL) && (ctxt->input->filename))
4661 xmlGenericError(xmlGenericErrorContext,
4662 "%s(%d): ", ctxt->input->filename,
4663 ctxt->input->line);
4664 xmlGenericError(xmlGenericErrorContext,
4665 "Leaving IGNORE Conditional Section\n");
4666 }
4667
4668 } else {
4669 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4670 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4671 ctxt->sax->error(ctxt->userData,
4672 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4673 ctxt->wellFormed = 0;
4674 ctxt->disableSAX = 1;
4675 }
4676
4677 if (RAW == 0)
4678 SHRINK;
4679
4680 if (RAW == 0) {
4681 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4682 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4683 ctxt->sax->error(ctxt->userData,
4684 "XML conditional section not closed\n");
4685 ctxt->wellFormed = 0;
4686 ctxt->disableSAX = 1;
4687 } else {
4688 SKIP(3);
4689 }
4690}
4691
4692/**
4693 * xmlParseExternalSubset:
4694 * @ctxt: an XML parser context
4695 * @ExternalID: the external identifier
4696 * @SystemID: the system identifier (or URL)
4697 *
4698 * parse Markup declarations from an external subset
4699 *
4700 * [30] extSubset ::= textDecl? extSubsetDecl
4701 *
4702 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4703 */
4704void
4705xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4706 const xmlChar *SystemID) {
4707 GROW;
4708 if ((RAW == '<') && (NXT(1) == '?') &&
4709 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4710 (NXT(4) == 'l')) {
4711 xmlParseTextDecl(ctxt);
4712 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4713 /*
4714 * The XML REC instructs us to stop parsing right here
4715 */
4716 ctxt->instate = XML_PARSER_EOF;
4717 return;
4718 }
4719 }
4720 if (ctxt->myDoc == NULL) {
4721 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4722 }
4723 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4724 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4725
4726 ctxt->instate = XML_PARSER_DTD;
4727 ctxt->external = 1;
4728 while (((RAW == '<') && (NXT(1) == '?')) ||
4729 ((RAW == '<') && (NXT(1) == '!')) ||
4730 IS_BLANK(CUR)) {
4731 const xmlChar *check = CUR_PTR;
4732 int cons = ctxt->input->consumed;
4733 int tok = ctxt->token;
4734
4735 GROW;
4736 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4737 xmlParseConditionalSections(ctxt);
4738 } else if (IS_BLANK(CUR)) {
4739 NEXT;
4740 } else if (RAW == '%') {
4741 xmlParsePEReference(ctxt);
4742 } else
4743 xmlParseMarkupDecl(ctxt);
4744
4745 /*
4746 * Pop-up of finished entities.
4747 */
4748 while ((RAW == 0) && (ctxt->inputNr > 1))
4749 xmlPopInput(ctxt);
4750
4751 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4752 (tok == ctxt->token)) {
4753 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4754 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4755 ctxt->sax->error(ctxt->userData,
4756 "Content error in the external subset\n");
4757 ctxt->wellFormed = 0;
4758 ctxt->disableSAX = 1;
4759 break;
4760 }
4761 }
4762
4763 if (RAW != 0) {
4764 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4766 ctxt->sax->error(ctxt->userData,
4767 "Extra content at the end of the document\n");
4768 ctxt->wellFormed = 0;
4769 ctxt->disableSAX = 1;
4770 }
4771
4772}
4773
4774/**
4775 * xmlParseReference:
4776 * @ctxt: an XML parser context
4777 *
4778 * parse and handle entity references in content, depending on the SAX
4779 * interface, this may end-up in a call to character() if this is a
4780 * CharRef, a predefined entity, if there is no reference() callback.
4781 * or if the parser was asked to switch to that mode.
4782 *
4783 * [67] Reference ::= EntityRef | CharRef
4784 */
4785void
4786xmlParseReference(xmlParserCtxtPtr ctxt) {
4787 xmlEntityPtr ent;
4788 xmlChar *val;
4789 if (RAW != '&') return;
4790
4791 if (NXT(1) == '#') {
4792 int i = 0;
4793 xmlChar out[10];
4794 int hex = NXT(2);
4795 int val = xmlParseCharRef(ctxt);
4796
4797 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4798 /*
4799 * So we are using non-UTF-8 buffers
4800 * Check that the char fit on 8bits, if not
4801 * generate a CharRef.
4802 */
4803 if (val <= 0xFF) {
4804 out[0] = val;
4805 out[1] = 0;
4806 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4807 (!ctxt->disableSAX))
4808 ctxt->sax->characters(ctxt->userData, out, 1);
4809 } else {
4810 if ((hex == 'x') || (hex == 'X'))
4811 sprintf((char *)out, "#x%X", val);
4812 else
4813 sprintf((char *)out, "#%d", val);
4814 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4815 (!ctxt->disableSAX))
4816 ctxt->sax->reference(ctxt->userData, out);
4817 }
4818 } else {
4819 /*
4820 * Just encode the value in UTF-8
4821 */
4822 COPY_BUF(0 ,out, i, val);
4823 out[i] = 0;
4824 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4825 (!ctxt->disableSAX))
4826 ctxt->sax->characters(ctxt->userData, out, i);
4827 }
4828 } else {
4829 ent = xmlParseEntityRef(ctxt);
4830 if (ent == NULL) return;
4831 if ((ent->name != NULL) &&
4832 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4833 xmlNodePtr list = NULL;
4834 int ret;
4835
4836
4837 /*
4838 * The first reference to the entity trigger a parsing phase
4839 * where the ent->children is filled with the result from
4840 * the parsing.
4841 */
4842 if (ent->children == NULL) {
4843 xmlChar *value;
4844 value = ent->content;
4845
4846 /*
4847 * Check that this entity is well formed
4848 */
4849 if ((value != NULL) &&
4850 (value[1] == 0) && (value[0] == '<') &&
4851 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4852 /*
4853 * DONE: get definite answer on this !!!
4854 * Lots of entity decls are used to declare a single
4855 * char
4856 * <!ENTITY lt "<">
4857 * Which seems to be valid since
4858 * 2.4: The ampersand character (&) and the left angle
4859 * bracket (<) may appear in their literal form only
4860 * when used ... They are also legal within the literal
4861 * entity value of an internal entity declaration;i
4862 * see "4.3.2 Well-Formed Parsed Entities".
4863 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4864 * Looking at the OASIS test suite and James Clark
4865 * tests, this is broken. However the XML REC uses
4866 * it. Is the XML REC not well-formed ????
4867 * This is a hack to avoid this problem
4868 *
4869 * ANSWER: since lt gt amp .. are already defined,
4870 * this is a redefinition and hence the fact that the
4871 * contentis not well balanced is not a Wf error, this
4872 * is lousy but acceptable.
4873 */
4874 list = xmlNewDocText(ctxt->myDoc, value);
4875 if (list != NULL) {
4876 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4877 (ent->children == NULL)) {
4878 ent->children = list;
4879 ent->last = list;
4880 list->parent = (xmlNodePtr) ent;
4881 } else {
4882 xmlFreeNodeList(list);
4883 }
4884 } else if (list != NULL) {
4885 xmlFreeNodeList(list);
4886 }
4887 } else {
4888 /*
4889 * 4.3.2: An internal general parsed entity is well-formed
4890 * if its replacement text matches the production labeled
4891 * content.
4892 */
4893 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4894 ctxt->depth++;
4895 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4896 ctxt->sax, NULL, ctxt->depth,
4897 value, &list);
4898 ctxt->depth--;
4899 } else if (ent->etype ==
4900 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4901 ctxt->depth++;
4902 ret = xmlParseExternalEntity(ctxt->myDoc,
4903 ctxt->sax, NULL, ctxt->depth,
4904 ent->URI, ent->ExternalID, &list);
4905 ctxt->depth--;
4906 } else {
4907 ret = -1;
4908 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4909 ctxt->sax->error(ctxt->userData,
4910 "Internal: invalid entity type\n");
4911 }
4912 if (ret == XML_ERR_ENTITY_LOOP) {
4913 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4914 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4915 ctxt->sax->error(ctxt->userData,
4916 "Detected entity reference loop\n");
4917 ctxt->wellFormed = 0;
4918 ctxt->disableSAX = 1;
4919 } else if ((ret == 0) && (list != NULL)) {
4920 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4921 (ent->children == NULL)) {
4922 ent->children = list;
4923 while (list != NULL) {
4924 list->parent = (xmlNodePtr) ent;
4925 if (list->next == NULL)
4926 ent->last = list;
4927 list = list->next;
4928 }
4929 } else {
4930 xmlFreeNodeList(list);
4931 }
4932 } else if (ret > 0) {
4933 ctxt->errNo = ret;
4934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4935 ctxt->sax->error(ctxt->userData,
4936 "Entity value required\n");
4937 ctxt->wellFormed = 0;
4938 ctxt->disableSAX = 1;
4939 } else if (list != NULL) {
4940 xmlFreeNodeList(list);
4941 }
4942 }
4943 }
4944 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4945 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4946 /*
4947 * Create a node.
4948 */
4949 ctxt->sax->reference(ctxt->userData, ent->name);
4950 return;
4951 } else if (ctxt->replaceEntities) {
4952 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4953 /*
4954 * Seems we are generating the DOM content, do
4955 * a simple tree copy
4956 */
4957 xmlNodePtr new;
4958 new = xmlCopyNodeList(ent->children);
4959
4960 xmlAddChildList(ctxt->node, new);
4961 /*
4962 * This is to avoid a nasty side effect, see
4963 * characters() in SAX.c
4964 */
4965 ctxt->nodemem = 0;
4966 ctxt->nodelen = 0;
4967 return;
4968 } else {
4969 /*
4970 * Probably running in SAX mode
4971 */
4972 xmlParserInputPtr input;
4973
4974 input = xmlNewEntityInputStream(ctxt, ent);
4975 xmlPushInput(ctxt, input);
4976 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4977 (RAW == '<') && (NXT(1) == '?') &&
4978 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4979 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4980 xmlParseTextDecl(ctxt);
4981 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4982 /*
4983 * The XML REC instructs us to stop parsing right here
4984 */
4985 ctxt->instate = XML_PARSER_EOF;
4986 return;
4987 }
4988 if (input->standalone == 1) {
4989 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
4990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4991 ctxt->sax->error(ctxt->userData,
4992 "external parsed entities cannot be standalone\n");
4993 ctxt->wellFormed = 0;
4994 ctxt->disableSAX = 1;
4995 }
4996 }
4997 return;
4998 }
4999 }
5000 } else {
5001 val = ent->content;
5002 if (val == NULL) return;
5003 /*
5004 * inline the entity.
5005 */
5006 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5007 (!ctxt->disableSAX))
5008 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5009 }
5010 }
5011}
5012
5013/**
5014 * xmlParseEntityRef:
5015 * @ctxt: an XML parser context
5016 *
5017 * parse ENTITY references declarations
5018 *
5019 * [68] EntityRef ::= '&' Name ';'
5020 *
5021 * [ WFC: Entity Declared ]
5022 * In a document without any DTD, a document with only an internal DTD
5023 * subset which contains no parameter entity references, or a document
5024 * with "standalone='yes'", the Name given in the entity reference
5025 * must match that in an entity declaration, except that well-formed
5026 * documents need not declare any of the following entities: amp, lt,
5027 * gt, apos, quot. The declaration of a parameter entity must precede
5028 * any reference to it. Similarly, the declaration of a general entity
5029 * must precede any reference to it which appears in a default value in an
5030 * attribute-list declaration. Note that if entities are declared in the
5031 * external subset or in external parameter entities, a non-validating
5032 * processor is not obligated to read and process their declarations;
5033 * for such documents, the rule that an entity must be declared is a
5034 * well-formedness constraint only if standalone='yes'.
5035 *
5036 * [ WFC: Parsed Entity ]
5037 * An entity reference must not contain the name of an unparsed entity
5038 *
5039 * Returns the xmlEntityPtr if found, or NULL otherwise.
5040 */
5041xmlEntityPtr
5042xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5043 xmlChar *name;
5044 xmlEntityPtr ent = NULL;
5045
5046 GROW;
5047
5048 if (RAW == '&') {
5049 NEXT;
5050 name = xmlParseName(ctxt);
5051 if (name == NULL) {
5052 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5053 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5054 ctxt->sax->error(ctxt->userData,
5055 "xmlParseEntityRef: no name\n");
5056 ctxt->wellFormed = 0;
5057 ctxt->disableSAX = 1;
5058 } else {
5059 if (RAW == ';') {
5060 NEXT;
5061 /*
5062 * Ask first SAX for entity resolution, otherwise try the
5063 * predefined set.
5064 */
5065 if (ctxt->sax != NULL) {
5066 if (ctxt->sax->getEntity != NULL)
5067 ent = ctxt->sax->getEntity(ctxt->userData, name);
5068 if (ent == NULL)
5069 ent = xmlGetPredefinedEntity(name);
5070 }
5071 /*
5072 * [ WFC: Entity Declared ]
5073 * In a document without any DTD, a document with only an
5074 * internal DTD subset which contains no parameter entity
5075 * references, or a document with "standalone='yes'", the
5076 * Name given in the entity reference must match that in an
5077 * entity declaration, except that well-formed documents
5078 * need not declare any of the following entities: amp, lt,
5079 * gt, apos, quot.
5080 * The declaration of a parameter entity must precede any
5081 * reference to it.
5082 * Similarly, the declaration of a general entity must
5083 * precede any reference to it which appears in a default
5084 * value in an attribute-list declaration. Note that if
5085 * entities are declared in the external subset or in
5086 * external parameter entities, a non-validating processor
5087 * is not obligated to read and process their declarations;
5088 * for such documents, the rule that an entity must be
5089 * declared is a well-formedness constraint only if
5090 * standalone='yes'.
5091 */
5092 if (ent == NULL) {
5093 if ((ctxt->standalone == 1) ||
5094 ((ctxt->hasExternalSubset == 0) &&
5095 (ctxt->hasPErefs == 0))) {
5096 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5097 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5098 ctxt->sax->error(ctxt->userData,
5099 "Entity '%s' not defined\n", name);
5100 ctxt->wellFormed = 0;
5101 ctxt->disableSAX = 1;
5102 } else {
5103 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5104 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5105 ctxt->sax->warning(ctxt->userData,
5106 "Entity '%s' not defined\n", name);
5107 }
5108 }
5109
5110 /*
5111 * [ WFC: Parsed Entity ]
5112 * An entity reference must not contain the name of an
5113 * unparsed entity
5114 */
5115 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5116 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5117 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5118 ctxt->sax->error(ctxt->userData,
5119 "Entity reference to unparsed entity %s\n", name);
5120 ctxt->wellFormed = 0;
5121 ctxt->disableSAX = 1;
5122 }
5123
5124 /*
5125 * [ WFC: No External Entity References ]
5126 * Attribute values cannot contain direct or indirect
5127 * entity references to external entities.
5128 */
5129 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5130 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5131 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5132 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5133 ctxt->sax->error(ctxt->userData,
5134 "Attribute references external entity '%s'\n", name);
5135 ctxt->wellFormed = 0;
5136 ctxt->disableSAX = 1;
5137 }
5138 /*
5139 * [ WFC: No < in Attribute Values ]
5140 * The replacement text of any entity referred to directly or
5141 * indirectly in an attribute value (other than "&lt;") must
5142 * not contain a <.
5143 */
5144 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5145 (ent != NULL) &&
5146 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5147 (ent->content != NULL) &&
5148 (xmlStrchr(ent->content, '<'))) {
5149 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5151 ctxt->sax->error(ctxt->userData,
5152 "'<' in entity '%s' is not allowed in attributes values\n", name);
5153 ctxt->wellFormed = 0;
5154 ctxt->disableSAX = 1;
5155 }
5156
5157 /*
5158 * Internal check, no parameter entities here ...
5159 */
5160 else {
5161 switch (ent->etype) {
5162 case XML_INTERNAL_PARAMETER_ENTITY:
5163 case XML_EXTERNAL_PARAMETER_ENTITY:
5164 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5165 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5166 ctxt->sax->error(ctxt->userData,
5167 "Attempt to reference the parameter entity '%s'\n", name);
5168 ctxt->wellFormed = 0;
5169 ctxt->disableSAX = 1;
5170 break;
5171 default:
5172 break;
5173 }
5174 }
5175
5176 /*
5177 * [ WFC: No Recursion ]
5178 * A parsed entity must not contain a recursive reference
5179 * to itself, either directly or indirectly.
5180 * Done somewhere else
5181 */
5182
5183 } else {
5184 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5186 ctxt->sax->error(ctxt->userData,
5187 "xmlParseEntityRef: expecting ';'\n");
5188 ctxt->wellFormed = 0;
5189 ctxt->disableSAX = 1;
5190 }
5191 xmlFree(name);
5192 }
5193 }
5194 return(ent);
5195}
5196
5197/**
5198 * xmlParseStringEntityRef:
5199 * @ctxt: an XML parser context
5200 * @str: a pointer to an index in the string
5201 *
5202 * parse ENTITY references declarations, but this version parses it from
5203 * a string value.
5204 *
5205 * [68] EntityRef ::= '&' Name ';'
5206 *
5207 * [ WFC: Entity Declared ]
5208 * In a document without any DTD, a document with only an internal DTD
5209 * subset which contains no parameter entity references, or a document
5210 * with "standalone='yes'", the Name given in the entity reference
5211 * must match that in an entity declaration, except that well-formed
5212 * documents need not declare any of the following entities: amp, lt,
5213 * gt, apos, quot. The declaration of a parameter entity must precede
5214 * any reference to it. Similarly, the declaration of a general entity
5215 * must precede any reference to it which appears in a default value in an
5216 * attribute-list declaration. Note that if entities are declared in the
5217 * external subset or in external parameter entities, a non-validating
5218 * processor is not obligated to read and process their declarations;
5219 * for such documents, the rule that an entity must be declared is a
5220 * well-formedness constraint only if standalone='yes'.
5221 *
5222 * [ WFC: Parsed Entity ]
5223 * An entity reference must not contain the name of an unparsed entity
5224 *
5225 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5226 * is updated to the current location in the string.
5227 */
5228xmlEntityPtr
5229xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5230 xmlChar *name;
5231 const xmlChar *ptr;
5232 xmlChar cur;
5233 xmlEntityPtr ent = NULL;
5234
5235 if ((str == NULL) || (*str == NULL))
5236 return(NULL);
5237 ptr = *str;
5238 cur = *ptr;
5239 if (cur == '&') {
5240 ptr++;
5241 cur = *ptr;
5242 name = xmlParseStringName(ctxt, &ptr);
5243 if (name == NULL) {
5244 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5245 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5246 ctxt->sax->error(ctxt->userData,
5247 "xmlParseEntityRef: no name\n");
5248 ctxt->wellFormed = 0;
5249 ctxt->disableSAX = 1;
5250 } else {
5251 if (*ptr == ';') {
5252 ptr++;
5253 /*
5254 * Ask first SAX for entity resolution, otherwise try the
5255 * predefined set.
5256 */
5257 if (ctxt->sax != NULL) {
5258 if (ctxt->sax->getEntity != NULL)
5259 ent = ctxt->sax->getEntity(ctxt->userData, name);
5260 if (ent == NULL)
5261 ent = xmlGetPredefinedEntity(name);
5262 }
5263 /*
5264 * [ WFC: Entity Declared ]
5265 * In a document without any DTD, a document with only an
5266 * internal DTD subset which contains no parameter entity
5267 * references, or a document with "standalone='yes'", the
5268 * Name given in the entity reference must match that in an
5269 * entity declaration, except that well-formed documents
5270 * need not declare any of the following entities: amp, lt,
5271 * gt, apos, quot.
5272 * The declaration of a parameter entity must precede any
5273 * reference to it.
5274 * Similarly, the declaration of a general entity must
5275 * precede any reference to it which appears in a default
5276 * value in an attribute-list declaration. Note that if
5277 * entities are declared in the external subset or in
5278 * external parameter entities, a non-validating processor
5279 * is not obligated to read and process their declarations;
5280 * for such documents, the rule that an entity must be
5281 * declared is a well-formedness constraint only if
5282 * standalone='yes'.
5283 */
5284 if (ent == NULL) {
5285 if ((ctxt->standalone == 1) ||
5286 ((ctxt->hasExternalSubset == 0) &&
5287 (ctxt->hasPErefs == 0))) {
5288 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5289 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5290 ctxt->sax->error(ctxt->userData,
5291 "Entity '%s' not defined\n", name);
5292 ctxt->wellFormed = 0;
5293 ctxt->disableSAX = 1;
5294 } else {
5295 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5296 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5297 ctxt->sax->warning(ctxt->userData,
5298 "Entity '%s' not defined\n", name);
5299 }
5300 }
5301
5302 /*
5303 * [ WFC: Parsed Entity ]
5304 * An entity reference must not contain the name of an
5305 * unparsed entity
5306 */
5307 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5308 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5309 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5310 ctxt->sax->error(ctxt->userData,
5311 "Entity reference to unparsed entity %s\n", name);
5312 ctxt->wellFormed = 0;
5313 ctxt->disableSAX = 1;
5314 }
5315
5316 /*
5317 * [ WFC: No External Entity References ]
5318 * Attribute values cannot contain direct or indirect
5319 * entity references to external entities.
5320 */
5321 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5322 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5323 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5325 ctxt->sax->error(ctxt->userData,
5326 "Attribute references external entity '%s'\n", name);
5327 ctxt->wellFormed = 0;
5328 ctxt->disableSAX = 1;
5329 }
5330 /*
5331 * [ WFC: No < in Attribute Values ]
5332 * The replacement text of any entity referred to directly or
5333 * indirectly in an attribute value (other than "&lt;") must
5334 * not contain a <.
5335 */
5336 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5337 (ent != NULL) &&
5338 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5339 (ent->content != NULL) &&
5340 (xmlStrchr(ent->content, '<'))) {
5341 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5343 ctxt->sax->error(ctxt->userData,
5344 "'<' in entity '%s' is not allowed in attributes values\n", name);
5345 ctxt->wellFormed = 0;
5346 ctxt->disableSAX = 1;
5347 }
5348
5349 /*
5350 * Internal check, no parameter entities here ...
5351 */
5352 else {
5353 switch (ent->etype) {
5354 case XML_INTERNAL_PARAMETER_ENTITY:
5355 case XML_EXTERNAL_PARAMETER_ENTITY:
5356 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5357 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5358 ctxt->sax->error(ctxt->userData,
5359 "Attempt to reference the parameter entity '%s'\n", name);
5360 ctxt->wellFormed = 0;
5361 ctxt->disableSAX = 1;
5362 break;
5363 default:
5364 break;
5365 }
5366 }
5367
5368 /*
5369 * [ WFC: No Recursion ]
5370 * A parsed entity must not contain a recursive reference
5371 * to itself, either directly or indirectly.
5372 * Done somewhwere else
5373 */
5374
5375 } else {
5376 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5377 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5378 ctxt->sax->error(ctxt->userData,
5379 "xmlParseEntityRef: expecting ';'\n");
5380 ctxt->wellFormed = 0;
5381 ctxt->disableSAX = 1;
5382 }
5383 xmlFree(name);
5384 }
5385 }
5386 *str = ptr;
5387 return(ent);
5388}
5389
5390/**
5391 * xmlParsePEReference:
5392 * @ctxt: an XML parser context
5393 *
5394 * parse PEReference declarations
5395 * The entity content is handled directly by pushing it's content as
5396 * a new input stream.
5397 *
5398 * [69] PEReference ::= '%' Name ';'
5399 *
5400 * [ WFC: No Recursion ]
5401 * A parsed entity must not contain a recursive
5402 * reference to itself, either directly or indirectly.
5403 *
5404 * [ WFC: Entity Declared ]
5405 * In a document without any DTD, a document with only an internal DTD
5406 * subset which contains no parameter entity references, or a document
5407 * with "standalone='yes'", ... ... The declaration of a parameter
5408 * entity must precede any reference to it...
5409 *
5410 * [ VC: Entity Declared ]
5411 * In a document with an external subset or external parameter entities
5412 * with "standalone='no'", ... ... The declaration of a parameter entity
5413 * must precede any reference to it...
5414 *
5415 * [ WFC: In DTD ]
5416 * Parameter-entity references may only appear in the DTD.
5417 * NOTE: misleading but this is handled.
5418 */
5419void
5420xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5421 xmlChar *name;
5422 xmlEntityPtr entity = NULL;
5423 xmlParserInputPtr input;
5424
5425 if (RAW == '%') {
5426 NEXT;
5427 name = xmlParseName(ctxt);
5428 if (name == NULL) {
5429 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5430 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5431 ctxt->sax->error(ctxt->userData,
5432 "xmlParsePEReference: no name\n");
5433 ctxt->wellFormed = 0;
5434 ctxt->disableSAX = 1;
5435 } else {
5436 if (RAW == ';') {
5437 NEXT;
5438 if ((ctxt->sax != NULL) &&
5439 (ctxt->sax->getParameterEntity != NULL))
5440 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5441 name);
5442 if (entity == NULL) {
5443 /*
5444 * [ WFC: Entity Declared ]
5445 * In a document without any DTD, a document with only an
5446 * internal DTD subset which contains no parameter entity
5447 * references, or a document with "standalone='yes'", ...
5448 * ... The declaration of a parameter entity must precede
5449 * any reference to it...
5450 */
5451 if ((ctxt->standalone == 1) ||
5452 ((ctxt->hasExternalSubset == 0) &&
5453 (ctxt->hasPErefs == 0))) {
5454 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5455 if ((!ctxt->disableSAX) &&
5456 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5457 ctxt->sax->error(ctxt->userData,
5458 "PEReference: %%%s; not found\n", name);
5459 ctxt->wellFormed = 0;
5460 ctxt->disableSAX = 1;
5461 } else {
5462 /*
5463 * [ VC: Entity Declared ]
5464 * In a document with an external subset or external
5465 * parameter entities with "standalone='no'", ...
5466 * ... The declaration of a parameter entity must precede
5467 * any reference to it...
5468 */
5469 if ((!ctxt->disableSAX) &&
5470 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5471 ctxt->sax->warning(ctxt->userData,
5472 "PEReference: %%%s; not found\n", name);
5473 ctxt->valid = 0;
5474 }
5475 } else {
5476 /*
5477 * Internal checking in case the entity quest barfed
5478 */
5479 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5480 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5481 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5482 ctxt->sax->warning(ctxt->userData,
5483 "Internal: %%%s; is not a parameter entity\n", name);
5484 } else {
5485 /*
5486 * TODO !!!
5487 * handle the extra spaces added before and after
5488 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5489 */
5490 input = xmlNewEntityInputStream(ctxt, entity);
5491 xmlPushInput(ctxt, input);
5492 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5493 (RAW == '<') && (NXT(1) == '?') &&
5494 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5495 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5496 xmlParseTextDecl(ctxt);
5497 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5498 /*
5499 * The XML REC instructs us to stop parsing
5500 * right here
5501 */
5502 ctxt->instate = XML_PARSER_EOF;
5503 xmlFree(name);
5504 return;
5505 }
5506 }
5507 if (ctxt->token == 0)
5508 ctxt->token = ' ';
5509 }
5510 }
5511 ctxt->hasPErefs = 1;
5512 } else {
5513 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5514 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5515 ctxt->sax->error(ctxt->userData,
5516 "xmlParsePEReference: expecting ';'\n");
5517 ctxt->wellFormed = 0;
5518 ctxt->disableSAX = 1;
5519 }
5520 xmlFree(name);
5521 }
5522 }
5523}
5524
5525/**
5526 * xmlParseStringPEReference:
5527 * @ctxt: an XML parser context
5528 * @str: a pointer to an index in the string
5529 *
5530 * parse PEReference declarations
5531 *
5532 * [69] PEReference ::= '%' Name ';'
5533 *
5534 * [ WFC: No Recursion ]
5535 * A parsed entity must not contain a recursive
5536 * reference to itself, either directly or indirectly.
5537 *
5538 * [ WFC: Entity Declared ]
5539 * In a document without any DTD, a document with only an internal DTD
5540 * subset which contains no parameter entity references, or a document
5541 * with "standalone='yes'", ... ... The declaration of a parameter
5542 * entity must precede any reference to it...
5543 *
5544 * [ VC: Entity Declared ]
5545 * In a document with an external subset or external parameter entities
5546 * with "standalone='no'", ... ... The declaration of a parameter entity
5547 * must precede any reference to it...
5548 *
5549 * [ WFC: In DTD ]
5550 * Parameter-entity references may only appear in the DTD.
5551 * NOTE: misleading but this is handled.
5552 *
5553 * Returns the string of the entity content.
5554 * str is updated to the current value of the index
5555 */
5556xmlEntityPtr
5557xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5558 const xmlChar *ptr;
5559 xmlChar cur;
5560 xmlChar *name;
5561 xmlEntityPtr entity = NULL;
5562
5563 if ((str == NULL) || (*str == NULL)) return(NULL);
5564 ptr = *str;
5565 cur = *ptr;
5566 if (cur == '%') {
5567 ptr++;
5568 cur = *ptr;
5569 name = xmlParseStringName(ctxt, &ptr);
5570 if (name == NULL) {
5571 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5572 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5573 ctxt->sax->error(ctxt->userData,
5574 "xmlParseStringPEReference: no name\n");
5575 ctxt->wellFormed = 0;
5576 ctxt->disableSAX = 1;
5577 } else {
5578 cur = *ptr;
5579 if (cur == ';') {
5580 ptr++;
5581 cur = *ptr;
5582 if ((ctxt->sax != NULL) &&
5583 (ctxt->sax->getParameterEntity != NULL))
5584 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5585 name);
5586 if (entity == NULL) {
5587 /*
5588 * [ WFC: Entity Declared ]
5589 * In a document without any DTD, a document with only an
5590 * internal DTD subset which contains no parameter entity
5591 * references, or a document with "standalone='yes'", ...
5592 * ... The declaration of a parameter entity must precede
5593 * any reference to it...
5594 */
5595 if ((ctxt->standalone == 1) ||
5596 ((ctxt->hasExternalSubset == 0) &&
5597 (ctxt->hasPErefs == 0))) {
5598 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5600 ctxt->sax->error(ctxt->userData,
5601 "PEReference: %%%s; not found\n", name);
5602 ctxt->wellFormed = 0;
5603 ctxt->disableSAX = 1;
5604 } else {
5605 /*
5606 * [ VC: Entity Declared ]
5607 * In a document with an external subset or external
5608 * parameter entities with "standalone='no'", ...
5609 * ... The declaration of a parameter entity must
5610 * precede any reference to it...
5611 */
5612 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5613 ctxt->sax->warning(ctxt->userData,
5614 "PEReference: %%%s; not found\n", name);
5615 ctxt->valid = 0;
5616 }
5617 } else {
5618 /*
5619 * Internal checking in case the entity quest barfed
5620 */
5621 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5622 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5623 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5624 ctxt->sax->warning(ctxt->userData,
5625 "Internal: %%%s; is not a parameter entity\n", name);
5626 }
5627 }
5628 ctxt->hasPErefs = 1;
5629 } else {
5630 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5631 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5632 ctxt->sax->error(ctxt->userData,
5633 "xmlParseStringPEReference: expecting ';'\n");
5634 ctxt->wellFormed = 0;
5635 ctxt->disableSAX = 1;
5636 }
5637 xmlFree(name);
5638 }
5639 }
5640 *str = ptr;
5641 return(entity);
5642}
5643
5644/**
5645 * xmlParseDocTypeDecl:
5646 * @ctxt: an XML parser context
5647 *
5648 * parse a DOCTYPE declaration
5649 *
5650 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5651 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5652 *
5653 * [ VC: Root Element Type ]
5654 * The Name in the document type declaration must match the element
5655 * type of the root element.
5656 */
5657
5658void
5659xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5660 xmlChar *name = NULL;
5661 xmlChar *ExternalID = NULL;
5662 xmlChar *URI = NULL;
5663
5664 /*
5665 * We know that '<!DOCTYPE' has been detected.
5666 */
5667 SKIP(9);
5668
5669 SKIP_BLANKS;
5670
5671 /*
5672 * Parse the DOCTYPE name.
5673 */
5674 name = xmlParseName(ctxt);
5675 if (name == NULL) {
5676 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5677 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5678 ctxt->sax->error(ctxt->userData,
5679 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5680 ctxt->wellFormed = 0;
5681 ctxt->disableSAX = 1;
5682 }
5683 ctxt->intSubName = name;
5684
5685 SKIP_BLANKS;
5686
5687 /*
5688 * Check for SystemID and ExternalID
5689 */
5690 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5691
5692 if ((URI != NULL) || (ExternalID != NULL)) {
5693 ctxt->hasExternalSubset = 1;
5694 }
5695 ctxt->extSubURI = URI;
5696 ctxt->extSubSystem = ExternalID;
5697
5698 SKIP_BLANKS;
5699
5700 /*
5701 * Create and update the internal subset.
5702 */
5703 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5704 (!ctxt->disableSAX))
5705 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5706
5707 /*
5708 * Is there any internal subset declarations ?
5709 * they are handled separately in xmlParseInternalSubset()
5710 */
5711 if (RAW == '[')
5712 return;
5713
5714 /*
5715 * We should be at the end of the DOCTYPE declaration.
5716 */
5717 if (RAW != '>') {
5718 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5719 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5720 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5721 ctxt->wellFormed = 0;
5722 ctxt->disableSAX = 1;
5723 }
5724 NEXT;
5725}
5726
5727/**
5728 * xmlParseInternalsubset:
5729 * @ctxt: an XML parser context
5730 *
5731 * parse the internal subset declaration
5732 *
5733 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5734 */
5735
5736void
5737xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5738 /*
5739 * Is there any DTD definition ?
5740 */
5741 if (RAW == '[') {
5742 ctxt->instate = XML_PARSER_DTD;
5743 NEXT;
5744 /*
5745 * Parse the succession of Markup declarations and
5746 * PEReferences.
5747 * Subsequence (markupdecl | PEReference | S)*
5748 */
5749 while (RAW != ']') {
5750 const xmlChar *check = CUR_PTR;
5751 int cons = ctxt->input->consumed;
5752
5753 SKIP_BLANKS;
5754 xmlParseMarkupDecl(ctxt);
5755 xmlParsePEReference(ctxt);
5756
5757 /*
5758 * Pop-up of finished entities.
5759 */
5760 while ((RAW == 0) && (ctxt->inputNr > 1))
5761 xmlPopInput(ctxt);
5762
5763 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5764 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5766 ctxt->sax->error(ctxt->userData,
5767 "xmlParseInternalSubset: error detected in Markup declaration\n");
5768 ctxt->wellFormed = 0;
5769 ctxt->disableSAX = 1;
5770 break;
5771 }
5772 }
5773 if (RAW == ']') {
5774 NEXT;
5775 SKIP_BLANKS;
5776 }
5777 }
5778
5779 /*
5780 * We should be at the end of the DOCTYPE declaration.
5781 */
5782 if (RAW != '>') {
5783 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5785 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5786 ctxt->wellFormed = 0;
5787 ctxt->disableSAX = 1;
5788 }
5789 NEXT;
5790}
5791
5792/**
5793 * xmlParseAttribute:
5794 * @ctxt: an XML parser context
5795 * @value: a xmlChar ** used to store the value of the attribute
5796 *
5797 * parse an attribute
5798 *
5799 * [41] Attribute ::= Name Eq AttValue
5800 *
5801 * [ WFC: No External Entity References ]
5802 * Attribute values cannot contain direct or indirect entity references
5803 * to external entities.
5804 *
5805 * [ WFC: No < in Attribute Values ]
5806 * The replacement text of any entity referred to directly or indirectly in
5807 * an attribute value (other than "&lt;") must not contain a <.
5808 *
5809 * [ VC: Attribute Value Type ]
5810 * The attribute must have been declared; the value must be of the type
5811 * declared for it.
5812 *
5813 * [25] Eq ::= S? '=' S?
5814 *
5815 * With namespace:
5816 *
5817 * [NS 11] Attribute ::= QName Eq AttValue
5818 *
5819 * Also the case QName == xmlns:??? is handled independently as a namespace
5820 * definition.
5821 *
5822 * Returns the attribute name, and the value in *value.
5823 */
5824
5825xmlChar *
5826xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5827 xmlChar *name, *val;
5828
5829 *value = NULL;
5830 name = xmlParseName(ctxt);
5831 if (name == NULL) {
5832 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5833 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5834 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5835 ctxt->wellFormed = 0;
5836 ctxt->disableSAX = 1;
5837 return(NULL);
5838 }
5839
5840 /*
5841 * read the value
5842 */
5843 SKIP_BLANKS;
5844 if (RAW == '=') {
5845 NEXT;
5846 SKIP_BLANKS;
5847 val = xmlParseAttValue(ctxt);
5848 ctxt->instate = XML_PARSER_CONTENT;
5849 } else {
5850 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData,
5853 "Specification mandate value for attribute %s\n", name);
5854 ctxt->wellFormed = 0;
5855 ctxt->disableSAX = 1;
5856 xmlFree(name);
5857 return(NULL);
5858 }
5859
5860 /*
5861 * Check that xml:lang conforms to the specification
5862 * No more registered as an error, just generate a warning now
5863 * since this was deprecated in XML second edition
5864 */
5865 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5866 if (!xmlCheckLanguageID(val)) {
5867 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5868 ctxt->sax->warning(ctxt->userData,
5869 "Malformed value for xml:lang : %s\n", val);
5870 }
5871 }
5872
5873 /*
5874 * Check that xml:space conforms to the specification
5875 */
5876 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5877 if (xmlStrEqual(val, BAD_CAST "default"))
5878 *(ctxt->space) = 0;
5879 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5880 *(ctxt->space) = 1;
5881 else {
5882 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5884 ctxt->sax->error(ctxt->userData,
5885"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5886 val);
5887 ctxt->wellFormed = 0;
5888 ctxt->disableSAX = 1;
5889 }
5890 }
5891
5892 *value = val;
5893 return(name);
5894}
5895
5896/**
5897 * xmlParseStartTag:
5898 * @ctxt: an XML parser context
5899 *
5900 * parse a start of tag either for rule element or
5901 * EmptyElement. In both case we don't parse the tag closing chars.
5902 *
5903 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5904 *
5905 * [ WFC: Unique Att Spec ]
5906 * No attribute name may appear more than once in the same start-tag or
5907 * empty-element tag.
5908 *
5909 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5910 *
5911 * [ WFC: Unique Att Spec ]
5912 * No attribute name may appear more than once in the same start-tag or
5913 * empty-element tag.
5914 *
5915 * With namespace:
5916 *
5917 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5918 *
5919 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5920 *
5921 * Returns the element name parsed
5922 */
5923
5924xmlChar *
5925xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5926 xmlChar *name;
5927 xmlChar *attname;
5928 xmlChar *attvalue;
5929 const xmlChar **atts = NULL;
5930 int nbatts = 0;
5931 int maxatts = 0;
5932 int i;
5933
5934 if (RAW != '<') return(NULL);
5935 NEXT;
5936
5937 name = xmlParseName(ctxt);
5938 if (name == NULL) {
5939 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5940 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5941 ctxt->sax->error(ctxt->userData,
5942 "xmlParseStartTag: invalid element name\n");
5943 ctxt->wellFormed = 0;
5944 ctxt->disableSAX = 1;
5945 return(NULL);
5946 }
5947
5948 /*
5949 * Now parse the attributes, it ends up with the ending
5950 *
5951 * (S Attribute)* S?
5952 */
5953 SKIP_BLANKS;
5954 GROW;
5955
5956 while ((IS_CHAR(RAW)) &&
5957 (RAW != '>') &&
5958 ((RAW != '/') || (NXT(1) != '>'))) {
5959 const xmlChar *q = CUR_PTR;
5960 int cons = ctxt->input->consumed;
5961
5962 attname = xmlParseAttribute(ctxt, &attvalue);
5963 if ((attname != NULL) && (attvalue != NULL)) {
5964 /*
5965 * [ WFC: Unique Att Spec ]
5966 * No attribute name may appear more than once in the same
5967 * start-tag or empty-element tag.
5968 */
5969 for (i = 0; i < nbatts;i += 2) {
5970 if (xmlStrEqual(atts[i], attname)) {
5971 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
5972 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5973 ctxt->sax->error(ctxt->userData,
5974 "Attribute %s redefined\n",
5975 attname);
5976 ctxt->wellFormed = 0;
5977 ctxt->disableSAX = 1;
5978 xmlFree(attname);
5979 xmlFree(attvalue);
5980 goto failed;
5981 }
5982 }
5983
5984 /*
5985 * Add the pair to atts
5986 */
5987 if (atts == NULL) {
5988 maxatts = 10;
5989 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
5990 if (atts == NULL) {
5991 xmlGenericError(xmlGenericErrorContext,
5992 "malloc of %ld byte failed\n",
5993 maxatts * (long)sizeof(xmlChar *));
5994 return(NULL);
5995 }
5996 } else if (nbatts + 4 > maxatts) {
5997 maxatts *= 2;
5998 atts = (const xmlChar **) xmlRealloc((void *) atts,
5999 maxatts * sizeof(xmlChar *));
6000 if (atts == NULL) {
6001 xmlGenericError(xmlGenericErrorContext,
6002 "realloc of %ld byte failed\n",
6003 maxatts * (long)sizeof(xmlChar *));
6004 return(NULL);
6005 }
6006 }
6007 atts[nbatts++] = attname;
6008 atts[nbatts++] = attvalue;
6009 atts[nbatts] = NULL;
6010 atts[nbatts + 1] = NULL;
6011 } else {
6012 if (attname != NULL)
6013 xmlFree(attname);
6014 if (attvalue != NULL)
6015 xmlFree(attvalue);
6016 }
6017
6018failed:
6019
6020 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6021 break;
6022 if (!IS_BLANK(RAW)) {
6023 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6024 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6025 ctxt->sax->error(ctxt->userData,
6026 "attributes construct error\n");
6027 ctxt->wellFormed = 0;
6028 ctxt->disableSAX = 1;
6029 }
6030 SKIP_BLANKS;
6031 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6032 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6034 ctxt->sax->error(ctxt->userData,
6035 "xmlParseStartTag: problem parsing attributes\n");
6036 ctxt->wellFormed = 0;
6037 ctxt->disableSAX = 1;
6038 break;
6039 }
6040 GROW;
6041 }
6042
6043 /*
6044 * SAX: Start of Element !
6045 */
6046 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6047 (!ctxt->disableSAX))
6048 ctxt->sax->startElement(ctxt->userData, name, atts);
6049
6050 if (atts != NULL) {
6051 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6052 xmlFree((void *) atts);
6053 }
6054 return(name);
6055}
6056
6057/**
6058 * xmlParseEndTag:
6059 * @ctxt: an XML parser context
6060 *
6061 * parse an end of tag
6062 *
6063 * [42] ETag ::= '</' Name S? '>'
6064 *
6065 * With namespace
6066 *
6067 * [NS 9] ETag ::= '</' QName S? '>'
6068 */
6069
6070void
6071xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6072 xmlChar *name;
6073 xmlChar *oldname;
6074
6075 GROW;
6076 if ((RAW != '<') || (NXT(1) != '/')) {
6077 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6078 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6079 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6080 ctxt->wellFormed = 0;
6081 ctxt->disableSAX = 1;
6082 return;
6083 }
6084 SKIP(2);
6085
6086 name = xmlParseName(ctxt);
6087
6088 /*
6089 * We should definitely be at the ending "S? '>'" part
6090 */
6091 GROW;
6092 SKIP_BLANKS;
6093 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6094 ctxt->errNo = XML_ERR_GT_REQUIRED;
6095 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6096 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6097 ctxt->wellFormed = 0;
6098 ctxt->disableSAX = 1;
6099 } else
6100 NEXT;
6101
6102 /*
6103 * [ WFC: Element Type Match ]
6104 * The Name in an element's end-tag must match the element type in the
6105 * start-tag.
6106 *
6107 */
6108 if ((name == NULL) || (ctxt->name == NULL) ||
6109 (!xmlStrEqual(name, ctxt->name))) {
6110 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6111 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6112 if ((name != NULL) && (ctxt->name != NULL)) {
6113 ctxt->sax->error(ctxt->userData,
6114 "Opening and ending tag mismatch: %s and %s\n",
6115 ctxt->name, name);
6116 } else if (ctxt->name != NULL) {
6117 ctxt->sax->error(ctxt->userData,
6118 "Ending tag eror for: %s\n", ctxt->name);
6119 } else {
6120 ctxt->sax->error(ctxt->userData,
6121 "Ending tag error: internal error ???\n");
6122 }
6123
6124 }
6125 ctxt->wellFormed = 0;
6126 ctxt->disableSAX = 1;
6127 }
6128
6129 /*
6130 * SAX: End of Tag
6131 */
6132 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6133 (!ctxt->disableSAX))
6134 ctxt->sax->endElement(ctxt->userData, name);
6135
6136 if (name != NULL)
6137 xmlFree(name);
6138 oldname = namePop(ctxt);
6139 spacePop(ctxt);
6140 if (oldname != NULL) {
6141#ifdef DEBUG_STACK
6142 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6143#endif
6144 xmlFree(oldname);
6145 }
6146 return;
6147}
6148
6149/**
6150 * xmlParseCDSect:
6151 * @ctxt: an XML parser context
6152 *
6153 * Parse escaped pure raw content.
6154 *
6155 * [18] CDSect ::= CDStart CData CDEnd
6156 *
6157 * [19] CDStart ::= '<![CDATA['
6158 *
6159 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6160 *
6161 * [21] CDEnd ::= ']]>'
6162 */
6163void
6164xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6165 xmlChar *buf = NULL;
6166 int len = 0;
6167 int size = XML_PARSER_BUFFER_SIZE;
6168 int r, rl;
6169 int s, sl;
6170 int cur, l;
6171 int count = 0;
6172
6173 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6174 (NXT(2) == '[') && (NXT(3) == 'C') &&
6175 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6176 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6177 (NXT(8) == '[')) {
6178 SKIP(9);
6179 } else
6180 return;
6181
6182 ctxt->instate = XML_PARSER_CDATA_SECTION;
6183 r = CUR_CHAR(rl);
6184 if (!IS_CHAR(r)) {
6185 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6186 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6187 ctxt->sax->error(ctxt->userData,
6188 "CData section not finished\n");
6189 ctxt->wellFormed = 0;
6190 ctxt->disableSAX = 1;
6191 ctxt->instate = XML_PARSER_CONTENT;
6192 return;
6193 }
6194 NEXTL(rl);
6195 s = CUR_CHAR(sl);
6196 if (!IS_CHAR(s)) {
6197 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6198 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6199 ctxt->sax->error(ctxt->userData,
6200 "CData section not finished\n");
6201 ctxt->wellFormed = 0;
6202 ctxt->disableSAX = 1;
6203 ctxt->instate = XML_PARSER_CONTENT;
6204 return;
6205 }
6206 NEXTL(sl);
6207 cur = CUR_CHAR(l);
6208 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6209 if (buf == NULL) {
6210 xmlGenericError(xmlGenericErrorContext,
6211 "malloc of %d byte failed\n", size);
6212 return;
6213 }
6214 while (IS_CHAR(cur) &&
6215 ((r != ']') || (s != ']') || (cur != '>'))) {
6216 if (len + 5 >= size) {
6217 size *= 2;
6218 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6219 if (buf == NULL) {
6220 xmlGenericError(xmlGenericErrorContext,
6221 "realloc of %d byte failed\n", size);
6222 return;
6223 }
6224 }
6225 COPY_BUF(rl,buf,len,r);
6226 r = s;
6227 rl = sl;
6228 s = cur;
6229 sl = l;
6230 count++;
6231 if (count > 50) {
6232 GROW;
6233 count = 0;
6234 }
6235 NEXTL(l);
6236 cur = CUR_CHAR(l);
6237 }
6238 buf[len] = 0;
6239 ctxt->instate = XML_PARSER_CONTENT;
6240 if (cur != '>') {
6241 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6242 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6243 ctxt->sax->error(ctxt->userData,
6244 "CData section not finished\n%.50s\n", buf);
6245 ctxt->wellFormed = 0;
6246 ctxt->disableSAX = 1;
6247 xmlFree(buf);
6248 return;
6249 }
6250 NEXTL(l);
6251
6252 /*
6253 * Ok the buffer is to be consumed as cdata.
6254 */
6255 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6256 if (ctxt->sax->cdataBlock != NULL)
6257 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6258 }
6259 xmlFree(buf);
6260}
6261
6262/**
6263 * xmlParseContent:
6264 * @ctxt: an XML parser context
6265 *
6266 * Parse a content:
6267 *
6268 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6269 */
6270
6271void
6272xmlParseContent(xmlParserCtxtPtr ctxt) {
6273 GROW;
6274 while (((RAW != 0) || (ctxt->token != 0)) &&
6275 ((RAW != '<') || (NXT(1) != '/'))) {
6276 const xmlChar *test = CUR_PTR;
6277 int cons = ctxt->input->consumed;
6278 xmlChar tok = ctxt->token;
6279
6280 /*
6281 * Handle possible processed charrefs.
6282 */
6283 if (ctxt->token != 0) {
6284 xmlParseCharData(ctxt, 0);
6285 }
6286 /*
6287 * First case : a Processing Instruction.
6288 */
6289 else if ((RAW == '<') && (NXT(1) == '?')) {
6290 xmlParsePI(ctxt);
6291 }
6292
6293 /*
6294 * Second case : a CDSection
6295 */
6296 else if ((RAW == '<') && (NXT(1) == '!') &&
6297 (NXT(2) == '[') && (NXT(3) == 'C') &&
6298 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6299 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6300 (NXT(8) == '[')) {
6301 xmlParseCDSect(ctxt);
6302 }
6303
6304 /*
6305 * Third case : a comment
6306 */
6307 else if ((RAW == '<') && (NXT(1) == '!') &&
6308 (NXT(2) == '-') && (NXT(3) == '-')) {
6309 xmlParseComment(ctxt);
6310 ctxt->instate = XML_PARSER_CONTENT;
6311 }
6312
6313 /*
6314 * Fourth case : a sub-element.
6315 */
6316 else if (RAW == '<') {
6317 xmlParseElement(ctxt);
6318 }
6319
6320 /*
6321 * Fifth case : a reference. If if has not been resolved,
6322 * parsing returns it's Name, create the node
6323 */
6324
6325 else if (RAW == '&') {
6326 xmlParseReference(ctxt);
6327 }
6328
6329 /*
6330 * Last case, text. Note that References are handled directly.
6331 */
6332 else {
6333 xmlParseCharData(ctxt, 0);
6334 }
6335
6336 GROW;
6337 /*
6338 * Pop-up of finished entities.
6339 */
6340 while ((RAW == 0) && (ctxt->inputNr > 1))
6341 xmlPopInput(ctxt);
6342 SHRINK;
6343
6344 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6345 (tok == ctxt->token)) {
6346 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6347 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6348 ctxt->sax->error(ctxt->userData,
6349 "detected an error in element content\n");
6350 ctxt->wellFormed = 0;
6351 ctxt->disableSAX = 1;
6352 ctxt->instate = XML_PARSER_EOF;
6353 break;
6354 }
6355 }
6356}
6357
6358/**
6359 * xmlParseElement:
6360 * @ctxt: an XML parser context
6361 *
6362 * parse an XML element, this is highly recursive
6363 *
6364 * [39] element ::= EmptyElemTag | STag content ETag
6365 *
6366 * [ WFC: Element Type Match ]
6367 * The Name in an element's end-tag must match the element type in the
6368 * start-tag.
6369 *
6370 * [ VC: Element Valid ]
6371 * An element is valid if there is a declaration matching elementdecl
6372 * where the Name matches the element type and one of the following holds:
6373 * - The declaration matches EMPTY and the element has no content.
6374 * - The declaration matches children and the sequence of child elements
6375 * belongs to the language generated by the regular expression in the
6376 * content model, with optional white space (characters matching the
6377 * nonterminal S) between each pair of child elements.
6378 * - The declaration matches Mixed and the content consists of character
6379 * data and child elements whose types match names in the content model.
6380 * - The declaration matches ANY, and the types of any child elements have
6381 * been declared.
6382 */
6383
6384void
6385xmlParseElement(xmlParserCtxtPtr ctxt) {
6386 const xmlChar *openTag = CUR_PTR;
6387 xmlChar *name;
6388 xmlChar *oldname;
6389 xmlParserNodeInfo node_info;
6390 xmlNodePtr ret;
6391
6392 /* Capture start position */
6393 if (ctxt->record_info) {
6394 node_info.begin_pos = ctxt->input->consumed +
6395 (CUR_PTR - ctxt->input->base);
6396 node_info.begin_line = ctxt->input->line;
6397 }
6398
6399 if (ctxt->spaceNr == 0)
6400 spacePush(ctxt, -1);
6401 else
6402 spacePush(ctxt, *ctxt->space);
6403
6404 name = xmlParseStartTag(ctxt);
6405 if (name == NULL) {
6406 spacePop(ctxt);
6407 return;
6408 }
6409 namePush(ctxt, name);
6410 ret = ctxt->node;
6411
6412 /*
6413 * [ VC: Root Element Type ]
6414 * The Name in the document type declaration must match the element
6415 * type of the root element.
6416 */
6417 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6418 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6419 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6420
6421 /*
6422 * Check for an Empty Element.
6423 */
6424 if ((RAW == '/') && (NXT(1) == '>')) {
6425 SKIP(2);
6426 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6427 (!ctxt->disableSAX))
6428 ctxt->sax->endElement(ctxt->userData, name);
6429 oldname = namePop(ctxt);
6430 spacePop(ctxt);
6431 if (oldname != NULL) {
6432#ifdef DEBUG_STACK
6433 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6434#endif
6435 xmlFree(oldname);
6436 }
6437 if ( ret != NULL && ctxt->record_info ) {
6438 node_info.end_pos = ctxt->input->consumed +
6439 (CUR_PTR - ctxt->input->base);
6440 node_info.end_line = ctxt->input->line;
6441 node_info.node = ret;
6442 xmlParserAddNodeInfo(ctxt, &node_info);
6443 }
6444 return;
6445 }
6446 if (RAW == '>') {
6447 NEXT;
6448 } else {
6449 ctxt->errNo = XML_ERR_GT_REQUIRED;
6450 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6451 ctxt->sax->error(ctxt->userData,
6452 "Couldn't find end of Start Tag\n%.30s\n",
6453 openTag);
6454 ctxt->wellFormed = 0;
6455 ctxt->disableSAX = 1;
6456
6457 /*
6458 * end of parsing of this node.
6459 */
6460 nodePop(ctxt);
6461 oldname = namePop(ctxt);
6462 spacePop(ctxt);
6463 if (oldname != NULL) {
6464#ifdef DEBUG_STACK
6465 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6466#endif
6467 xmlFree(oldname);
6468 }
6469
6470 /*
6471 * Capture end position and add node
6472 */
6473 if ( ret != NULL && ctxt->record_info ) {
6474 node_info.end_pos = ctxt->input->consumed +
6475 (CUR_PTR - ctxt->input->base);
6476 node_info.end_line = ctxt->input->line;
6477 node_info.node = ret;
6478 xmlParserAddNodeInfo(ctxt, &node_info);
6479 }
6480 return;
6481 }
6482
6483 /*
6484 * Parse the content of the element:
6485 */
6486 xmlParseContent(ctxt);
6487 if (!IS_CHAR(RAW)) {
6488 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6489 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6490 ctxt->sax->error(ctxt->userData,
6491 "Premature end of data in tag %.30s\n", openTag);
6492 ctxt->wellFormed = 0;
6493 ctxt->disableSAX = 1;
6494
6495 /*
6496 * end of parsing of this node.
6497 */
6498 nodePop(ctxt);
6499 oldname = namePop(ctxt);
6500 spacePop(ctxt);
6501 if (oldname != NULL) {
6502#ifdef DEBUG_STACK
6503 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6504#endif
6505 xmlFree(oldname);
6506 }
6507 return;
6508 }
6509
6510 /*
6511 * parse the end of tag: '</' should be here.
6512 */
6513 xmlParseEndTag(ctxt);
6514
6515 /*
6516 * Capture end position and add node
6517 */
6518 if ( ret != NULL && ctxt->record_info ) {
6519 node_info.end_pos = ctxt->input->consumed +
6520 (CUR_PTR - ctxt->input->base);
6521 node_info.end_line = ctxt->input->line;
6522 node_info.node = ret;
6523 xmlParserAddNodeInfo(ctxt, &node_info);
6524 }
6525}
6526
6527/**
6528 * xmlParseVersionNum:
6529 * @ctxt: an XML parser context
6530 *
6531 * parse the XML version value.
6532 *
6533 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6534 *
6535 * Returns the string giving the XML version number, or NULL
6536 */
6537xmlChar *
6538xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6539 xmlChar *buf = NULL;
6540 int len = 0;
6541 int size = 10;
6542 xmlChar cur;
6543
6544 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6545 if (buf == NULL) {
6546 xmlGenericError(xmlGenericErrorContext,
6547 "malloc of %d byte failed\n", size);
6548 return(NULL);
6549 }
6550 cur = CUR;
6551 while (((cur >= 'a') && (cur <= 'z')) ||
6552 ((cur >= 'A') && (cur <= 'Z')) ||
6553 ((cur >= '0') && (cur <= '9')) ||
6554 (cur == '_') || (cur == '.') ||
6555 (cur == ':') || (cur == '-')) {
6556 if (len + 1 >= size) {
6557 size *= 2;
6558 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6559 if (buf == NULL) {
6560 xmlGenericError(xmlGenericErrorContext,
6561 "realloc of %d byte failed\n", size);
6562 return(NULL);
6563 }
6564 }
6565 buf[len++] = cur;
6566 NEXT;
6567 cur=CUR;
6568 }
6569 buf[len] = 0;
6570 return(buf);
6571}
6572
6573/**
6574 * xmlParseVersionInfo:
6575 * @ctxt: an XML parser context
6576 *
6577 * parse the XML version.
6578 *
6579 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6580 *
6581 * [25] Eq ::= S? '=' S?
6582 *
6583 * Returns the version string, e.g. "1.0"
6584 */
6585
6586xmlChar *
6587xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6588 xmlChar *version = NULL;
6589 const xmlChar *q;
6590
6591 if ((RAW == 'v') && (NXT(1) == 'e') &&
6592 (NXT(2) == 'r') && (NXT(3) == 's') &&
6593 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6594 (NXT(6) == 'n')) {
6595 SKIP(7);
6596 SKIP_BLANKS;
6597 if (RAW != '=') {
6598 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6599 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6600 ctxt->sax->error(ctxt->userData,
6601 "xmlParseVersionInfo : expected '='\n");
6602 ctxt->wellFormed = 0;
6603 ctxt->disableSAX = 1;
6604 return(NULL);
6605 }
6606 NEXT;
6607 SKIP_BLANKS;
6608 if (RAW == '"') {
6609 NEXT;
6610 q = CUR_PTR;
6611 version = xmlParseVersionNum(ctxt);
6612 if (RAW != '"') {
6613 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6614 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6615 ctxt->sax->error(ctxt->userData,
6616 "String not closed\n%.50s\n", q);
6617 ctxt->wellFormed = 0;
6618 ctxt->disableSAX = 1;
6619 } else
6620 NEXT;
6621 } else if (RAW == '\''){
6622 NEXT;
6623 q = CUR_PTR;
6624 version = xmlParseVersionNum(ctxt);
6625 if (RAW != '\'') {
6626 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6627 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6628 ctxt->sax->error(ctxt->userData,
6629 "String not closed\n%.50s\n", q);
6630 ctxt->wellFormed = 0;
6631 ctxt->disableSAX = 1;
6632 } else
6633 NEXT;
6634 } else {
6635 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6636 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6637 ctxt->sax->error(ctxt->userData,
6638 "xmlParseVersionInfo : expected ' or \"\n");
6639 ctxt->wellFormed = 0;
6640 ctxt->disableSAX = 1;
6641 }
6642 }
6643 return(version);
6644}
6645
6646/**
6647 * xmlParseEncName:
6648 * @ctxt: an XML parser context
6649 *
6650 * parse the XML encoding name
6651 *
6652 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6653 *
6654 * Returns the encoding name value or NULL
6655 */
6656xmlChar *
6657xmlParseEncName(xmlParserCtxtPtr ctxt) {
6658 xmlChar *buf = NULL;
6659 int len = 0;
6660 int size = 10;
6661 xmlChar cur;
6662
6663 cur = CUR;
6664 if (((cur >= 'a') && (cur <= 'z')) ||
6665 ((cur >= 'A') && (cur <= 'Z'))) {
6666 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6667 if (buf == NULL) {
6668 xmlGenericError(xmlGenericErrorContext,
6669 "malloc of %d byte failed\n", size);
6670 return(NULL);
6671 }
6672
6673 buf[len++] = cur;
6674 NEXT;
6675 cur = CUR;
6676 while (((cur >= 'a') && (cur <= 'z')) ||
6677 ((cur >= 'A') && (cur <= 'Z')) ||
6678 ((cur >= '0') && (cur <= '9')) ||
6679 (cur == '.') || (cur == '_') ||
6680 (cur == '-')) {
6681 if (len + 1 >= size) {
6682 size *= 2;
6683 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6684 if (buf == NULL) {
6685 xmlGenericError(xmlGenericErrorContext,
6686 "realloc of %d byte failed\n", size);
6687 return(NULL);
6688 }
6689 }
6690 buf[len++] = cur;
6691 NEXT;
6692 cur = CUR;
6693 if (cur == 0) {
6694 SHRINK;
6695 GROW;
6696 cur = CUR;
6697 }
6698 }
6699 buf[len] = 0;
6700 } else {
6701 ctxt->errNo = XML_ERR_ENCODING_NAME;
6702 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6703 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6704 ctxt->wellFormed = 0;
6705 ctxt->disableSAX = 1;
6706 }
6707 return(buf);
6708}
6709
6710/**
6711 * xmlParseEncodingDecl:
6712 * @ctxt: an XML parser context
6713 *
6714 * parse the XML encoding declaration
6715 *
6716 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6717 *
6718 * this setups the conversion filters.
6719 *
6720 * Returns the encoding value or NULL
6721 */
6722
6723xmlChar *
6724xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6725 xmlChar *encoding = NULL;
6726 const xmlChar *q;
6727
6728 SKIP_BLANKS;
6729 if ((RAW == 'e') && (NXT(1) == 'n') &&
6730 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6731 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6732 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6733 SKIP(8);
6734 SKIP_BLANKS;
6735 if (RAW != '=') {
6736 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6738 ctxt->sax->error(ctxt->userData,
6739 "xmlParseEncodingDecl : expected '='\n");
6740 ctxt->wellFormed = 0;
6741 ctxt->disableSAX = 1;
6742 return(NULL);
6743 }
6744 NEXT;
6745 SKIP_BLANKS;
6746 if (RAW == '"') {
6747 NEXT;
6748 q = CUR_PTR;
6749 encoding = xmlParseEncName(ctxt);
6750 if (RAW != '"') {
6751 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6753 ctxt->sax->error(ctxt->userData,
6754 "String not closed\n%.50s\n", q);
6755 ctxt->wellFormed = 0;
6756 ctxt->disableSAX = 1;
6757 } else
6758 NEXT;
6759 } else if (RAW == '\''){
6760 NEXT;
6761 q = CUR_PTR;
6762 encoding = xmlParseEncName(ctxt);
6763 if (RAW != '\'') {
6764 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6765 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6766 ctxt->sax->error(ctxt->userData,
6767 "String not closed\n%.50s\n", q);
6768 ctxt->wellFormed = 0;
6769 ctxt->disableSAX = 1;
6770 } else
6771 NEXT;
6772 } else if (RAW == '"'){
6773 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6775 ctxt->sax->error(ctxt->userData,
6776 "xmlParseEncodingDecl : expected ' or \"\n");
6777 ctxt->wellFormed = 0;
6778 ctxt->disableSAX = 1;
6779 }
6780 if (encoding != NULL) {
6781 xmlCharEncoding enc;
6782 xmlCharEncodingHandlerPtr handler;
6783
6784 if (ctxt->input->encoding != NULL)
6785 xmlFree((xmlChar *) ctxt->input->encoding);
6786 ctxt->input->encoding = encoding;
6787
6788 enc = xmlParseCharEncoding((const char *) encoding);
6789 /*
6790 * registered set of known encodings
6791 */
6792 if (enc != XML_CHAR_ENCODING_ERROR) {
6793 xmlSwitchEncoding(ctxt, enc);
6794 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6795 xmlFree(encoding);
6796 return(NULL);
6797 }
6798 } else {
6799 /*
6800 * fallback for unknown encodings
6801 */
6802 handler = xmlFindCharEncodingHandler((const char *) encoding);
6803 if (handler != NULL) {
6804 xmlSwitchToEncoding(ctxt, handler);
6805 } else {
6806 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6807 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6808 ctxt->sax->error(ctxt->userData,
6809 "Unsupported encoding %s\n", encoding);
6810 return(NULL);
6811 }
6812 }
6813 }
6814 }
6815 return(encoding);
6816}
6817
6818/**
6819 * xmlParseSDDecl:
6820 * @ctxt: an XML parser context
6821 *
6822 * parse the XML standalone declaration
6823 *
6824 * [32] SDDecl ::= S 'standalone' Eq
6825 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6826 *
6827 * [ VC: Standalone Document Declaration ]
6828 * TODO The standalone document declaration must have the value "no"
6829 * if any external markup declarations contain declarations of:
6830 * - attributes with default values, if elements to which these
6831 * attributes apply appear in the document without specifications
6832 * of values for these attributes, or
6833 * - entities (other than amp, lt, gt, apos, quot), if references
6834 * to those entities appear in the document, or
6835 * - attributes with values subject to normalization, where the
6836 * attribute appears in the document with a value which will change
6837 * as a result of normalization, or
6838 * - element types with element content, if white space occurs directly
6839 * within any instance of those types.
6840 *
6841 * Returns 1 if standalone, 0 otherwise
6842 */
6843
6844int
6845xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6846 int standalone = -1;
6847
6848 SKIP_BLANKS;
6849 if ((RAW == 's') && (NXT(1) == 't') &&
6850 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6851 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6852 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6853 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6854 SKIP(10);
6855 SKIP_BLANKS;
6856 if (RAW != '=') {
6857 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6858 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6859 ctxt->sax->error(ctxt->userData,
6860 "XML standalone declaration : expected '='\n");
6861 ctxt->wellFormed = 0;
6862 ctxt->disableSAX = 1;
6863 return(standalone);
6864 }
6865 NEXT;
6866 SKIP_BLANKS;
6867 if (RAW == '\''){
6868 NEXT;
6869 if ((RAW == 'n') && (NXT(1) == 'o')) {
6870 standalone = 0;
6871 SKIP(2);
6872 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6873 (NXT(2) == 's')) {
6874 standalone = 1;
6875 SKIP(3);
6876 } else {
6877 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6878 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6879 ctxt->sax->error(ctxt->userData,
6880 "standalone accepts only 'yes' or 'no'\n");
6881 ctxt->wellFormed = 0;
6882 ctxt->disableSAX = 1;
6883 }
6884 if (RAW != '\'') {
6885 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6886 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6887 ctxt->sax->error(ctxt->userData, "String not closed\n");
6888 ctxt->wellFormed = 0;
6889 ctxt->disableSAX = 1;
6890 } else
6891 NEXT;
6892 } else if (RAW == '"'){
6893 NEXT;
6894 if ((RAW == 'n') && (NXT(1) == 'o')) {
6895 standalone = 0;
6896 SKIP(2);
6897 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6898 (NXT(2) == 's')) {
6899 standalone = 1;
6900 SKIP(3);
6901 } else {
6902 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6904 ctxt->sax->error(ctxt->userData,
6905 "standalone accepts only 'yes' or 'no'\n");
6906 ctxt->wellFormed = 0;
6907 ctxt->disableSAX = 1;
6908 }
6909 if (RAW != '"') {
6910 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6911 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6912 ctxt->sax->error(ctxt->userData, "String not closed\n");
6913 ctxt->wellFormed = 0;
6914 ctxt->disableSAX = 1;
6915 } else
6916 NEXT;
6917 } else {
6918 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6919 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6920 ctxt->sax->error(ctxt->userData,
6921 "Standalone value not found\n");
6922 ctxt->wellFormed = 0;
6923 ctxt->disableSAX = 1;
6924 }
6925 }
6926 return(standalone);
6927}
6928
6929/**
6930 * xmlParseXMLDecl:
6931 * @ctxt: an XML parser context
6932 *
6933 * parse an XML declaration header
6934 *
6935 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6936 */
6937
6938void
6939xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6940 xmlChar *version;
6941
6942 /*
6943 * We know that '<?xml' is here.
6944 */
6945 SKIP(5);
6946
6947 if (!IS_BLANK(RAW)) {
6948 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6949 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6950 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6951 ctxt->wellFormed = 0;
6952 ctxt->disableSAX = 1;
6953 }
6954 SKIP_BLANKS;
6955
6956 /*
6957 * We should have the VersionInfo here.
6958 */
6959 version = xmlParseVersionInfo(ctxt);
6960 if (version == NULL)
6961 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6962 ctxt->version = xmlStrdup(version);
6963 xmlFree(version);
6964
6965 /*
6966 * We may have the encoding declaration
6967 */
6968 if (!IS_BLANK(RAW)) {
6969 if ((RAW == '?') && (NXT(1) == '>')) {
6970 SKIP(2);
6971 return;
6972 }
6973 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6974 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6975 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6976 ctxt->wellFormed = 0;
6977 ctxt->disableSAX = 1;
6978 }
6979 xmlParseEncodingDecl(ctxt);
6980 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6981 /*
6982 * The XML REC instructs us to stop parsing right here
6983 */
6984 return;
6985 }
6986
6987 /*
6988 * We may have the standalone status.
6989 */
6990 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
6991 if ((RAW == '?') && (NXT(1) == '>')) {
6992 SKIP(2);
6993 return;
6994 }
6995 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6996 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6997 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6998 ctxt->wellFormed = 0;
6999 ctxt->disableSAX = 1;
7000 }
7001 SKIP_BLANKS;
7002 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7003
7004 SKIP_BLANKS;
7005 if ((RAW == '?') && (NXT(1) == '>')) {
7006 SKIP(2);
7007 } else if (RAW == '>') {
7008 /* Deprecated old WD ... */
7009 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7010 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7011 ctxt->sax->error(ctxt->userData,
7012 "XML declaration must end-up with '?>'\n");
7013 ctxt->wellFormed = 0;
7014 ctxt->disableSAX = 1;
7015 NEXT;
7016 } else {
7017 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7018 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7019 ctxt->sax->error(ctxt->userData,
7020 "parsing XML declaration: '?>' expected\n");
7021 ctxt->wellFormed = 0;
7022 ctxt->disableSAX = 1;
7023 MOVETO_ENDTAG(CUR_PTR);
7024 NEXT;
7025 }
7026}
7027
7028/**
7029 * xmlParseMisc:
7030 * @ctxt: an XML parser context
7031 *
7032 * parse an XML Misc* optionnal field.
7033 *
7034 * [27] Misc ::= Comment | PI | S
7035 */
7036
7037void
7038xmlParseMisc(xmlParserCtxtPtr ctxt) {
7039 while (((RAW == '<') && (NXT(1) == '?')) ||
7040 ((RAW == '<') && (NXT(1) == '!') &&
7041 (NXT(2) == '-') && (NXT(3) == '-')) ||
7042 IS_BLANK(CUR)) {
7043 if ((RAW == '<') && (NXT(1) == '?')) {
7044 xmlParsePI(ctxt);
7045 } else if (IS_BLANK(CUR)) {
7046 NEXT;
7047 } else
7048 xmlParseComment(ctxt);
7049 }
7050}
7051
7052/**
7053 * xmlParseDocument:
7054 * @ctxt: an XML parser context
7055 *
7056 * parse an XML document (and build a tree if using the standard SAX
7057 * interface).
7058 *
7059 * [1] document ::= prolog element Misc*
7060 *
7061 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7062 *
7063 * Returns 0, -1 in case of error. the parser context is augmented
7064 * as a result of the parsing.
7065 */
7066
7067int
7068xmlParseDocument(xmlParserCtxtPtr ctxt) {
7069 xmlChar start[4];
7070 xmlCharEncoding enc;
7071
7072 xmlInitParser();
7073
7074 GROW;
7075
7076 /*
7077 * SAX: beginning of the document processing.
7078 */
7079 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7080 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7081
7082 /*
7083 * Get the 4 first bytes and decode the charset
7084 * if enc != XML_CHAR_ENCODING_NONE
7085 * plug some encoding conversion routines.
7086 */
7087 start[0] = RAW;
7088 start[1] = NXT(1);
7089 start[2] = NXT(2);
7090 start[3] = NXT(3);
7091 enc = xmlDetectCharEncoding(start, 4);
7092 if (enc != XML_CHAR_ENCODING_NONE) {
7093 xmlSwitchEncoding(ctxt, enc);
7094 }
7095
7096
7097 if (CUR == 0) {
7098 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7099 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7100 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7101 ctxt->wellFormed = 0;
7102 ctxt->disableSAX = 1;
7103 }
7104
7105 /*
7106 * Check for the XMLDecl in the Prolog.
7107 */
7108 GROW;
7109 if ((RAW == '<') && (NXT(1) == '?') &&
7110 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7111 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7112
7113 /*
7114 * Note that we will switch encoding on the fly.
7115 */
7116 xmlParseXMLDecl(ctxt);
7117 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7118 /*
7119 * The XML REC instructs us to stop parsing right here
7120 */
7121 return(-1);
7122 }
7123 ctxt->standalone = ctxt->input->standalone;
7124 SKIP_BLANKS;
7125 } else {
7126 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7127 }
7128 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7129 ctxt->sax->startDocument(ctxt->userData);
7130
7131 /*
7132 * The Misc part of the Prolog
7133 */
7134 GROW;
7135 xmlParseMisc(ctxt);
7136
7137 /*
7138 * Then possibly doc type declaration(s) and more Misc
7139 * (doctypedecl Misc*)?
7140 */
7141 GROW;
7142 if ((RAW == '<') && (NXT(1) == '!') &&
7143 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7144 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7145 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7146 (NXT(8) == 'E')) {
7147
7148 ctxt->inSubset = 1;
7149 xmlParseDocTypeDecl(ctxt);
7150 if (RAW == '[') {
7151 ctxt->instate = XML_PARSER_DTD;
7152 xmlParseInternalSubset(ctxt);
7153 }
7154
7155 /*
7156 * Create and update the external subset.
7157 */
7158 ctxt->inSubset = 2;
7159 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7160 (!ctxt->disableSAX))
7161 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7162 ctxt->extSubSystem, ctxt->extSubURI);
7163 ctxt->inSubset = 0;
7164
7165
7166 ctxt->instate = XML_PARSER_PROLOG;
7167 xmlParseMisc(ctxt);
7168 }
7169
7170 /*
7171 * Time to start parsing the tree itself
7172 */
7173 GROW;
7174 if (RAW != '<') {
7175 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7176 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7177 ctxt->sax->error(ctxt->userData,
7178 "Start tag expected, '<' not found\n");
7179 ctxt->wellFormed = 0;
7180 ctxt->disableSAX = 1;
7181 ctxt->instate = XML_PARSER_EOF;
7182 } else {
7183 ctxt->instate = XML_PARSER_CONTENT;
7184 xmlParseElement(ctxt);
7185 ctxt->instate = XML_PARSER_EPILOG;
7186
7187
7188 /*
7189 * The Misc part at the end
7190 */
7191 xmlParseMisc(ctxt);
7192
7193 if (RAW != 0) {
7194 ctxt->errNo = XML_ERR_DOCUMENT_END;
7195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7196 ctxt->sax->error(ctxt->userData,
7197 "Extra content at the end of the document\n");
7198 ctxt->wellFormed = 0;
7199 ctxt->disableSAX = 1;
7200 }
7201 ctxt->instate = XML_PARSER_EOF;
7202 }
7203
7204 /*
7205 * SAX: end of the document processing.
7206 */
7207 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7208 (!ctxt->disableSAX))
7209 ctxt->sax->endDocument(ctxt->userData);
7210
7211 if (! ctxt->wellFormed) return(-1);
7212 return(0);
7213}
7214
7215/**
7216 * xmlParseExtParsedEnt:
7217 * @ctxt: an XML parser context
7218 *
7219 * parse a genreral parsed entity
7220 * An external general parsed entity is well-formed if it matches the
7221 * production labeled extParsedEnt.
7222 *
7223 * [78] extParsedEnt ::= TextDecl? content
7224 *
7225 * Returns 0, -1 in case of error. the parser context is augmented
7226 * as a result of the parsing.
7227 */
7228
7229int
7230xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7231 xmlChar start[4];
7232 xmlCharEncoding enc;
7233
7234 xmlDefaultSAXHandlerInit();
7235
7236 GROW;
7237
7238 /*
7239 * SAX: beginning of the document processing.
7240 */
7241 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7242 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7243
7244 /*
7245 * Get the 4 first bytes and decode the charset
7246 * if enc != XML_CHAR_ENCODING_NONE
7247 * plug some encoding conversion routines.
7248 */
7249 start[0] = RAW;
7250 start[1] = NXT(1);
7251 start[2] = NXT(2);
7252 start[3] = NXT(3);
7253 enc = xmlDetectCharEncoding(start, 4);
7254 if (enc != XML_CHAR_ENCODING_NONE) {
7255 xmlSwitchEncoding(ctxt, enc);
7256 }
7257
7258
7259 if (CUR == 0) {
7260 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7261 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7262 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7263 ctxt->wellFormed = 0;
7264 ctxt->disableSAX = 1;
7265 }
7266
7267 /*
7268 * Check for the XMLDecl in the Prolog.
7269 */
7270 GROW;
7271 if ((RAW == '<') && (NXT(1) == '?') &&
7272 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7273 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7274
7275 /*
7276 * Note that we will switch encoding on the fly.
7277 */
7278 xmlParseXMLDecl(ctxt);
7279 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7280 /*
7281 * The XML REC instructs us to stop parsing right here
7282 */
7283 return(-1);
7284 }
7285 SKIP_BLANKS;
7286 } else {
7287 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7288 }
7289 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7290 ctxt->sax->startDocument(ctxt->userData);
7291
7292 /*
7293 * Doing validity checking on chunk doesn't make sense
7294 */
7295 ctxt->instate = XML_PARSER_CONTENT;
7296 ctxt->validate = 0;
7297 ctxt->loadsubset = 0;
7298 ctxt->depth = 0;
7299
7300 xmlParseContent(ctxt);
7301
7302 if ((RAW == '<') && (NXT(1) == '/')) {
7303 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7304 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7305 ctxt->sax->error(ctxt->userData,
7306 "chunk is not well balanced\n");
7307 ctxt->wellFormed = 0;
7308 ctxt->disableSAX = 1;
7309 } else if (RAW != 0) {
7310 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7311 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7312 ctxt->sax->error(ctxt->userData,
7313 "extra content at the end of well balanced chunk\n");
7314 ctxt->wellFormed = 0;
7315 ctxt->disableSAX = 1;
7316 }
7317
7318 /*
7319 * SAX: end of the document processing.
7320 */
7321 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7322 (!ctxt->disableSAX))
7323 ctxt->sax->endDocument(ctxt->userData);
7324
7325 if (! ctxt->wellFormed) return(-1);
7326 return(0);
7327}
7328
7329/************************************************************************
7330 * *
7331 * Progressive parsing interfaces *
7332 * *
7333 ************************************************************************/
7334
7335/**
7336 * xmlParseLookupSequence:
7337 * @ctxt: an XML parser context
7338 * @first: the first char to lookup
7339 * @next: the next char to lookup or zero
7340 * @third: the next char to lookup or zero
7341 *
7342 * Try to find if a sequence (first, next, third) or just (first next) or
7343 * (first) is available in the input stream.
7344 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7345 * to avoid rescanning sequences of bytes, it DOES change the state of the
7346 * parser, do not use liberally.
7347 *
7348 * Returns the index to the current parsing point if the full sequence
7349 * is available, -1 otherwise.
7350 */
7351int
7352xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7353 xmlChar next, xmlChar third) {
7354 int base, len;
7355 xmlParserInputPtr in;
7356 const xmlChar *buf;
7357
7358 in = ctxt->input;
7359 if (in == NULL) return(-1);
7360 base = in->cur - in->base;
7361 if (base < 0) return(-1);
7362 if (ctxt->checkIndex > base)
7363 base = ctxt->checkIndex;
7364 if (in->buf == NULL) {
7365 buf = in->base;
7366 len = in->length;
7367 } else {
7368 buf = in->buf->buffer->content;
7369 len = in->buf->buffer->use;
7370 }
7371 /* take into account the sequence length */
7372 if (third) len -= 2;
7373 else if (next) len --;
7374 for (;base < len;base++) {
7375 if (buf[base] == first) {
7376 if (third != 0) {
7377 if ((buf[base + 1] != next) ||
7378 (buf[base + 2] != third)) continue;
7379 } else if (next != 0) {
7380 if (buf[base + 1] != next) continue;
7381 }
7382 ctxt->checkIndex = 0;
7383#ifdef DEBUG_PUSH
7384 if (next == 0)
7385 xmlGenericError(xmlGenericErrorContext,
7386 "PP: lookup '%c' found at %d\n",
7387 first, base);
7388 else if (third == 0)
7389 xmlGenericError(xmlGenericErrorContext,
7390 "PP: lookup '%c%c' found at %d\n",
7391 first, next, base);
7392 else
7393 xmlGenericError(xmlGenericErrorContext,
7394 "PP: lookup '%c%c%c' found at %d\n",
7395 first, next, third, base);
7396#endif
7397 return(base - (in->cur - in->base));
7398 }
7399 }
7400 ctxt->checkIndex = base;
7401#ifdef DEBUG_PUSH
7402 if (next == 0)
7403 xmlGenericError(xmlGenericErrorContext,
7404 "PP: lookup '%c' failed\n", first);
7405 else if (third == 0)
7406 xmlGenericError(xmlGenericErrorContext,
7407 "PP: lookup '%c%c' failed\n", first, next);
7408 else
7409 xmlGenericError(xmlGenericErrorContext,
7410 "PP: lookup '%c%c%c' failed\n", first, next, third);
7411#endif
7412 return(-1);
7413}
7414
7415/**
7416 * xmlParseTryOrFinish:
7417 * @ctxt: an XML parser context
7418 * @terminate: last chunk indicator
7419 *
7420 * Try to progress on parsing
7421 *
7422 * Returns zero if no parsing was possible
7423 */
7424int
7425xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7426 int ret = 0;
7427 int avail;
7428 xmlChar cur, next;
7429
7430#ifdef DEBUG_PUSH
7431 switch (ctxt->instate) {
7432 case XML_PARSER_EOF:
7433 xmlGenericError(xmlGenericErrorContext,
7434 "PP: try EOF\n"); break;
7435 case XML_PARSER_START:
7436 xmlGenericError(xmlGenericErrorContext,
7437 "PP: try START\n"); break;
7438 case XML_PARSER_MISC:
7439 xmlGenericError(xmlGenericErrorContext,
7440 "PP: try MISC\n");break;
7441 case XML_PARSER_COMMENT:
7442 xmlGenericError(xmlGenericErrorContext,
7443 "PP: try COMMENT\n");break;
7444 case XML_PARSER_PROLOG:
7445 xmlGenericError(xmlGenericErrorContext,
7446 "PP: try PROLOG\n");break;
7447 case XML_PARSER_START_TAG:
7448 xmlGenericError(xmlGenericErrorContext,
7449 "PP: try START_TAG\n");break;
7450 case XML_PARSER_CONTENT:
7451 xmlGenericError(xmlGenericErrorContext,
7452 "PP: try CONTENT\n");break;
7453 case XML_PARSER_CDATA_SECTION:
7454 xmlGenericError(xmlGenericErrorContext,
7455 "PP: try CDATA_SECTION\n");break;
7456 case XML_PARSER_END_TAG:
7457 xmlGenericError(xmlGenericErrorContext,
7458 "PP: try END_TAG\n");break;
7459 case XML_PARSER_ENTITY_DECL:
7460 xmlGenericError(xmlGenericErrorContext,
7461 "PP: try ENTITY_DECL\n");break;
7462 case XML_PARSER_ENTITY_VALUE:
7463 xmlGenericError(xmlGenericErrorContext,
7464 "PP: try ENTITY_VALUE\n");break;
7465 case XML_PARSER_ATTRIBUTE_VALUE:
7466 xmlGenericError(xmlGenericErrorContext,
7467 "PP: try ATTRIBUTE_VALUE\n");break;
7468 case XML_PARSER_DTD:
7469 xmlGenericError(xmlGenericErrorContext,
7470 "PP: try DTD\n");break;
7471 case XML_PARSER_EPILOG:
7472 xmlGenericError(xmlGenericErrorContext,
7473 "PP: try EPILOG\n");break;
7474 case XML_PARSER_PI:
7475 xmlGenericError(xmlGenericErrorContext,
7476 "PP: try PI\n");break;
7477 case XML_PARSER_IGNORE:
7478 xmlGenericError(xmlGenericErrorContext,
7479 "PP: try IGNORE\n");break;
7480 }
7481#endif
7482
7483 while (1) {
7484 /*
7485 * Pop-up of finished entities.
7486 */
7487 while ((RAW == 0) && (ctxt->inputNr > 1))
7488 xmlPopInput(ctxt);
7489
7490 if (ctxt->input ==NULL) break;
7491 if (ctxt->input->buf == NULL)
7492 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7493 else
7494 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7495 if (avail < 1)
7496 goto done;
7497 switch (ctxt->instate) {
7498 case XML_PARSER_EOF:
7499 /*
7500 * Document parsing is done !
7501 */
7502 goto done;
7503 case XML_PARSER_START:
7504 /*
7505 * Very first chars read from the document flow.
7506 */
7507 cur = ctxt->input->cur[0];
7508 if (IS_BLANK(cur)) {
7509 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7510 ctxt->sax->setDocumentLocator(ctxt->userData,
7511 &xmlDefaultSAXLocator);
7512 ctxt->errNo = XML_ERR_DOCUMENT_START;
7513 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7514 ctxt->sax->error(ctxt->userData,
7515 "Extra spaces at the beginning of the document are not allowed\n");
7516 ctxt->wellFormed = 0;
7517 ctxt->disableSAX = 1;
7518 SKIP_BLANKS;
7519 ret++;
7520 if (ctxt->input->buf == NULL)
7521 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7522 else
7523 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7524 }
7525 if (avail < 2)
7526 goto done;
7527
7528 cur = ctxt->input->cur[0];
7529 next = ctxt->input->cur[1];
7530 if (cur == 0) {
7531 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7532 ctxt->sax->setDocumentLocator(ctxt->userData,
7533 &xmlDefaultSAXLocator);
7534 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7535 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7536 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7537 ctxt->wellFormed = 0;
7538 ctxt->disableSAX = 1;
7539 ctxt->instate = XML_PARSER_EOF;
7540#ifdef DEBUG_PUSH
7541 xmlGenericError(xmlGenericErrorContext,
7542 "PP: entering EOF\n");
7543#endif
7544 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7545 ctxt->sax->endDocument(ctxt->userData);
7546 goto done;
7547 }
7548 if ((cur == '<') && (next == '?')) {
7549 /* PI or XML decl */
7550 if (avail < 5) return(ret);
7551 if ((!terminate) &&
7552 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7553 return(ret);
7554 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7555 ctxt->sax->setDocumentLocator(ctxt->userData,
7556 &xmlDefaultSAXLocator);
7557 if ((ctxt->input->cur[2] == 'x') &&
7558 (ctxt->input->cur[3] == 'm') &&
7559 (ctxt->input->cur[4] == 'l') &&
7560 (IS_BLANK(ctxt->input->cur[5]))) {
7561 ret += 5;
7562#ifdef DEBUG_PUSH
7563 xmlGenericError(xmlGenericErrorContext,
7564 "PP: Parsing XML Decl\n");
7565#endif
7566 xmlParseXMLDecl(ctxt);
7567 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7568 /*
7569 * The XML REC instructs us to stop parsing right
7570 * here
7571 */
7572 ctxt->instate = XML_PARSER_EOF;
7573 return(0);
7574 }
7575 ctxt->standalone = ctxt->input->standalone;
7576 if ((ctxt->encoding == NULL) &&
7577 (ctxt->input->encoding != NULL))
7578 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7579 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7580 (!ctxt->disableSAX))
7581 ctxt->sax->startDocument(ctxt->userData);
7582 ctxt->instate = XML_PARSER_MISC;
7583#ifdef DEBUG_PUSH
7584 xmlGenericError(xmlGenericErrorContext,
7585 "PP: entering MISC\n");
7586#endif
7587 } else {
7588 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7589 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7590 (!ctxt->disableSAX))
7591 ctxt->sax->startDocument(ctxt->userData);
7592 ctxt->instate = XML_PARSER_MISC;
7593#ifdef DEBUG_PUSH
7594 xmlGenericError(xmlGenericErrorContext,
7595 "PP: entering MISC\n");
7596#endif
7597 }
7598 } else {
7599 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7600 ctxt->sax->setDocumentLocator(ctxt->userData,
7601 &xmlDefaultSAXLocator);
7602 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7603 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7604 (!ctxt->disableSAX))
7605 ctxt->sax->startDocument(ctxt->userData);
7606 ctxt->instate = XML_PARSER_MISC;
7607#ifdef DEBUG_PUSH
7608 xmlGenericError(xmlGenericErrorContext,
7609 "PP: entering MISC\n");
7610#endif
7611 }
7612 break;
7613 case XML_PARSER_MISC:
7614 SKIP_BLANKS;
7615 if (ctxt->input->buf == NULL)
7616 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7617 else
7618 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7619 if (avail < 2)
7620 goto done;
7621 cur = ctxt->input->cur[0];
7622 next = ctxt->input->cur[1];
7623 if ((cur == '<') && (next == '?')) {
7624 if ((!terminate) &&
7625 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7626 goto done;
7627#ifdef DEBUG_PUSH
7628 xmlGenericError(xmlGenericErrorContext,
7629 "PP: Parsing PI\n");
7630#endif
7631 xmlParsePI(ctxt);
7632 } else if ((cur == '<') && (next == '!') &&
7633 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7634 if ((!terminate) &&
7635 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7636 goto done;
7637#ifdef DEBUG_PUSH
7638 xmlGenericError(xmlGenericErrorContext,
7639 "PP: Parsing Comment\n");
7640#endif
7641 xmlParseComment(ctxt);
7642 ctxt->instate = XML_PARSER_MISC;
7643 } else if ((cur == '<') && (next == '!') &&
7644 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7645 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7646 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7647 (ctxt->input->cur[8] == 'E')) {
7648 if ((!terminate) &&
7649 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7650 goto done;
7651#ifdef DEBUG_PUSH
7652 xmlGenericError(xmlGenericErrorContext,
7653 "PP: Parsing internal subset\n");
7654#endif
7655 ctxt->inSubset = 1;
7656 xmlParseDocTypeDecl(ctxt);
7657 if (RAW == '[') {
7658 ctxt->instate = XML_PARSER_DTD;
7659#ifdef DEBUG_PUSH
7660 xmlGenericError(xmlGenericErrorContext,
7661 "PP: entering DTD\n");
7662#endif
7663 } else {
7664 /*
7665 * Create and update the external subset.
7666 */
7667 ctxt->inSubset = 2;
7668 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7669 (ctxt->sax->externalSubset != NULL))
7670 ctxt->sax->externalSubset(ctxt->userData,
7671 ctxt->intSubName, ctxt->extSubSystem,
7672 ctxt->extSubURI);
7673 ctxt->inSubset = 0;
7674 ctxt->instate = XML_PARSER_PROLOG;
7675#ifdef DEBUG_PUSH
7676 xmlGenericError(xmlGenericErrorContext,
7677 "PP: entering PROLOG\n");
7678#endif
7679 }
7680 } else if ((cur == '<') && (next == '!') &&
7681 (avail < 9)) {
7682 goto done;
7683 } else {
7684 ctxt->instate = XML_PARSER_START_TAG;
7685#ifdef DEBUG_PUSH
7686 xmlGenericError(xmlGenericErrorContext,
7687 "PP: entering START_TAG\n");
7688#endif
7689 }
7690 break;
7691 case XML_PARSER_IGNORE:
7692 xmlGenericError(xmlGenericErrorContext,
7693 "PP: internal error, state == IGNORE");
7694 ctxt->instate = XML_PARSER_DTD;
7695#ifdef DEBUG_PUSH
7696 xmlGenericError(xmlGenericErrorContext,
7697 "PP: entering DTD\n");
7698#endif
7699 break;
7700 case XML_PARSER_PROLOG:
7701 SKIP_BLANKS;
7702 if (ctxt->input->buf == NULL)
7703 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7704 else
7705 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7706 if (avail < 2)
7707 goto done;
7708 cur = ctxt->input->cur[0];
7709 next = ctxt->input->cur[1];
7710 if ((cur == '<') && (next == '?')) {
7711 if ((!terminate) &&
7712 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7713 goto done;
7714#ifdef DEBUG_PUSH
7715 xmlGenericError(xmlGenericErrorContext,
7716 "PP: Parsing PI\n");
7717#endif
7718 xmlParsePI(ctxt);
7719 } else if ((cur == '<') && (next == '!') &&
7720 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7721 if ((!terminate) &&
7722 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7723 goto done;
7724#ifdef DEBUG_PUSH
7725 xmlGenericError(xmlGenericErrorContext,
7726 "PP: Parsing Comment\n");
7727#endif
7728 xmlParseComment(ctxt);
7729 ctxt->instate = XML_PARSER_PROLOG;
7730 } else if ((cur == '<') && (next == '!') &&
7731 (avail < 4)) {
7732 goto done;
7733 } else {
7734 ctxt->instate = XML_PARSER_START_TAG;
7735#ifdef DEBUG_PUSH
7736 xmlGenericError(xmlGenericErrorContext,
7737 "PP: entering START_TAG\n");
7738#endif
7739 }
7740 break;
7741 case XML_PARSER_EPILOG:
7742 SKIP_BLANKS;
7743 if (ctxt->input->buf == NULL)
7744 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7745 else
7746 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7747 if (avail < 2)
7748 goto done;
7749 cur = ctxt->input->cur[0];
7750 next = ctxt->input->cur[1];
7751 if ((cur == '<') && (next == '?')) {
7752 if ((!terminate) &&
7753 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7754 goto done;
7755#ifdef DEBUG_PUSH
7756 xmlGenericError(xmlGenericErrorContext,
7757 "PP: Parsing PI\n");
7758#endif
7759 xmlParsePI(ctxt);
7760 ctxt->instate = XML_PARSER_EPILOG;
7761 } else if ((cur == '<') && (next == '!') &&
7762 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7763 if ((!terminate) &&
7764 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7765 goto done;
7766#ifdef DEBUG_PUSH
7767 xmlGenericError(xmlGenericErrorContext,
7768 "PP: Parsing Comment\n");
7769#endif
7770 xmlParseComment(ctxt);
7771 ctxt->instate = XML_PARSER_EPILOG;
7772 } else if ((cur == '<') && (next == '!') &&
7773 (avail < 4)) {
7774 goto done;
7775 } else {
7776 ctxt->errNo = XML_ERR_DOCUMENT_END;
7777 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7778 ctxt->sax->error(ctxt->userData,
7779 "Extra content at the end of the document\n");
7780 ctxt->wellFormed = 0;
7781 ctxt->disableSAX = 1;
7782 ctxt->instate = XML_PARSER_EOF;
7783#ifdef DEBUG_PUSH
7784 xmlGenericError(xmlGenericErrorContext,
7785 "PP: entering EOF\n");
7786#endif
7787 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7788 (!ctxt->disableSAX))
7789 ctxt->sax->endDocument(ctxt->userData);
7790 goto done;
7791 }
7792 break;
7793 case XML_PARSER_START_TAG: {
7794 xmlChar *name, *oldname;
7795
7796 if ((avail < 2) && (ctxt->inputNr == 1))
7797 goto done;
7798 cur = ctxt->input->cur[0];
7799 if (cur != '<') {
7800 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7801 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7802 ctxt->sax->error(ctxt->userData,
7803 "Start tag expect, '<' not found\n");
7804 ctxt->wellFormed = 0;
7805 ctxt->disableSAX = 1;
7806 ctxt->instate = XML_PARSER_EOF;
7807#ifdef DEBUG_PUSH
7808 xmlGenericError(xmlGenericErrorContext,
7809 "PP: entering EOF\n");
7810#endif
7811 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7812 (!ctxt->disableSAX))
7813 ctxt->sax->endDocument(ctxt->userData);
7814 goto done;
7815 }
7816 if ((!terminate) &&
7817 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7818 goto done;
7819 if (ctxt->spaceNr == 0)
7820 spacePush(ctxt, -1);
7821 else
7822 spacePush(ctxt, *ctxt->space);
7823 name = xmlParseStartTag(ctxt);
7824 if (name == NULL) {
7825 spacePop(ctxt);
7826 ctxt->instate = XML_PARSER_EOF;
7827#ifdef DEBUG_PUSH
7828 xmlGenericError(xmlGenericErrorContext,
7829 "PP: entering EOF\n");
7830#endif
7831 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7832 (!ctxt->disableSAX))
7833 ctxt->sax->endDocument(ctxt->userData);
7834 goto done;
7835 }
7836 namePush(ctxt, xmlStrdup(name));
7837
7838 /*
7839 * [ VC: Root Element Type ]
7840 * The Name in the document type declaration must match
7841 * the element type of the root element.
7842 */
7843 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7844 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7845 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7846
7847 /*
7848 * Check for an Empty Element.
7849 */
7850 if ((RAW == '/') && (NXT(1) == '>')) {
7851 SKIP(2);
7852 if ((ctxt->sax != NULL) &&
7853 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7854 ctxt->sax->endElement(ctxt->userData, name);
7855 xmlFree(name);
7856 oldname = namePop(ctxt);
7857 spacePop(ctxt);
7858 if (oldname != NULL) {
7859#ifdef DEBUG_STACK
7860 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7861#endif
7862 xmlFree(oldname);
7863 }
7864 if (ctxt->name == NULL) {
7865 ctxt->instate = XML_PARSER_EPILOG;
7866#ifdef DEBUG_PUSH
7867 xmlGenericError(xmlGenericErrorContext,
7868 "PP: entering EPILOG\n");
7869#endif
7870 } else {
7871 ctxt->instate = XML_PARSER_CONTENT;
7872#ifdef DEBUG_PUSH
7873 xmlGenericError(xmlGenericErrorContext,
7874 "PP: entering CONTENT\n");
7875#endif
7876 }
7877 break;
7878 }
7879 if (RAW == '>') {
7880 NEXT;
7881 } else {
7882 ctxt->errNo = XML_ERR_GT_REQUIRED;
7883 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7884 ctxt->sax->error(ctxt->userData,
7885 "Couldn't find end of Start Tag %s\n",
7886 name);
7887 ctxt->wellFormed = 0;
7888 ctxt->disableSAX = 1;
7889
7890 /*
7891 * end of parsing of this node.
7892 */
7893 nodePop(ctxt);
7894 oldname = namePop(ctxt);
7895 spacePop(ctxt);
7896 if (oldname != NULL) {
7897#ifdef DEBUG_STACK
7898 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7899#endif
7900 xmlFree(oldname);
7901 }
7902 }
7903 xmlFree(name);
7904 ctxt->instate = XML_PARSER_CONTENT;
7905#ifdef DEBUG_PUSH
7906 xmlGenericError(xmlGenericErrorContext,
7907 "PP: entering CONTENT\n");
7908#endif
7909 break;
7910 }
7911 case XML_PARSER_CONTENT: {
7912 const xmlChar *test;
7913 int cons;
7914 xmlChar tok;
7915
7916 /*
7917 * Handle preparsed entities and charRef
7918 */
7919 if (ctxt->token != 0) {
7920 xmlChar cur[2] = { 0 , 0 } ;
7921
7922 cur[0] = (xmlChar) ctxt->token;
7923 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7924 (ctxt->sax->characters != NULL))
7925 ctxt->sax->characters(ctxt->userData, cur, 1);
7926 ctxt->token = 0;
7927 }
7928 if ((avail < 2) && (ctxt->inputNr == 1))
7929 goto done;
7930 cur = ctxt->input->cur[0];
7931 next = ctxt->input->cur[1];
7932
7933 test = CUR_PTR;
7934 cons = ctxt->input->consumed;
7935 tok = ctxt->token;
7936 if ((cur == '<') && (next == '?')) {
7937 if ((!terminate) &&
7938 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7939 goto done;
7940#ifdef DEBUG_PUSH
7941 xmlGenericError(xmlGenericErrorContext,
7942 "PP: Parsing PI\n");
7943#endif
7944 xmlParsePI(ctxt);
7945 } else if ((cur == '<') && (next == '!') &&
7946 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7947 if ((!terminate) &&
7948 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7949 goto done;
7950#ifdef DEBUG_PUSH
7951 xmlGenericError(xmlGenericErrorContext,
7952 "PP: Parsing Comment\n");
7953#endif
7954 xmlParseComment(ctxt);
7955 ctxt->instate = XML_PARSER_CONTENT;
7956 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7957 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7958 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7959 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7960 (ctxt->input->cur[8] == '[')) {
7961 SKIP(9);
7962 ctxt->instate = XML_PARSER_CDATA_SECTION;
7963#ifdef DEBUG_PUSH
7964 xmlGenericError(xmlGenericErrorContext,
7965 "PP: entering CDATA_SECTION\n");
7966#endif
7967 break;
7968 } else if ((cur == '<') && (next == '!') &&
7969 (avail < 9)) {
7970 goto done;
7971 } else if ((cur == '<') && (next == '/')) {
7972 ctxt->instate = XML_PARSER_END_TAG;
7973#ifdef DEBUG_PUSH
7974 xmlGenericError(xmlGenericErrorContext,
7975 "PP: entering END_TAG\n");
7976#endif
7977 break;
7978 } else if (cur == '<') {
7979 ctxt->instate = XML_PARSER_START_TAG;
7980#ifdef DEBUG_PUSH
7981 xmlGenericError(xmlGenericErrorContext,
7982 "PP: entering START_TAG\n");
7983#endif
7984 break;
7985 } else if (cur == '&') {
7986 if ((!terminate) &&
7987 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
7988 goto done;
7989#ifdef DEBUG_PUSH
7990 xmlGenericError(xmlGenericErrorContext,
7991 "PP: Parsing Reference\n");
7992#endif
7993 xmlParseReference(ctxt);
7994 } else {
7995 /* TODO Avoid the extra copy, handle directly !!! */
7996 /*
7997 * Goal of the following test is:
7998 * - minimize calls to the SAX 'character' callback
7999 * when they are mergeable
8000 * - handle an problem for isBlank when we only parse
8001 * a sequence of blank chars and the next one is
8002 * not available to check against '<' presence.
8003 * - tries to homogenize the differences in SAX
8004 * callbacks beween the push and pull versions
8005 * of the parser.
8006 */
8007 if ((ctxt->inputNr == 1) &&
8008 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8009 if ((!terminate) &&
8010 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8011 goto done;
8012 }
8013 ctxt->checkIndex = 0;
8014#ifdef DEBUG_PUSH
8015 xmlGenericError(xmlGenericErrorContext,
8016 "PP: Parsing char data\n");
8017#endif
8018 xmlParseCharData(ctxt, 0);
8019 }
8020 /*
8021 * Pop-up of finished entities.
8022 */
8023 while ((RAW == 0) && (ctxt->inputNr > 1))
8024 xmlPopInput(ctxt);
8025 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8026 (tok == ctxt->token)) {
8027 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8028 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8029 ctxt->sax->error(ctxt->userData,
8030 "detected an error in element content\n");
8031 ctxt->wellFormed = 0;
8032 ctxt->disableSAX = 1;
8033 ctxt->instate = XML_PARSER_EOF;
8034 break;
8035 }
8036 break;
8037 }
8038 case XML_PARSER_CDATA_SECTION: {
8039 /*
8040 * The Push mode need to have the SAX callback for
8041 * cdataBlock merge back contiguous callbacks.
8042 */
8043 int base;
8044
8045 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8046 if (base < 0) {
8047 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8048 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8049 if (ctxt->sax->cdataBlock != NULL)
8050 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8051 XML_PARSER_BIG_BUFFER_SIZE);
8052 }
8053 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8054 ctxt->checkIndex = 0;
8055 }
8056 goto done;
8057 } else {
8058 if ((ctxt->sax != NULL) && (base > 0) &&
8059 (!ctxt->disableSAX)) {
8060 if (ctxt->sax->cdataBlock != NULL)
8061 ctxt->sax->cdataBlock(ctxt->userData,
8062 ctxt->input->cur, base);
8063 }
8064 SKIP(base + 3);
8065 ctxt->checkIndex = 0;
8066 ctxt->instate = XML_PARSER_CONTENT;
8067#ifdef DEBUG_PUSH
8068 xmlGenericError(xmlGenericErrorContext,
8069 "PP: entering CONTENT\n");
8070#endif
8071 }
8072 break;
8073 }
8074 case XML_PARSER_END_TAG:
8075 if (avail < 2)
8076 goto done;
8077 if ((!terminate) &&
8078 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8079 goto done;
8080 xmlParseEndTag(ctxt);
8081 if (ctxt->name == NULL) {
8082 ctxt->instate = XML_PARSER_EPILOG;
8083#ifdef DEBUG_PUSH
8084 xmlGenericError(xmlGenericErrorContext,
8085 "PP: entering EPILOG\n");
8086#endif
8087 } else {
8088 ctxt->instate = XML_PARSER_CONTENT;
8089#ifdef DEBUG_PUSH
8090 xmlGenericError(xmlGenericErrorContext,
8091 "PP: entering CONTENT\n");
8092#endif
8093 }
8094 break;
8095 case XML_PARSER_DTD: {
8096 /*
8097 * Sorry but progressive parsing of the internal subset
8098 * is not expected to be supported. We first check that
8099 * the full content of the internal subset is available and
8100 * the parsing is launched only at that point.
8101 * Internal subset ends up with "']' S? '>'" in an unescaped
8102 * section and not in a ']]>' sequence which are conditional
8103 * sections (whoever argued to keep that crap in XML deserve
8104 * a place in hell !).
8105 */
8106 int base, i;
8107 xmlChar *buf;
8108 xmlChar quote = 0;
8109
8110 base = ctxt->input->cur - ctxt->input->base;
8111 if (base < 0) return(0);
8112 if (ctxt->checkIndex > base)
8113 base = ctxt->checkIndex;
8114 buf = ctxt->input->buf->buffer->content;
8115 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8116 base++) {
8117 if (quote != 0) {
8118 if (buf[base] == quote)
8119 quote = 0;
8120 continue;
8121 }
8122 if (buf[base] == '"') {
8123 quote = '"';
8124 continue;
8125 }
8126 if (buf[base] == '\'') {
8127 quote = '\'';
8128 continue;
8129 }
8130 if (buf[base] == ']') {
8131 if ((unsigned int) base +1 >=
8132 ctxt->input->buf->buffer->use)
8133 break;
8134 if (buf[base + 1] == ']') {
8135 /* conditional crap, skip both ']' ! */
8136 base++;
8137 continue;
8138 }
8139 for (i = 0;
8140 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8141 i++) {
8142 if (buf[base + i] == '>')
8143 goto found_end_int_subset;
8144 }
8145 break;
8146 }
8147 }
8148 /*
8149 * We didn't found the end of the Internal subset
8150 */
8151 if (quote == 0)
8152 ctxt->checkIndex = base;
8153#ifdef DEBUG_PUSH
8154 if (next == 0)
8155 xmlGenericError(xmlGenericErrorContext,
8156 "PP: lookup of int subset end filed\n");
8157#endif
8158 goto done;
8159
8160found_end_int_subset:
8161 xmlParseInternalSubset(ctxt);
8162 ctxt->inSubset = 2;
8163 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8164 (ctxt->sax->externalSubset != NULL))
8165 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8166 ctxt->extSubSystem, ctxt->extSubURI);
8167 ctxt->inSubset = 0;
8168 ctxt->instate = XML_PARSER_PROLOG;
8169 ctxt->checkIndex = 0;
8170#ifdef DEBUG_PUSH
8171 xmlGenericError(xmlGenericErrorContext,
8172 "PP: entering PROLOG\n");
8173#endif
8174 break;
8175 }
8176 case XML_PARSER_COMMENT:
8177 xmlGenericError(xmlGenericErrorContext,
8178 "PP: internal error, state == COMMENT\n");
8179 ctxt->instate = XML_PARSER_CONTENT;
8180#ifdef DEBUG_PUSH
8181 xmlGenericError(xmlGenericErrorContext,
8182 "PP: entering CONTENT\n");
8183#endif
8184 break;
8185 case XML_PARSER_PI:
8186 xmlGenericError(xmlGenericErrorContext,
8187 "PP: internal error, state == PI\n");
8188 ctxt->instate = XML_PARSER_CONTENT;
8189#ifdef DEBUG_PUSH
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: entering CONTENT\n");
8192#endif
8193 break;
8194 case XML_PARSER_ENTITY_DECL:
8195 xmlGenericError(xmlGenericErrorContext,
8196 "PP: internal error, state == ENTITY_DECL\n");
8197 ctxt->instate = XML_PARSER_DTD;
8198#ifdef DEBUG_PUSH
8199 xmlGenericError(xmlGenericErrorContext,
8200 "PP: entering DTD\n");
8201#endif
8202 break;
8203 case XML_PARSER_ENTITY_VALUE:
8204 xmlGenericError(xmlGenericErrorContext,
8205 "PP: internal error, state == ENTITY_VALUE\n");
8206 ctxt->instate = XML_PARSER_CONTENT;
8207#ifdef DEBUG_PUSH
8208 xmlGenericError(xmlGenericErrorContext,
8209 "PP: entering DTD\n");
8210#endif
8211 break;
8212 case XML_PARSER_ATTRIBUTE_VALUE:
8213 xmlGenericError(xmlGenericErrorContext,
8214 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8215 ctxt->instate = XML_PARSER_START_TAG;
8216#ifdef DEBUG_PUSH
8217 xmlGenericError(xmlGenericErrorContext,
8218 "PP: entering START_TAG\n");
8219#endif
8220 break;
8221 case XML_PARSER_SYSTEM_LITERAL:
8222 xmlGenericError(xmlGenericErrorContext,
8223 "PP: internal error, state == SYSTEM_LITERAL\n");
8224 ctxt->instate = XML_PARSER_START_TAG;
8225#ifdef DEBUG_PUSH
8226 xmlGenericError(xmlGenericErrorContext,
8227 "PP: entering START_TAG\n");
8228#endif
8229 break;
8230 }
8231 }
8232done:
8233#ifdef DEBUG_PUSH
8234 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8235#endif
8236 return(ret);
8237}
8238
8239/**
8240 * xmlParseTry:
8241 * @ctxt: an XML parser context
8242 *
8243 * Try to progress on parsing
8244 *
8245 * Returns zero if no parsing was possible
8246 */
8247int
8248xmlParseTry(xmlParserCtxtPtr ctxt) {
8249 return(xmlParseTryOrFinish(ctxt, 0));
8250}
8251
8252/**
8253 * xmlParseChunk:
8254 * @ctxt: an XML parser context
8255 * @chunk: an char array
8256 * @size: the size in byte of the chunk
8257 * @terminate: last chunk indicator
8258 *
8259 * Parse a Chunk of memory
8260 *
8261 * Returns zero if no error, the xmlParserErrors otherwise.
8262 */
8263int
8264xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8265 int terminate) {
8266 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8267 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8268 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8269 int cur = ctxt->input->cur - ctxt->input->base;
8270
8271 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8272 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8273 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008274 ctxt->input->end =
8275 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008276#ifdef DEBUG_PUSH
8277 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8278#endif
8279
8280 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8281 xmlParseTryOrFinish(ctxt, terminate);
8282 } else if (ctxt->instate != XML_PARSER_EOF) {
8283 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8284 xmlParserInputBufferPtr in = ctxt->input->buf;
8285 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8286 (in->raw != NULL)) {
8287 int nbchars;
8288
8289 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8290 if (nbchars < 0) {
8291 xmlGenericError(xmlGenericErrorContext,
8292 "xmlParseChunk: encoder error\n");
8293 return(XML_ERR_INVALID_ENCODING);
8294 }
8295 }
8296 }
8297 }
8298 xmlParseTryOrFinish(ctxt, terminate);
8299 if (terminate) {
8300 /*
8301 * Check for termination
8302 */
8303 if ((ctxt->instate != XML_PARSER_EOF) &&
8304 (ctxt->instate != XML_PARSER_EPILOG)) {
8305 ctxt->errNo = XML_ERR_DOCUMENT_END;
8306 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8307 ctxt->sax->error(ctxt->userData,
8308 "Extra content at the end of the document\n");
8309 ctxt->wellFormed = 0;
8310 ctxt->disableSAX = 1;
8311 }
8312 if (ctxt->instate != XML_PARSER_EOF) {
8313 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8314 (!ctxt->disableSAX))
8315 ctxt->sax->endDocument(ctxt->userData);
8316 }
8317 ctxt->instate = XML_PARSER_EOF;
8318 }
8319 return((xmlParserErrors) ctxt->errNo);
8320}
8321
8322/************************************************************************
8323 * *
8324 * I/O front end functions to the parser *
8325 * *
8326 ************************************************************************/
8327
8328/**
8329 * xmlStopParser:
8330 * @ctxt: an XML parser context
8331 *
8332 * Blocks further parser processing
8333 */
8334void
8335xmlStopParser(xmlParserCtxtPtr ctxt) {
8336 ctxt->instate = XML_PARSER_EOF;
8337 if (ctxt->input != NULL)
8338 ctxt->input->cur = BAD_CAST"";
8339}
8340
8341/**
8342 * xmlCreatePushParserCtxt:
8343 * @sax: a SAX handler
8344 * @user_data: The user data returned on SAX callbacks
8345 * @chunk: a pointer to an array of chars
8346 * @size: number of chars in the array
8347 * @filename: an optional file name or URI
8348 *
8349 * Create a parser context for using the XML parser in push mode
8350 * To allow content encoding detection, @size should be >= 4
8351 * The value of @filename is used for fetching external entities
8352 * and error/warning reports.
8353 *
8354 * Returns the new parser context or NULL
8355 */
8356xmlParserCtxtPtr
8357xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8358 const char *chunk, int size, const char *filename) {
8359 xmlParserCtxtPtr ctxt;
8360 xmlParserInputPtr inputStream;
8361 xmlParserInputBufferPtr buf;
8362 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8363
8364 /*
8365 * plug some encoding conversion routines
8366 */
8367 if ((chunk != NULL) && (size >= 4))
8368 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8369
8370 buf = xmlAllocParserInputBuffer(enc);
8371 if (buf == NULL) return(NULL);
8372
8373 ctxt = xmlNewParserCtxt();
8374 if (ctxt == NULL) {
8375 xmlFree(buf);
8376 return(NULL);
8377 }
8378 if (sax != NULL) {
8379 if (ctxt->sax != &xmlDefaultSAXHandler)
8380 xmlFree(ctxt->sax);
8381 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8382 if (ctxt->sax == NULL) {
8383 xmlFree(buf);
8384 xmlFree(ctxt);
8385 return(NULL);
8386 }
8387 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8388 if (user_data != NULL)
8389 ctxt->userData = user_data;
8390 }
8391 if (filename == NULL) {
8392 ctxt->directory = NULL;
8393 } else {
8394 ctxt->directory = xmlParserGetDirectory(filename);
8395 }
8396
8397 inputStream = xmlNewInputStream(ctxt);
8398 if (inputStream == NULL) {
8399 xmlFreeParserCtxt(ctxt);
8400 return(NULL);
8401 }
8402
8403 if (filename == NULL)
8404 inputStream->filename = NULL;
8405 else
8406 inputStream->filename = xmlMemStrdup(filename);
8407 inputStream->buf = buf;
8408 inputStream->base = inputStream->buf->buffer->content;
8409 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008410 inputStream->end =
8411 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008412 if (enc != XML_CHAR_ENCODING_NONE) {
8413 xmlSwitchEncoding(ctxt, enc);
8414 }
8415
8416 inputPush(ctxt, inputStream);
8417
8418 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8419 (ctxt->input->buf != NULL)) {
8420 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8421#ifdef DEBUG_PUSH
8422 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8423#endif
8424 }
8425
8426 return(ctxt);
8427}
8428
8429/**
8430 * xmlCreateIOParserCtxt:
8431 * @sax: a SAX handler
8432 * @user_data: The user data returned on SAX callbacks
8433 * @ioread: an I/O read function
8434 * @ioclose: an I/O close function
8435 * @ioctx: an I/O handler
8436 * @enc: the charset encoding if known
8437 *
8438 * Create a parser context for using the XML parser with an existing
8439 * I/O stream
8440 *
8441 * Returns the new parser context or NULL
8442 */
8443xmlParserCtxtPtr
8444xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8445 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8446 void *ioctx, xmlCharEncoding enc) {
8447 xmlParserCtxtPtr ctxt;
8448 xmlParserInputPtr inputStream;
8449 xmlParserInputBufferPtr buf;
8450
8451 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8452 if (buf == NULL) return(NULL);
8453
8454 ctxt = xmlNewParserCtxt();
8455 if (ctxt == NULL) {
8456 xmlFree(buf);
8457 return(NULL);
8458 }
8459 if (sax != NULL) {
8460 if (ctxt->sax != &xmlDefaultSAXHandler)
8461 xmlFree(ctxt->sax);
8462 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8463 if (ctxt->sax == NULL) {
8464 xmlFree(buf);
8465 xmlFree(ctxt);
8466 return(NULL);
8467 }
8468 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8469 if (user_data != NULL)
8470 ctxt->userData = user_data;
8471 }
8472
8473 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8474 if (inputStream == NULL) {
8475 xmlFreeParserCtxt(ctxt);
8476 return(NULL);
8477 }
8478 inputPush(ctxt, inputStream);
8479
8480 return(ctxt);
8481}
8482
8483/************************************************************************
8484 * *
8485 * Front ends when parsing a Dtd *
8486 * *
8487 ************************************************************************/
8488
8489/**
8490 * xmlIOParseDTD:
8491 * @sax: the SAX handler block or NULL
8492 * @input: an Input Buffer
8493 * @enc: the charset encoding if known
8494 *
8495 * Load and parse a DTD
8496 *
8497 * Returns the resulting xmlDtdPtr or NULL in case of error.
8498 * @input will be freed at parsing end.
8499 */
8500
8501xmlDtdPtr
8502xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8503 xmlCharEncoding enc) {
8504 xmlDtdPtr ret = NULL;
8505 xmlParserCtxtPtr ctxt;
8506 xmlParserInputPtr pinput = NULL;
8507
8508 if (input == NULL)
8509 return(NULL);
8510
8511 ctxt = xmlNewParserCtxt();
8512 if (ctxt == NULL) {
8513 return(NULL);
8514 }
8515
8516 /*
8517 * Set-up the SAX context
8518 */
8519 if (sax != NULL) {
8520 if (ctxt->sax != NULL)
8521 xmlFree(ctxt->sax);
8522 ctxt->sax = sax;
8523 ctxt->userData = NULL;
8524 }
8525
8526 /*
8527 * generate a parser input from the I/O handler
8528 */
8529
8530 pinput = xmlNewIOInputStream(ctxt, input, enc);
8531 if (pinput == NULL) {
8532 if (sax != NULL) ctxt->sax = NULL;
8533 xmlFreeParserCtxt(ctxt);
8534 return(NULL);
8535 }
8536
8537 /*
8538 * plug some encoding conversion routines here.
8539 */
8540 xmlPushInput(ctxt, pinput);
8541
8542 pinput->filename = NULL;
8543 pinput->line = 1;
8544 pinput->col = 1;
8545 pinput->base = ctxt->input->cur;
8546 pinput->cur = ctxt->input->cur;
8547 pinput->free = NULL;
8548
8549 /*
8550 * let's parse that entity knowing it's an external subset.
8551 */
8552 ctxt->inSubset = 2;
8553 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8554 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8555 BAD_CAST "none", BAD_CAST "none");
8556 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8557
8558 if (ctxt->myDoc != NULL) {
8559 if (ctxt->wellFormed) {
8560 ret = ctxt->myDoc->extSubset;
8561 ctxt->myDoc->extSubset = NULL;
8562 } else {
8563 ret = NULL;
8564 }
8565 xmlFreeDoc(ctxt->myDoc);
8566 ctxt->myDoc = NULL;
8567 }
8568 if (sax != NULL) ctxt->sax = NULL;
8569 xmlFreeParserCtxt(ctxt);
8570
8571 return(ret);
8572}
8573
8574/**
8575 * xmlSAXParseDTD:
8576 * @sax: the SAX handler block
8577 * @ExternalID: a NAME* containing the External ID of the DTD
8578 * @SystemID: a NAME* containing the URL to the DTD
8579 *
8580 * Load and parse an external subset.
8581 *
8582 * Returns the resulting xmlDtdPtr or NULL in case of error.
8583 */
8584
8585xmlDtdPtr
8586xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8587 const xmlChar *SystemID) {
8588 xmlDtdPtr ret = NULL;
8589 xmlParserCtxtPtr ctxt;
8590 xmlParserInputPtr input = NULL;
8591 xmlCharEncoding enc;
8592
8593 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8594
8595 ctxt = xmlNewParserCtxt();
8596 if (ctxt == NULL) {
8597 return(NULL);
8598 }
8599
8600 /*
8601 * Set-up the SAX context
8602 */
8603 if (sax != NULL) {
8604 if (ctxt->sax != NULL)
8605 xmlFree(ctxt->sax);
8606 ctxt->sax = sax;
8607 ctxt->userData = NULL;
8608 }
8609
8610 /*
8611 * Ask the Entity resolver to load the damn thing
8612 */
8613
8614 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8615 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8616 if (input == NULL) {
8617 if (sax != NULL) ctxt->sax = NULL;
8618 xmlFreeParserCtxt(ctxt);
8619 return(NULL);
8620 }
8621
8622 /*
8623 * plug some encoding conversion routines here.
8624 */
8625 xmlPushInput(ctxt, input);
8626 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8627 xmlSwitchEncoding(ctxt, enc);
8628
8629 if (input->filename == NULL)
8630 input->filename = (char *) xmlStrdup(SystemID);
8631 input->line = 1;
8632 input->col = 1;
8633 input->base = ctxt->input->cur;
8634 input->cur = ctxt->input->cur;
8635 input->free = NULL;
8636
8637 /*
8638 * let's parse that entity knowing it's an external subset.
8639 */
8640 ctxt->inSubset = 2;
8641 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8642 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8643 ExternalID, SystemID);
8644 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8645
8646 if (ctxt->myDoc != NULL) {
8647 if (ctxt->wellFormed) {
8648 ret = ctxt->myDoc->extSubset;
8649 ctxt->myDoc->extSubset = NULL;
8650 } else {
8651 ret = NULL;
8652 }
8653 xmlFreeDoc(ctxt->myDoc);
8654 ctxt->myDoc = NULL;
8655 }
8656 if (sax != NULL) ctxt->sax = NULL;
8657 xmlFreeParserCtxt(ctxt);
8658
8659 return(ret);
8660}
8661
8662/**
8663 * xmlParseDTD:
8664 * @ExternalID: a NAME* containing the External ID of the DTD
8665 * @SystemID: a NAME* containing the URL to the DTD
8666 *
8667 * Load and parse an external subset.
8668 *
8669 * Returns the resulting xmlDtdPtr or NULL in case of error.
8670 */
8671
8672xmlDtdPtr
8673xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8674 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8675}
8676
8677/************************************************************************
8678 * *
8679 * Front ends when parsing an Entity *
8680 * *
8681 ************************************************************************/
8682
8683/**
8684 * xmlSAXParseBalancedChunk:
8685 * @ctx: an XML parser context (possibly NULL)
8686 * @sax: the SAX handler bloc (possibly NULL)
8687 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8688 * @input: a parser input stream
8689 * @enc: the encoding
8690 *
8691 * Parse a well-balanced chunk of an XML document
8692 * The user has to provide SAX callback block whose routines will be
8693 * called by the parser
8694 * The allowed sequence for the Well Balanced Chunk is the one defined by
8695 * the content production in the XML grammar:
8696 *
8697 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8698 *
8699 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8700 * the error code otherwise
8701 */
8702
8703int
8704xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8705 void *user_data, xmlParserInputPtr input,
8706 xmlCharEncoding enc) {
8707 xmlParserCtxtPtr ctxt;
8708 int ret;
8709
8710 if (input == NULL) return(-1);
8711
8712 if (ctx != NULL)
8713 ctxt = ctx;
8714 else {
8715 ctxt = xmlNewParserCtxt();
8716 if (ctxt == NULL)
8717 return(-1);
8718 if (sax == NULL)
8719 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8720 }
8721
8722 /*
8723 * Set-up the SAX context
8724 */
8725 if (sax != NULL) {
8726 if (ctxt->sax != NULL)
8727 xmlFree(ctxt->sax);
8728 ctxt->sax = sax;
8729 ctxt->userData = user_data;
8730 }
8731
8732 /*
8733 * plug some encoding conversion routines here.
8734 */
8735 xmlPushInput(ctxt, input);
8736 if (enc != XML_CHAR_ENCODING_NONE)
8737 xmlSwitchEncoding(ctxt, enc);
8738
8739 /*
8740 * let's parse that entity knowing it's an external subset.
8741 */
8742 xmlParseContent(ctxt);
8743 ret = ctxt->errNo;
8744
8745 if (ctx == NULL) {
8746 if (sax != NULL)
8747 ctxt->sax = NULL;
8748 else
8749 xmlFreeDoc(ctxt->myDoc);
8750 xmlFreeParserCtxt(ctxt);
8751 }
8752 return(ret);
8753}
8754
8755/**
8756 * xmlParseCtxtExternalEntity:
8757 * @ctx: the existing parsing context
8758 * @URL: the URL for the entity to load
8759 * @ID: the System ID for the entity to load
8760 * @list: the return value for the set of parsed nodes
8761 *
8762 * Parse an external general entity within an existing parsing context
8763 * An external general parsed entity is well-formed if it matches the
8764 * production labeled extParsedEnt.
8765 *
8766 * [78] extParsedEnt ::= TextDecl? content
8767 *
8768 * Returns 0 if the entity is well formed, -1 in case of args problem and
8769 * the parser error code otherwise
8770 */
8771
8772int
8773xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8774 const xmlChar *ID, xmlNodePtr *list) {
8775 xmlParserCtxtPtr ctxt;
8776 xmlDocPtr newDoc;
8777 xmlSAXHandlerPtr oldsax = NULL;
8778 int ret = 0;
8779
8780 if (ctx->depth > 40) {
8781 return(XML_ERR_ENTITY_LOOP);
8782 }
8783
8784 if (list != NULL)
8785 *list = NULL;
8786 if ((URL == NULL) && (ID == NULL))
8787 return(-1);
8788 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8789 return(-1);
8790
8791
8792 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8793 if (ctxt == NULL) return(-1);
8794 ctxt->userData = ctxt;
8795 oldsax = ctxt->sax;
8796 ctxt->sax = ctx->sax;
8797 newDoc = xmlNewDoc(BAD_CAST "1.0");
8798 if (newDoc == NULL) {
8799 xmlFreeParserCtxt(ctxt);
8800 return(-1);
8801 }
8802 if (ctx->myDoc != NULL) {
8803 newDoc->intSubset = ctx->myDoc->intSubset;
8804 newDoc->extSubset = ctx->myDoc->extSubset;
8805 }
8806 if (ctx->myDoc->URL != NULL) {
8807 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8808 }
8809 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8810 if (newDoc->children == NULL) {
8811 ctxt->sax = oldsax;
8812 xmlFreeParserCtxt(ctxt);
8813 newDoc->intSubset = NULL;
8814 newDoc->extSubset = NULL;
8815 xmlFreeDoc(newDoc);
8816 return(-1);
8817 }
8818 nodePush(ctxt, newDoc->children);
8819 if (ctx->myDoc == NULL) {
8820 ctxt->myDoc = newDoc;
8821 } else {
8822 ctxt->myDoc = ctx->myDoc;
8823 newDoc->children->doc = ctx->myDoc;
8824 }
8825
8826 /*
8827 * Parse a possible text declaration first
8828 */
8829 GROW;
8830 if ((RAW == '<') && (NXT(1) == '?') &&
8831 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8832 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8833 xmlParseTextDecl(ctxt);
8834 }
8835
8836 /*
8837 * Doing validity checking on chunk doesn't make sense
8838 */
8839 ctxt->instate = XML_PARSER_CONTENT;
8840 ctxt->validate = ctx->validate;
8841 ctxt->loadsubset = ctx->loadsubset;
8842 ctxt->depth = ctx->depth + 1;
8843 ctxt->replaceEntities = ctx->replaceEntities;
8844 if (ctxt->validate) {
8845 ctxt->vctxt.error = ctx->vctxt.error;
8846 ctxt->vctxt.warning = ctx->vctxt.warning;
8847 /* Allocate the Node stack */
8848 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8849 if (ctxt->vctxt.nodeTab == NULL) {
8850 xmlGenericError(xmlGenericErrorContext,
8851 "xmlParseCtxtExternalEntity: out of memory\n");
8852 ctxt->validate = 0;
8853 ctxt->vctxt.error = NULL;
8854 ctxt->vctxt.warning = NULL;
8855 } else {
8856 ctxt->vctxt.nodeNr = 0;
8857 ctxt->vctxt.nodeMax = 4;
8858 ctxt->vctxt.node = NULL;
8859 }
8860 } else {
8861 ctxt->vctxt.error = NULL;
8862 ctxt->vctxt.warning = NULL;
8863 }
8864
8865 xmlParseContent(ctxt);
8866
8867 if ((RAW == '<') && (NXT(1) == '/')) {
8868 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8870 ctxt->sax->error(ctxt->userData,
8871 "chunk is not well balanced\n");
8872 ctxt->wellFormed = 0;
8873 ctxt->disableSAX = 1;
8874 } else if (RAW != 0) {
8875 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8876 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8877 ctxt->sax->error(ctxt->userData,
8878 "extra content at the end of well balanced chunk\n");
8879 ctxt->wellFormed = 0;
8880 ctxt->disableSAX = 1;
8881 }
8882 if (ctxt->node != newDoc->children) {
8883 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8884 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8885 ctxt->sax->error(ctxt->userData,
8886 "chunk is not well balanced\n");
8887 ctxt->wellFormed = 0;
8888 ctxt->disableSAX = 1;
8889 }
8890
8891 if (!ctxt->wellFormed) {
8892 if (ctxt->errNo == 0)
8893 ret = 1;
8894 else
8895 ret = ctxt->errNo;
8896 } else {
8897 if (list != NULL) {
8898 xmlNodePtr cur;
8899
8900 /*
8901 * Return the newly created nodeset after unlinking it from
8902 * they pseudo parent.
8903 */
8904 cur = newDoc->children->children;
8905 *list = cur;
8906 while (cur != NULL) {
8907 cur->parent = NULL;
8908 cur = cur->next;
8909 }
8910 newDoc->children->children = NULL;
8911 }
8912 ret = 0;
8913 }
8914 ctxt->sax = oldsax;
8915 xmlFreeParserCtxt(ctxt);
8916 newDoc->intSubset = NULL;
8917 newDoc->extSubset = NULL;
8918 xmlFreeDoc(newDoc);
8919
8920 return(ret);
8921}
8922
8923/**
8924 * xmlParseExternalEntity:
8925 * @doc: the document the chunk pertains to
8926 * @sax: the SAX handler bloc (possibly NULL)
8927 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8928 * @depth: Used for loop detection, use 0
8929 * @URL: the URL for the entity to load
8930 * @ID: the System ID for the entity to load
8931 * @list: the return value for the set of parsed nodes
8932 *
8933 * Parse an external general entity
8934 * An external general parsed entity is well-formed if it matches the
8935 * production labeled extParsedEnt.
8936 *
8937 * [78] extParsedEnt ::= TextDecl? content
8938 *
8939 * Returns 0 if the entity is well formed, -1 in case of args problem and
8940 * the parser error code otherwise
8941 */
8942
8943int
8944xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8945 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8946 xmlParserCtxtPtr ctxt;
8947 xmlDocPtr newDoc;
8948 xmlSAXHandlerPtr oldsax = NULL;
8949 int ret = 0;
8950
8951 if (depth > 40) {
8952 return(XML_ERR_ENTITY_LOOP);
8953 }
8954
8955
8956
8957 if (list != NULL)
8958 *list = NULL;
8959 if ((URL == NULL) && (ID == NULL))
8960 return(-1);
8961 if (doc == NULL) /* @@ relax but check for dereferences */
8962 return(-1);
8963
8964
8965 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8966 if (ctxt == NULL) return(-1);
8967 ctxt->userData = ctxt;
8968 if (sax != NULL) {
8969 oldsax = ctxt->sax;
8970 ctxt->sax = sax;
8971 if (user_data != NULL)
8972 ctxt->userData = user_data;
8973 }
8974 newDoc = xmlNewDoc(BAD_CAST "1.0");
8975 if (newDoc == NULL) {
8976 xmlFreeParserCtxt(ctxt);
8977 return(-1);
8978 }
8979 if (doc != NULL) {
8980 newDoc->intSubset = doc->intSubset;
8981 newDoc->extSubset = doc->extSubset;
8982 }
8983 if (doc->URL != NULL) {
8984 newDoc->URL = xmlStrdup(doc->URL);
8985 }
8986 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8987 if (newDoc->children == NULL) {
8988 if (sax != NULL)
8989 ctxt->sax = oldsax;
8990 xmlFreeParserCtxt(ctxt);
8991 newDoc->intSubset = NULL;
8992 newDoc->extSubset = NULL;
8993 xmlFreeDoc(newDoc);
8994 return(-1);
8995 }
8996 nodePush(ctxt, newDoc->children);
8997 if (doc == NULL) {
8998 ctxt->myDoc = newDoc;
8999 } else {
9000 ctxt->myDoc = doc;
9001 newDoc->children->doc = doc;
9002 }
9003
9004 /*
9005 * Parse a possible text declaration first
9006 */
9007 GROW;
9008 if ((RAW == '<') && (NXT(1) == '?') &&
9009 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9010 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9011 xmlParseTextDecl(ctxt);
9012 }
9013
9014 /*
9015 * Doing validity checking on chunk doesn't make sense
9016 */
9017 ctxt->instate = XML_PARSER_CONTENT;
9018 ctxt->validate = 0;
9019 ctxt->loadsubset = 0;
9020 ctxt->depth = depth;
9021
9022 xmlParseContent(ctxt);
9023
9024 if ((RAW == '<') && (NXT(1) == '/')) {
9025 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9026 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9027 ctxt->sax->error(ctxt->userData,
9028 "chunk is not well balanced\n");
9029 ctxt->wellFormed = 0;
9030 ctxt->disableSAX = 1;
9031 } else if (RAW != 0) {
9032 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9033 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9034 ctxt->sax->error(ctxt->userData,
9035 "extra content at the end of well balanced chunk\n");
9036 ctxt->wellFormed = 0;
9037 ctxt->disableSAX = 1;
9038 }
9039 if (ctxt->node != newDoc->children) {
9040 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9041 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9042 ctxt->sax->error(ctxt->userData,
9043 "chunk is not well balanced\n");
9044 ctxt->wellFormed = 0;
9045 ctxt->disableSAX = 1;
9046 }
9047
9048 if (!ctxt->wellFormed) {
9049 if (ctxt->errNo == 0)
9050 ret = 1;
9051 else
9052 ret = ctxt->errNo;
9053 } else {
9054 if (list != NULL) {
9055 xmlNodePtr cur;
9056
9057 /*
9058 * Return the newly created nodeset after unlinking it from
9059 * they pseudo parent.
9060 */
9061 cur = newDoc->children->children;
9062 *list = cur;
9063 while (cur != NULL) {
9064 cur->parent = NULL;
9065 cur = cur->next;
9066 }
9067 newDoc->children->children = NULL;
9068 }
9069 ret = 0;
9070 }
9071 if (sax != NULL)
9072 ctxt->sax = oldsax;
9073 xmlFreeParserCtxt(ctxt);
9074 newDoc->intSubset = NULL;
9075 newDoc->extSubset = NULL;
9076 xmlFreeDoc(newDoc);
9077
9078 return(ret);
9079}
9080
9081/**
9082 * xmlParseBalancedChunk:
9083 * @doc: the document the chunk pertains to
9084 * @sax: the SAX handler bloc (possibly NULL)
9085 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9086 * @depth: Used for loop detection, use 0
9087 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9088 * @list: the return value for the set of parsed nodes
9089 *
9090 * Parse a well-balanced chunk of an XML document
9091 * called by the parser
9092 * The allowed sequence for the Well Balanced Chunk is the one defined by
9093 * the content production in the XML grammar:
9094 *
9095 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9096 *
9097 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9098 * the parser error code otherwise
9099 */
9100
9101int
9102xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9103 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9104 xmlParserCtxtPtr ctxt;
9105 xmlDocPtr newDoc;
9106 xmlSAXHandlerPtr oldsax = NULL;
9107 int size;
9108 int ret = 0;
9109
9110 if (depth > 40) {
9111 return(XML_ERR_ENTITY_LOOP);
9112 }
9113
9114
9115 if (list != NULL)
9116 *list = NULL;
9117 if (string == NULL)
9118 return(-1);
9119
9120 size = xmlStrlen(string);
9121
9122 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9123 if (ctxt == NULL) return(-1);
9124 ctxt->userData = ctxt;
9125 if (sax != NULL) {
9126 oldsax = ctxt->sax;
9127 ctxt->sax = sax;
9128 if (user_data != NULL)
9129 ctxt->userData = user_data;
9130 }
9131 newDoc = xmlNewDoc(BAD_CAST "1.0");
9132 if (newDoc == NULL) {
9133 xmlFreeParserCtxt(ctxt);
9134 return(-1);
9135 }
9136 if (doc != NULL) {
9137 newDoc->intSubset = doc->intSubset;
9138 newDoc->extSubset = doc->extSubset;
9139 }
9140 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9141 if (newDoc->children == NULL) {
9142 if (sax != NULL)
9143 ctxt->sax = oldsax;
9144 xmlFreeParserCtxt(ctxt);
9145 newDoc->intSubset = NULL;
9146 newDoc->extSubset = NULL;
9147 xmlFreeDoc(newDoc);
9148 return(-1);
9149 }
9150 nodePush(ctxt, newDoc->children);
9151 if (doc == NULL) {
9152 ctxt->myDoc = newDoc;
9153 } else {
9154 ctxt->myDoc = doc;
9155 newDoc->children->doc = doc;
9156 }
9157 ctxt->instate = XML_PARSER_CONTENT;
9158 ctxt->depth = depth;
9159
9160 /*
9161 * Doing validity checking on chunk doesn't make sense
9162 */
9163 ctxt->validate = 0;
9164 ctxt->loadsubset = 0;
9165
9166 xmlParseContent(ctxt);
9167
9168 if ((RAW == '<') && (NXT(1) == '/')) {
9169 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9170 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9171 ctxt->sax->error(ctxt->userData,
9172 "chunk is not well balanced\n");
9173 ctxt->wellFormed = 0;
9174 ctxt->disableSAX = 1;
9175 } else if (RAW != 0) {
9176 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9177 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9178 ctxt->sax->error(ctxt->userData,
9179 "extra content at the end of well balanced chunk\n");
9180 ctxt->wellFormed = 0;
9181 ctxt->disableSAX = 1;
9182 }
9183 if (ctxt->node != newDoc->children) {
9184 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9185 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9186 ctxt->sax->error(ctxt->userData,
9187 "chunk is not well balanced\n");
9188 ctxt->wellFormed = 0;
9189 ctxt->disableSAX = 1;
9190 }
9191
9192 if (!ctxt->wellFormed) {
9193 if (ctxt->errNo == 0)
9194 ret = 1;
9195 else
9196 ret = ctxt->errNo;
9197 } else {
9198 if (list != NULL) {
9199 xmlNodePtr cur;
9200
9201 /*
9202 * Return the newly created nodeset after unlinking it from
9203 * they pseudo parent.
9204 */
9205 cur = newDoc->children->children;
9206 *list = cur;
9207 while (cur != NULL) {
9208 cur->parent = NULL;
9209 cur = cur->next;
9210 }
9211 newDoc->children->children = NULL;
9212 }
9213 ret = 0;
9214 }
9215 if (sax != NULL)
9216 ctxt->sax = oldsax;
9217 xmlFreeParserCtxt(ctxt);
9218 newDoc->intSubset = NULL;
9219 newDoc->extSubset = NULL;
9220 xmlFreeDoc(newDoc);
9221
9222 return(ret);
9223}
9224
9225/**
9226 * xmlSAXParseEntity:
9227 * @sax: the SAX handler block
9228 * @filename: the filename
9229 *
9230 * parse an XML external entity out of context and build a tree.
9231 * It use the given SAX function block to handle the parsing callback.
9232 * If sax is NULL, fallback to the default DOM tree building routines.
9233 *
9234 * [78] extParsedEnt ::= TextDecl? content
9235 *
9236 * This correspond to a "Well Balanced" chunk
9237 *
9238 * Returns the resulting document tree
9239 */
9240
9241xmlDocPtr
9242xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9243 xmlDocPtr ret;
9244 xmlParserCtxtPtr ctxt;
9245 char *directory = NULL;
9246
9247 ctxt = xmlCreateFileParserCtxt(filename);
9248 if (ctxt == NULL) {
9249 return(NULL);
9250 }
9251 if (sax != NULL) {
9252 if (ctxt->sax != NULL)
9253 xmlFree(ctxt->sax);
9254 ctxt->sax = sax;
9255 ctxt->userData = NULL;
9256 }
9257
9258 if ((ctxt->directory == NULL) && (directory == NULL))
9259 directory = xmlParserGetDirectory(filename);
9260
9261 xmlParseExtParsedEnt(ctxt);
9262
9263 if (ctxt->wellFormed)
9264 ret = ctxt->myDoc;
9265 else {
9266 ret = NULL;
9267 xmlFreeDoc(ctxt->myDoc);
9268 ctxt->myDoc = NULL;
9269 }
9270 if (sax != NULL)
9271 ctxt->sax = NULL;
9272 xmlFreeParserCtxt(ctxt);
9273
9274 return(ret);
9275}
9276
9277/**
9278 * xmlParseEntity:
9279 * @filename: the filename
9280 *
9281 * parse an XML external entity out of context and build a tree.
9282 *
9283 * [78] extParsedEnt ::= TextDecl? content
9284 *
9285 * This correspond to a "Well Balanced" chunk
9286 *
9287 * Returns the resulting document tree
9288 */
9289
9290xmlDocPtr
9291xmlParseEntity(const char *filename) {
9292 return(xmlSAXParseEntity(NULL, filename));
9293}
9294
9295/**
9296 * xmlCreateEntityParserCtxt:
9297 * @URL: the entity URL
9298 * @ID: the entity PUBLIC ID
9299 * @base: a posible base for the target URI
9300 *
9301 * Create a parser context for an external entity
9302 * Automatic support for ZLIB/Compress compressed document is provided
9303 * by default if found at compile-time.
9304 *
9305 * Returns the new parser context or NULL
9306 */
9307xmlParserCtxtPtr
9308xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9309 const xmlChar *base) {
9310 xmlParserCtxtPtr ctxt;
9311 xmlParserInputPtr inputStream;
9312 char *directory = NULL;
9313 xmlChar *uri;
9314
9315 ctxt = xmlNewParserCtxt();
9316 if (ctxt == NULL) {
9317 return(NULL);
9318 }
9319
9320 uri = xmlBuildURI(URL, base);
9321
9322 if (uri == NULL) {
9323 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9324 if (inputStream == NULL) {
9325 xmlFreeParserCtxt(ctxt);
9326 return(NULL);
9327 }
9328
9329 inputPush(ctxt, inputStream);
9330
9331 if ((ctxt->directory == NULL) && (directory == NULL))
9332 directory = xmlParserGetDirectory((char *)URL);
9333 if ((ctxt->directory == NULL) && (directory != NULL))
9334 ctxt->directory = directory;
9335 } else {
9336 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9337 if (inputStream == NULL) {
9338 xmlFree(uri);
9339 xmlFreeParserCtxt(ctxt);
9340 return(NULL);
9341 }
9342
9343 inputPush(ctxt, inputStream);
9344
9345 if ((ctxt->directory == NULL) && (directory == NULL))
9346 directory = xmlParserGetDirectory((char *)uri);
9347 if ((ctxt->directory == NULL) && (directory != NULL))
9348 ctxt->directory = directory;
9349 xmlFree(uri);
9350 }
9351
9352 return(ctxt);
9353}
9354
9355/************************************************************************
9356 * *
9357 * Front ends when parsing from a file *
9358 * *
9359 ************************************************************************/
9360
9361/**
9362 * xmlCreateFileParserCtxt:
9363 * @filename: the filename
9364 *
9365 * Create a parser context for a file content.
9366 * Automatic support for ZLIB/Compress compressed document is provided
9367 * by default if found at compile-time.
9368 *
9369 * Returns the new parser context or NULL
9370 */
9371xmlParserCtxtPtr
9372xmlCreateFileParserCtxt(const char *filename)
9373{
9374 xmlParserCtxtPtr ctxt;
9375 xmlParserInputPtr inputStream;
9376 xmlParserInputBufferPtr buf;
9377 char *directory = NULL;
9378
9379 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9380 if (buf == NULL) {
9381 return(NULL);
9382 }
9383
9384 ctxt = xmlNewParserCtxt();
9385 if (ctxt == NULL) {
9386 if (xmlDefaultSAXHandler.error != NULL) {
9387 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9388 }
9389 return(NULL);
9390 }
9391
9392 inputStream = xmlNewInputStream(ctxt);
9393 if (inputStream == NULL) {
9394 xmlFreeParserCtxt(ctxt);
9395 return(NULL);
9396 }
9397
9398 inputStream->filename = xmlMemStrdup(filename);
9399 inputStream->buf = buf;
9400 inputStream->base = inputStream->buf->buffer->content;
9401 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009402 inputStream->end =
9403 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009404
9405 inputPush(ctxt, inputStream);
9406 if ((ctxt->directory == NULL) && (directory == NULL))
9407 directory = xmlParserGetDirectory(filename);
9408 if ((ctxt->directory == NULL) && (directory != NULL))
9409 ctxt->directory = directory;
9410
9411 return(ctxt);
9412}
9413
9414/**
9415 * xmlSAXParseFile:
9416 * @sax: the SAX handler block
9417 * @filename: the filename
9418 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9419 * documents
9420 *
9421 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9422 * compressed document is provided by default if found at compile-time.
9423 * It use the given SAX function block to handle the parsing callback.
9424 * If sax is NULL, fallback to the default DOM tree building routines.
9425 *
9426 * Returns the resulting document tree
9427 */
9428
9429xmlDocPtr
9430xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9431 int recovery) {
9432 xmlDocPtr ret;
9433 xmlParserCtxtPtr ctxt;
9434 char *directory = NULL;
9435
9436 ctxt = xmlCreateFileParserCtxt(filename);
9437 if (ctxt == NULL) {
9438 return(NULL);
9439 }
9440 if (sax != NULL) {
9441 if (ctxt->sax != NULL)
9442 xmlFree(ctxt->sax);
9443 ctxt->sax = sax;
9444 ctxt->userData = NULL;
9445 }
9446
9447 if ((ctxt->directory == NULL) && (directory == NULL))
9448 directory = xmlParserGetDirectory(filename);
9449 if ((ctxt->directory == NULL) && (directory != NULL))
9450 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9451
9452 xmlParseDocument(ctxt);
9453
9454 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9455 else {
9456 ret = NULL;
9457 xmlFreeDoc(ctxt->myDoc);
9458 ctxt->myDoc = NULL;
9459 }
9460 if (sax != NULL)
9461 ctxt->sax = NULL;
9462 xmlFreeParserCtxt(ctxt);
9463
9464 return(ret);
9465}
9466
9467/**
9468 * xmlRecoverDoc:
9469 * @cur: a pointer to an array of xmlChar
9470 *
9471 * parse an XML in-memory document and build a tree.
9472 * In the case the document is not Well Formed, a tree is built anyway
9473 *
9474 * Returns the resulting document tree
9475 */
9476
9477xmlDocPtr
9478xmlRecoverDoc(xmlChar *cur) {
9479 return(xmlSAXParseDoc(NULL, cur, 1));
9480}
9481
9482/**
9483 * xmlParseFile:
9484 * @filename: the filename
9485 *
9486 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9487 * compressed document is provided by default if found at compile-time.
9488 *
9489 * Returns the resulting document tree
9490 */
9491
9492xmlDocPtr
9493xmlParseFile(const char *filename) {
9494 return(xmlSAXParseFile(NULL, filename, 0));
9495}
9496
9497/**
9498 * xmlRecoverFile:
9499 * @filename: the filename
9500 *
9501 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9502 * compressed document is provided by default if found at compile-time.
9503 * In the case the document is not Well Formed, a tree is built anyway
9504 *
9505 * Returns the resulting document tree
9506 */
9507
9508xmlDocPtr
9509xmlRecoverFile(const char *filename) {
9510 return(xmlSAXParseFile(NULL, filename, 1));
9511}
9512
9513
9514/**
9515 * xmlSetupParserForBuffer:
9516 * @ctxt: an XML parser context
9517 * @buffer: a xmlChar * buffer
9518 * @filename: a file name
9519 *
9520 * Setup the parser context to parse a new buffer; Clears any prior
9521 * contents from the parser context. The buffer parameter must not be
9522 * NULL, but the filename parameter can be
9523 */
9524void
9525xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9526 const char* filename)
9527{
9528 xmlParserInputPtr input;
9529
9530 input = xmlNewInputStream(ctxt);
9531 if (input == NULL) {
9532 perror("malloc");
9533 xmlFree(ctxt);
9534 return;
9535 }
9536
9537 xmlClearParserCtxt(ctxt);
9538 if (filename != NULL)
9539 input->filename = xmlMemStrdup(filename);
9540 input->base = buffer;
9541 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009542 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009543 inputPush(ctxt, input);
9544}
9545
9546/**
9547 * xmlSAXUserParseFile:
9548 * @sax: a SAX handler
9549 * @user_data: The user data returned on SAX callbacks
9550 * @filename: a file name
9551 *
9552 * parse an XML file and call the given SAX handler routines.
9553 * Automatic support for ZLIB/Compress compressed document is provided
9554 *
9555 * Returns 0 in case of success or a error number otherwise
9556 */
9557int
9558xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9559 const char *filename) {
9560 int ret = 0;
9561 xmlParserCtxtPtr ctxt;
9562
9563 ctxt = xmlCreateFileParserCtxt(filename);
9564 if (ctxt == NULL) return -1;
9565 if (ctxt->sax != &xmlDefaultSAXHandler)
9566 xmlFree(ctxt->sax);
9567 ctxt->sax = sax;
9568 if (user_data != NULL)
9569 ctxt->userData = user_data;
9570
9571 xmlParseDocument(ctxt);
9572
9573 if (ctxt->wellFormed)
9574 ret = 0;
9575 else {
9576 if (ctxt->errNo != 0)
9577 ret = ctxt->errNo;
9578 else
9579 ret = -1;
9580 }
9581 if (sax != NULL)
9582 ctxt->sax = NULL;
9583 xmlFreeParserCtxt(ctxt);
9584
9585 return ret;
9586}
9587
9588/************************************************************************
9589 * *
9590 * Front ends when parsing from memory *
9591 * *
9592 ************************************************************************/
9593
9594/**
9595 * xmlCreateMemoryParserCtxt:
9596 * @buffer: a pointer to a char array
9597 * @size: the size of the array
9598 *
9599 * Create a parser context for an XML in-memory document.
9600 *
9601 * Returns the new parser context or NULL
9602 */
9603xmlParserCtxtPtr
9604xmlCreateMemoryParserCtxt(char *buffer, int size) {
9605 xmlParserCtxtPtr ctxt;
9606 xmlParserInputPtr input;
9607 xmlParserInputBufferPtr buf;
9608
9609 if (buffer == NULL)
9610 return(NULL);
9611 if (size <= 0)
9612 return(NULL);
9613
9614 ctxt = xmlNewParserCtxt();
9615 if (ctxt == NULL)
9616 return(NULL);
9617
9618 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9619 if (buf == NULL) return(NULL);
9620
9621 input = xmlNewInputStream(ctxt);
9622 if (input == NULL) {
9623 xmlFreeParserCtxt(ctxt);
9624 return(NULL);
9625 }
9626
9627 input->filename = NULL;
9628 input->buf = buf;
9629 input->base = input->buf->buffer->content;
9630 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009631 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009632
9633 inputPush(ctxt, input);
9634 return(ctxt);
9635}
9636
9637/**
9638 * xmlSAXParseMemory:
9639 * @sax: the SAX handler block
9640 * @buffer: an pointer to a char array
9641 * @size: the size of the array
9642 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9643 * documents
9644 *
9645 * parse an XML in-memory block and use the given SAX function block
9646 * to handle the parsing callback. If sax is NULL, fallback to the default
9647 * DOM tree building routines.
9648 *
9649 * Returns the resulting document tree
9650 */
9651xmlDocPtr
9652xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9653 xmlDocPtr ret;
9654 xmlParserCtxtPtr ctxt;
9655
9656 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9657 if (ctxt == NULL) return(NULL);
9658 if (sax != NULL) {
9659 ctxt->sax = sax;
9660 ctxt->userData = NULL;
9661 }
9662
9663 xmlParseDocument(ctxt);
9664
9665 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9666 else {
9667 ret = NULL;
9668 xmlFreeDoc(ctxt->myDoc);
9669 ctxt->myDoc = NULL;
9670 }
9671 if (sax != NULL)
9672 ctxt->sax = NULL;
9673 xmlFreeParserCtxt(ctxt);
9674
9675 return(ret);
9676}
9677
9678/**
9679 * xmlParseMemory:
9680 * @buffer: an pointer to a char array
9681 * @size: the size of the array
9682 *
9683 * parse an XML in-memory block and build a tree.
9684 *
9685 * Returns the resulting document tree
9686 */
9687
9688xmlDocPtr xmlParseMemory(char *buffer, int size) {
9689 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9690}
9691
9692/**
9693 * xmlRecoverMemory:
9694 * @buffer: an pointer to a char array
9695 * @size: the size of the array
9696 *
9697 * parse an XML in-memory block and build a tree.
9698 * In the case the document is not Well Formed, a tree is built anyway
9699 *
9700 * Returns the resulting document tree
9701 */
9702
9703xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9704 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9705}
9706
9707/**
9708 * xmlSAXUserParseMemory:
9709 * @sax: a SAX handler
9710 * @user_data: The user data returned on SAX callbacks
9711 * @buffer: an in-memory XML document input
9712 * @size: the length of the XML document in bytes
9713 *
9714 * A better SAX parsing routine.
9715 * parse an XML in-memory buffer and call the given SAX handler routines.
9716 *
9717 * Returns 0 in case of success or a error number otherwise
9718 */
9719int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9720 char *buffer, int size) {
9721 int ret = 0;
9722 xmlParserCtxtPtr ctxt;
9723 xmlSAXHandlerPtr oldsax = NULL;
9724
9725 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9726 if (ctxt == NULL) return -1;
9727 if (sax != NULL) {
9728 oldsax = ctxt->sax;
9729 ctxt->sax = sax;
9730 }
9731 ctxt->userData = user_data;
9732
9733 xmlParseDocument(ctxt);
9734
9735 if (ctxt->wellFormed)
9736 ret = 0;
9737 else {
9738 if (ctxt->errNo != 0)
9739 ret = ctxt->errNo;
9740 else
9741 ret = -1;
9742 }
9743 if (sax != NULL) {
9744 ctxt->sax = oldsax;
9745 }
9746 xmlFreeParserCtxt(ctxt);
9747
9748 return ret;
9749}
9750
9751/**
9752 * xmlCreateDocParserCtxt:
9753 * @cur: a pointer to an array of xmlChar
9754 *
9755 * Creates a parser context for an XML in-memory document.
9756 *
9757 * Returns the new parser context or NULL
9758 */
9759xmlParserCtxtPtr
9760xmlCreateDocParserCtxt(xmlChar *cur) {
9761 int len;
9762
9763 if (cur == NULL)
9764 return(NULL);
9765 len = xmlStrlen(cur);
9766 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9767}
9768
9769/**
9770 * xmlSAXParseDoc:
9771 * @sax: the SAX handler block
9772 * @cur: a pointer to an array of xmlChar
9773 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9774 * documents
9775 *
9776 * parse an XML in-memory document and build a tree.
9777 * It use the given SAX function block to handle the parsing callback.
9778 * If sax is NULL, fallback to the default DOM tree building routines.
9779 *
9780 * Returns the resulting document tree
9781 */
9782
9783xmlDocPtr
9784xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9785 xmlDocPtr ret;
9786 xmlParserCtxtPtr ctxt;
9787
9788 if (cur == NULL) return(NULL);
9789
9790
9791 ctxt = xmlCreateDocParserCtxt(cur);
9792 if (ctxt == NULL) return(NULL);
9793 if (sax != NULL) {
9794 ctxt->sax = sax;
9795 ctxt->userData = NULL;
9796 }
9797
9798 xmlParseDocument(ctxt);
9799 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9800 else {
9801 ret = NULL;
9802 xmlFreeDoc(ctxt->myDoc);
9803 ctxt->myDoc = NULL;
9804 }
9805 if (sax != NULL)
9806 ctxt->sax = NULL;
9807 xmlFreeParserCtxt(ctxt);
9808
9809 return(ret);
9810}
9811
9812/**
9813 * xmlParseDoc:
9814 * @cur: a pointer to an array of xmlChar
9815 *
9816 * parse an XML in-memory document and build a tree.
9817 *
9818 * Returns the resulting document tree
9819 */
9820
9821xmlDocPtr
9822xmlParseDoc(xmlChar *cur) {
9823 return(xmlSAXParseDoc(NULL, cur, 0));
9824}
9825
9826
9827/************************************************************************
9828 * *
9829 * Miscellaneous *
9830 * *
9831 ************************************************************************/
9832
9833#ifdef LIBXML_XPATH_ENABLED
9834#include <libxml/xpath.h>
9835#endif
9836
9837static int xmlParserInitialized = 0;
9838
9839/**
9840 * xmlInitParser:
9841 *
9842 * Initialization function for the XML parser.
9843 * This is not reentrant. Call once before processing in case of
9844 * use in multithreaded programs.
9845 */
9846
9847void
9848xmlInitParser(void) {
9849 if (xmlParserInitialized) return;
9850
9851 xmlInitCharEncodingHandlers();
9852 xmlInitializePredefinedEntities();
9853 xmlDefaultSAXHandlerInit();
9854 xmlRegisterDefaultInputCallbacks();
9855 xmlRegisterDefaultOutputCallbacks();
9856#ifdef LIBXML_HTML_ENABLED
9857 htmlInitAutoClose();
9858 htmlDefaultSAXHandlerInit();
9859#endif
9860#ifdef LIBXML_XPATH_ENABLED
9861 xmlXPathInit();
9862#endif
9863 xmlParserInitialized = 1;
9864}
9865
9866/**
9867 * xmlCleanupParser:
9868 *
9869 * Cleanup function for the XML parser. It tries to reclaim all
9870 * parsing related global memory allocated for the parser processing.
9871 * It doesn't deallocate any document related memory. Calling this
9872 * function should not prevent reusing the parser.
9873 */
9874
9875void
9876xmlCleanupParser(void) {
9877 xmlParserInitialized = 0;
9878 xmlCleanupCharEncodingHandlers();
9879 xmlCleanupPredefinedEntities();
9880}
9881
9882/**
9883 * xmlPedanticParserDefault:
9884 * @val: int 0 or 1
9885 *
9886 * Set and return the previous value for enabling pedantic warnings.
9887 *
9888 * Returns the last value for 0 for no substitution, 1 for substitution.
9889 */
9890
9891int
9892xmlPedanticParserDefault(int val) {
9893 int old = xmlPedanticParserDefaultValue;
9894
9895 xmlPedanticParserDefaultValue = val;
9896 return(old);
9897}
9898
9899/**
9900 * xmlSubstituteEntitiesDefault:
9901 * @val: int 0 or 1
9902 *
9903 * Set and return the previous value for default entity support.
9904 * Initially the parser always keep entity references instead of substituting
9905 * entity values in the output. This function has to be used to change the
9906 * default parser behaviour
9907 * SAX::subtituteEntities() has to be used for changing that on a file by
9908 * file basis.
9909 *
9910 * Returns the last value for 0 for no substitution, 1 for substitution.
9911 */
9912
9913int
9914xmlSubstituteEntitiesDefault(int val) {
9915 int old = xmlSubstituteEntitiesDefaultValue;
9916
9917 xmlSubstituteEntitiesDefaultValue = val;
9918 return(old);
9919}
9920
9921/**
9922 * xmlKeepBlanksDefault:
9923 * @val: int 0 or 1
9924 *
9925 * Set and return the previous value for default blanks text nodes support.
9926 * The 1.x version of the parser used an heuristic to try to detect
9927 * ignorable white spaces. As a result the SAX callback was generating
9928 * ignorableWhitespace() callbacks instead of characters() one, and when
9929 * using the DOM output text nodes containing those blanks were not generated.
9930 * The 2.x and later version will switch to the XML standard way and
9931 * ignorableWhitespace() are only generated when running the parser in
9932 * validating mode and when the current element doesn't allow CDATA or
9933 * mixed content.
9934 * This function is provided as a way to force the standard behaviour
9935 * on 1.X libs and to switch back to the old mode for compatibility when
9936 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9937 * by using xmlIsBlankNode() commodity function to detect the "empty"
9938 * nodes generated.
9939 * This value also affect autogeneration of indentation when saving code
9940 * if blanks sections are kept, indentation is not generated.
9941 *
9942 * Returns the last value for 0 for no substitution, 1 for substitution.
9943 */
9944
9945int
9946xmlKeepBlanksDefault(int val) {
9947 int old = xmlKeepBlanksDefaultValue;
9948
9949 xmlKeepBlanksDefaultValue = val;
9950 xmlIndentTreeOutput = !val;
9951 return(old);
9952}
9953