blob: b6f6144960fe23ef7de36199cd50ef4d7464a5e5 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3 * implemented on top of the SAX interfaces
4 *
5 * References:
6 * The XML specification:
7 * http://www.w3.org/TR/REC-xml
8 * Original 1.0 version:
9 * http://www.w3.org/TR/1998/REC-xml-19980210
10 * XML second edition working draft
11 * http://www.w3.org/TR/2000/WD-xml-2e-20000814
12 *
13 * Okay this is a big file, the parser core is around 7000 lines, then it
14 * is followed by the progressive parser top routines, then the various
15 * high level APIs to call the parser and a few miscelaneous functions.
16 * A number of helper functions and deprecated ones have been moved to
17 * parserInternals.c to reduce this file size.
18 * As much as possible the functions are associated with their relative
19 * production in the XML specification. A few productions defining the
20 * different ranges of character are actually implanted either in
21 * parserInternals.h or parserInternals.c
22 * The DOM tree build is realized from the default SAX callbacks in
23 * the module SAX.c.
24 * The routines doing the validation checks are in valid.c and called either
25 * from the SAx callbacks or as standalones functions using a preparsed
26 * document.
27 *
28 * See Copyright for the status of this software.
29 *
30 * Daniel.Veillard@w3.org
31 *
32 * 14 Nov 2000 ht - truncated definitions of xmlSubstituteEntitiesDefaultValue
33 * and xmlDoValidityCheckingDefaultValue for VMS
34 */
35
36#ifdef WIN32
37#include "win32config.h"
38#define XML_DIR_SEP '\\'
39#else
40#include "config.h"
41#define XML_DIR_SEP '/'
42#endif
43
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <libxml/xmlmemory.h>
48#include <libxml/tree.h>
49#include <libxml/parser.h>
50#include <libxml/parserInternals.h>
51#include <libxml/valid.h>
52#include <libxml/entities.h>
53#include <libxml/xmlerror.h>
54#include <libxml/encoding.h>
55#include <libxml/xmlIO.h>
56#include <libxml/uri.h>
57
58#ifdef HAVE_CTYPE_H
59#include <ctype.h>
60#endif
61#ifdef HAVE_STDLIB_H
62#include <stdlib.h>
63#endif
64#ifdef HAVE_SYS_STAT_H
65#include <sys/stat.h>
66#endif
67#ifdef HAVE_FCNTL_H
68#include <fcntl.h>
69#endif
70#ifdef HAVE_UNISTD_H
71#include <unistd.h>
72#endif
73#ifdef HAVE_ZLIB_H
74#include <zlib.h>
75#endif
76
77
Daniel Veillard21a0f912001-02-25 19:54:14 +000078#define XML_PARSER_BIG_BUFFER_SIZE 300
Owen Taylor3473f882001-02-23 17:55:21 +000079#define XML_PARSER_BUFFER_SIZE 100
80
81/*
82 * Various global defaults for parsing
83 */
84int xmlGetWarningsDefaultValue = 1;
85int xmlParserDebugEntities = 0;
86#ifdef VMS
87int xmlSubstituteEntitiesDefaultVal = 0;
88#define xmlSubstituteEntitiesDefaultValue xmlSubstituteEntitiesDefaultVal
89int xmlDoValidityCheckingDefaultVal = 0;
90#define xmlDoValidityCheckingDefaultValue xmlDoValidityCheckingDefaultVal
91#else
92int xmlSubstituteEntitiesDefaultValue = 0;
93int xmlDoValidityCheckingDefaultValue = 0;
94#endif
95int xmlLoadExtDtdDefaultValue = 0;
96int xmlPedanticParserDefaultValue = 0;
97int xmlKeepBlanksDefaultValue = 1;
98
99/*
100 * List of XML prefixed PI allowed by W3C specs
101 */
102
103const char *xmlW3CPIs[] = {
104 "xml-stylesheet",
105 NULL
106};
107
108/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
109void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);
110xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
111 const xmlChar **str);
112
113
114/************************************************************************
115 * *
116 * Parser stacks related functions and macros *
117 * *
118 ************************************************************************/
119
120xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
121 const xmlChar ** str);
122
123/*
124 * Generic function for accessing stacks in the Parser Context
125 */
126
127#define PUSH_AND_POP(scope, type, name) \
128scope int name##Push(xmlParserCtxtPtr ctxt, type value) { \
129 if (ctxt->name##Nr >= ctxt->name##Max) { \
130 ctxt->name##Max *= 2; \
131 ctxt->name##Tab = (type *) xmlRealloc(ctxt->name##Tab, \
132 ctxt->name##Max * sizeof(ctxt->name##Tab[0])); \
133 if (ctxt->name##Tab == NULL) { \
134 xmlGenericError(xmlGenericErrorContext, \
135 "realloc failed !\n"); \
136 return(0); \
137 } \
138 } \
139 ctxt->name##Tab[ctxt->name##Nr] = value; \
140 ctxt->name = value; \
141 return(ctxt->name##Nr++); \
142} \
143scope type name##Pop(xmlParserCtxtPtr ctxt) { \
144 type ret; \
145 if (ctxt->name##Nr <= 0) return(0); \
146 ctxt->name##Nr--; \
147 if (ctxt->name##Nr > 0) \
148 ctxt->name = ctxt->name##Tab[ctxt->name##Nr - 1]; \
149 else \
150 ctxt->name = NULL; \
151 ret = ctxt->name##Tab[ctxt->name##Nr]; \
152 ctxt->name##Tab[ctxt->name##Nr] = 0; \
153 return(ret); \
154} \
155
156/*
157 * Those macros actually generate the functions
158 */
159PUSH_AND_POP(extern, xmlParserInputPtr, input)
160PUSH_AND_POP(extern, xmlNodePtr, node)
161PUSH_AND_POP(extern, xmlChar*, name)
162
163int spacePush(xmlParserCtxtPtr ctxt, int val) {
164 if (ctxt->spaceNr >= ctxt->spaceMax) {
165 ctxt->spaceMax *= 2;
166 ctxt->spaceTab = (int *) xmlRealloc(ctxt->spaceTab,
167 ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
168 if (ctxt->spaceTab == NULL) {
169 xmlGenericError(xmlGenericErrorContext,
170 "realloc failed !\n");
171 return(0);
172 }
173 }
174 ctxt->spaceTab[ctxt->spaceNr] = val;
175 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
176 return(ctxt->spaceNr++);
177}
178
179int spacePop(xmlParserCtxtPtr ctxt) {
180 int ret;
181 if (ctxt->spaceNr <= 0) return(0);
182 ctxt->spaceNr--;
183 if (ctxt->spaceNr > 0)
184 ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
185 else
186 ctxt->space = NULL;
187 ret = ctxt->spaceTab[ctxt->spaceNr];
188 ctxt->spaceTab[ctxt->spaceNr] = -1;
189 return(ret);
190}
191
192/*
193 * Macros for accessing the content. Those should be used only by the parser,
194 * and not exported.
195 *
196 * Dirty macros, i.e. one often need to make assumption on the context to
197 * use them
198 *
199 * CUR_PTR return the current pointer to the xmlChar to be parsed.
200 * To be used with extreme caution since operations consuming
201 * characters may move the input buffer to a different location !
202 * CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
203 * This should be used internally by the parser
204 * only to compare to ASCII values otherwise it would break when
205 * running with UTF-8 encoding.
206 * RAW same as CUR but in the input buffer, bypass any token
207 * extraction that may have been done
208 * NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
209 * to compare on ASCII based substring.
210 * SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
211 * strings within the parser.
212 *
213 * Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
214 *
215 * NEXT Skip to the next character, this does the proper decoding
216 * in UTF-8 mode. It also pop-up unfinished entities on the fly.
217 * NEXTL(l) Skip l xmlChars in the input buffer
218 * CUR_CHAR(l) returns the current unicode character (int), set l
219 * to the number of xmlChars used for the encoding [0-5].
220 * CUR_SCHAR same but operate on a string instead of the context
221 * COPY_BUF copy the current unicode char to the target buffer, increment
222 * the index
223 * GROW, SHRINK handling of input buffers
224 */
225
226#define RAW (ctxt->token ? -1 : (*ctxt->input->cur))
227#define CUR (ctxt->token ? ctxt->token : (*ctxt->input->cur))
228#define NXT(val) ctxt->input->cur[(val)]
229#define CUR_PTR ctxt->input->cur
230
231#define SKIP(val) do { \
232 ctxt->nbChars += (val),ctxt->input->cur += (val); \
233 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000234 if ((*ctxt->input->cur == 0) && \
235 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
236 xmlPopInput(ctxt); \
237 } while (0)
238
Daniel Veillard48b2f892001-02-25 16:11:03 +0000239#define SHRINK if (ctxt->input->cur - ctxt->input->base > INPUT_CHUNK) {\
Owen Taylor3473f882001-02-23 17:55:21 +0000240 xmlParserInputShrink(ctxt->input); \
241 if ((*ctxt->input->cur == 0) && \
242 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
243 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000244 }
Owen Taylor3473f882001-02-23 17:55:21 +0000245
Daniel Veillard48b2f892001-02-25 16:11:03 +0000246#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) { \
Owen Taylor3473f882001-02-23 17:55:21 +0000247 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
248 if ((*ctxt->input->cur == 0) && \
249 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) \
250 xmlPopInput(ctxt); \
Daniel Veillard48b2f892001-02-25 16:11:03 +0000251 }
Owen Taylor3473f882001-02-23 17:55:21 +0000252
253#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
254
255#define NEXT xmlNextChar(ctxt)
256
Daniel Veillard21a0f912001-02-25 19:54:14 +0000257#define NEXT1 { \
258 ctxt->input->cur++; \
259 ctxt->nbChars++; \
260 if (*ctxt->input->cur == 0) \
261 xmlParserInputGrow(ctxt->input, INPUT_CHUNK); \
262 }
263
Owen Taylor3473f882001-02-23 17:55:21 +0000264#define NEXTL(l) do { \
265 if (*(ctxt->input->cur) == '\n') { \
266 ctxt->input->line++; ctxt->input->col = 1; \
267 } else ctxt->input->col++; \
268 ctxt->token = 0; ctxt->input->cur += l; \
269 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt); \
Owen Taylor3473f882001-02-23 17:55:21 +0000270 } while (0)
271
272#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
273#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
274
275#define COPY_BUF(l,b,i,v) \
276 if (l == 1) b[i++] = (xmlChar) v; \
277 else i += xmlCopyChar(l,&b[i],v)
278
279/**
280 * xmlSkipBlankChars:
281 * @ctxt: the XML parser context
282 *
283 * skip all blanks character found at that point in the input streams.
284 * It pops up finished entities in the process if allowable at that point.
285 *
286 * Returns the number of space chars skipped
287 */
288
289int
290xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
291 int cur, res = 0;
292
293 /*
294 * It's Okay to use CUR/NEXT here since all the blanks are on
295 * the ASCII range.
296 */
297 do {
298 cur = CUR;
299 while (IS_BLANK(cur)) { /* CHECKED tstblanks.xml */
300 NEXT;
301 cur = CUR;
302 res++;
303 }
304 while ((cur == 0) && (ctxt->inputNr > 1) &&
305 (ctxt->instate != XML_PARSER_COMMENT)) {
306 xmlPopInput(ctxt);
307 cur = CUR;
308 }
309 /*
310 * Need to handle support of entities branching here
311 */
312 if (*ctxt->input->cur == '%') xmlParserHandlePEReference(ctxt);
313 /* DEPR if (*ctxt->input->cur == '&') xmlParserHandleReference(ctxt); */
314 } while (IS_BLANK(cur)); /* CHECKED tstblanks.xml */
315 return(res);
316}
317
318/************************************************************************
319 * *
320 * Commodity functions to handle entities *
321 * *
322 ************************************************************************/
323
324/**
325 * xmlPopInput:
326 * @ctxt: an XML parser context
327 *
328 * xmlPopInput: the current input pointed by ctxt->input came to an end
329 * pop it and return the next char.
330 *
331 * Returns the current xmlChar in the parser context
332 */
333xmlChar
334xmlPopInput(xmlParserCtxtPtr ctxt) {
335 if (ctxt->inputNr == 1) return(0); /* End of main Input */
336 if (xmlParserDebugEntities)
337 xmlGenericError(xmlGenericErrorContext,
338 "Popping input %d\n", ctxt->inputNr);
339 xmlFreeInputStream(inputPop(ctxt));
340 if ((*ctxt->input->cur == 0) &&
341 (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0))
342 return(xmlPopInput(ctxt));
343 return(CUR);
344}
345
346/**
347 * xmlPushInput:
348 * @ctxt: an XML parser context
349 * @input: an XML parser input fragment (entity, XML fragment ...).
350 *
351 * xmlPushInput: switch to a new input stream which is stacked on top
352 * of the previous one(s).
353 */
354void
355xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
356 if (input == NULL) return;
357
358 if (xmlParserDebugEntities) {
359 if ((ctxt->input != NULL) && (ctxt->input->filename))
360 xmlGenericError(xmlGenericErrorContext,
361 "%s(%d): ", ctxt->input->filename,
362 ctxt->input->line);
363 xmlGenericError(xmlGenericErrorContext,
364 "Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
365 }
366 inputPush(ctxt, input);
367 GROW;
368}
369
370/**
371 * xmlParseCharRef:
372 * @ctxt: an XML parser context
373 *
374 * parse Reference declarations
375 *
376 * [66] CharRef ::= '&#' [0-9]+ ';' |
377 * '&#x' [0-9a-fA-F]+ ';'
378 *
379 * [ WFC: Legal Character ]
380 * Characters referred to using character references must match the
381 * production for Char.
382 *
383 * Returns the value parsed (as an int), 0 in case of error
384 */
385int
386xmlParseCharRef(xmlParserCtxtPtr ctxt) {
387 int val = 0;
388 int count = 0;
389
390 if (ctxt->token != 0) {
391 val = ctxt->token;
392 ctxt->token = 0;
393 return(val);
394 }
395 /*
396 * Using RAW/CUR/NEXT is okay since we are working on ASCII range here
397 */
398 if ((RAW == '&') && (NXT(1) == '#') &&
399 (NXT(2) == 'x')) {
400 SKIP(3);
401 GROW;
402 while (RAW != ';') { /* loop blocked by count */
403 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
404 val = val * 16 + (CUR - '0');
405 else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
406 val = val * 16 + (CUR - 'a') + 10;
407 else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
408 val = val * 16 + (CUR - 'A') + 10;
409 else {
410 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
411 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
412 ctxt->sax->error(ctxt->userData,
413 "xmlParseCharRef: invalid hexadecimal value\n");
414 ctxt->wellFormed = 0;
415 ctxt->disableSAX = 1;
416 val = 0;
417 break;
418 }
419 NEXT;
420 count++;
421 }
422 if (RAW == ';') {
423 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
424 ctxt->nbChars ++;
425 ctxt->input->cur++;
426 }
427 } else if ((RAW == '&') && (NXT(1) == '#')) {
428 SKIP(2);
429 GROW;
430 while (RAW != ';') { /* loop blocked by count */
431 if ((RAW >= '0') && (RAW <= '9') && (count < 20))
432 val = val * 10 + (CUR - '0');
433 else {
434 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
435 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
436 ctxt->sax->error(ctxt->userData,
437 "xmlParseCharRef: invalid decimal value\n");
438 ctxt->wellFormed = 0;
439 ctxt->disableSAX = 1;
440 val = 0;
441 break;
442 }
443 NEXT;
444 count++;
445 }
446 if (RAW == ';') {
447 /* on purpose to avoid reentrancy problems with NEXT and SKIP */
448 ctxt->nbChars ++;
449 ctxt->input->cur++;
450 }
451 } else {
452 ctxt->errNo = XML_ERR_INVALID_CHARREF;
453 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
454 ctxt->sax->error(ctxt->userData,
455 "xmlParseCharRef: invalid value\n");
456 ctxt->wellFormed = 0;
457 ctxt->disableSAX = 1;
458 }
459
460 /*
461 * [ WFC: Legal Character ]
462 * Characters referred to using character references must match the
463 * production for Char.
464 */
465 if (IS_CHAR(val)) {
466 return(val);
467 } else {
468 ctxt->errNo = XML_ERR_INVALID_CHAR;
469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
470 ctxt->sax->error(ctxt->userData, "CharRef: invalid xmlChar value %d\n",
471 val);
472 ctxt->wellFormed = 0;
473 ctxt->disableSAX = 1;
474 }
475 return(0);
476}
477
478/**
479 * xmlParseStringCharRef:
480 * @ctxt: an XML parser context
481 * @str: a pointer to an index in the string
482 *
483 * parse Reference declarations, variant parsing from a string rather
484 * than an an input flow.
485 *
486 * [66] CharRef ::= '&#' [0-9]+ ';' |
487 * '&#x' [0-9a-fA-F]+ ';'
488 *
489 * [ WFC: Legal Character ]
490 * Characters referred to using character references must match the
491 * production for Char.
492 *
493 * Returns the value parsed (as an int), 0 in case of error, str will be
494 * updated to the current value of the index
495 */
496int
497xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
498 const xmlChar *ptr;
499 xmlChar cur;
500 int val = 0;
501
502 if ((str == NULL) || (*str == NULL)) return(0);
503 ptr = *str;
504 cur = *ptr;
505 if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
506 ptr += 3;
507 cur = *ptr;
508 while (cur != ';') { /* Non input consuming loop */
509 if ((cur >= '0') && (cur <= '9'))
510 val = val * 16 + (cur - '0');
511 else if ((cur >= 'a') && (cur <= 'f'))
512 val = val * 16 + (cur - 'a') + 10;
513 else if ((cur >= 'A') && (cur <= 'F'))
514 val = val * 16 + (cur - 'A') + 10;
515 else {
516 ctxt->errNo = XML_ERR_INVALID_HEX_CHARREF;
517 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
518 ctxt->sax->error(ctxt->userData,
519 "xmlParseStringCharRef: invalid hexadecimal value\n");
520 ctxt->wellFormed = 0;
521 ctxt->disableSAX = 1;
522 val = 0;
523 break;
524 }
525 ptr++;
526 cur = *ptr;
527 }
528 if (cur == ';')
529 ptr++;
530 } else if ((cur == '&') && (ptr[1] == '#')){
531 ptr += 2;
532 cur = *ptr;
533 while (cur != ';') { /* Non input consuming loops */
534 if ((cur >= '0') && (cur <= '9'))
535 val = val * 10 + (cur - '0');
536 else {
537 ctxt->errNo = XML_ERR_INVALID_DEC_CHARREF;
538 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
539 ctxt->sax->error(ctxt->userData,
540 "xmlParseStringCharRef: invalid decimal value\n");
541 ctxt->wellFormed = 0;
542 ctxt->disableSAX = 1;
543 val = 0;
544 break;
545 }
546 ptr++;
547 cur = *ptr;
548 }
549 if (cur == ';')
550 ptr++;
551 } else {
552 ctxt->errNo = XML_ERR_INVALID_CHARREF;
553 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
554 ctxt->sax->error(ctxt->userData,
555 "xmlParseCharRef: invalid value\n");
556 ctxt->wellFormed = 0;
557 ctxt->disableSAX = 1;
558 return(0);
559 }
560 *str = ptr;
561
562 /*
563 * [ WFC: Legal Character ]
564 * Characters referred to using character references must match the
565 * production for Char.
566 */
567 if (IS_CHAR(val)) {
568 return(val);
569 } else {
570 ctxt->errNo = XML_ERR_INVALID_CHAR;
571 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
572 ctxt->sax->error(ctxt->userData,
573 "CharRef: invalid xmlChar value %d\n", val);
574 ctxt->wellFormed = 0;
575 ctxt->disableSAX = 1;
576 }
577 return(0);
578}
579
580/**
581 * xmlParserHandlePEReference:
582 * @ctxt: the parser context
583 *
584 * [69] PEReference ::= '%' Name ';'
585 *
586 * [ WFC: No Recursion ]
587 * A parsed entity must not contain a recursive
588 * reference to itself, either directly or indirectly.
589 *
590 * [ WFC: Entity Declared ]
591 * In a document without any DTD, a document with only an internal DTD
592 * subset which contains no parameter entity references, or a document
593 * with "standalone='yes'", ... ... The declaration of a parameter
594 * entity must precede any reference to it...
595 *
596 * [ VC: Entity Declared ]
597 * In a document with an external subset or external parameter entities
598 * with "standalone='no'", ... ... The declaration of a parameter entity
599 * must precede any reference to it...
600 *
601 * [ WFC: In DTD ]
602 * Parameter-entity references may only appear in the DTD.
603 * NOTE: misleading but this is handled.
604 *
605 * A PEReference may have been detected in the current input stream
606 * the handling is done accordingly to
607 * http://www.w3.org/TR/REC-xml#entproc
608 * i.e.
609 * - Included in literal in entity values
610 * - Included as Paraemeter Entity reference within DTDs
611 */
612void
613xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
614 xmlChar *name;
615 xmlEntityPtr entity = NULL;
616 xmlParserInputPtr input;
617
618 if (ctxt->token != 0) {
619 return;
620 }
621 if (RAW != '%') return;
622 switch(ctxt->instate) {
623 case XML_PARSER_CDATA_SECTION:
624 return;
625 case XML_PARSER_COMMENT:
626 return;
627 case XML_PARSER_START_TAG:
628 return;
629 case XML_PARSER_END_TAG:
630 return;
631 case XML_PARSER_EOF:
632 ctxt->errNo = XML_ERR_PEREF_AT_EOF;
633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
634 ctxt->sax->error(ctxt->userData, "PEReference at EOF\n");
635 ctxt->wellFormed = 0;
636 ctxt->disableSAX = 1;
637 return;
638 case XML_PARSER_PROLOG:
639 case XML_PARSER_START:
640 case XML_PARSER_MISC:
641 ctxt->errNo = XML_ERR_PEREF_IN_PROLOG;
642 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
643 ctxt->sax->error(ctxt->userData, "PEReference in prolog!\n");
644 ctxt->wellFormed = 0;
645 ctxt->disableSAX = 1;
646 return;
647 case XML_PARSER_ENTITY_DECL:
648 case XML_PARSER_CONTENT:
649 case XML_PARSER_ATTRIBUTE_VALUE:
650 case XML_PARSER_PI:
651 case XML_PARSER_SYSTEM_LITERAL:
652 /* we just ignore it there */
653 return;
654 case XML_PARSER_EPILOG:
655 ctxt->errNo = XML_ERR_PEREF_IN_EPILOG;
656 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
657 ctxt->sax->error(ctxt->userData, "PEReference in epilog!\n");
658 ctxt->wellFormed = 0;
659 ctxt->disableSAX = 1;
660 return;
661 case XML_PARSER_ENTITY_VALUE:
662 /*
663 * NOTE: in the case of entity values, we don't do the
664 * substitution here since we need the literal
665 * entity value to be able to save the internal
666 * subset of the document.
667 * This will be handled by xmlStringDecodeEntities
668 */
669 return;
670 case XML_PARSER_DTD:
671 /*
672 * [WFC: Well-Formedness Constraint: PEs in Internal Subset]
673 * In the internal DTD subset, parameter-entity references
674 * can occur only where markup declarations can occur, not
675 * within markup declarations.
676 * In that case this is handled in xmlParseMarkupDecl
677 */
678 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
679 return;
680 break;
681 case XML_PARSER_IGNORE:
682 return;
683 }
684
685 NEXT;
686 name = xmlParseName(ctxt);
687 if (xmlParserDebugEntities)
688 xmlGenericError(xmlGenericErrorContext,
689 "PE Reference: %s\n", name);
690 if (name == NULL) {
691 ctxt->errNo = XML_ERR_PEREF_NO_NAME;
692 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
693 ctxt->sax->error(ctxt->userData, "xmlHandlePEReference: no name\n");
694 ctxt->wellFormed = 0;
695 ctxt->disableSAX = 1;
696 } else {
697 if (RAW == ';') {
698 NEXT;
699 if ((ctxt->sax != NULL) && (ctxt->sax->getParameterEntity != NULL))
700 entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
701 if (entity == NULL) {
702
703 /*
704 * [ WFC: Entity Declared ]
705 * In a document without any DTD, a document with only an
706 * internal DTD subset which contains no parameter entity
707 * references, or a document with "standalone='yes'", ...
708 * ... The declaration of a parameter entity must precede
709 * any reference to it...
710 */
711 if ((ctxt->standalone == 1) ||
712 ((ctxt->hasExternalSubset == 0) &&
713 (ctxt->hasPErefs == 0))) {
714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
715 ctxt->sax->error(ctxt->userData,
716 "PEReference: %%%s; not found\n", name);
717 ctxt->wellFormed = 0;
718 ctxt->disableSAX = 1;
719 } else {
720 /*
721 * [ VC: Entity Declared ]
722 * In a document with an external subset or external
723 * parameter entities with "standalone='no'", ...
724 * ... The declaration of a parameter entity must precede
725 * any reference to it...
726 */
727 if ((!ctxt->disableSAX) &&
728 (ctxt->validate) && (ctxt->vctxt.error != NULL)) {
729 ctxt->vctxt.error(ctxt->vctxt.userData,
730 "PEReference: %%%s; not found\n", name);
731 } else if ((!ctxt->disableSAX) &&
732 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
733 ctxt->sax->warning(ctxt->userData,
734 "PEReference: %%%s; not found\n", name);
735 ctxt->valid = 0;
736 }
737 } else {
738 if ((entity->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
739 (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY)) {
740 /*
741 * handle the extra spaces added before and after
742 * c.f. http://www.w3.org/TR/REC-xml#as-PE
743 * this is done independantly.
744 */
745 input = xmlNewEntityInputStream(ctxt, entity);
746 xmlPushInput(ctxt, input);
747 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
748 (RAW == '<') && (NXT(1) == '?') &&
749 (NXT(2) == 'x') && (NXT(3) == 'm') &&
750 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
751 xmlParseTextDecl(ctxt);
752 }
753 if (ctxt->token == 0)
754 ctxt->token = ' ';
755 } else {
756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
757 ctxt->sax->error(ctxt->userData,
758 "xmlHandlePEReference: %s is not a parameter entity\n",
759 name);
760 ctxt->wellFormed = 0;
761 ctxt->disableSAX = 1;
762 }
763 }
764 } else {
765 ctxt->errNo = XML_ERR_PEREF_SEMICOL_MISSING;
766 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
767 ctxt->sax->error(ctxt->userData,
768 "xmlHandlePEReference: expecting ';'\n");
769 ctxt->wellFormed = 0;
770 ctxt->disableSAX = 1;
771 }
772 xmlFree(name);
773 }
774}
775
776/*
777 * Macro used to grow the current buffer.
778 */
779#define growBuffer(buffer) { \
780 buffer##_size *= 2; \
781 buffer = (xmlChar *) \
782 xmlRealloc(buffer, buffer##_size * sizeof(xmlChar)); \
783 if (buffer == NULL) { \
784 perror("realloc failed"); \
785 return(NULL); \
786 } \
787}
788
789/**
790 * xmlStringDecodeEntities:
791 * @ctxt: the parser context
792 * @str: the input string
793 * @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
794 * @end: an end marker xmlChar, 0 if none
795 * @end2: an end marker xmlChar, 0 if none
796 * @end3: an end marker xmlChar, 0 if none
797 *
798 * Takes a entity string content and process to do the adequate subtitutions.
799 *
800 * [67] Reference ::= EntityRef | CharRef
801 *
802 * [69] PEReference ::= '%' Name ';'
803 *
804 * Returns A newly allocated string with the substitution done. The caller
805 * must deallocate it !
806 */
807xmlChar *
808xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
809 xmlChar end, xmlChar end2, xmlChar end3) {
810 xmlChar *buffer = NULL;
811 int buffer_size = 0;
812
813 xmlChar *current = NULL;
814 xmlEntityPtr ent;
815 int c,l;
816 int nbchars = 0;
817
818 if (str == NULL)
819 return(NULL);
820
821 if (ctxt->depth > 40) {
822 ctxt->errNo = XML_ERR_ENTITY_LOOP;
823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
824 ctxt->sax->error(ctxt->userData,
825 "Detected entity reference loop\n");
826 ctxt->wellFormed = 0;
827 ctxt->disableSAX = 1;
828 return(NULL);
829 }
830
831 /*
832 * allocate a translation buffer.
833 */
834 buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
835 buffer = (xmlChar *) xmlMalloc(buffer_size * sizeof(xmlChar));
836 if (buffer == NULL) {
837 perror("xmlDecodeEntities: malloc failed");
838 return(NULL);
839 }
840
841 /*
842 * Ok loop until we reach one of the ending char or a size limit.
843 * we are operating on already parsed values.
844 */
845 c = CUR_SCHAR(str, l);
846 while ((c != 0) && (c != end) && /* non input consuming loop */
847 (c != end2) && (c != end3)) {
848
849 if (c == 0) break;
850 if ((c == '&') && (str[1] == '#')) {
851 int val = xmlParseStringCharRef(ctxt, &str);
852 if (val != 0) {
853 COPY_BUF(0,buffer,nbchars,val);
854 }
855 } else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
856 if (xmlParserDebugEntities)
857 xmlGenericError(xmlGenericErrorContext,
858 "String decoding Entity Reference: %.30s\n",
859 str);
860 ent = xmlParseStringEntityRef(ctxt, &str);
861 if ((ent != NULL) &&
862 (ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
863 if (ent->content != NULL) {
864 COPY_BUF(0,buffer,nbchars,ent->content[0]);
865 } else {
866 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
867 ctxt->sax->error(ctxt->userData,
868 "internal error entity has no content\n");
869 }
870 } else if ((ent != NULL) && (ent->content != NULL)) {
871 xmlChar *rep;
872
873 ctxt->depth++;
874 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
875 0, 0, 0);
876 ctxt->depth--;
877 if (rep != NULL) {
878 current = rep;
879 while (*current != 0) { /* non input consuming loop */
880 buffer[nbchars++] = *current++;
881 if (nbchars >
882 buffer_size - XML_PARSER_BUFFER_SIZE) {
883 growBuffer(buffer);
884 }
885 }
886 xmlFree(rep);
887 }
888 } else if (ent != NULL) {
889 int i = xmlStrlen(ent->name);
890 const xmlChar *cur = ent->name;
891
892 buffer[nbchars++] = '&';
893 if (nbchars > buffer_size - i - XML_PARSER_BUFFER_SIZE) {
894 growBuffer(buffer);
895 }
896 for (;i > 0;i--)
897 buffer[nbchars++] = *cur++;
898 buffer[nbchars++] = ';';
899 }
900 } else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
901 if (xmlParserDebugEntities)
902 xmlGenericError(xmlGenericErrorContext,
903 "String decoding PE Reference: %.30s\n", str);
904 ent = xmlParseStringPEReference(ctxt, &str);
905 if (ent != NULL) {
906 xmlChar *rep;
907
908 ctxt->depth++;
909 rep = xmlStringDecodeEntities(ctxt, ent->content, what,
910 0, 0, 0);
911 ctxt->depth--;
912 if (rep != NULL) {
913 current = rep;
914 while (*current != 0) { /* non input consuming loop */
915 buffer[nbchars++] = *current++;
916 if (nbchars >
917 buffer_size - XML_PARSER_BUFFER_SIZE) {
918 growBuffer(buffer);
919 }
920 }
921 xmlFree(rep);
922 }
923 }
924 } else {
925 COPY_BUF(l,buffer,nbchars,c);
926 str += l;
927 if (nbchars > buffer_size - XML_PARSER_BUFFER_SIZE) {
928 growBuffer(buffer);
929 }
930 }
931 c = CUR_SCHAR(str, l);
932 }
933 buffer[nbchars++] = 0;
934 return(buffer);
935}
936
937
938/************************************************************************
939 * *
940 * Commodity functions to handle xmlChars *
941 * *
942 ************************************************************************/
943
944/**
945 * xmlStrndup:
946 * @cur: the input xmlChar *
947 * @len: the len of @cur
948 *
949 * a strndup for array of xmlChar's
950 *
951 * Returns a new xmlChar * or NULL
952 */
953xmlChar *
954xmlStrndup(const xmlChar *cur, int len) {
955 xmlChar *ret;
956
957 if ((cur == NULL) || (len < 0)) return(NULL);
958 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
959 if (ret == NULL) {
960 xmlGenericError(xmlGenericErrorContext,
961 "malloc of %ld byte failed\n",
962 (len + 1) * (long)sizeof(xmlChar));
963 return(NULL);
964 }
965 memcpy(ret, cur, len * sizeof(xmlChar));
966 ret[len] = 0;
967 return(ret);
968}
969
970/**
971 * xmlStrdup:
972 * @cur: the input xmlChar *
973 *
974 * a strdup for array of xmlChar's. Since they are supposed to be
975 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
976 * a termination mark of '0'.
977 *
978 * Returns a new xmlChar * or NULL
979 */
980xmlChar *
981xmlStrdup(const xmlChar *cur) {
982 const xmlChar *p = cur;
983
984 if (cur == NULL) return(NULL);
985 while (*p != 0) p++; /* non input consuming */
986 return(xmlStrndup(cur, p - cur));
987}
988
989/**
990 * xmlCharStrndup:
991 * @cur: the input char *
992 * @len: the len of @cur
993 *
994 * a strndup for char's to xmlChar's
995 *
996 * Returns a new xmlChar * or NULL
997 */
998
999xmlChar *
1000xmlCharStrndup(const char *cur, int len) {
1001 int i;
1002 xmlChar *ret;
1003
1004 if ((cur == NULL) || (len < 0)) return(NULL);
1005 ret = (xmlChar *) xmlMalloc((len + 1) * sizeof(xmlChar));
1006 if (ret == NULL) {
1007 xmlGenericError(xmlGenericErrorContext, "malloc of %ld byte failed\n",
1008 (len + 1) * (long)sizeof(xmlChar));
1009 return(NULL);
1010 }
1011 for (i = 0;i < len;i++)
1012 ret[i] = (xmlChar) cur[i];
1013 ret[len] = 0;
1014 return(ret);
1015}
1016
1017/**
1018 * xmlCharStrdup:
1019 * @cur: the input char *
1020 * @len: the len of @cur
1021 *
1022 * a strdup for char's to xmlChar's
1023 *
1024 * Returns a new xmlChar * or NULL
1025 */
1026
1027xmlChar *
1028xmlCharStrdup(const char *cur) {
1029 const char *p = cur;
1030
1031 if (cur == NULL) return(NULL);
1032 while (*p != '\0') p++; /* non input consuming */
1033 return(xmlCharStrndup(cur, p - cur));
1034}
1035
1036/**
1037 * xmlStrcmp:
1038 * @str1: the first xmlChar *
1039 * @str2: the second xmlChar *
1040 *
1041 * a strcmp for xmlChar's
1042 *
1043 * Returns the integer result of the comparison
1044 */
1045
1046int
1047xmlStrcmp(const xmlChar *str1, const xmlChar *str2) {
1048 register int tmp;
1049
1050 if (str1 == str2) return(0);
1051 if (str1 == NULL) return(-1);
1052 if (str2 == NULL) return(1);
1053 do {
1054 tmp = *str1++ - *str2;
1055 if (tmp != 0) return(tmp);
1056 } while (*str2++ != 0);
1057 return 0;
1058}
1059
1060/**
1061 * xmlStrEqual:
1062 * @str1: the first xmlChar *
1063 * @str2: the second xmlChar *
1064 *
1065 * Check if both string are equal of have same content
1066 * Should be a bit more readable and faster than xmlStrEqual()
1067 *
1068 * Returns 1 if they are equal, 0 if they are different
1069 */
1070
1071int
1072xmlStrEqual(const xmlChar *str1, const xmlChar *str2) {
1073 if (str1 == str2) return(1);
1074 if (str1 == NULL) return(0);
1075 if (str2 == NULL) return(0);
1076 do {
1077 if (*str1++ != *str2) return(0);
1078 } while (*str2++);
1079 return(1);
1080}
1081
1082/**
1083 * xmlStrncmp:
1084 * @str1: the first xmlChar *
1085 * @str2: the second xmlChar *
1086 * @len: the max comparison length
1087 *
1088 * a strncmp for xmlChar's
1089 *
1090 * Returns the integer result of the comparison
1091 */
1092
1093int
1094xmlStrncmp(const xmlChar *str1, const xmlChar *str2, int len) {
1095 register int tmp;
1096
1097 if (len <= 0) return(0);
1098 if (str1 == str2) return(0);
1099 if (str1 == NULL) return(-1);
1100 if (str2 == NULL) return(1);
1101 do {
1102 tmp = *str1++ - *str2;
1103 if (tmp != 0 || --len == 0) return(tmp);
1104 } while (*str2++ != 0);
1105 return 0;
1106}
1107
1108static xmlChar casemap[256] = {
1109 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
1110 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
1111 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
1112 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
1113 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
1114 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
1115 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
1116 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
1117 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1120 0x78,0x79,0x7A,0x7B,0x5C,0x5D,0x5E,0x5F,
1121 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
1122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
1123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
1124 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
1125 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
1126 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
1127 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
1128 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
1129 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
1130 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
1131 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
1132 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
1133 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
1134 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
1135 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
1136 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
1137 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
1138 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
1139 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
1140 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
1141};
1142
1143/**
1144 * xmlStrcasecmp:
1145 * @str1: the first xmlChar *
1146 * @str2: the second xmlChar *
1147 *
1148 * a strcasecmp for xmlChar's
1149 *
1150 * Returns the integer result of the comparison
1151 */
1152
1153int
1154xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) {
1155 register int tmp;
1156
1157 if (str1 == str2) return(0);
1158 if (str1 == NULL) return(-1);
1159 if (str2 == NULL) return(1);
1160 do {
1161 tmp = casemap[*str1++] - casemap[*str2];
1162 if (tmp != 0) return(tmp);
1163 } while (*str2++ != 0);
1164 return 0;
1165}
1166
1167/**
1168 * xmlStrncasecmp:
1169 * @str1: the first xmlChar *
1170 * @str2: the second xmlChar *
1171 * @len: the max comparison length
1172 *
1173 * a strncasecmp for xmlChar's
1174 *
1175 * Returns the integer result of the comparison
1176 */
1177
1178int
1179xmlStrncasecmp(const xmlChar *str1, const xmlChar *str2, int len) {
1180 register int tmp;
1181
1182 if (len <= 0) return(0);
1183 if (str1 == str2) return(0);
1184 if (str1 == NULL) return(-1);
1185 if (str2 == NULL) return(1);
1186 do {
1187 tmp = casemap[*str1++] - casemap[*str2];
1188 if (tmp != 0 || --len == 0) return(tmp);
1189 } while (*str2++ != 0);
1190 return 0;
1191}
1192
1193/**
1194 * xmlStrchr:
1195 * @str: the xmlChar * array
1196 * @val: the xmlChar to search
1197 *
1198 * a strchr for xmlChar's
1199 *
1200 * Returns the xmlChar * for the first occurence or NULL.
1201 */
1202
1203const xmlChar *
1204xmlStrchr(const xmlChar *str, xmlChar val) {
1205 if (str == NULL) return(NULL);
1206 while (*str != 0) { /* non input consuming */
1207 if (*str == val) return((xmlChar *) str);
1208 str++;
1209 }
1210 return(NULL);
1211}
1212
1213/**
1214 * xmlStrstr:
1215 * @str: the xmlChar * array (haystack)
1216 * @val: the xmlChar to search (needle)
1217 *
1218 * a strstr for xmlChar's
1219 *
1220 * Returns the xmlChar * for the first occurence or NULL.
1221 */
1222
1223const xmlChar *
1224xmlStrstr(const xmlChar *str, xmlChar *val) {
1225 int n;
1226
1227 if (str == NULL) return(NULL);
1228 if (val == NULL) return(NULL);
1229 n = xmlStrlen(val);
1230
1231 if (n == 0) return(str);
1232 while (*str != 0) { /* non input consuming */
1233 if (*str == *val) {
1234 if (!xmlStrncmp(str, val, n)) return((const xmlChar *) str);
1235 }
1236 str++;
1237 }
1238 return(NULL);
1239}
1240
1241/**
1242 * xmlStrcasestr:
1243 * @str: the xmlChar * array (haystack)
1244 * @val: the xmlChar to search (needle)
1245 *
1246 * a case-ignoring strstr for xmlChar's
1247 *
1248 * Returns the xmlChar * for the first occurence or NULL.
1249 */
1250
1251const xmlChar *
1252xmlStrcasestr(const xmlChar *str, xmlChar *val) {
1253 int n;
1254
1255 if (str == NULL) return(NULL);
1256 if (val == NULL) return(NULL);
1257 n = xmlStrlen(val);
1258
1259 if (n == 0) return(str);
1260 while (*str != 0) { /* non input consuming */
1261 if (casemap[*str] == casemap[*val])
1262 if (!xmlStrncasecmp(str, val, n)) return(str);
1263 str++;
1264 }
1265 return(NULL);
1266}
1267
1268/**
1269 * xmlStrsub:
1270 * @str: the xmlChar * array (haystack)
1271 * @start: the index of the first char (zero based)
1272 * @len: the length of the substring
1273 *
1274 * Extract a substring of a given string
1275 *
1276 * Returns the xmlChar * for the first occurence or NULL.
1277 */
1278
1279xmlChar *
1280xmlStrsub(const xmlChar *str, int start, int len) {
1281 int i;
1282
1283 if (str == NULL) return(NULL);
1284 if (start < 0) return(NULL);
1285 if (len < 0) return(NULL);
1286
1287 for (i = 0;i < start;i++) {
1288 if (*str == 0) return(NULL);
1289 str++;
1290 }
1291 if (*str == 0) return(NULL);
1292 return(xmlStrndup(str, len));
1293}
1294
1295/**
1296 * xmlStrlen:
1297 * @str: the xmlChar * array
1298 *
1299 * length of a xmlChar's string
1300 *
1301 * Returns the number of xmlChar contained in the ARRAY.
1302 */
1303
1304int
1305xmlStrlen(const xmlChar *str) {
1306 int len = 0;
1307
1308 if (str == NULL) return(0);
1309 while (*str != 0) { /* non input consuming */
1310 str++;
1311 len++;
1312 }
1313 return(len);
1314}
1315
1316/**
1317 * xmlStrncat:
1318 * @cur: the original xmlChar * array
1319 * @add: the xmlChar * array added
1320 * @len: the length of @add
1321 *
1322 * a strncat for array of xmlChar's, it will extend cur with the len
1323 * first bytes of @add.
1324 *
1325 * Returns a new xmlChar *, the original @cur is reallocated if needed
1326 * and should not be freed
1327 */
1328
1329xmlChar *
1330xmlStrncat(xmlChar *cur, const xmlChar *add, int len) {
1331 int size;
1332 xmlChar *ret;
1333
1334 if ((add == NULL) || (len == 0))
1335 return(cur);
1336 if (cur == NULL)
1337 return(xmlStrndup(add, len));
1338
1339 size = xmlStrlen(cur);
1340 ret = (xmlChar *) xmlRealloc(cur, (size + len + 1) * sizeof(xmlChar));
1341 if (ret == NULL) {
1342 xmlGenericError(xmlGenericErrorContext,
1343 "xmlStrncat: realloc of %ld byte failed\n",
1344 (size + len + 1) * (long)sizeof(xmlChar));
1345 return(cur);
1346 }
1347 memcpy(&ret[size], add, len * sizeof(xmlChar));
1348 ret[size + len] = 0;
1349 return(ret);
1350}
1351
1352/**
1353 * xmlStrcat:
1354 * @cur: the original xmlChar * array
1355 * @add: the xmlChar * array added
1356 *
1357 * a strcat for array of xmlChar's. Since they are supposed to be
1358 * encoded in UTF-8 or an encoding with 8bit based chars, we assume
1359 * a termination mark of '0'.
1360 *
1361 * Returns a new xmlChar * containing the concatenated string.
1362 */
1363xmlChar *
1364xmlStrcat(xmlChar *cur, const xmlChar *add) {
1365 const xmlChar *p = add;
1366
1367 if (add == NULL) return(cur);
1368 if (cur == NULL)
1369 return(xmlStrdup(add));
1370
1371 while (*p != 0) p++; /* non input consuming */
1372 return(xmlStrncat(cur, add, p - add));
1373}
1374
1375/************************************************************************
1376 * *
1377 * Commodity functions, cleanup needed ? *
1378 * *
1379 ************************************************************************/
1380
1381/**
1382 * areBlanks:
1383 * @ctxt: an XML parser context
1384 * @str: a xmlChar *
1385 * @len: the size of @str
1386 *
1387 * Is this a sequence of blank chars that one can ignore ?
1388 *
1389 * Returns 1 if ignorable 0 otherwise.
1390 */
1391
1392static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len) {
1393 int i, ret;
1394 xmlNodePtr lastChild;
1395
1396 /*
1397 * Check for xml:space value.
1398 */
1399 if (*(ctxt->space) == 1)
1400 return(0);
1401
1402 /*
1403 * Check that the string is made of blanks
1404 */
1405 for (i = 0;i < len;i++)
1406 if (!(IS_BLANK(str[i]))) return(0);
1407
1408 /*
1409 * Look if the element is mixed content in the Dtd if available
1410 */
1411 if (ctxt->myDoc != NULL) {
1412 ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
1413 if (ret == 0) return(1);
1414 if (ret == 1) return(0);
1415 }
1416
1417 /*
1418 * Otherwise, heuristic :-\
1419 */
1420 if (ctxt->keepBlanks)
1421 return(0);
1422 if (RAW != '<') return(0);
1423 if (ctxt->node == NULL) return(0);
1424 if ((ctxt->node->children == NULL) &&
1425 (RAW == '<') && (NXT(1) == '/')) return(0);
1426
1427 lastChild = xmlGetLastChild(ctxt->node);
1428 if (lastChild == NULL) {
1429 if (ctxt->node->content != NULL) return(0);
1430 } else if (xmlNodeIsText(lastChild))
1431 return(0);
1432 else if ((ctxt->node->children != NULL) &&
1433 (xmlNodeIsText(ctxt->node->children)))
1434 return(0);
1435 return(1);
1436}
1437
1438/*
1439 * Forward definition for recusive behaviour.
1440 */
1441void xmlParsePEReference(xmlParserCtxtPtr ctxt);
1442void xmlParseReference(xmlParserCtxtPtr ctxt);
1443
1444/************************************************************************
1445 * *
1446 * Extra stuff for namespace support *
1447 * Relates to http://www.w3.org/TR/WD-xml-names *
1448 * *
1449 ************************************************************************/
1450
1451/**
1452 * xmlSplitQName:
1453 * @ctxt: an XML parser context
1454 * @name: an XML parser context
1455 * @prefix: a xmlChar **
1456 *
1457 * parse an UTF8 encoded XML qualified name string
1458 *
1459 * [NS 5] QName ::= (Prefix ':')? LocalPart
1460 *
1461 * [NS 6] Prefix ::= NCName
1462 *
1463 * [NS 7] LocalPart ::= NCName
1464 *
1465 * Returns the local part, and prefix is updated
1466 * to get the Prefix if any.
1467 */
1468
1469xmlChar *
1470xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
1471 xmlChar buf[XML_MAX_NAMELEN + 5];
1472 xmlChar *buffer = NULL;
1473 int len = 0;
1474 int max = XML_MAX_NAMELEN;
1475 xmlChar *ret = NULL;
1476 const xmlChar *cur = name;
1477 int c;
1478
1479 *prefix = NULL;
1480
1481#ifndef XML_XML_NAMESPACE
1482 /* xml: prefix is not really a namespace */
1483 if ((cur[0] == 'x') && (cur[1] == 'm') &&
1484 (cur[2] == 'l') && (cur[3] == ':'))
1485 return(xmlStrdup(name));
1486#endif
1487
1488 /* nasty but valid */
1489 if (cur[0] == ':')
1490 return(xmlStrdup(name));
1491
1492 c = *cur++;
1493 while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
1494 buf[len++] = c;
1495 c = *cur++;
1496 }
1497 if (len >= max) {
1498 /*
1499 * Okay someone managed to make a huge name, so he's ready to pay
1500 * for the processing speed.
1501 */
1502 max = len * 2;
1503
1504 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1505 if (buffer == NULL) {
1506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1507 ctxt->sax->error(ctxt->userData,
1508 "xmlSplitQName: out of memory\n");
1509 return(NULL);
1510 }
1511 memcpy(buffer, buf, len);
1512 while ((c != 0) && (c != ':')) { /* tested bigname.xml */
1513 if (len + 10 > max) {
1514 max *= 2;
1515 buffer = (xmlChar *) xmlRealloc(buffer,
1516 max * sizeof(xmlChar));
1517 if (buffer == NULL) {
1518 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1519 ctxt->sax->error(ctxt->userData,
1520 "xmlSplitQName: out of memory\n");
1521 return(NULL);
1522 }
1523 }
1524 buffer[len++] = c;
1525 c = *cur++;
1526 }
1527 buffer[len] = 0;
1528 }
1529
1530 if (buffer == NULL)
1531 ret = xmlStrndup(buf, len);
1532 else {
1533 ret = buffer;
1534 buffer = NULL;
1535 max = XML_MAX_NAMELEN;
1536 }
1537
1538
1539 if (c == ':') {
1540 c = *cur++;
1541 if (c == 0) return(ret);
1542 *prefix = ret;
1543 len = 0;
1544
1545 while ((c != 0) && (len < max)) { /* tested bigname2.xml */
1546 buf[len++] = c;
1547 c = *cur++;
1548 }
1549 if (len >= max) {
1550 /*
1551 * Okay someone managed to make a huge name, so he's ready to pay
1552 * for the processing speed.
1553 */
1554 max = len * 2;
1555
1556 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1557 if (buffer == NULL) {
1558 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1559 ctxt->sax->error(ctxt->userData,
1560 "xmlSplitQName: out of memory\n");
1561 return(NULL);
1562 }
1563 memcpy(buffer, buf, len);
1564 while (c != 0) { /* tested bigname2.xml */
1565 if (len + 10 > max) {
1566 max *= 2;
1567 buffer = (xmlChar *) xmlRealloc(buffer,
1568 max * sizeof(xmlChar));
1569 if (buffer == NULL) {
1570 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1571 ctxt->sax->error(ctxt->userData,
1572 "xmlSplitQName: out of memory\n");
1573 return(NULL);
1574 }
1575 }
1576 buffer[len++] = c;
1577 c = *cur++;
1578 }
1579 buffer[len] = 0;
1580 }
1581
1582 if (buffer == NULL)
1583 ret = xmlStrndup(buf, len);
1584 else {
1585 ret = buffer;
1586 }
1587 }
1588
1589 return(ret);
1590}
1591
1592/************************************************************************
1593 * *
1594 * The parser itself *
1595 * Relates to http://www.w3.org/TR/REC-xml *
1596 * *
1597 ************************************************************************/
1598
Daniel Veillard21a0f912001-02-25 19:54:14 +00001599xmlChar *xmlParseNameComplex(xmlParserCtxtPtr ctxt);
Owen Taylor3473f882001-02-23 17:55:21 +00001600/**
1601 * xmlParseName:
1602 * @ctxt: an XML parser context
1603 *
1604 * parse an XML name.
1605 *
1606 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1607 * CombiningChar | Extender
1608 *
1609 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1610 *
1611 * [6] Names ::= Name (S Name)*
1612 *
1613 * Returns the Name parsed or NULL
1614 */
1615
1616xmlChar *
1617xmlParseName(xmlParserCtxtPtr ctxt) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00001618 const xmlChar *in;
1619 xmlChar *ret;
Owen Taylor3473f882001-02-23 17:55:21 +00001620 int count = 0;
1621
1622 GROW;
Daniel Veillard48b2f892001-02-25 16:11:03 +00001623
1624 /*
1625 * Accelerator for simple ASCII names
1626 */
1627 in = ctxt->input->cur;
1628 if (((*in >= 0x61) && (*in <= 0x7A)) ||
1629 ((*in >= 0x41) && (*in <= 0x5A)) ||
1630 (*in == '_') || (*in == ':')) {
1631 in++;
1632 while (((*in >= 0x61) && (*in <= 0x7A)) ||
1633 ((*in >= 0x41) && (*in <= 0x5A)) ||
1634 ((*in >= 0x30) && (*in <= 0x39)) ||
1635 (*in == '_') || (*in == ':'))
1636 in++;
1637 if ((*in == ' ') || (*in == '>') || (*in == '/')) {
1638 count = in - ctxt->input->cur;
1639 ret = xmlStrndup(ctxt->input->cur, count);
1640 ctxt->input->cur = in;
1641 return(ret);
1642 }
1643 }
Daniel Veillard21a0f912001-02-25 19:54:14 +00001644 xmlParseNameComplex(ctxt);
1645}
Daniel Veillard48b2f892001-02-25 16:11:03 +00001646
Daniel Veillard21a0f912001-02-25 19:54:14 +00001647xmlChar *
1648xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
1649 xmlChar buf[XML_MAX_NAMELEN + 5];
1650 int len = 0, l;
1651 int c;
1652 int count = 0;
1653
1654 /*
1655 * Handler for more complex cases
1656 */
1657 GROW;
Owen Taylor3473f882001-02-23 17:55:21 +00001658 c = CUR_CHAR(l);
1659 if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
1660 (!IS_LETTER(c) && (c != '_') &&
1661 (c != ':'))) {
1662 return(NULL);
1663 }
1664
1665 while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
1666 ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
1667 (c == '.') || (c == '-') ||
1668 (c == '_') || (c == ':') ||
1669 (IS_COMBINING(c)) ||
1670 (IS_EXTENDER(c)))) {
1671 if (count++ > 100) {
1672 count = 0;
1673 GROW;
1674 }
1675 COPY_BUF(l,buf,len,c);
1676 NEXTL(l);
1677 c = CUR_CHAR(l);
1678 if (len >= XML_MAX_NAMELEN) {
1679 /*
1680 * Okay someone managed to make a huge name, so he's ready to pay
1681 * for the processing speed.
1682 */
1683 xmlChar *buffer;
1684 int max = len * 2;
1685
1686 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1687 if (buffer == NULL) {
1688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1689 ctxt->sax->error(ctxt->userData,
1690 "xmlParseName: out of memory\n");
1691 return(NULL);
1692 }
1693 memcpy(buffer, buf, len);
1694 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigname.xml */
1695 (c == '.') || (c == '-') ||
1696 (c == '_') || (c == ':') ||
1697 (IS_COMBINING(c)) ||
1698 (IS_EXTENDER(c))) {
1699 if (count++ > 100) {
1700 count = 0;
1701 GROW;
1702 }
1703 if (len + 10 > max) {
1704 max *= 2;
1705 buffer = (xmlChar *) xmlRealloc(buffer,
1706 max * sizeof(xmlChar));
1707 if (buffer == NULL) {
1708 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1709 ctxt->sax->error(ctxt->userData,
1710 "xmlParseName: out of memory\n");
1711 return(NULL);
1712 }
1713 }
1714 COPY_BUF(l,buffer,len,c);
1715 NEXTL(l);
1716 c = CUR_CHAR(l);
1717 }
1718 buffer[len] = 0;
1719 return(buffer);
1720 }
1721 }
1722 return(xmlStrndup(buf, len));
1723}
1724
1725/**
1726 * xmlParseStringName:
1727 * @ctxt: an XML parser context
1728 * @str: a pointer to the string pointer (IN/OUT)
1729 *
1730 * parse an XML name.
1731 *
1732 * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
1733 * CombiningChar | Extender
1734 *
1735 * [5] Name ::= (Letter | '_' | ':') (NameChar)*
1736 *
1737 * [6] Names ::= Name (S Name)*
1738 *
1739 * Returns the Name parsed or NULL. The str pointer
1740 * is updated to the current location in the string.
1741 */
1742
1743xmlChar *
1744xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
1745 xmlChar buf[XML_MAX_NAMELEN + 5];
1746 const xmlChar *cur = *str;
1747 int len = 0, l;
1748 int c;
1749
1750 c = CUR_SCHAR(cur, l);
1751 if (!IS_LETTER(c) && (c != '_') &&
1752 (c != ':')) {
1753 return(NULL);
1754 }
1755
1756 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1757 (c == '.') || (c == '-') ||
1758 (c == '_') || (c == ':') ||
1759 (IS_COMBINING(c)) ||
1760 (IS_EXTENDER(c))) {
1761 COPY_BUF(l,buf,len,c);
1762 cur += l;
1763 c = CUR_SCHAR(cur, l);
1764 if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
1765 /*
1766 * Okay someone managed to make a huge name, so he's ready to pay
1767 * for the processing speed.
1768 */
1769 xmlChar *buffer;
1770 int max = len * 2;
1771
1772 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1773 if (buffer == NULL) {
1774 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1775 ctxt->sax->error(ctxt->userData,
1776 "xmlParseStringName: out of memory\n");
1777 return(NULL);
1778 }
1779 memcpy(buffer, buf, len);
1780 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigentname.xml */
1781 (c == '.') || (c == '-') ||
1782 (c == '_') || (c == ':') ||
1783 (IS_COMBINING(c)) ||
1784 (IS_EXTENDER(c))) {
1785 if (len + 10 > max) {
1786 max *= 2;
1787 buffer = (xmlChar *) xmlRealloc(buffer,
1788 max * sizeof(xmlChar));
1789 if (buffer == NULL) {
1790 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1791 ctxt->sax->error(ctxt->userData,
1792 "xmlParseStringName: out of memory\n");
1793 return(NULL);
1794 }
1795 }
1796 COPY_BUF(l,buffer,len,c);
1797 cur += l;
1798 c = CUR_SCHAR(cur, l);
1799 }
1800 buffer[len] = 0;
1801 *str = cur;
1802 return(buffer);
1803 }
1804 }
1805 *str = cur;
1806 return(xmlStrndup(buf, len));
1807}
1808
1809/**
1810 * xmlParseNmtoken:
1811 * @ctxt: an XML parser context
1812 *
1813 * parse an XML Nmtoken.
1814 *
1815 * [7] Nmtoken ::= (NameChar)+
1816 *
1817 * [8] Nmtokens ::= Nmtoken (S Nmtoken)*
1818 *
1819 * Returns the Nmtoken parsed or NULL
1820 */
1821
1822xmlChar *
1823xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
1824 xmlChar buf[XML_MAX_NAMELEN + 5];
1825 int len = 0, l;
1826 int c;
1827 int count = 0;
1828
1829 GROW;
1830 c = CUR_CHAR(l);
1831
1832 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1833 (c == '.') || (c == '-') ||
1834 (c == '_') || (c == ':') ||
1835 (IS_COMBINING(c)) ||
1836 (IS_EXTENDER(c))) {
1837 if (count++ > 100) {
1838 count = 0;
1839 GROW;
1840 }
1841 COPY_BUF(l,buf,len,c);
1842 NEXTL(l);
1843 c = CUR_CHAR(l);
1844 if (len >= XML_MAX_NAMELEN) {
1845 /*
1846 * Okay someone managed to make a huge token, so he's ready to pay
1847 * for the processing speed.
1848 */
1849 xmlChar *buffer;
1850 int max = len * 2;
1851
1852 buffer = (xmlChar *) xmlMalloc(max * sizeof(xmlChar));
1853 if (buffer == NULL) {
1854 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1855 ctxt->sax->error(ctxt->userData,
1856 "xmlParseNmtoken: out of memory\n");
1857 return(NULL);
1858 }
1859 memcpy(buffer, buf, len);
1860 while ((IS_LETTER(c)) || (IS_DIGIT(c)) || /* test bigtoken.xml */
1861 (c == '.') || (c == '-') ||
1862 (c == '_') || (c == ':') ||
1863 (IS_COMBINING(c)) ||
1864 (IS_EXTENDER(c))) {
1865 if (count++ > 100) {
1866 count = 0;
1867 GROW;
1868 }
1869 if (len + 10 > max) {
1870 max *= 2;
1871 buffer = (xmlChar *) xmlRealloc(buffer,
1872 max * sizeof(xmlChar));
1873 if (buffer == NULL) {
1874 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1875 ctxt->sax->error(ctxt->userData,
1876 "xmlParseName: out of memory\n");
1877 return(NULL);
1878 }
1879 }
1880 COPY_BUF(l,buffer,len,c);
1881 NEXTL(l);
1882 c = CUR_CHAR(l);
1883 }
1884 buffer[len] = 0;
1885 return(buffer);
1886 }
1887 }
1888 if (len == 0)
1889 return(NULL);
1890 return(xmlStrndup(buf, len));
1891}
1892
1893/**
1894 * xmlParseEntityValue:
1895 * @ctxt: an XML parser context
1896 * @orig: if non-NULL store a copy of the original entity value
1897 *
1898 * parse a value for ENTITY declarations
1899 *
1900 * [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
1901 * "'" ([^%&'] | PEReference | Reference)* "'"
1902 *
1903 * Returns the EntityValue parsed with reference substitued or NULL
1904 */
1905
1906xmlChar *
1907xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
1908 xmlChar *buf = NULL;
1909 int len = 0;
1910 int size = XML_PARSER_BUFFER_SIZE;
1911 int c, l;
1912 xmlChar stop;
1913 xmlChar *ret = NULL;
1914 const xmlChar *cur = NULL;
1915 xmlParserInputPtr input;
1916
1917 if (RAW == '"') stop = '"';
1918 else if (RAW == '\'') stop = '\'';
1919 else {
1920 ctxt->errNo = XML_ERR_ENTITY_NOT_STARTED;
1921 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1922 ctxt->sax->error(ctxt->userData, "EntityValue: \" or ' expected\n");
1923 ctxt->wellFormed = 0;
1924 ctxt->disableSAX = 1;
1925 return(NULL);
1926 }
1927 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
1928 if (buf == NULL) {
1929 xmlGenericError(xmlGenericErrorContext,
1930 "malloc of %d byte failed\n", size);
1931 return(NULL);
1932 }
1933
1934 /*
1935 * The content of the entity definition is copied in a buffer.
1936 */
1937
1938 ctxt->instate = XML_PARSER_ENTITY_VALUE;
1939 input = ctxt->input;
1940 GROW;
1941 NEXT;
1942 c = CUR_CHAR(l);
1943 /*
1944 * NOTE: 4.4.5 Included in Literal
1945 * When a parameter entity reference appears in a literal entity
1946 * value, ... a single or double quote character in the replacement
1947 * text is always treated as a normal data character and will not
1948 * terminate the literal.
1949 * In practice it means we stop the loop only when back at parsing
1950 * the initial entity and the quote is found
1951 */
1952 while ((IS_CHAR(c)) && ((c != stop) || /* checked */
1953 (ctxt->input != input))) {
1954 if (len + 5 >= size) {
1955 size *= 2;
1956 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
1957 if (buf == NULL) {
1958 xmlGenericError(xmlGenericErrorContext,
1959 "realloc of %d byte failed\n", size);
1960 return(NULL);
1961 }
1962 }
1963 COPY_BUF(l,buf,len,c);
1964 NEXTL(l);
1965 /*
1966 * Pop-up of finished entities.
1967 */
1968 while ((RAW == 0) && (ctxt->inputNr > 1)) /* non input consuming */
1969 xmlPopInput(ctxt);
1970
1971 GROW;
1972 c = CUR_CHAR(l);
1973 if (c == 0) {
1974 GROW;
1975 c = CUR_CHAR(l);
1976 }
1977 }
1978 buf[len] = 0;
1979
1980 /*
1981 * Raise problem w.r.t. '&' and '%' being used in non-entities
1982 * reference constructs. Note Charref will be handled in
1983 * xmlStringDecodeEntities()
1984 */
1985 cur = buf;
1986 while (*cur != 0) { /* non input consuming */
1987 if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
1988 xmlChar *name;
1989 xmlChar tmp = *cur;
1990
1991 cur++;
1992 name = xmlParseStringName(ctxt, &cur);
1993 if ((name == NULL) || (*cur != ';')) {
1994 ctxt->errNo = XML_ERR_ENTITY_CHAR_ERROR;
1995 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
1996 ctxt->sax->error(ctxt->userData,
1997 "EntityValue: '%c' forbidden except for entities references\n",
1998 tmp);
1999 ctxt->wellFormed = 0;
2000 ctxt->disableSAX = 1;
2001 }
2002 if ((ctxt->inSubset == 1) && (tmp == '%')) {
2003 ctxt->errNo = XML_ERR_ENTITY_PE_INTERNAL;
2004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2005 ctxt->sax->error(ctxt->userData,
2006 "EntityValue: PEReferences forbidden in internal subset\n",
2007 tmp);
2008 ctxt->wellFormed = 0;
2009 ctxt->disableSAX = 1;
2010 }
2011 if (name != NULL)
2012 xmlFree(name);
2013 }
2014 cur++;
2015 }
2016
2017 /*
2018 * Then PEReference entities are substituted.
2019 */
2020 if (c != stop) {
2021 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
2022 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2023 ctxt->sax->error(ctxt->userData, "EntityValue: \" expected\n");
2024 ctxt->wellFormed = 0;
2025 ctxt->disableSAX = 1;
2026 xmlFree(buf);
2027 } else {
2028 NEXT;
2029 /*
2030 * NOTE: 4.4.7 Bypassed
2031 * When a general entity reference appears in the EntityValue in
2032 * an entity declaration, it is bypassed and left as is.
2033 * so XML_SUBSTITUTE_REF is not set here.
2034 */
2035 ret = xmlStringDecodeEntities(ctxt, buf, XML_SUBSTITUTE_PEREF,
2036 0, 0, 0);
2037 if (orig != NULL)
2038 *orig = buf;
2039 else
2040 xmlFree(buf);
2041 }
2042
2043 return(ret);
2044}
2045
2046/**
2047 * xmlParseAttValue:
2048 * @ctxt: an XML parser context
2049 *
2050 * parse a value for an attribute
2051 * Note: the parser won't do substitution of entities here, this
2052 * will be handled later in xmlStringGetNodeList
2053 *
2054 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
2055 * "'" ([^<&'] | Reference)* "'"
2056 *
2057 * 3.3.3 Attribute-Value Normalization:
2058 * Before the value of an attribute is passed to the application or
2059 * checked for validity, the XML processor must normalize it as follows:
2060 * - a character reference is processed by appending the referenced
2061 * character to the attribute value
2062 * - an entity reference is processed by recursively processing the
2063 * replacement text of the entity
2064 * - a whitespace character (#x20, #xD, #xA, #x9) is processed by
2065 * appending #x20 to the normalized value, except that only a single
2066 * #x20 is appended for a "#xD#xA" sequence that is part of an external
2067 * parsed entity or the literal entity value of an internal parsed entity
2068 * - other characters are processed by appending them to the normalized value
2069 * If the declared value is not CDATA, then the XML processor must further
2070 * process the normalized attribute value by discarding any leading and
2071 * trailing space (#x20) characters, and by replacing sequences of space
2072 * (#x20) characters by a single space (#x20) character.
2073 * All attributes for which no declaration has been read should be treated
2074 * by a non-validating parser as if declared CDATA.
2075 *
2076 * Returns the AttValue parsed or NULL. The value has to be freed by the caller.
2077 */
2078
2079xmlChar *
2080xmlParseAttValue(xmlParserCtxtPtr ctxt) {
2081 xmlChar limit = 0;
2082 xmlChar *buf = NULL;
2083 int len = 0;
2084 int buf_size = 0;
2085 int c, l;
2086 xmlChar *current = NULL;
2087 xmlEntityPtr ent;
2088
2089
2090 SHRINK;
2091 if (NXT(0) == '"') {
2092 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2093 limit = '"';
2094 NEXT;
2095 } else if (NXT(0) == '\'') {
2096 limit = '\'';
2097 ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
2098 NEXT;
2099 } else {
2100 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_STARTED;
2101 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2102 ctxt->sax->error(ctxt->userData, "AttValue: \" or ' expected\n");
2103 ctxt->wellFormed = 0;
2104 ctxt->disableSAX = 1;
2105 return(NULL);
2106 }
2107
2108 /*
2109 * allocate a translation buffer.
2110 */
2111 buf_size = XML_PARSER_BUFFER_SIZE;
2112 buf = (xmlChar *) xmlMalloc(buf_size * sizeof(xmlChar));
2113 if (buf == NULL) {
2114 perror("xmlParseAttValue: malloc failed");
2115 return(NULL);
2116 }
2117
2118 /*
2119 * Ok loop until we reach one of the ending char or a size limit.
2120 */
2121 c = CUR_CHAR(l);
2122 while (((NXT(0) != limit) && /* checked */
2123 (c != '<')) || (ctxt->token != 0)) {
2124 if (c == 0) break;
2125 if (ctxt->token == '&') {
2126 /*
2127 * The reparsing will be done in xmlStringGetNodeList()
2128 * called by the attribute() function in SAX.c
2129 */
2130 static xmlChar buffer[6] = "&#38;";
2131
2132 if (len > buf_size - 10) {
2133 growBuffer(buf);
2134 }
2135 current = &buffer[0];
2136 while (*current != 0) { /* non input consuming */
2137 buf[len++] = *current++;
2138 }
2139 ctxt->token = 0;
2140 } else if (c == '&') {
2141 if (NXT(1) == '#') {
2142 int val = xmlParseCharRef(ctxt);
2143 if (val == '&') {
2144 /*
2145 * The reparsing will be done in xmlStringGetNodeList()
2146 * called by the attribute() function in SAX.c
2147 */
2148 static xmlChar buffer[6] = "&#38;";
2149
2150 if (len > buf_size - 10) {
2151 growBuffer(buf);
2152 }
2153 current = &buffer[0];
2154 while (*current != 0) { /* non input consuming */
2155 buf[len++] = *current++;
2156 }
2157 } else {
2158 len += xmlCopyChar(0, &buf[len], val);
2159 }
2160 } else {
2161 ent = xmlParseEntityRef(ctxt);
2162 if ((ent != NULL) &&
2163 (ctxt->replaceEntities != 0)) {
2164 xmlChar *rep;
2165
2166 if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
2167 rep = xmlStringDecodeEntities(ctxt, ent->content,
2168 XML_SUBSTITUTE_REF, 0, 0, 0);
2169 if (rep != NULL) {
2170 current = rep;
2171 while (*current != 0) { /* non input consuming */
2172 buf[len++] = *current++;
2173 if (len > buf_size - 10) {
2174 growBuffer(buf);
2175 }
2176 }
2177 xmlFree(rep);
2178 }
2179 } else {
2180 if (ent->content != NULL)
2181 buf[len++] = ent->content[0];
2182 }
2183 } else if (ent != NULL) {
2184 int i = xmlStrlen(ent->name);
2185 const xmlChar *cur = ent->name;
2186
2187 /*
2188 * This may look absurd but is needed to detect
2189 * entities problems
2190 */
2191 if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
2192 (ent->content != NULL)) {
2193 xmlChar *rep;
2194 rep = xmlStringDecodeEntities(ctxt, ent->content,
2195 XML_SUBSTITUTE_REF, 0, 0, 0);
2196 if (rep != NULL)
2197 xmlFree(rep);
2198 }
2199
2200 /*
2201 * Just output the reference
2202 */
2203 buf[len++] = '&';
2204 if (len > buf_size - i - 10) {
2205 growBuffer(buf);
2206 }
2207 for (;i > 0;i--)
2208 buf[len++] = *cur++;
2209 buf[len++] = ';';
2210 }
2211 }
2212 } else {
2213 if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
2214 COPY_BUF(l,buf,len,0x20);
2215 if (len > buf_size - 10) {
2216 growBuffer(buf);
2217 }
2218 } else {
2219 COPY_BUF(l,buf,len,c);
2220 if (len > buf_size - 10) {
2221 growBuffer(buf);
2222 }
2223 }
2224 NEXTL(l);
2225 }
2226 GROW;
2227 c = CUR_CHAR(l);
2228 }
2229 buf[len++] = 0;
2230 if (RAW == '<') {
2231 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
2232 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2233 ctxt->sax->error(ctxt->userData,
2234 "Unescaped '<' not allowed in attributes values\n");
2235 ctxt->wellFormed = 0;
2236 ctxt->disableSAX = 1;
2237 } else if (RAW != limit) {
2238 ctxt->errNo = XML_ERR_ATTRIBUTE_NOT_FINISHED;
2239 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2240 ctxt->sax->error(ctxt->userData, "AttValue: ' expected\n");
2241 ctxt->wellFormed = 0;
2242 ctxt->disableSAX = 1;
2243 } else
2244 NEXT;
2245 return(buf);
2246}
2247
2248/**
2249 * xmlParseSystemLiteral:
2250 * @ctxt: an XML parser context
2251 *
2252 * parse an XML Literal
2253 *
2254 * [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
2255 *
2256 * Returns the SystemLiteral parsed or NULL
2257 */
2258
2259xmlChar *
2260xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
2261 xmlChar *buf = NULL;
2262 int len = 0;
2263 int size = XML_PARSER_BUFFER_SIZE;
2264 int cur, l;
2265 xmlChar stop;
2266 int state = ctxt->instate;
2267 int count = 0;
2268
2269 SHRINK;
2270 if (RAW == '"') {
2271 NEXT;
2272 stop = '"';
2273 } else if (RAW == '\'') {
2274 NEXT;
2275 stop = '\'';
2276 } else {
2277 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2278 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2279 ctxt->sax->error(ctxt->userData,
2280 "SystemLiteral \" or ' expected\n");
2281 ctxt->wellFormed = 0;
2282 ctxt->disableSAX = 1;
2283 return(NULL);
2284 }
2285
2286 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2287 if (buf == NULL) {
2288 xmlGenericError(xmlGenericErrorContext,
2289 "malloc of %d byte failed\n", size);
2290 return(NULL);
2291 }
2292 ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
2293 cur = CUR_CHAR(l);
2294 while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
2295 if (len + 5 >= size) {
2296 size *= 2;
2297 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2298 if (buf == NULL) {
2299 xmlGenericError(xmlGenericErrorContext,
2300 "realloc of %d byte failed\n", size);
2301 ctxt->instate = (xmlParserInputState) state;
2302 return(NULL);
2303 }
2304 }
2305 count++;
2306 if (count > 50) {
2307 GROW;
2308 count = 0;
2309 }
2310 COPY_BUF(l,buf,len,cur);
2311 NEXTL(l);
2312 cur = CUR_CHAR(l);
2313 if (cur == 0) {
2314 GROW;
2315 SHRINK;
2316 cur = CUR_CHAR(l);
2317 }
2318 }
2319 buf[len] = 0;
2320 ctxt->instate = (xmlParserInputState) state;
2321 if (!IS_CHAR(cur)) {
2322 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2324 ctxt->sax->error(ctxt->userData, "Unfinished SystemLiteral\n");
2325 ctxt->wellFormed = 0;
2326 ctxt->disableSAX = 1;
2327 } else {
2328 NEXT;
2329 }
2330 return(buf);
2331}
2332
2333/**
2334 * xmlParsePubidLiteral:
2335 * @ctxt: an XML parser context
2336 *
2337 * parse an XML public literal
2338 *
2339 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
2340 *
2341 * Returns the PubidLiteral parsed or NULL.
2342 */
2343
2344xmlChar *
2345xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
2346 xmlChar *buf = NULL;
2347 int len = 0;
2348 int size = XML_PARSER_BUFFER_SIZE;
2349 xmlChar cur;
2350 xmlChar stop;
2351 int count = 0;
2352
2353 SHRINK;
2354 if (RAW == '"') {
2355 NEXT;
2356 stop = '"';
2357 } else if (RAW == '\'') {
2358 NEXT;
2359 stop = '\'';
2360 } else {
2361 ctxt->errNo = XML_ERR_LITERAL_NOT_STARTED;
2362 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2363 ctxt->sax->error(ctxt->userData,
2364 "SystemLiteral \" or ' expected\n");
2365 ctxt->wellFormed = 0;
2366 ctxt->disableSAX = 1;
2367 return(NULL);
2368 }
2369 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2370 if (buf == NULL) {
2371 xmlGenericError(xmlGenericErrorContext,
2372 "malloc of %d byte failed\n", size);
2373 return(NULL);
2374 }
2375 cur = CUR;
2376 while ((IS_PUBIDCHAR(cur)) && (cur != stop)) { /* checked */
2377 if (len + 1 >= size) {
2378 size *= 2;
2379 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2380 if (buf == NULL) {
2381 xmlGenericError(xmlGenericErrorContext,
2382 "realloc of %d byte failed\n", size);
2383 return(NULL);
2384 }
2385 }
2386 buf[len++] = cur;
2387 count++;
2388 if (count > 50) {
2389 GROW;
2390 count = 0;
2391 }
2392 NEXT;
2393 cur = CUR;
2394 if (cur == 0) {
2395 GROW;
2396 SHRINK;
2397 cur = CUR;
2398 }
2399 }
2400 buf[len] = 0;
2401 if (cur != stop) {
2402 ctxt->errNo = XML_ERR_LITERAL_NOT_FINISHED;
2403 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2404 ctxt->sax->error(ctxt->userData, "Unfinished PubidLiteral\n");
2405 ctxt->wellFormed = 0;
2406 ctxt->disableSAX = 1;
2407 } else {
2408 NEXT;
2409 }
2410 return(buf);
2411}
2412
Daniel Veillard48b2f892001-02-25 16:11:03 +00002413void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata);
Owen Taylor3473f882001-02-23 17:55:21 +00002414/**
2415 * xmlParseCharData:
2416 * @ctxt: an XML parser context
2417 * @cdata: int indicating whether we are within a CDATA section
2418 *
2419 * parse a CharData section.
2420 * if we are within a CDATA section ']]>' marks an end of section.
2421 *
2422 * The right angle bracket (>) may be represented using the string "&gt;",
2423 * and must, for compatibility, be escaped using "&gt;" or a character
2424 * reference when it appears in the string "]]>" in content, when that
2425 * string is not marking the end of a CDATA section.
2426 *
2427 * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
2428 */
2429
2430void
2431xmlParseCharData(xmlParserCtxtPtr ctxt, int cdata) {
Daniel Veillard48b2f892001-02-25 16:11:03 +00002432 const xmlChar *in;
2433 int nbchar = 0;
2434
2435 SHRINK;
2436 GROW;
2437 /*
2438 * Accelerated common case where input don't need to be
2439 * modified before passing it to the handler.
2440 */
2441 if ((ctxt->token == 0) && (!cdata)) {
2442 in = ctxt->input->cur;
2443 do {
2444 while (((*in >= 0x20) && (*in != '<') &&
2445 (*in != '&') && (*in <= 0x7F)) || (*in == 0x09))
2446 in++;
2447 if (*in == 0xA) {
2448 ctxt->input->line++;
2449 continue; /* while */
2450 }
2451 nbchar = in - ctxt->input->cur;
2452 if (IS_BLANK(*ctxt->input->cur) &&
2453 areBlanks(ctxt, ctxt->input->cur, nbchar)) {
2454 if (ctxt->sax->ignorableWhitespace != NULL)
2455 ctxt->sax->ignorableWhitespace(ctxt->userData,
2456 ctxt->input->cur, nbchar);
2457 } else {
2458 if (ctxt->sax->characters != NULL)
2459 ctxt->sax->characters(ctxt->userData,
2460 ctxt->input->cur, nbchar);
2461 }
2462 ctxt->input->cur = in;
2463 if (*in == 0xD) {
2464 in++;
2465 if (*in == 0xA) {
2466 ctxt->input->cur = in;
2467 in++;
2468 ctxt->input->line++;
2469 continue; /* while */
2470 }
2471 in--;
2472 }
2473 if ((*in == '<') || (*in == '&')) {
2474 return;
2475 }
2476 SHRINK;
2477 GROW;
2478 in = ctxt->input->cur;
2479 } while ((*in >= 0x20) && (*in <= 0x7F));
2480 nbchar = 0;
2481 }
2482 xmlParseCharDataComplex(ctxt, cdata);
2483}
2484
2485void
2486xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
Owen Taylor3473f882001-02-23 17:55:21 +00002487 xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
2488 int nbchar = 0;
2489 int cur, l;
2490 int count = 0;
2491
2492 SHRINK;
2493 GROW;
2494 cur = CUR_CHAR(l);
2495 while (((cur != '<') || (ctxt->token == '<')) && /* checked */
2496 ((cur != '&') || (ctxt->token == '&')) &&
2497 (IS_CHAR(cur))) /* test also done in xmlCurrentChar() */ {
2498 if ((cur == ']') && (NXT(1) == ']') &&
2499 (NXT(2) == '>')) {
2500 if (cdata) break;
2501 else {
2502 ctxt->errNo = XML_ERR_MISPLACED_CDATA_END;
2503 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2504 ctxt->sax->error(ctxt->userData,
2505 "Sequence ']]>' not allowed in content\n");
2506 /* Should this be relaxed ??? I see a "must here */
2507 ctxt->wellFormed = 0;
2508 ctxt->disableSAX = 1;
2509 }
2510 }
2511 COPY_BUF(l,buf,nbchar,cur);
2512 if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
2513 /*
2514 * Ok the segment is to be consumed as chars.
2515 */
2516 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2517 if (areBlanks(ctxt, buf, nbchar)) {
2518 if (ctxt->sax->ignorableWhitespace != NULL)
2519 ctxt->sax->ignorableWhitespace(ctxt->userData,
2520 buf, nbchar);
2521 } else {
2522 if (ctxt->sax->characters != NULL)
2523 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2524 }
2525 }
2526 nbchar = 0;
2527 }
2528 count++;
2529 if (count > 50) {
2530 GROW;
2531 count = 0;
2532 }
2533 NEXTL(l);
2534 cur = CUR_CHAR(l);
2535 }
2536 if (nbchar != 0) {
2537 /*
2538 * Ok the segment is to be consumed as chars.
2539 */
2540 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
2541 if (areBlanks(ctxt, buf, nbchar)) {
2542 if (ctxt->sax->ignorableWhitespace != NULL)
2543 ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
2544 } else {
2545 if (ctxt->sax->characters != NULL)
2546 ctxt->sax->characters(ctxt->userData, buf, nbchar);
2547 }
2548 }
2549 }
2550}
2551
2552/**
2553 * xmlParseExternalID:
2554 * @ctxt: an XML parser context
2555 * @publicID: a xmlChar** receiving PubidLiteral
2556 * @strict: indicate whether we should restrict parsing to only
2557 * production [75], see NOTE below
2558 *
2559 * Parse an External ID or a Public ID
2560 *
2561 * NOTE: Productions [75] and [83] interract badly since [75] can generate
2562 * 'PUBLIC' S PubidLiteral S SystemLiteral
2563 *
2564 * [75] ExternalID ::= 'SYSTEM' S SystemLiteral
2565 * | 'PUBLIC' S PubidLiteral S SystemLiteral
2566 *
2567 * [83] PublicID ::= 'PUBLIC' S PubidLiteral
2568 *
2569 * Returns the function returns SystemLiteral and in the second
2570 * case publicID receives PubidLiteral, is strict is off
2571 * it is possible to return NULL and have publicID set.
2572 */
2573
2574xmlChar *
2575xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
2576 xmlChar *URI = NULL;
2577
2578 SHRINK;
2579 if ((RAW == 'S') && (NXT(1) == 'Y') &&
2580 (NXT(2) == 'S') && (NXT(3) == 'T') &&
2581 (NXT(4) == 'E') && (NXT(5) == 'M')) {
2582 SKIP(6);
2583 if (!IS_BLANK(CUR)) {
2584 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2585 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2586 ctxt->sax->error(ctxt->userData,
2587 "Space required after 'SYSTEM'\n");
2588 ctxt->wellFormed = 0;
2589 ctxt->disableSAX = 1;
2590 }
2591 SKIP_BLANKS;
2592 URI = xmlParseSystemLiteral(ctxt);
2593 if (URI == NULL) {
2594 ctxt->errNo = XML_ERR_URI_REQUIRED;
2595 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2596 ctxt->sax->error(ctxt->userData,
2597 "xmlParseExternalID: SYSTEM, no URI\n");
2598 ctxt->wellFormed = 0;
2599 ctxt->disableSAX = 1;
2600 }
2601 } else if ((RAW == 'P') && (NXT(1) == 'U') &&
2602 (NXT(2) == 'B') && (NXT(3) == 'L') &&
2603 (NXT(4) == 'I') && (NXT(5) == 'C')) {
2604 SKIP(6);
2605 if (!IS_BLANK(CUR)) {
2606 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2608 ctxt->sax->error(ctxt->userData,
2609 "Space required after 'PUBLIC'\n");
2610 ctxt->wellFormed = 0;
2611 ctxt->disableSAX = 1;
2612 }
2613 SKIP_BLANKS;
2614 *publicID = xmlParsePubidLiteral(ctxt);
2615 if (*publicID == NULL) {
2616 ctxt->errNo = XML_ERR_PUBID_REQUIRED;
2617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2618 ctxt->sax->error(ctxt->userData,
2619 "xmlParseExternalID: PUBLIC, no Public Identifier\n");
2620 ctxt->wellFormed = 0;
2621 ctxt->disableSAX = 1;
2622 }
2623 if (strict) {
2624 /*
2625 * We don't handle [83] so "S SystemLiteral" is required.
2626 */
2627 if (!IS_BLANK(CUR)) {
2628 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2629 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2630 ctxt->sax->error(ctxt->userData,
2631 "Space required after the Public Identifier\n");
2632 ctxt->wellFormed = 0;
2633 ctxt->disableSAX = 1;
2634 }
2635 } else {
2636 /*
2637 * We handle [83] so we return immediately, if
2638 * "S SystemLiteral" is not detected. From a purely parsing
2639 * point of view that's a nice mess.
2640 */
2641 const xmlChar *ptr;
2642 GROW;
2643
2644 ptr = CUR_PTR;
2645 if (!IS_BLANK(*ptr)) return(NULL);
2646
2647 while (IS_BLANK(*ptr)) ptr++; /* TODO: dangerous, fix ! */
2648 if ((*ptr != '\'') && (*ptr != '"')) return(NULL);
2649 }
2650 SKIP_BLANKS;
2651 URI = xmlParseSystemLiteral(ctxt);
2652 if (URI == NULL) {
2653 ctxt->errNo = XML_ERR_URI_REQUIRED;
2654 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2655 ctxt->sax->error(ctxt->userData,
2656 "xmlParseExternalID: PUBLIC, no URI\n");
2657 ctxt->wellFormed = 0;
2658 ctxt->disableSAX = 1;
2659 }
2660 }
2661 return(URI);
2662}
2663
2664/**
2665 * xmlParseComment:
2666 * @ctxt: an XML parser context
2667 *
2668 * Skip an XML (SGML) comment <!-- .... -->
2669 * The spec says that "For compatibility, the string "--" (double-hyphen)
2670 * must not occur within comments. "
2671 *
2672 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
2673 */
2674void
2675xmlParseComment(xmlParserCtxtPtr ctxt) {
2676 xmlChar *buf = NULL;
2677 int len;
2678 int size = XML_PARSER_BUFFER_SIZE;
2679 int q, ql;
2680 int r, rl;
2681 int cur, l;
2682 xmlParserInputState state;
2683 xmlParserInputPtr input = ctxt->input;
2684 int count = 0;
2685
2686 /*
2687 * Check that there is a comment right here.
2688 */
2689 if ((RAW != '<') || (NXT(1) != '!') ||
2690 (NXT(2) != '-') || (NXT(3) != '-')) return;
2691
2692 state = ctxt->instate;
2693 ctxt->instate = XML_PARSER_COMMENT;
2694 SHRINK;
2695 SKIP(4);
2696 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2697 if (buf == NULL) {
2698 xmlGenericError(xmlGenericErrorContext,
2699 "malloc of %d byte failed\n", size);
2700 ctxt->instate = state;
2701 return;
2702 }
2703 q = CUR_CHAR(ql);
2704 NEXTL(ql);
2705 r = CUR_CHAR(rl);
2706 NEXTL(rl);
2707 cur = CUR_CHAR(l);
2708 len = 0;
2709 while (IS_CHAR(cur) && /* checked */
2710 ((cur != '>') ||
2711 (r != '-') || (q != '-'))) {
2712 if ((r == '-') && (q == '-') && (len > 1)) {
2713 ctxt->errNo = XML_ERR_HYPHEN_IN_COMMENT;
2714 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2715 ctxt->sax->error(ctxt->userData,
2716 "Comment must not contain '--' (double-hyphen)`\n");
2717 ctxt->wellFormed = 0;
2718 ctxt->disableSAX = 1;
2719 }
2720 if (len + 5 >= size) {
2721 size *= 2;
2722 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2723 if (buf == NULL) {
2724 xmlGenericError(xmlGenericErrorContext,
2725 "realloc of %d byte failed\n", size);
2726 ctxt->instate = state;
2727 return;
2728 }
2729 }
2730 COPY_BUF(ql,buf,len,q);
2731 q = r;
2732 ql = rl;
2733 r = cur;
2734 rl = l;
2735
2736 count++;
2737 if (count > 50) {
2738 GROW;
2739 count = 0;
2740 }
2741 NEXTL(l);
2742 cur = CUR_CHAR(l);
2743 if (cur == 0) {
2744 SHRINK;
2745 GROW;
2746 cur = CUR_CHAR(l);
2747 }
2748 }
2749 buf[len] = 0;
2750 if (!IS_CHAR(cur)) {
2751 ctxt->errNo = XML_ERR_COMMENT_NOT_FINISHED;
2752 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2753 ctxt->sax->error(ctxt->userData,
2754 "Comment not terminated \n<!--%.50s\n", buf);
2755 ctxt->wellFormed = 0;
2756 ctxt->disableSAX = 1;
2757 xmlFree(buf);
2758 } else {
2759 if (input != ctxt->input) {
2760 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2761 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2762 ctxt->sax->error(ctxt->userData,
2763"Comment doesn't start and stop in the same entity\n");
2764 ctxt->wellFormed = 0;
2765 ctxt->disableSAX = 1;
2766 }
2767 NEXT;
2768 if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
2769 (!ctxt->disableSAX))
2770 ctxt->sax->comment(ctxt->userData, buf);
2771 xmlFree(buf);
2772 }
2773 ctxt->instate = state;
2774}
2775
2776/**
2777 * xmlParsePITarget:
2778 * @ctxt: an XML parser context
2779 *
2780 * parse the name of a PI
2781 *
2782 * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
2783 *
2784 * Returns the PITarget name or NULL
2785 */
2786
2787xmlChar *
2788xmlParsePITarget(xmlParserCtxtPtr ctxt) {
2789 xmlChar *name;
2790
2791 name = xmlParseName(ctxt);
2792 if ((name != NULL) &&
2793 ((name[0] == 'x') || (name[0] == 'X')) &&
2794 ((name[1] == 'm') || (name[1] == 'M')) &&
2795 ((name[2] == 'l') || (name[2] == 'L'))) {
2796 int i;
2797 if ((name[0] == 'x') && (name[1] == 'm') &&
2798 (name[2] == 'l') && (name[3] == 0)) {
2799 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2800 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2801 ctxt->sax->error(ctxt->userData,
2802 "XML declaration allowed only at the start of the document\n");
2803 ctxt->wellFormed = 0;
2804 ctxt->disableSAX = 1;
2805 return(name);
2806 } else if (name[3] == 0) {
2807 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2808 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2809 ctxt->sax->error(ctxt->userData, "Invalid PI name\n");
2810 ctxt->wellFormed = 0;
2811 ctxt->disableSAX = 1;
2812 return(name);
2813 }
2814 for (i = 0;;i++) {
2815 if (xmlW3CPIs[i] == NULL) break;
2816 if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
2817 return(name);
2818 }
2819 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) {
2820 ctxt->errNo = XML_ERR_RESERVED_XML_NAME;
2821 ctxt->sax->warning(ctxt->userData,
2822 "xmlParsePItarget: invalid name prefix 'xml'\n");
2823 }
2824 }
2825 return(name);
2826}
2827
2828/**
2829 * xmlParsePI:
2830 * @ctxt: an XML parser context
2831 *
2832 * parse an XML Processing Instruction.
2833 *
2834 * [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
2835 *
2836 * The processing is transfered to SAX once parsed.
2837 */
2838
2839void
2840xmlParsePI(xmlParserCtxtPtr ctxt) {
2841 xmlChar *buf = NULL;
2842 int len = 0;
2843 int size = XML_PARSER_BUFFER_SIZE;
2844 int cur, l;
2845 xmlChar *target;
2846 xmlParserInputState state;
2847 int count = 0;
2848
2849 if ((RAW == '<') && (NXT(1) == '?')) {
2850 xmlParserInputPtr input = ctxt->input;
2851 state = ctxt->instate;
2852 ctxt->instate = XML_PARSER_PI;
2853 /*
2854 * this is a Processing Instruction.
2855 */
2856 SKIP(2);
2857 SHRINK;
2858
2859 /*
2860 * Parse the target name and check for special support like
2861 * namespace.
2862 */
2863 target = xmlParsePITarget(ctxt);
2864 if (target != NULL) {
2865 if ((RAW == '?') && (NXT(1) == '>')) {
2866 if (input != ctxt->input) {
2867 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2868 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2869 ctxt->sax->error(ctxt->userData,
2870 "PI declaration doesn't start and stop in the same entity\n");
2871 ctxt->wellFormed = 0;
2872 ctxt->disableSAX = 1;
2873 }
2874 SKIP(2);
2875
2876 /*
2877 * SAX: PI detected.
2878 */
2879 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2880 (ctxt->sax->processingInstruction != NULL))
2881 ctxt->sax->processingInstruction(ctxt->userData,
2882 target, NULL);
2883 ctxt->instate = state;
2884 xmlFree(target);
2885 return;
2886 }
2887 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
2888 if (buf == NULL) {
2889 xmlGenericError(xmlGenericErrorContext,
2890 "malloc of %d byte failed\n", size);
2891 ctxt->instate = state;
2892 return;
2893 }
2894 cur = CUR;
2895 if (!IS_BLANK(cur)) {
2896 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
2897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2898 ctxt->sax->error(ctxt->userData,
2899 "xmlParsePI: PI %s space expected\n", target);
2900 ctxt->wellFormed = 0;
2901 ctxt->disableSAX = 1;
2902 }
2903 SKIP_BLANKS;
2904 cur = CUR_CHAR(l);
2905 while (IS_CHAR(cur) && /* checked */
2906 ((cur != '?') || (NXT(1) != '>'))) {
2907 if (len + 5 >= size) {
2908 size *= 2;
2909 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
2910 if (buf == NULL) {
2911 xmlGenericError(xmlGenericErrorContext,
2912 "realloc of %d byte failed\n", size);
2913 ctxt->instate = state;
2914 return;
2915 }
2916 }
2917 count++;
2918 if (count > 50) {
2919 GROW;
2920 count = 0;
2921 }
2922 COPY_BUF(l,buf,len,cur);
2923 NEXTL(l);
2924 cur = CUR_CHAR(l);
2925 if (cur == 0) {
2926 SHRINK;
2927 GROW;
2928 cur = CUR_CHAR(l);
2929 }
2930 }
2931 buf[len] = 0;
2932 if (cur != '?') {
2933 ctxt->errNo = XML_ERR_PI_NOT_FINISHED;
2934 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2935 ctxt->sax->error(ctxt->userData,
2936 "xmlParsePI: PI %s never end ...\n", target);
2937 ctxt->wellFormed = 0;
2938 ctxt->disableSAX = 1;
2939 } else {
2940 if (input != ctxt->input) {
2941 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
2942 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2943 ctxt->sax->error(ctxt->userData,
2944 "PI declaration doesn't start and stop in the same entity\n");
2945 ctxt->wellFormed = 0;
2946 ctxt->disableSAX = 1;
2947 }
2948 SKIP(2);
2949
2950 /*
2951 * SAX: PI detected.
2952 */
2953 if ((ctxt->sax) && (!ctxt->disableSAX) &&
2954 (ctxt->sax->processingInstruction != NULL))
2955 ctxt->sax->processingInstruction(ctxt->userData,
2956 target, buf);
2957 }
2958 xmlFree(buf);
2959 xmlFree(target);
2960 } else {
2961 ctxt->errNo = XML_ERR_PI_NOT_STARTED;
2962 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
2963 ctxt->sax->error(ctxt->userData,
2964 "xmlParsePI : no target name\n");
2965 ctxt->wellFormed = 0;
2966 ctxt->disableSAX = 1;
2967 }
2968 ctxt->instate = state;
2969 }
2970}
2971
2972/**
2973 * xmlParseNotationDecl:
2974 * @ctxt: an XML parser context
2975 *
2976 * parse a notation declaration
2977 *
2978 * [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
2979 *
2980 * Hence there is actually 3 choices:
2981 * 'PUBLIC' S PubidLiteral
2982 * 'PUBLIC' S PubidLiteral S SystemLiteral
2983 * and 'SYSTEM' S SystemLiteral
2984 *
2985 * See the NOTE on xmlParseExternalID().
2986 */
2987
2988void
2989xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
2990 xmlChar *name;
2991 xmlChar *Pubid;
2992 xmlChar *Systemid;
2993
2994 if ((RAW == '<') && (NXT(1) == '!') &&
2995 (NXT(2) == 'N') && (NXT(3) == 'O') &&
2996 (NXT(4) == 'T') && (NXT(5) == 'A') &&
2997 (NXT(6) == 'T') && (NXT(7) == 'I') &&
2998 (NXT(8) == 'O') && (NXT(9) == 'N')) {
2999 xmlParserInputPtr input = ctxt->input;
3000 SHRINK;
3001 SKIP(10);
3002 if (!IS_BLANK(CUR)) {
3003 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3004 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3005 ctxt->sax->error(ctxt->userData,
3006 "Space required after '<!NOTATION'\n");
3007 ctxt->wellFormed = 0;
3008 ctxt->disableSAX = 1;
3009 return;
3010 }
3011 SKIP_BLANKS;
3012
3013 name = xmlParseName(ctxt);
3014 if (name == NULL) {
3015 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3016 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3017 ctxt->sax->error(ctxt->userData,
3018 "NOTATION: Name expected here\n");
3019 ctxt->wellFormed = 0;
3020 ctxt->disableSAX = 1;
3021 return;
3022 }
3023 if (!IS_BLANK(CUR)) {
3024 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3025 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3026 ctxt->sax->error(ctxt->userData,
3027 "Space required after the NOTATION name'\n");
3028 ctxt->wellFormed = 0;
3029 ctxt->disableSAX = 1;
3030 return;
3031 }
3032 SKIP_BLANKS;
3033
3034 /*
3035 * Parse the IDs.
3036 */
3037 Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
3038 SKIP_BLANKS;
3039
3040 if (RAW == '>') {
3041 if (input != ctxt->input) {
3042 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3043 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3044 ctxt->sax->error(ctxt->userData,
3045"Notation declaration doesn't start and stop in the same entity\n");
3046 ctxt->wellFormed = 0;
3047 ctxt->disableSAX = 1;
3048 }
3049 NEXT;
3050 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3051 (ctxt->sax->notationDecl != NULL))
3052 ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
3053 } else {
3054 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3055 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3056 ctxt->sax->error(ctxt->userData,
3057 "'>' required to close NOTATION declaration\n");
3058 ctxt->wellFormed = 0;
3059 ctxt->disableSAX = 1;
3060 }
3061 xmlFree(name);
3062 if (Systemid != NULL) xmlFree(Systemid);
3063 if (Pubid != NULL) xmlFree(Pubid);
3064 }
3065}
3066
3067/**
3068 * xmlParseEntityDecl:
3069 * @ctxt: an XML parser context
3070 *
3071 * parse <!ENTITY declarations
3072 *
3073 * [70] EntityDecl ::= GEDecl | PEDecl
3074 *
3075 * [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
3076 *
3077 * [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
3078 *
3079 * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
3080 *
3081 * [74] PEDef ::= EntityValue | ExternalID
3082 *
3083 * [76] NDataDecl ::= S 'NDATA' S Name
3084 *
3085 * [ VC: Notation Declared ]
3086 * The Name must match the declared name of a notation.
3087 */
3088
3089void
3090xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
3091 xmlChar *name = NULL;
3092 xmlChar *value = NULL;
3093 xmlChar *URI = NULL, *literal = NULL;
3094 xmlChar *ndata = NULL;
3095 int isParameter = 0;
3096 xmlChar *orig = NULL;
3097
3098 GROW;
3099 if ((RAW == '<') && (NXT(1) == '!') &&
3100 (NXT(2) == 'E') && (NXT(3) == 'N') &&
3101 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3102 (NXT(6) == 'T') && (NXT(7) == 'Y')) {
3103 xmlParserInputPtr input = ctxt->input;
3104 ctxt->instate = XML_PARSER_ENTITY_DECL;
3105 SHRINK;
3106 SKIP(8);
3107 if (!IS_BLANK(CUR)) {
3108 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3109 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3110 ctxt->sax->error(ctxt->userData,
3111 "Space required after '<!ENTITY'\n");
3112 ctxt->wellFormed = 0;
3113 ctxt->disableSAX = 1;
3114 }
3115 SKIP_BLANKS;
3116
3117 if (RAW == '%') {
3118 NEXT;
3119 if (!IS_BLANK(CUR)) {
3120 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3121 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3122 ctxt->sax->error(ctxt->userData,
3123 "Space required after '%'\n");
3124 ctxt->wellFormed = 0;
3125 ctxt->disableSAX = 1;
3126 }
3127 SKIP_BLANKS;
3128 isParameter = 1;
3129 }
3130
3131 name = xmlParseName(ctxt);
3132 if (name == NULL) {
3133 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3134 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3135 ctxt->sax->error(ctxt->userData, "xmlParseEntityDecl: no name\n");
3136 ctxt->wellFormed = 0;
3137 ctxt->disableSAX = 1;
3138 return;
3139 }
3140 if (!IS_BLANK(CUR)) {
3141 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3142 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3143 ctxt->sax->error(ctxt->userData,
3144 "Space required after the entity name\n");
3145 ctxt->wellFormed = 0;
3146 ctxt->disableSAX = 1;
3147 }
3148 SKIP_BLANKS;
3149
3150 /*
3151 * handle the various case of definitions...
3152 */
3153 if (isParameter) {
3154 if ((RAW == '"') || (RAW == '\'')) {
3155 value = xmlParseEntityValue(ctxt, &orig);
3156 if (value) {
3157 if ((ctxt->sax != NULL) &&
3158 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3159 ctxt->sax->entityDecl(ctxt->userData, name,
3160 XML_INTERNAL_PARAMETER_ENTITY,
3161 NULL, NULL, value);
3162 }
3163 } else {
3164 URI = xmlParseExternalID(ctxt, &literal, 1);
3165 if ((URI == NULL) && (literal == NULL)) {
3166 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3167 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3168 ctxt->sax->error(ctxt->userData,
3169 "Entity value required\n");
3170 ctxt->wellFormed = 0;
3171 ctxt->disableSAX = 1;
3172 }
3173 if (URI) {
3174 xmlURIPtr uri;
3175
3176 uri = xmlParseURI((const char *) URI);
3177 if (uri == NULL) {
3178 ctxt->errNo = XML_ERR_INVALID_URI;
3179 if ((ctxt->sax != NULL) &&
3180 (!ctxt->disableSAX) &&
3181 (ctxt->sax->error != NULL))
3182 ctxt->sax->error(ctxt->userData,
3183 "Invalid URI: %s\n", URI);
3184 ctxt->wellFormed = 0;
3185 } else {
3186 if (uri->fragment != NULL) {
3187 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3188 if ((ctxt->sax != NULL) &&
3189 (!ctxt->disableSAX) &&
3190 (ctxt->sax->error != NULL))
3191 ctxt->sax->error(ctxt->userData,
3192 "Fragment not allowed: %s\n", URI);
3193 ctxt->wellFormed = 0;
3194 } else {
3195 if ((ctxt->sax != NULL) &&
3196 (!ctxt->disableSAX) &&
3197 (ctxt->sax->entityDecl != NULL))
3198 ctxt->sax->entityDecl(ctxt->userData, name,
3199 XML_EXTERNAL_PARAMETER_ENTITY,
3200 literal, URI, NULL);
3201 }
3202 xmlFreeURI(uri);
3203 }
3204 }
3205 }
3206 } else {
3207 if ((RAW == '"') || (RAW == '\'')) {
3208 value = xmlParseEntityValue(ctxt, &orig);
3209 if ((ctxt->sax != NULL) &&
3210 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3211 ctxt->sax->entityDecl(ctxt->userData, name,
3212 XML_INTERNAL_GENERAL_ENTITY,
3213 NULL, NULL, value);
3214 } else {
3215 URI = xmlParseExternalID(ctxt, &literal, 1);
3216 if ((URI == NULL) && (literal == NULL)) {
3217 ctxt->errNo = XML_ERR_VALUE_REQUIRED;
3218 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3219 ctxt->sax->error(ctxt->userData,
3220 "Entity value required\n");
3221 ctxt->wellFormed = 0;
3222 ctxt->disableSAX = 1;
3223 }
3224 if (URI) {
3225 xmlURIPtr uri;
3226
3227 uri = xmlParseURI((const char *)URI);
3228 if (uri == NULL) {
3229 ctxt->errNo = XML_ERR_INVALID_URI;
3230 if ((ctxt->sax != NULL) &&
3231 (!ctxt->disableSAX) &&
3232 (ctxt->sax->error != NULL))
3233 ctxt->sax->error(ctxt->userData,
3234 "Invalid URI: %s\n", URI);
3235 ctxt->wellFormed = 0;
3236 } else {
3237 if (uri->fragment != NULL) {
3238 ctxt->errNo = XML_ERR_URI_FRAGMENT;
3239 if ((ctxt->sax != NULL) &&
3240 (!ctxt->disableSAX) &&
3241 (ctxt->sax->error != NULL))
3242 ctxt->sax->error(ctxt->userData,
3243 "Fragment not allowed: %s\n", URI);
3244 ctxt->wellFormed = 0;
3245 }
3246 xmlFreeURI(uri);
3247 }
3248 }
3249 if ((RAW != '>') && (!IS_BLANK(CUR))) {
3250 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3251 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3252 ctxt->sax->error(ctxt->userData,
3253 "Space required before 'NDATA'\n");
3254 ctxt->wellFormed = 0;
3255 ctxt->disableSAX = 1;
3256 }
3257 SKIP_BLANKS;
3258 if ((RAW == 'N') && (NXT(1) == 'D') &&
3259 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3260 (NXT(4) == 'A')) {
3261 SKIP(5);
3262 if (!IS_BLANK(CUR)) {
3263 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3264 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3265 ctxt->sax->error(ctxt->userData,
3266 "Space required after 'NDATA'\n");
3267 ctxt->wellFormed = 0;
3268 ctxt->disableSAX = 1;
3269 }
3270 SKIP_BLANKS;
3271 ndata = xmlParseName(ctxt);
3272 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3273 (ctxt->sax->unparsedEntityDecl != NULL))
3274 ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
3275 literal, URI, ndata);
3276 } else {
3277 if ((ctxt->sax != NULL) &&
3278 (!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
3279 ctxt->sax->entityDecl(ctxt->userData, name,
3280 XML_EXTERNAL_GENERAL_PARSED_ENTITY,
3281 literal, URI, NULL);
3282 }
3283 }
3284 }
3285 SKIP_BLANKS;
3286 if (RAW != '>') {
3287 ctxt->errNo = XML_ERR_ENTITY_NOT_FINISHED;
3288 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3289 ctxt->sax->error(ctxt->userData,
3290 "xmlParseEntityDecl: entity %s not terminated\n", name);
3291 ctxt->wellFormed = 0;
3292 ctxt->disableSAX = 1;
3293 } else {
3294 if (input != ctxt->input) {
3295 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3296 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3297 ctxt->sax->error(ctxt->userData,
3298"Entity declaration doesn't start and stop in the same entity\n");
3299 ctxt->wellFormed = 0;
3300 ctxt->disableSAX = 1;
3301 }
3302 NEXT;
3303 }
3304 if (orig != NULL) {
3305 /*
3306 * Ugly mechanism to save the raw entity value.
3307 */
3308 xmlEntityPtr cur = NULL;
3309
3310 if (isParameter) {
3311 if ((ctxt->sax != NULL) &&
3312 (ctxt->sax->getParameterEntity != NULL))
3313 cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
3314 } else {
3315 if ((ctxt->sax != NULL) &&
3316 (ctxt->sax->getEntity != NULL))
3317 cur = ctxt->sax->getEntity(ctxt->userData, name);
3318 }
3319 if (cur != NULL) {
3320 if (cur->orig != NULL)
3321 xmlFree(orig);
3322 else
3323 cur->orig = orig;
3324 } else
3325 xmlFree(orig);
3326 }
3327 if (name != NULL) xmlFree(name);
3328 if (value != NULL) xmlFree(value);
3329 if (URI != NULL) xmlFree(URI);
3330 if (literal != NULL) xmlFree(literal);
3331 if (ndata != NULL) xmlFree(ndata);
3332 }
3333}
3334
3335/**
3336 * xmlParseDefaultDecl:
3337 * @ctxt: an XML parser context
3338 * @value: Receive a possible fixed default value for the attribute
3339 *
3340 * Parse an attribute default declaration
3341 *
3342 * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
3343 *
3344 * [ VC: Required Attribute ]
3345 * if the default declaration is the keyword #REQUIRED, then the
3346 * attribute must be specified for all elements of the type in the
3347 * attribute-list declaration.
3348 *
3349 * [ VC: Attribute Default Legal ]
3350 * The declared default value must meet the lexical constraints of
3351 * the declared attribute type c.f. xmlValidateAttributeDecl()
3352 *
3353 * [ VC: Fixed Attribute Default ]
3354 * if an attribute has a default value declared with the #FIXED
3355 * keyword, instances of that attribute must match the default value.
3356 *
3357 * [ WFC: No < in Attribute Values ]
3358 * handled in xmlParseAttValue()
3359 *
3360 * returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
3361 * or XML_ATTRIBUTE_FIXED.
3362 */
3363
3364int
3365xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
3366 int val;
3367 xmlChar *ret;
3368
3369 *value = NULL;
3370 if ((RAW == '#') && (NXT(1) == 'R') &&
3371 (NXT(2) == 'E') && (NXT(3) == 'Q') &&
3372 (NXT(4) == 'U') && (NXT(5) == 'I') &&
3373 (NXT(6) == 'R') && (NXT(7) == 'E') &&
3374 (NXT(8) == 'D')) {
3375 SKIP(9);
3376 return(XML_ATTRIBUTE_REQUIRED);
3377 }
3378 if ((RAW == '#') && (NXT(1) == 'I') &&
3379 (NXT(2) == 'M') && (NXT(3) == 'P') &&
3380 (NXT(4) == 'L') && (NXT(5) == 'I') &&
3381 (NXT(6) == 'E') && (NXT(7) == 'D')) {
3382 SKIP(8);
3383 return(XML_ATTRIBUTE_IMPLIED);
3384 }
3385 val = XML_ATTRIBUTE_NONE;
3386 if ((RAW == '#') && (NXT(1) == 'F') &&
3387 (NXT(2) == 'I') && (NXT(3) == 'X') &&
3388 (NXT(4) == 'E') && (NXT(5) == 'D')) {
3389 SKIP(6);
3390 val = XML_ATTRIBUTE_FIXED;
3391 if (!IS_BLANK(CUR)) {
3392 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3393 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3394 ctxt->sax->error(ctxt->userData,
3395 "Space required after '#FIXED'\n");
3396 ctxt->wellFormed = 0;
3397 ctxt->disableSAX = 1;
3398 }
3399 SKIP_BLANKS;
3400 }
3401 ret = xmlParseAttValue(ctxt);
3402 ctxt->instate = XML_PARSER_DTD;
3403 if (ret == NULL) {
3404 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3405 ctxt->sax->error(ctxt->userData,
3406 "Attribute default value declaration error\n");
3407 ctxt->wellFormed = 0;
3408 ctxt->disableSAX = 1;
3409 } else
3410 *value = ret;
3411 return(val);
3412}
3413
3414/**
3415 * xmlParseNotationType:
3416 * @ctxt: an XML parser context
3417 *
3418 * parse an Notation attribute type.
3419 *
3420 * Note: the leading 'NOTATION' S part has already being parsed...
3421 *
3422 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3423 *
3424 * [ VC: Notation Attributes ]
3425 * Values of this type must match one of the notation names included
3426 * in the declaration; all notation names in the declaration must be declared.
3427 *
3428 * Returns: the notation attribute tree built while parsing
3429 */
3430
3431xmlEnumerationPtr
3432xmlParseNotationType(xmlParserCtxtPtr ctxt) {
3433 xmlChar *name;
3434 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3435
3436 if (RAW != '(') {
3437 ctxt->errNo = XML_ERR_NOTATION_NOT_STARTED;
3438 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3439 ctxt->sax->error(ctxt->userData,
3440 "'(' required to start 'NOTATION'\n");
3441 ctxt->wellFormed = 0;
3442 ctxt->disableSAX = 1;
3443 return(NULL);
3444 }
3445 SHRINK;
3446 do {
3447 NEXT;
3448 SKIP_BLANKS;
3449 name = xmlParseName(ctxt);
3450 if (name == NULL) {
3451 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3452 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3453 ctxt->sax->error(ctxt->userData,
3454 "Name expected in NOTATION declaration\n");
3455 ctxt->wellFormed = 0;
3456 ctxt->disableSAX = 1;
3457 return(ret);
3458 }
3459 cur = xmlCreateEnumeration(name);
3460 xmlFree(name);
3461 if (cur == NULL) return(ret);
3462 if (last == NULL) ret = last = cur;
3463 else {
3464 last->next = cur;
3465 last = cur;
3466 }
3467 SKIP_BLANKS;
3468 } while (RAW == '|');
3469 if (RAW != ')') {
3470 ctxt->errNo = XML_ERR_NOTATION_NOT_FINISHED;
3471 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3472 ctxt->sax->error(ctxt->userData,
3473 "')' required to finish NOTATION declaration\n");
3474 ctxt->wellFormed = 0;
3475 ctxt->disableSAX = 1;
3476 if ((last != NULL) && (last != ret))
3477 xmlFreeEnumeration(last);
3478 return(ret);
3479 }
3480 NEXT;
3481 return(ret);
3482}
3483
3484/**
3485 * xmlParseEnumerationType:
3486 * @ctxt: an XML parser context
3487 *
3488 * parse an Enumeration attribute type.
3489 *
3490 * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
3491 *
3492 * [ VC: Enumeration ]
3493 * Values of this type must match one of the Nmtoken tokens in
3494 * the declaration
3495 *
3496 * Returns: the enumeration attribute tree built while parsing
3497 */
3498
3499xmlEnumerationPtr
3500xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
3501 xmlChar *name;
3502 xmlEnumerationPtr ret = NULL, last = NULL, cur;
3503
3504 if (RAW != '(') {
3505 ctxt->errNo = XML_ERR_ATTLIST_NOT_STARTED;
3506 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3507 ctxt->sax->error(ctxt->userData,
3508 "'(' required to start ATTLIST enumeration\n");
3509 ctxt->wellFormed = 0;
3510 ctxt->disableSAX = 1;
3511 return(NULL);
3512 }
3513 SHRINK;
3514 do {
3515 NEXT;
3516 SKIP_BLANKS;
3517 name = xmlParseNmtoken(ctxt);
3518 if (name == NULL) {
3519 ctxt->errNo = XML_ERR_NMTOKEN_REQUIRED;
3520 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3521 ctxt->sax->error(ctxt->userData,
3522 "NmToken expected in ATTLIST enumeration\n");
3523 ctxt->wellFormed = 0;
3524 ctxt->disableSAX = 1;
3525 return(ret);
3526 }
3527 cur = xmlCreateEnumeration(name);
3528 xmlFree(name);
3529 if (cur == NULL) return(ret);
3530 if (last == NULL) ret = last = cur;
3531 else {
3532 last->next = cur;
3533 last = cur;
3534 }
3535 SKIP_BLANKS;
3536 } while (RAW == '|');
3537 if (RAW != ')') {
3538 ctxt->errNo = XML_ERR_ATTLIST_NOT_FINISHED;
3539 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3540 ctxt->sax->error(ctxt->userData,
3541 "')' required to finish ATTLIST enumeration\n");
3542 ctxt->wellFormed = 0;
3543 ctxt->disableSAX = 1;
3544 return(ret);
3545 }
3546 NEXT;
3547 return(ret);
3548}
3549
3550/**
3551 * xmlParseEnumeratedType:
3552 * @ctxt: an XML parser context
3553 * @tree: the enumeration tree built while parsing
3554 *
3555 * parse an Enumerated attribute type.
3556 *
3557 * [57] EnumeratedType ::= NotationType | Enumeration
3558 *
3559 * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
3560 *
3561 *
3562 * Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
3563 */
3564
3565int
3566xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3567 if ((RAW == 'N') && (NXT(1) == 'O') &&
3568 (NXT(2) == 'T') && (NXT(3) == 'A') &&
3569 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3570 (NXT(6) == 'O') && (NXT(7) == 'N')) {
3571 SKIP(8);
3572 if (!IS_BLANK(CUR)) {
3573 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3574 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3575 ctxt->sax->error(ctxt->userData,
3576 "Space required after 'NOTATION'\n");
3577 ctxt->wellFormed = 0;
3578 ctxt->disableSAX = 1;
3579 return(0);
3580 }
3581 SKIP_BLANKS;
3582 *tree = xmlParseNotationType(ctxt);
3583 if (*tree == NULL) return(0);
3584 return(XML_ATTRIBUTE_NOTATION);
3585 }
3586 *tree = xmlParseEnumerationType(ctxt);
3587 if (*tree == NULL) return(0);
3588 return(XML_ATTRIBUTE_ENUMERATION);
3589}
3590
3591/**
3592 * xmlParseAttributeType:
3593 * @ctxt: an XML parser context
3594 * @tree: the enumeration tree built while parsing
3595 *
3596 * parse the Attribute list def for an element
3597 *
3598 * [54] AttType ::= StringType | TokenizedType | EnumeratedType
3599 *
3600 * [55] StringType ::= 'CDATA'
3601 *
3602 * [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
3603 * 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
3604 *
3605 * Validity constraints for attribute values syntax are checked in
3606 * xmlValidateAttributeValue()
3607 *
3608 * [ VC: ID ]
3609 * Values of type ID must match the Name production. A name must not
3610 * appear more than once in an XML document as a value of this type;
3611 * i.e., ID values must uniquely identify the elements which bear them.
3612 *
3613 * [ VC: One ID per Element Type ]
3614 * No element type may have more than one ID attribute specified.
3615 *
3616 * [ VC: ID Attribute Default ]
3617 * An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
3618 *
3619 * [ VC: IDREF ]
3620 * Values of type IDREF must match the Name production, and values
3621 * of type IDREFS must match Names; each IDREF Name must match the value
3622 * of an ID attribute on some element in the XML document; i.e. IDREF
3623 * values must match the value of some ID attribute.
3624 *
3625 * [ VC: Entity Name ]
3626 * Values of type ENTITY must match the Name production, values
3627 * of type ENTITIES must match Names; each Entity Name must match the
3628 * name of an unparsed entity declared in the DTD.
3629 *
3630 * [ VC: Name Token ]
3631 * Values of type NMTOKEN must match the Nmtoken production; values
3632 * of type NMTOKENS must match Nmtokens.
3633 *
3634 * Returns the attribute type
3635 */
3636int
3637xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
3638 SHRINK;
3639 if ((RAW == 'C') && (NXT(1) == 'D') &&
3640 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3641 (NXT(4) == 'A')) {
3642 SKIP(5);
3643 return(XML_ATTRIBUTE_CDATA);
3644 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3645 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3646 (NXT(4) == 'F') && (NXT(5) == 'S')) {
3647 SKIP(6);
3648 return(XML_ATTRIBUTE_IDREFS);
3649 } else if ((RAW == 'I') && (NXT(1) == 'D') &&
3650 (NXT(2) == 'R') && (NXT(3) == 'E') &&
3651 (NXT(4) == 'F')) {
3652 SKIP(5);
3653 return(XML_ATTRIBUTE_IDREF);
3654 } else if ((RAW == 'I') && (NXT(1) == 'D')) {
3655 SKIP(2);
3656 return(XML_ATTRIBUTE_ID);
3657 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3658 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3659 (NXT(4) == 'T') && (NXT(5) == 'Y')) {
3660 SKIP(6);
3661 return(XML_ATTRIBUTE_ENTITY);
3662 } else if ((RAW == 'E') && (NXT(1) == 'N') &&
3663 (NXT(2) == 'T') && (NXT(3) == 'I') &&
3664 (NXT(4) == 'T') && (NXT(5) == 'I') &&
3665 (NXT(6) == 'E') && (NXT(7) == 'S')) {
3666 SKIP(8);
3667 return(XML_ATTRIBUTE_ENTITIES);
3668 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3669 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3670 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3671 (NXT(6) == 'N') && (NXT(7) == 'S')) {
3672 SKIP(8);
3673 return(XML_ATTRIBUTE_NMTOKENS);
3674 } else if ((RAW == 'N') && (NXT(1) == 'M') &&
3675 (NXT(2) == 'T') && (NXT(3) == 'O') &&
3676 (NXT(4) == 'K') && (NXT(5) == 'E') &&
3677 (NXT(6) == 'N')) {
3678 SKIP(7);
3679 return(XML_ATTRIBUTE_NMTOKEN);
3680 }
3681 return(xmlParseEnumeratedType(ctxt, tree));
3682}
3683
3684/**
3685 * xmlParseAttributeListDecl:
3686 * @ctxt: an XML parser context
3687 *
3688 * : parse the Attribute list def for an element
3689 *
3690 * [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
3691 *
3692 * [53] AttDef ::= S Name S AttType S DefaultDecl
3693 *
3694 */
3695void
3696xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
3697 xmlChar *elemName;
3698 xmlChar *attrName;
3699 xmlEnumerationPtr tree;
3700
3701 if ((RAW == '<') && (NXT(1) == '!') &&
3702 (NXT(2) == 'A') && (NXT(3) == 'T') &&
3703 (NXT(4) == 'T') && (NXT(5) == 'L') &&
3704 (NXT(6) == 'I') && (NXT(7) == 'S') &&
3705 (NXT(8) == 'T')) {
3706 xmlParserInputPtr input = ctxt->input;
3707
3708 SKIP(9);
3709 if (!IS_BLANK(CUR)) {
3710 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3711 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3712 ctxt->sax->error(ctxt->userData,
3713 "Space required after '<!ATTLIST'\n");
3714 ctxt->wellFormed = 0;
3715 ctxt->disableSAX = 1;
3716 }
3717 SKIP_BLANKS;
3718 elemName = xmlParseName(ctxt);
3719 if (elemName == NULL) {
3720 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3722 ctxt->sax->error(ctxt->userData,
3723 "ATTLIST: no name for Element\n");
3724 ctxt->wellFormed = 0;
3725 ctxt->disableSAX = 1;
3726 return;
3727 }
3728 SKIP_BLANKS;
3729 GROW;
3730 while (RAW != '>') {
3731 const xmlChar *check = CUR_PTR;
3732 int type;
3733 int def;
3734 xmlChar *defaultValue = NULL;
3735
3736 GROW;
3737 tree = NULL;
3738 attrName = xmlParseName(ctxt);
3739 if (attrName == NULL) {
3740 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3741 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3742 ctxt->sax->error(ctxt->userData,
3743 "ATTLIST: no name for Attribute\n");
3744 ctxt->wellFormed = 0;
3745 ctxt->disableSAX = 1;
3746 break;
3747 }
3748 GROW;
3749 if (!IS_BLANK(CUR)) {
3750 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3751 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3752 ctxt->sax->error(ctxt->userData,
3753 "Space required after the attribute name\n");
3754 ctxt->wellFormed = 0;
3755 ctxt->disableSAX = 1;
3756 if (attrName != NULL)
3757 xmlFree(attrName);
3758 if (defaultValue != NULL)
3759 xmlFree(defaultValue);
3760 break;
3761 }
3762 SKIP_BLANKS;
3763
3764 type = xmlParseAttributeType(ctxt, &tree);
3765 if (type <= 0) {
3766 if (attrName != NULL)
3767 xmlFree(attrName);
3768 if (defaultValue != NULL)
3769 xmlFree(defaultValue);
3770 break;
3771 }
3772
3773 GROW;
3774 if (!IS_BLANK(CUR)) {
3775 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3776 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3777 ctxt->sax->error(ctxt->userData,
3778 "Space required after the attribute type\n");
3779 ctxt->wellFormed = 0;
3780 ctxt->disableSAX = 1;
3781 if (attrName != NULL)
3782 xmlFree(attrName);
3783 if (defaultValue != NULL)
3784 xmlFree(defaultValue);
3785 if (tree != NULL)
3786 xmlFreeEnumeration(tree);
3787 break;
3788 }
3789 SKIP_BLANKS;
3790
3791 def = xmlParseDefaultDecl(ctxt, &defaultValue);
3792 if (def <= 0) {
3793 if (attrName != NULL)
3794 xmlFree(attrName);
3795 if (defaultValue != NULL)
3796 xmlFree(defaultValue);
3797 if (tree != NULL)
3798 xmlFreeEnumeration(tree);
3799 break;
3800 }
3801
3802 GROW;
3803 if (RAW != '>') {
3804 if (!IS_BLANK(CUR)) {
3805 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
3806 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3807 ctxt->sax->error(ctxt->userData,
3808 "Space required after the attribute default value\n");
3809 ctxt->wellFormed = 0;
3810 ctxt->disableSAX = 1;
3811 if (attrName != NULL)
3812 xmlFree(attrName);
3813 if (defaultValue != NULL)
3814 xmlFree(defaultValue);
3815 if (tree != NULL)
3816 xmlFreeEnumeration(tree);
3817 break;
3818 }
3819 SKIP_BLANKS;
3820 }
3821 if (check == CUR_PTR) {
3822 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
3823 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3824 ctxt->sax->error(ctxt->userData,
3825 "xmlParseAttributeListDecl: detected internal error\n");
3826 if (attrName != NULL)
3827 xmlFree(attrName);
3828 if (defaultValue != NULL)
3829 xmlFree(defaultValue);
3830 if (tree != NULL)
3831 xmlFreeEnumeration(tree);
3832 break;
3833 }
3834 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
3835 (ctxt->sax->attributeDecl != NULL))
3836 ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
3837 type, def, defaultValue, tree);
3838 if (attrName != NULL)
3839 xmlFree(attrName);
3840 if (defaultValue != NULL)
3841 xmlFree(defaultValue);
3842 GROW;
3843 }
3844 if (RAW == '>') {
3845 if (input != ctxt->input) {
3846 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
3847 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3848 ctxt->sax->error(ctxt->userData,
3849"Attribute list declaration doesn't start and stop in the same entity\n");
3850 ctxt->wellFormed = 0;
3851 ctxt->disableSAX = 1;
3852 }
3853 NEXT;
3854 }
3855
3856 xmlFree(elemName);
3857 }
3858}
3859
3860/**
3861 * xmlParseElementMixedContentDecl:
3862 * @ctxt: an XML parser context
3863 *
3864 * parse the declaration for a Mixed Element content
3865 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3866 *
3867 * [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
3868 * '(' S? '#PCDATA' S? ')'
3869 *
3870 * [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
3871 *
3872 * [ VC: No Duplicate Types ]
3873 * The same name must not appear more than once in a single
3874 * mixed-content declaration.
3875 *
3876 * returns: the list of the xmlElementContentPtr describing the element choices
3877 */
3878xmlElementContentPtr
3879xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt) {
3880 xmlElementContentPtr ret = NULL, cur = NULL, n;
3881 xmlChar *elem = NULL;
3882
3883 GROW;
3884 if ((RAW == '#') && (NXT(1) == 'P') &&
3885 (NXT(2) == 'C') && (NXT(3) == 'D') &&
3886 (NXT(4) == 'A') && (NXT(5) == 'T') &&
3887 (NXT(6) == 'A')) {
3888 SKIP(7);
3889 SKIP_BLANKS;
3890 SHRINK;
3891 if (RAW == ')') {
3892 ctxt->entity = ctxt->input;
3893 NEXT;
3894 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3895 if (RAW == '*') {
3896 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3897 NEXT;
3898 }
3899 return(ret);
3900 }
3901 if ((RAW == '(') || (RAW == '|')) {
3902 ret = cur = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_PCDATA);
3903 if (ret == NULL) return(NULL);
3904 }
3905 while (RAW == '|') {
3906 NEXT;
3907 if (elem == NULL) {
3908 ret = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3909 if (ret == NULL) return(NULL);
3910 ret->c1 = cur;
3911 cur = ret;
3912 } else {
3913 n = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
3914 if (n == NULL) return(NULL);
3915 n->c1 = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
3916 cur->c2 = n;
3917 cur = n;
3918 xmlFree(elem);
3919 }
3920 SKIP_BLANKS;
3921 elem = xmlParseName(ctxt);
3922 if (elem == NULL) {
3923 ctxt->errNo = XML_ERR_NAME_REQUIRED;
3924 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3925 ctxt->sax->error(ctxt->userData,
3926 "xmlParseElementMixedContentDecl : Name expected\n");
3927 ctxt->wellFormed = 0;
3928 ctxt->disableSAX = 1;
3929 xmlFreeElementContent(cur);
3930 return(NULL);
3931 }
3932 SKIP_BLANKS;
3933 GROW;
3934 }
3935 if ((RAW == ')') && (NXT(1) == '*')) {
3936 if (elem != NULL) {
3937 cur->c2 = xmlNewElementContent(elem,
3938 XML_ELEMENT_CONTENT_ELEMENT);
3939 xmlFree(elem);
3940 }
3941 ret->ocur = XML_ELEMENT_CONTENT_MULT;
3942 ctxt->entity = ctxt->input;
3943 SKIP(2);
3944 } else {
3945 if (elem != NULL) xmlFree(elem);
3946 xmlFreeElementContent(ret);
3947 ctxt->errNo = XML_ERR_MIXED_NOT_STARTED;
3948 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3949 ctxt->sax->error(ctxt->userData,
3950 "xmlParseElementMixedContentDecl : '|' or ')*' expected\n");
3951 ctxt->wellFormed = 0;
3952 ctxt->disableSAX = 1;
3953 return(NULL);
3954 }
3955
3956 } else {
3957 ctxt->errNo = XML_ERR_PCDATA_REQUIRED;
3958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
3959 ctxt->sax->error(ctxt->userData,
3960 "xmlParseElementMixedContentDecl : '#PCDATA' expected\n");
3961 ctxt->wellFormed = 0;
3962 ctxt->disableSAX = 1;
3963 }
3964 return(ret);
3965}
3966
3967/**
3968 * xmlParseElementChildrenContentDecl:
3969 * @ctxt: an XML parser context
3970 *
3971 * parse the declaration for a Mixed Element content
3972 * The leading '(' and spaces have been skipped in xmlParseElementContentDecl
3973 *
3974 *
3975 * [47] children ::= (choice | seq) ('?' | '*' | '+')?
3976 *
3977 * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
3978 *
3979 * [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
3980 *
3981 * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
3982 *
3983 * [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
3984 * TODO Parameter-entity replacement text must be properly nested
3985 * with parenthetized groups. That is to say, if either of the
3986 * opening or closing parentheses in a choice, seq, or Mixed
3987 * construct is contained in the replacement text for a parameter
3988 * entity, both must be contained in the same replacement text. For
3989 * interoperability, if a parameter-entity reference appears in a
3990 * choice, seq, or Mixed construct, its replacement text should not
3991 * be empty, and neither the first nor last non-blank character of
3992 * the replacement text should be a connector (| or ,).
3993 *
3994 * returns: the tree of xmlElementContentPtr describing the element
3995 * hierarchy.
3996 */
3997xmlElementContentPtr
3998#ifdef VMS
3999xmlParseElementChildrenContentD
4000#else
4001xmlParseElementChildrenContentDecl
4002#endif
4003(xmlParserCtxtPtr ctxt) {
4004 xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
4005 xmlChar *elem;
4006 xmlChar type = 0;
4007
4008 SKIP_BLANKS;
4009 GROW;
4010 if (RAW == '(') {
4011 /* Recurse on first child */
4012 NEXT;
4013 SKIP_BLANKS;
4014 cur = ret = xmlParseElementChildrenContentDecl(ctxt);
4015 SKIP_BLANKS;
4016 GROW;
4017 } else {
4018 elem = xmlParseName(ctxt);
4019 if (elem == NULL) {
4020 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4021 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4022 ctxt->sax->error(ctxt->userData,
4023 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4024 ctxt->wellFormed = 0;
4025 ctxt->disableSAX = 1;
4026 return(NULL);
4027 }
4028 cur = ret = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4029 GROW;
4030 if (RAW == '?') {
4031 cur->ocur = XML_ELEMENT_CONTENT_OPT;
4032 NEXT;
4033 } else if (RAW == '*') {
4034 cur->ocur = XML_ELEMENT_CONTENT_MULT;
4035 NEXT;
4036 } else if (RAW == '+') {
4037 cur->ocur = XML_ELEMENT_CONTENT_PLUS;
4038 NEXT;
4039 } else {
4040 cur->ocur = XML_ELEMENT_CONTENT_ONCE;
4041 }
4042 xmlFree(elem);
4043 GROW;
4044 }
4045 SKIP_BLANKS;
4046 SHRINK;
4047 while (RAW != ')') {
4048 /*
4049 * Each loop we parse one separator and one element.
4050 */
4051 if (RAW == ',') {
4052 if (type == 0) type = CUR;
4053
4054 /*
4055 * Detect "Name | Name , Name" error
4056 */
4057 else if (type != CUR) {
4058 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4059 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4060 ctxt->sax->error(ctxt->userData,
4061 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4062 type);
4063 ctxt->wellFormed = 0;
4064 ctxt->disableSAX = 1;
4065 if ((op != NULL) && (op != ret))
4066 xmlFreeElementContent(op);
4067 if ((last != NULL) && (last != ret) &&
4068 (last != ret->c1) && (last != ret->c2))
4069 xmlFreeElementContent(last);
4070 if (ret != NULL)
4071 xmlFreeElementContent(ret);
4072 return(NULL);
4073 }
4074 NEXT;
4075
4076 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_SEQ);
4077 if (op == NULL) {
4078 xmlFreeElementContent(ret);
4079 return(NULL);
4080 }
4081 if (last == NULL) {
4082 op->c1 = ret;
4083 ret = cur = op;
4084 } else {
4085 cur->c2 = op;
4086 op->c1 = last;
4087 cur =op;
4088 last = NULL;
4089 }
4090 } else if (RAW == '|') {
4091 if (type == 0) type = CUR;
4092
4093 /*
4094 * Detect "Name , Name | Name" error
4095 */
4096 else if (type != CUR) {
4097 ctxt->errNo = XML_ERR_SEPARATOR_REQUIRED;
4098 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4099 ctxt->sax->error(ctxt->userData,
4100 "xmlParseElementChildrenContentDecl : '%c' expected\n",
4101 type);
4102 ctxt->wellFormed = 0;
4103 ctxt->disableSAX = 1;
4104 if ((op != NULL) && (op != ret) && (op != last))
4105 xmlFreeElementContent(op);
4106 if ((last != NULL) && (last != ret) &&
4107 (last != ret->c1) && (last != ret->c2))
4108 xmlFreeElementContent(last);
4109 if (ret != NULL)
4110 xmlFreeElementContent(ret);
4111 return(NULL);
4112 }
4113 NEXT;
4114
4115 op = xmlNewElementContent(NULL, XML_ELEMENT_CONTENT_OR);
4116 if (op == NULL) {
4117 if ((op != NULL) && (op != ret))
4118 xmlFreeElementContent(op);
4119 if ((last != NULL) && (last != ret) &&
4120 (last != ret->c1) && (last != ret->c2))
4121 xmlFreeElementContent(last);
4122 if (ret != NULL)
4123 xmlFreeElementContent(ret);
4124 return(NULL);
4125 }
4126 if (last == NULL) {
4127 op->c1 = ret;
4128 ret = cur = op;
4129 } else {
4130 cur->c2 = op;
4131 op->c1 = last;
4132 cur =op;
4133 last = NULL;
4134 }
4135 } else {
4136 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_FINISHED;
4137 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4138 ctxt->sax->error(ctxt->userData,
4139 "xmlParseElementChildrenContentDecl : ',' '|' or ')' expected\n");
4140 ctxt->wellFormed = 0;
4141 ctxt->disableSAX = 1;
4142 if ((op != NULL) && (op != ret))
4143 xmlFreeElementContent(op);
4144 if ((last != NULL) && (last != ret) &&
4145 (last != ret->c1) && (last != ret->c2))
4146 xmlFreeElementContent(last);
4147 if (ret != NULL)
4148 xmlFreeElementContent(ret);
4149 return(NULL);
4150 }
4151 GROW;
4152 SKIP_BLANKS;
4153 GROW;
4154 if (RAW == '(') {
4155 /* Recurse on second child */
4156 NEXT;
4157 SKIP_BLANKS;
4158 last = xmlParseElementChildrenContentDecl(ctxt);
4159 SKIP_BLANKS;
4160 } else {
4161 elem = xmlParseName(ctxt);
4162 if (elem == NULL) {
4163 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4164 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4165 ctxt->sax->error(ctxt->userData,
4166 "xmlParseElementChildrenContentDecl : Name or '(' expected\n");
4167 ctxt->wellFormed = 0;
4168 ctxt->disableSAX = 1;
4169 if ((op != NULL) && (op != ret))
4170 xmlFreeElementContent(op);
4171 if ((last != NULL) && (last != ret) &&
4172 (last != ret->c1) && (last != ret->c2))
4173 xmlFreeElementContent(last);
4174 if (ret != NULL)
4175 xmlFreeElementContent(ret);
4176 return(NULL);
4177 }
4178 last = xmlNewElementContent(elem, XML_ELEMENT_CONTENT_ELEMENT);
4179 xmlFree(elem);
4180 if (RAW == '?') {
4181 last->ocur = XML_ELEMENT_CONTENT_OPT;
4182 NEXT;
4183 } else if (RAW == '*') {
4184 last->ocur = XML_ELEMENT_CONTENT_MULT;
4185 NEXT;
4186 } else if (RAW == '+') {
4187 last->ocur = XML_ELEMENT_CONTENT_PLUS;
4188 NEXT;
4189 } else {
4190 last->ocur = XML_ELEMENT_CONTENT_ONCE;
4191 }
4192 }
4193 SKIP_BLANKS;
4194 GROW;
4195 }
4196 if ((cur != NULL) && (last != NULL)) {
4197 cur->c2 = last;
4198 }
4199 ctxt->entity = ctxt->input;
4200 NEXT;
4201 if (RAW == '?') {
4202 ret->ocur = XML_ELEMENT_CONTENT_OPT;
4203 NEXT;
4204 } else if (RAW == '*') {
4205 ret->ocur = XML_ELEMENT_CONTENT_MULT;
4206 NEXT;
4207 } else if (RAW == '+') {
4208 ret->ocur = XML_ELEMENT_CONTENT_PLUS;
4209 NEXT;
4210 }
4211 return(ret);
4212}
4213
4214/**
4215 * xmlParseElementContentDecl:
4216 * @ctxt: an XML parser context
4217 * @name: the name of the element being defined.
4218 * @result: the Element Content pointer will be stored here if any
4219 *
4220 * parse the declaration for an Element content either Mixed or Children,
4221 * the cases EMPTY and ANY are handled directly in xmlParseElementDecl
4222 *
4223 * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
4224 *
4225 * returns: the type of element content XML_ELEMENT_TYPE_xxx
4226 */
4227
4228int
4229xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, xmlChar *name,
4230 xmlElementContentPtr *result) {
4231
4232 xmlElementContentPtr tree = NULL;
4233 xmlParserInputPtr input = ctxt->input;
4234 int res;
4235
4236 *result = NULL;
4237
4238 if (RAW != '(') {
4239 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4240 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4241 ctxt->sax->error(ctxt->userData,
4242 "xmlParseElementContentDecl : '(' expected\n");
4243 ctxt->wellFormed = 0;
4244 ctxt->disableSAX = 1;
4245 return(-1);
4246 }
4247 NEXT;
4248 GROW;
4249 SKIP_BLANKS;
4250 if ((RAW == '#') && (NXT(1) == 'P') &&
4251 (NXT(2) == 'C') && (NXT(3) == 'D') &&
4252 (NXT(4) == 'A') && (NXT(5) == 'T') &&
4253 (NXT(6) == 'A')) {
4254 tree = xmlParseElementMixedContentDecl(ctxt);
4255 res = XML_ELEMENT_TYPE_MIXED;
4256 } else {
4257 tree = xmlParseElementChildrenContentDecl(ctxt);
4258 res = XML_ELEMENT_TYPE_ELEMENT;
4259 }
4260 if ((ctxt->entity != NULL) && (input != ctxt->entity)) {
4261 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4262 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4263 ctxt->sax->error(ctxt->userData,
4264"Element content declaration doesn't start and stop in the same entity\n");
4265 ctxt->wellFormed = 0;
4266 ctxt->disableSAX = 1;
4267 }
4268 SKIP_BLANKS;
4269 *result = tree;
4270 return(res);
4271}
4272
4273/**
4274 * xmlParseElementDecl:
4275 * @ctxt: an XML parser context
4276 *
4277 * parse an Element declaration.
4278 *
4279 * [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
4280 *
4281 * [ VC: Unique Element Type Declaration ]
4282 * No element type may be declared more than once
4283 *
4284 * Returns the type of the element, or -1 in case of error
4285 */
4286int
4287xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
4288 xmlChar *name;
4289 int ret = -1;
4290 xmlElementContentPtr content = NULL;
4291
4292 GROW;
4293 if ((RAW == '<') && (NXT(1) == '!') &&
4294 (NXT(2) == 'E') && (NXT(3) == 'L') &&
4295 (NXT(4) == 'E') && (NXT(5) == 'M') &&
4296 (NXT(6) == 'E') && (NXT(7) == 'N') &&
4297 (NXT(8) == 'T')) {
4298 xmlParserInputPtr input = ctxt->input;
4299
4300 SKIP(9);
4301 if (!IS_BLANK(CUR)) {
4302 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4303 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4304 ctxt->sax->error(ctxt->userData,
4305 "Space required after 'ELEMENT'\n");
4306 ctxt->wellFormed = 0;
4307 ctxt->disableSAX = 1;
4308 }
4309 SKIP_BLANKS;
4310 name = xmlParseName(ctxt);
4311 if (name == NULL) {
4312 ctxt->errNo = XML_ERR_NAME_REQUIRED;
4313 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4314 ctxt->sax->error(ctxt->userData,
4315 "xmlParseElementDecl: no name for Element\n");
4316 ctxt->wellFormed = 0;
4317 ctxt->disableSAX = 1;
4318 return(-1);
4319 }
4320 while ((RAW == 0) && (ctxt->inputNr > 1))
4321 xmlPopInput(ctxt);
4322 if (!IS_BLANK(CUR)) {
4323 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4324 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4325 ctxt->sax->error(ctxt->userData,
4326 "Space required after the element name\n");
4327 ctxt->wellFormed = 0;
4328 ctxt->disableSAX = 1;
4329 }
4330 SKIP_BLANKS;
4331 if ((RAW == 'E') && (NXT(1) == 'M') &&
4332 (NXT(2) == 'P') && (NXT(3) == 'T') &&
4333 (NXT(4) == 'Y')) {
4334 SKIP(5);
4335 /*
4336 * Element must always be empty.
4337 */
4338 ret = XML_ELEMENT_TYPE_EMPTY;
4339 } else if ((RAW == 'A') && (NXT(1) == 'N') &&
4340 (NXT(2) == 'Y')) {
4341 SKIP(3);
4342 /*
4343 * Element is a generic container.
4344 */
4345 ret = XML_ELEMENT_TYPE_ANY;
4346 } else if (RAW == '(') {
4347 ret = xmlParseElementContentDecl(ctxt, name, &content);
4348 } else {
4349 /*
4350 * [ WFC: PEs in Internal Subset ] error handling.
4351 */
4352 if ((RAW == '%') && (ctxt->external == 0) &&
4353 (ctxt->inputNr == 1)) {
4354 ctxt->errNo = XML_ERR_PEREF_IN_INT_SUBSET;
4355 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4356 ctxt->sax->error(ctxt->userData,
4357 "PEReference: forbidden within markup decl in internal subset\n");
4358 } else {
4359 ctxt->errNo = XML_ERR_ELEMCONTENT_NOT_STARTED;
4360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4361 ctxt->sax->error(ctxt->userData,
4362 "xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
4363 }
4364 ctxt->wellFormed = 0;
4365 ctxt->disableSAX = 1;
4366 if (name != NULL) xmlFree(name);
4367 return(-1);
4368 }
4369
4370 SKIP_BLANKS;
4371 /*
4372 * Pop-up of finished entities.
4373 */
4374 while ((RAW == 0) && (ctxt->inputNr > 1))
4375 xmlPopInput(ctxt);
4376 SKIP_BLANKS;
4377
4378 if (RAW != '>') {
4379 ctxt->errNo = XML_ERR_GT_REQUIRED;
4380 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4381 ctxt->sax->error(ctxt->userData,
4382 "xmlParseElementDecl: expected '>' at the end\n");
4383 ctxt->wellFormed = 0;
4384 ctxt->disableSAX = 1;
4385 } else {
4386 if (input != ctxt->input) {
4387 ctxt->errNo = XML_ERR_ENTITY_BOUNDARY;
4388 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4389 ctxt->sax->error(ctxt->userData,
4390"Element declaration doesn't start and stop in the same entity\n");
4391 ctxt->wellFormed = 0;
4392 ctxt->disableSAX = 1;
4393 }
4394
4395 NEXT;
4396 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
4397 (ctxt->sax->elementDecl != NULL))
4398 ctxt->sax->elementDecl(ctxt->userData, name, ret,
4399 content);
4400 }
4401 if (content != NULL) {
4402 xmlFreeElementContent(content);
4403 }
4404 if (name != NULL) {
4405 xmlFree(name);
4406 }
4407 }
4408 return(ret);
4409}
4410
4411/**
4412 * xmlParseMarkupDecl:
4413 * @ctxt: an XML parser context
4414 *
4415 * parse Markup declarations
4416 *
4417 * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
4418 * NotationDecl | PI | Comment
4419 *
4420 * [ VC: Proper Declaration/PE Nesting ]
4421 * Parameter-entity replacement text must be properly nested with
4422 * markup declarations. That is to say, if either the first character
4423 * or the last character of a markup declaration (markupdecl above) is
4424 * contained in the replacement text for a parameter-entity reference,
4425 * both must be contained in the same replacement text.
4426 *
4427 * [ WFC: PEs in Internal Subset ]
4428 * In the internal DTD subset, parameter-entity references can occur
4429 * only where markup declarations can occur, not within markup declarations.
4430 * (This does not apply to references that occur in external parameter
4431 * entities or to the external subset.)
4432 */
4433void
4434xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
4435 GROW;
4436 xmlParseElementDecl(ctxt);
4437 xmlParseAttributeListDecl(ctxt);
4438 xmlParseEntityDecl(ctxt);
4439 xmlParseNotationDecl(ctxt);
4440 xmlParsePI(ctxt);
4441 xmlParseComment(ctxt);
4442 /*
4443 * This is only for internal subset. On external entities,
4444 * the replacement is done before parsing stage
4445 */
4446 if ((ctxt->external == 0) && (ctxt->inputNr == 1))
4447 xmlParsePEReference(ctxt);
4448 ctxt->instate = XML_PARSER_DTD;
4449}
4450
4451/**
4452 * xmlParseTextDecl:
4453 * @ctxt: an XML parser context
4454 *
4455 * parse an XML declaration header for external entities
4456 *
4457 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
4458 *
4459 * Question: Seems that EncodingDecl is mandatory ? Is that a typo ?
4460 */
4461
4462void
4463xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
4464 xmlChar *version;
4465
4466 /*
4467 * We know that '<?xml' is here.
4468 */
4469 if ((RAW == '<') && (NXT(1) == '?') &&
4470 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4471 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4472 SKIP(5);
4473 } else {
4474 ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
4475 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4476 ctxt->sax->error(ctxt->userData,
4477 "Text declaration '<?xml' required\n");
4478 ctxt->wellFormed = 0;
4479 ctxt->disableSAX = 1;
4480
4481 return;
4482 }
4483
4484 if (!IS_BLANK(CUR)) {
4485 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4486 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4487 ctxt->sax->error(ctxt->userData,
4488 "Space needed after '<?xml'\n");
4489 ctxt->wellFormed = 0;
4490 ctxt->disableSAX = 1;
4491 }
4492 SKIP_BLANKS;
4493
4494 /*
4495 * We may have the VersionInfo here.
4496 */
4497 version = xmlParseVersionInfo(ctxt);
4498 if (version == NULL)
4499 version = xmlCharStrdup(XML_DEFAULT_VERSION);
4500 ctxt->input->version = version;
4501
4502 /*
4503 * We must have the encoding declaration
4504 */
4505 if (!IS_BLANK(CUR)) {
4506 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
4507 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4508 ctxt->sax->error(ctxt->userData, "Space needed here\n");
4509 ctxt->wellFormed = 0;
4510 ctxt->disableSAX = 1;
4511 }
4512 xmlParseEncodingDecl(ctxt);
4513 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4514 /*
4515 * The XML REC instructs us to stop parsing right here
4516 */
4517 return;
4518 }
4519
4520 SKIP_BLANKS;
4521 if ((RAW == '?') && (NXT(1) == '>')) {
4522 SKIP(2);
4523 } else if (RAW == '>') {
4524 /* Deprecated old WD ... */
4525 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4526 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4527 ctxt->sax->error(ctxt->userData,
4528 "XML declaration must end-up with '?>'\n");
4529 ctxt->wellFormed = 0;
4530 ctxt->disableSAX = 1;
4531 NEXT;
4532 } else {
4533 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
4534 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4535 ctxt->sax->error(ctxt->userData,
4536 "parsing XML declaration: '?>' expected\n");
4537 ctxt->wellFormed = 0;
4538 ctxt->disableSAX = 1;
4539 MOVETO_ENDTAG(CUR_PTR);
4540 NEXT;
4541 }
4542}
4543
4544/*
4545 * xmlParseConditionalSections
4546 * @ctxt: an XML parser context
4547 *
4548 * [61] conditionalSect ::= includeSect | ignoreSect
4549 * [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
4550 * [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
4551 * [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
4552 * [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
4553 */
4554
4555void
4556xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
4557 SKIP(3);
4558 SKIP_BLANKS;
4559 if ((RAW == 'I') && (NXT(1) == 'N') && (NXT(2) == 'C') &&
4560 (NXT(3) == 'L') && (NXT(4) == 'U') && (NXT(5) == 'D') &&
4561 (NXT(6) == 'E')) {
4562 SKIP(7);
4563 SKIP_BLANKS;
4564 if (RAW != '[') {
4565 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4566 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4567 ctxt->sax->error(ctxt->userData,
4568 "XML conditional section '[' expected\n");
4569 ctxt->wellFormed = 0;
4570 ctxt->disableSAX = 1;
4571 } else {
4572 NEXT;
4573 }
4574 if (xmlParserDebugEntities) {
4575 if ((ctxt->input != NULL) && (ctxt->input->filename))
4576 xmlGenericError(xmlGenericErrorContext,
4577 "%s(%d): ", ctxt->input->filename,
4578 ctxt->input->line);
4579 xmlGenericError(xmlGenericErrorContext,
4580 "Entering INCLUDE Conditional Section\n");
4581 }
4582
4583 while ((RAW != 0) && ((RAW != ']') || (NXT(1) != ']') ||
4584 (NXT(2) != '>'))) {
4585 const xmlChar *check = CUR_PTR;
4586 int cons = ctxt->input->consumed;
4587 int tok = ctxt->token;
4588
4589 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4590 xmlParseConditionalSections(ctxt);
4591 } else if (IS_BLANK(CUR)) {
4592 NEXT;
4593 } else if (RAW == '%') {
4594 xmlParsePEReference(ctxt);
4595 } else
4596 xmlParseMarkupDecl(ctxt);
4597
4598 /*
4599 * Pop-up of finished entities.
4600 */
4601 while ((RAW == 0) && (ctxt->inputNr > 1))
4602 xmlPopInput(ctxt);
4603
4604 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4605 (tok == ctxt->token)) {
4606 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4607 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4608 ctxt->sax->error(ctxt->userData,
4609 "Content error in the external subset\n");
4610 ctxt->wellFormed = 0;
4611 ctxt->disableSAX = 1;
4612 break;
4613 }
4614 }
4615 if (xmlParserDebugEntities) {
4616 if ((ctxt->input != NULL) && (ctxt->input->filename))
4617 xmlGenericError(xmlGenericErrorContext,
4618 "%s(%d): ", ctxt->input->filename,
4619 ctxt->input->line);
4620 xmlGenericError(xmlGenericErrorContext,
4621 "Leaving INCLUDE Conditional Section\n");
4622 }
4623
4624 } else if ((RAW == 'I') && (NXT(1) == 'G') && (NXT(2) == 'N') &&
4625 (NXT(3) == 'O') && (NXT(4) == 'R') && (NXT(5) == 'E')) {
4626 int state;
4627 int instate;
4628 int depth = 0;
4629
4630 SKIP(6);
4631 SKIP_BLANKS;
4632 if (RAW != '[') {
4633 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4634 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4635 ctxt->sax->error(ctxt->userData,
4636 "XML conditional section '[' expected\n");
4637 ctxt->wellFormed = 0;
4638 ctxt->disableSAX = 1;
4639 } else {
4640 NEXT;
4641 }
4642 if (xmlParserDebugEntities) {
4643 if ((ctxt->input != NULL) && (ctxt->input->filename))
4644 xmlGenericError(xmlGenericErrorContext,
4645 "%s(%d): ", ctxt->input->filename,
4646 ctxt->input->line);
4647 xmlGenericError(xmlGenericErrorContext,
4648 "Entering IGNORE Conditional Section\n");
4649 }
4650
4651 /*
4652 * Parse up to the end of the conditionnal section
4653 * But disable SAX event generating DTD building in the meantime
4654 */
4655 state = ctxt->disableSAX;
4656 instate = ctxt->instate;
4657 ctxt->disableSAX = 1;
4658 ctxt->instate = XML_PARSER_IGNORE;
4659
4660 while (depth >= 0) {
4661 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4662 depth++;
4663 SKIP(3);
4664 continue;
4665 }
4666 if ((RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4667 if (--depth >= 0) SKIP(3);
4668 continue;
4669 }
4670 NEXT;
4671 continue;
4672 }
4673
4674 ctxt->disableSAX = state;
4675 ctxt->instate = instate;
4676
4677 if (xmlParserDebugEntities) {
4678 if ((ctxt->input != NULL) && (ctxt->input->filename))
4679 xmlGenericError(xmlGenericErrorContext,
4680 "%s(%d): ", ctxt->input->filename,
4681 ctxt->input->line);
4682 xmlGenericError(xmlGenericErrorContext,
4683 "Leaving IGNORE Conditional Section\n");
4684 }
4685
4686 } else {
4687 ctxt->errNo = XML_ERR_CONDSEC_INVALID;
4688 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4689 ctxt->sax->error(ctxt->userData,
4690 "XML conditional section INCLUDE or IGNORE keyword expected\n");
4691 ctxt->wellFormed = 0;
4692 ctxt->disableSAX = 1;
4693 }
4694
4695 if (RAW == 0)
4696 SHRINK;
4697
4698 if (RAW == 0) {
4699 ctxt->errNo = XML_ERR_CONDSEC_NOT_FINISHED;
4700 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4701 ctxt->sax->error(ctxt->userData,
4702 "XML conditional section not closed\n");
4703 ctxt->wellFormed = 0;
4704 ctxt->disableSAX = 1;
4705 } else {
4706 SKIP(3);
4707 }
4708}
4709
4710/**
4711 * xmlParseExternalSubset:
4712 * @ctxt: an XML parser context
4713 * @ExternalID: the external identifier
4714 * @SystemID: the system identifier (or URL)
4715 *
4716 * parse Markup declarations from an external subset
4717 *
4718 * [30] extSubset ::= textDecl? extSubsetDecl
4719 *
4720 * [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
4721 */
4722void
4723xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
4724 const xmlChar *SystemID) {
4725 GROW;
4726 if ((RAW == '<') && (NXT(1) == '?') &&
4727 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4728 (NXT(4) == 'l')) {
4729 xmlParseTextDecl(ctxt);
4730 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
4731 /*
4732 * The XML REC instructs us to stop parsing right here
4733 */
4734 ctxt->instate = XML_PARSER_EOF;
4735 return;
4736 }
4737 }
4738 if (ctxt->myDoc == NULL) {
4739 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
4740 }
4741 if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
4742 xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
4743
4744 ctxt->instate = XML_PARSER_DTD;
4745 ctxt->external = 1;
4746 while (((RAW == '<') && (NXT(1) == '?')) ||
4747 ((RAW == '<') && (NXT(1) == '!')) ||
4748 IS_BLANK(CUR)) {
4749 const xmlChar *check = CUR_PTR;
4750 int cons = ctxt->input->consumed;
4751 int tok = ctxt->token;
4752
4753 GROW;
4754 if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
4755 xmlParseConditionalSections(ctxt);
4756 } else if (IS_BLANK(CUR)) {
4757 NEXT;
4758 } else if (RAW == '%') {
4759 xmlParsePEReference(ctxt);
4760 } else
4761 xmlParseMarkupDecl(ctxt);
4762
4763 /*
4764 * Pop-up of finished entities.
4765 */
4766 while ((RAW == 0) && (ctxt->inputNr > 1))
4767 xmlPopInput(ctxt);
4768
4769 if ((CUR_PTR == check) && (cons == ctxt->input->consumed) &&
4770 (tok == ctxt->token)) {
4771 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4772 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4773 ctxt->sax->error(ctxt->userData,
4774 "Content error in the external subset\n");
4775 ctxt->wellFormed = 0;
4776 ctxt->disableSAX = 1;
4777 break;
4778 }
4779 }
4780
4781 if (RAW != 0) {
4782 ctxt->errNo = XML_ERR_EXT_SUBSET_NOT_FINISHED;
4783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4784 ctxt->sax->error(ctxt->userData,
4785 "Extra content at the end of the document\n");
4786 ctxt->wellFormed = 0;
4787 ctxt->disableSAX = 1;
4788 }
4789
4790}
4791
4792/**
4793 * xmlParseReference:
4794 * @ctxt: an XML parser context
4795 *
4796 * parse and handle entity references in content, depending on the SAX
4797 * interface, this may end-up in a call to character() if this is a
4798 * CharRef, a predefined entity, if there is no reference() callback.
4799 * or if the parser was asked to switch to that mode.
4800 *
4801 * [67] Reference ::= EntityRef | CharRef
4802 */
4803void
4804xmlParseReference(xmlParserCtxtPtr ctxt) {
4805 xmlEntityPtr ent;
4806 xmlChar *val;
4807 if (RAW != '&') return;
4808
4809 if (NXT(1) == '#') {
4810 int i = 0;
4811 xmlChar out[10];
4812 int hex = NXT(2);
4813 int val = xmlParseCharRef(ctxt);
4814
4815 if (ctxt->charset != XML_CHAR_ENCODING_UTF8) {
4816 /*
4817 * So we are using non-UTF-8 buffers
4818 * Check that the char fit on 8bits, if not
4819 * generate a CharRef.
4820 */
4821 if (val <= 0xFF) {
4822 out[0] = val;
4823 out[1] = 0;
4824 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4825 (!ctxt->disableSAX))
4826 ctxt->sax->characters(ctxt->userData, out, 1);
4827 } else {
4828 if ((hex == 'x') || (hex == 'X'))
4829 sprintf((char *)out, "#x%X", val);
4830 else
4831 sprintf((char *)out, "#%d", val);
4832 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4833 (!ctxt->disableSAX))
4834 ctxt->sax->reference(ctxt->userData, out);
4835 }
4836 } else {
4837 /*
4838 * Just encode the value in UTF-8
4839 */
4840 COPY_BUF(0 ,out, i, val);
4841 out[i] = 0;
4842 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
4843 (!ctxt->disableSAX))
4844 ctxt->sax->characters(ctxt->userData, out, i);
4845 }
4846 } else {
4847 ent = xmlParseEntityRef(ctxt);
4848 if (ent == NULL) return;
4849 if ((ent->name != NULL) &&
4850 (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
4851 xmlNodePtr list = NULL;
4852 int ret;
4853
4854
4855 /*
4856 * The first reference to the entity trigger a parsing phase
4857 * where the ent->children is filled with the result from
4858 * the parsing.
4859 */
4860 if (ent->children == NULL) {
4861 xmlChar *value;
4862 value = ent->content;
4863
4864 /*
4865 * Check that this entity is well formed
4866 */
4867 if ((value != NULL) &&
4868 (value[1] == 0) && (value[0] == '<') &&
4869 (xmlStrEqual(ent->name, BAD_CAST "lt"))) {
4870 /*
4871 * DONE: get definite answer on this !!!
4872 * Lots of entity decls are used to declare a single
4873 * char
4874 * <!ENTITY lt "<">
4875 * Which seems to be valid since
4876 * 2.4: The ampersand character (&) and the left angle
4877 * bracket (<) may appear in their literal form only
4878 * when used ... They are also legal within the literal
4879 * entity value of an internal entity declaration;i
4880 * see "4.3.2 Well-Formed Parsed Entities".
4881 * IMHO 2.4 and 4.3.2 are directly in contradiction.
4882 * Looking at the OASIS test suite and James Clark
4883 * tests, this is broken. However the XML REC uses
4884 * it. Is the XML REC not well-formed ????
4885 * This is a hack to avoid this problem
4886 *
4887 * ANSWER: since lt gt amp .. are already defined,
4888 * this is a redefinition and hence the fact that the
4889 * contentis not well balanced is not a Wf error, this
4890 * is lousy but acceptable.
4891 */
4892 list = xmlNewDocText(ctxt->myDoc, value);
4893 if (list != NULL) {
4894 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4895 (ent->children == NULL)) {
4896 ent->children = list;
4897 ent->last = list;
4898 list->parent = (xmlNodePtr) ent;
4899 } else {
4900 xmlFreeNodeList(list);
4901 }
4902 } else if (list != NULL) {
4903 xmlFreeNodeList(list);
4904 }
4905 } else {
4906 /*
4907 * 4.3.2: An internal general parsed entity is well-formed
4908 * if its replacement text matches the production labeled
4909 * content.
4910 */
4911 if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
4912 ctxt->depth++;
4913 ret = xmlParseBalancedChunkMemory(ctxt->myDoc,
4914 ctxt->sax, NULL, ctxt->depth,
4915 value, &list);
4916 ctxt->depth--;
4917 } else if (ent->etype ==
4918 XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
4919 ctxt->depth++;
4920 ret = xmlParseExternalEntity(ctxt->myDoc,
4921 ctxt->sax, NULL, ctxt->depth,
4922 ent->URI, ent->ExternalID, &list);
4923 ctxt->depth--;
4924 } else {
4925 ret = -1;
4926 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4927 ctxt->sax->error(ctxt->userData,
4928 "Internal: invalid entity type\n");
4929 }
4930 if (ret == XML_ERR_ENTITY_LOOP) {
4931 ctxt->errNo = XML_ERR_ENTITY_LOOP;
4932 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4933 ctxt->sax->error(ctxt->userData,
4934 "Detected entity reference loop\n");
4935 ctxt->wellFormed = 0;
4936 ctxt->disableSAX = 1;
4937 } else if ((ret == 0) && (list != NULL)) {
4938 if ((ent->etype == XML_INTERNAL_GENERAL_ENTITY) &&
4939 (ent->children == NULL)) {
4940 ent->children = list;
4941 while (list != NULL) {
4942 list->parent = (xmlNodePtr) ent;
4943 if (list->next == NULL)
4944 ent->last = list;
4945 list = list->next;
4946 }
4947 } else {
4948 xmlFreeNodeList(list);
4949 }
4950 } else if (ret > 0) {
4951 ctxt->errNo = ret;
4952 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
4953 ctxt->sax->error(ctxt->userData,
4954 "Entity value required\n");
4955 ctxt->wellFormed = 0;
4956 ctxt->disableSAX = 1;
4957 } else if (list != NULL) {
4958 xmlFreeNodeList(list);
4959 }
4960 }
4961 }
4962 if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
4963 (ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
4964 /*
4965 * Create a node.
4966 */
4967 ctxt->sax->reference(ctxt->userData, ent->name);
4968 return;
4969 } else if (ctxt->replaceEntities) {
4970 if ((ctxt->node != NULL) && (ent->children != NULL)) {
4971 /*
4972 * Seems we are generating the DOM content, do
4973 * a simple tree copy
4974 */
4975 xmlNodePtr new;
4976 new = xmlCopyNodeList(ent->children);
4977
4978 xmlAddChildList(ctxt->node, new);
4979 /*
4980 * This is to avoid a nasty side effect, see
4981 * characters() in SAX.c
4982 */
4983 ctxt->nodemem = 0;
4984 ctxt->nodelen = 0;
4985 return;
4986 } else {
4987 /*
4988 * Probably running in SAX mode
4989 */
4990 xmlParserInputPtr input;
4991
4992 input = xmlNewEntityInputStream(ctxt, ent);
4993 xmlPushInput(ctxt, input);
4994 if ((ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) &&
4995 (RAW == '<') && (NXT(1) == '?') &&
4996 (NXT(2) == 'x') && (NXT(3) == 'm') &&
4997 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
4998 xmlParseTextDecl(ctxt);
4999 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5000 /*
5001 * The XML REC instructs us to stop parsing right here
5002 */
5003 ctxt->instate = XML_PARSER_EOF;
5004 return;
5005 }
5006 if (input->standalone == 1) {
5007 ctxt->errNo = XML_ERR_EXT_ENTITY_STANDALONE;
5008 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5009 ctxt->sax->error(ctxt->userData,
5010 "external parsed entities cannot be standalone\n");
5011 ctxt->wellFormed = 0;
5012 ctxt->disableSAX = 1;
5013 }
5014 }
5015 return;
5016 }
5017 }
5018 } else {
5019 val = ent->content;
5020 if (val == NULL) return;
5021 /*
5022 * inline the entity.
5023 */
5024 if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
5025 (!ctxt->disableSAX))
5026 ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
5027 }
5028 }
5029}
5030
5031/**
5032 * xmlParseEntityRef:
5033 * @ctxt: an XML parser context
5034 *
5035 * parse ENTITY references declarations
5036 *
5037 * [68] EntityRef ::= '&' Name ';'
5038 *
5039 * [ WFC: Entity Declared ]
5040 * In a document without any DTD, a document with only an internal DTD
5041 * subset which contains no parameter entity references, or a document
5042 * with "standalone='yes'", the Name given in the entity reference
5043 * must match that in an entity declaration, except that well-formed
5044 * documents need not declare any of the following entities: amp, lt,
5045 * gt, apos, quot. The declaration of a parameter entity must precede
5046 * any reference to it. Similarly, the declaration of a general entity
5047 * must precede any reference to it which appears in a default value in an
5048 * attribute-list declaration. Note that if entities are declared in the
5049 * external subset or in external parameter entities, a non-validating
5050 * processor is not obligated to read and process their declarations;
5051 * for such documents, the rule that an entity must be declared is a
5052 * well-formedness constraint only if standalone='yes'.
5053 *
5054 * [ WFC: Parsed Entity ]
5055 * An entity reference must not contain the name of an unparsed entity
5056 *
5057 * Returns the xmlEntityPtr if found, or NULL otherwise.
5058 */
5059xmlEntityPtr
5060xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
5061 xmlChar *name;
5062 xmlEntityPtr ent = NULL;
5063
5064 GROW;
5065
5066 if (RAW == '&') {
5067 NEXT;
5068 name = xmlParseName(ctxt);
5069 if (name == NULL) {
5070 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5071 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5072 ctxt->sax->error(ctxt->userData,
5073 "xmlParseEntityRef: no name\n");
5074 ctxt->wellFormed = 0;
5075 ctxt->disableSAX = 1;
5076 } else {
5077 if (RAW == ';') {
5078 NEXT;
5079 /*
5080 * Ask first SAX for entity resolution, otherwise try the
5081 * predefined set.
5082 */
5083 if (ctxt->sax != NULL) {
5084 if (ctxt->sax->getEntity != NULL)
5085 ent = ctxt->sax->getEntity(ctxt->userData, name);
5086 if (ent == NULL)
5087 ent = xmlGetPredefinedEntity(name);
5088 }
5089 /*
5090 * [ WFC: Entity Declared ]
5091 * In a document without any DTD, a document with only an
5092 * internal DTD subset which contains no parameter entity
5093 * references, or a document with "standalone='yes'", the
5094 * Name given in the entity reference must match that in an
5095 * entity declaration, except that well-formed documents
5096 * need not declare any of the following entities: amp, lt,
5097 * gt, apos, quot.
5098 * The declaration of a parameter entity must precede any
5099 * reference to it.
5100 * Similarly, the declaration of a general entity must
5101 * precede any reference to it which appears in a default
5102 * value in an attribute-list declaration. Note that if
5103 * entities are declared in the external subset or in
5104 * external parameter entities, a non-validating processor
5105 * is not obligated to read and process their declarations;
5106 * for such documents, the rule that an entity must be
5107 * declared is a well-formedness constraint only if
5108 * standalone='yes'.
5109 */
5110 if (ent == NULL) {
5111 if ((ctxt->standalone == 1) ||
5112 ((ctxt->hasExternalSubset == 0) &&
5113 (ctxt->hasPErefs == 0))) {
5114 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5115 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5116 ctxt->sax->error(ctxt->userData,
5117 "Entity '%s' not defined\n", name);
5118 ctxt->wellFormed = 0;
5119 ctxt->disableSAX = 1;
5120 } else {
5121 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5122 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5123 ctxt->sax->warning(ctxt->userData,
5124 "Entity '%s' not defined\n", name);
5125 }
5126 }
5127
5128 /*
5129 * [ WFC: Parsed Entity ]
5130 * An entity reference must not contain the name of an
5131 * unparsed entity
5132 */
5133 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5134 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5135 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5136 ctxt->sax->error(ctxt->userData,
5137 "Entity reference to unparsed entity %s\n", name);
5138 ctxt->wellFormed = 0;
5139 ctxt->disableSAX = 1;
5140 }
5141
5142 /*
5143 * [ WFC: No External Entity References ]
5144 * Attribute values cannot contain direct or indirect
5145 * entity references to external entities.
5146 */
5147 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5148 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5149 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5150 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5151 ctxt->sax->error(ctxt->userData,
5152 "Attribute references external entity '%s'\n", name);
5153 ctxt->wellFormed = 0;
5154 ctxt->disableSAX = 1;
5155 }
5156 /*
5157 * [ WFC: No < in Attribute Values ]
5158 * The replacement text of any entity referred to directly or
5159 * indirectly in an attribute value (other than "&lt;") must
5160 * not contain a <.
5161 */
5162 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5163 (ent != NULL) &&
5164 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5165 (ent->content != NULL) &&
5166 (xmlStrchr(ent->content, '<'))) {
5167 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5168 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5169 ctxt->sax->error(ctxt->userData,
5170 "'<' in entity '%s' is not allowed in attributes values\n", name);
5171 ctxt->wellFormed = 0;
5172 ctxt->disableSAX = 1;
5173 }
5174
5175 /*
5176 * Internal check, no parameter entities here ...
5177 */
5178 else {
5179 switch (ent->etype) {
5180 case XML_INTERNAL_PARAMETER_ENTITY:
5181 case XML_EXTERNAL_PARAMETER_ENTITY:
5182 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5183 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5184 ctxt->sax->error(ctxt->userData,
5185 "Attempt to reference the parameter entity '%s'\n", name);
5186 ctxt->wellFormed = 0;
5187 ctxt->disableSAX = 1;
5188 break;
5189 default:
5190 break;
5191 }
5192 }
5193
5194 /*
5195 * [ WFC: No Recursion ]
5196 * A parsed entity must not contain a recursive reference
5197 * to itself, either directly or indirectly.
5198 * Done somewhere else
5199 */
5200
5201 } else {
5202 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5203 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5204 ctxt->sax->error(ctxt->userData,
5205 "xmlParseEntityRef: expecting ';'\n");
5206 ctxt->wellFormed = 0;
5207 ctxt->disableSAX = 1;
5208 }
5209 xmlFree(name);
5210 }
5211 }
5212 return(ent);
5213}
5214
5215/**
5216 * xmlParseStringEntityRef:
5217 * @ctxt: an XML parser context
5218 * @str: a pointer to an index in the string
5219 *
5220 * parse ENTITY references declarations, but this version parses it from
5221 * a string value.
5222 *
5223 * [68] EntityRef ::= '&' Name ';'
5224 *
5225 * [ WFC: Entity Declared ]
5226 * In a document without any DTD, a document with only an internal DTD
5227 * subset which contains no parameter entity references, or a document
5228 * with "standalone='yes'", the Name given in the entity reference
5229 * must match that in an entity declaration, except that well-formed
5230 * documents need not declare any of the following entities: amp, lt,
5231 * gt, apos, quot. The declaration of a parameter entity must precede
5232 * any reference to it. Similarly, the declaration of a general entity
5233 * must precede any reference to it which appears in a default value in an
5234 * attribute-list declaration. Note that if entities are declared in the
5235 * external subset or in external parameter entities, a non-validating
5236 * processor is not obligated to read and process their declarations;
5237 * for such documents, the rule that an entity must be declared is a
5238 * well-formedness constraint only if standalone='yes'.
5239 *
5240 * [ WFC: Parsed Entity ]
5241 * An entity reference must not contain the name of an unparsed entity
5242 *
5243 * Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
5244 * is updated to the current location in the string.
5245 */
5246xmlEntityPtr
5247xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
5248 xmlChar *name;
5249 const xmlChar *ptr;
5250 xmlChar cur;
5251 xmlEntityPtr ent = NULL;
5252
5253 if ((str == NULL) || (*str == NULL))
5254 return(NULL);
5255 ptr = *str;
5256 cur = *ptr;
5257 if (cur == '&') {
5258 ptr++;
5259 cur = *ptr;
5260 name = xmlParseStringName(ctxt, &ptr);
5261 if (name == NULL) {
5262 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5263 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5264 ctxt->sax->error(ctxt->userData,
5265 "xmlParseEntityRef: no name\n");
5266 ctxt->wellFormed = 0;
5267 ctxt->disableSAX = 1;
5268 } else {
5269 if (*ptr == ';') {
5270 ptr++;
5271 /*
5272 * Ask first SAX for entity resolution, otherwise try the
5273 * predefined set.
5274 */
5275 if (ctxt->sax != NULL) {
5276 if (ctxt->sax->getEntity != NULL)
5277 ent = ctxt->sax->getEntity(ctxt->userData, name);
5278 if (ent == NULL)
5279 ent = xmlGetPredefinedEntity(name);
5280 }
5281 /*
5282 * [ WFC: Entity Declared ]
5283 * In a document without any DTD, a document with only an
5284 * internal DTD subset which contains no parameter entity
5285 * references, or a document with "standalone='yes'", the
5286 * Name given in the entity reference must match that in an
5287 * entity declaration, except that well-formed documents
5288 * need not declare any of the following entities: amp, lt,
5289 * gt, apos, quot.
5290 * The declaration of a parameter entity must precede any
5291 * reference to it.
5292 * Similarly, the declaration of a general entity must
5293 * precede any reference to it which appears in a default
5294 * value in an attribute-list declaration. Note that if
5295 * entities are declared in the external subset or in
5296 * external parameter entities, a non-validating processor
5297 * is not obligated to read and process their declarations;
5298 * for such documents, the rule that an entity must be
5299 * declared is a well-formedness constraint only if
5300 * standalone='yes'.
5301 */
5302 if (ent == NULL) {
5303 if ((ctxt->standalone == 1) ||
5304 ((ctxt->hasExternalSubset == 0) &&
5305 (ctxt->hasPErefs == 0))) {
5306 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5307 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5308 ctxt->sax->error(ctxt->userData,
5309 "Entity '%s' not defined\n", name);
5310 ctxt->wellFormed = 0;
5311 ctxt->disableSAX = 1;
5312 } else {
5313 ctxt->errNo = XML_WAR_UNDECLARED_ENTITY;
5314 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5315 ctxt->sax->warning(ctxt->userData,
5316 "Entity '%s' not defined\n", name);
5317 }
5318 }
5319
5320 /*
5321 * [ WFC: Parsed Entity ]
5322 * An entity reference must not contain the name of an
5323 * unparsed entity
5324 */
5325 else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
5326 ctxt->errNo = XML_ERR_UNPARSED_ENTITY;
5327 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5328 ctxt->sax->error(ctxt->userData,
5329 "Entity reference to unparsed entity %s\n", name);
5330 ctxt->wellFormed = 0;
5331 ctxt->disableSAX = 1;
5332 }
5333
5334 /*
5335 * [ WFC: No External Entity References ]
5336 * Attribute values cannot contain direct or indirect
5337 * entity references to external entities.
5338 */
5339 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5340 (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
5341 ctxt->errNo = XML_ERR_ENTITY_IS_EXTERNAL;
5342 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5343 ctxt->sax->error(ctxt->userData,
5344 "Attribute references external entity '%s'\n", name);
5345 ctxt->wellFormed = 0;
5346 ctxt->disableSAX = 1;
5347 }
5348 /*
5349 * [ WFC: No < in Attribute Values ]
5350 * The replacement text of any entity referred to directly or
5351 * indirectly in an attribute value (other than "&lt;") must
5352 * not contain a <.
5353 */
5354 else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
5355 (ent != NULL) &&
5356 (!xmlStrEqual(ent->name, BAD_CAST "lt")) &&
5357 (ent->content != NULL) &&
5358 (xmlStrchr(ent->content, '<'))) {
5359 ctxt->errNo = XML_ERR_LT_IN_ATTRIBUTE;
5360 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5361 ctxt->sax->error(ctxt->userData,
5362 "'<' in entity '%s' is not allowed in attributes values\n", name);
5363 ctxt->wellFormed = 0;
5364 ctxt->disableSAX = 1;
5365 }
5366
5367 /*
5368 * Internal check, no parameter entities here ...
5369 */
5370 else {
5371 switch (ent->etype) {
5372 case XML_INTERNAL_PARAMETER_ENTITY:
5373 case XML_EXTERNAL_PARAMETER_ENTITY:
5374 ctxt->errNo = XML_ERR_ENTITY_IS_PARAMETER;
5375 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5376 ctxt->sax->error(ctxt->userData,
5377 "Attempt to reference the parameter entity '%s'\n", name);
5378 ctxt->wellFormed = 0;
5379 ctxt->disableSAX = 1;
5380 break;
5381 default:
5382 break;
5383 }
5384 }
5385
5386 /*
5387 * [ WFC: No Recursion ]
5388 * A parsed entity must not contain a recursive reference
5389 * to itself, either directly or indirectly.
5390 * Done somewhwere else
5391 */
5392
5393 } else {
5394 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5395 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5396 ctxt->sax->error(ctxt->userData,
5397 "xmlParseEntityRef: expecting ';'\n");
5398 ctxt->wellFormed = 0;
5399 ctxt->disableSAX = 1;
5400 }
5401 xmlFree(name);
5402 }
5403 }
5404 *str = ptr;
5405 return(ent);
5406}
5407
5408/**
5409 * xmlParsePEReference:
5410 * @ctxt: an XML parser context
5411 *
5412 * parse PEReference declarations
5413 * The entity content is handled directly by pushing it's content as
5414 * a new input stream.
5415 *
5416 * [69] PEReference ::= '%' Name ';'
5417 *
5418 * [ WFC: No Recursion ]
5419 * A parsed entity must not contain a recursive
5420 * reference to itself, either directly or indirectly.
5421 *
5422 * [ WFC: Entity Declared ]
5423 * In a document without any DTD, a document with only an internal DTD
5424 * subset which contains no parameter entity references, or a document
5425 * with "standalone='yes'", ... ... The declaration of a parameter
5426 * entity must precede any reference to it...
5427 *
5428 * [ VC: Entity Declared ]
5429 * In a document with an external subset or external parameter entities
5430 * with "standalone='no'", ... ... The declaration of a parameter entity
5431 * must precede any reference to it...
5432 *
5433 * [ WFC: In DTD ]
5434 * Parameter-entity references may only appear in the DTD.
5435 * NOTE: misleading but this is handled.
5436 */
5437void
5438xmlParsePEReference(xmlParserCtxtPtr ctxt) {
5439 xmlChar *name;
5440 xmlEntityPtr entity = NULL;
5441 xmlParserInputPtr input;
5442
5443 if (RAW == '%') {
5444 NEXT;
5445 name = xmlParseName(ctxt);
5446 if (name == NULL) {
5447 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5448 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5449 ctxt->sax->error(ctxt->userData,
5450 "xmlParsePEReference: no name\n");
5451 ctxt->wellFormed = 0;
5452 ctxt->disableSAX = 1;
5453 } else {
5454 if (RAW == ';') {
5455 NEXT;
5456 if ((ctxt->sax != NULL) &&
5457 (ctxt->sax->getParameterEntity != NULL))
5458 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5459 name);
5460 if (entity == NULL) {
5461 /*
5462 * [ WFC: Entity Declared ]
5463 * In a document without any DTD, a document with only an
5464 * internal DTD subset which contains no parameter entity
5465 * references, or a document with "standalone='yes'", ...
5466 * ... The declaration of a parameter entity must precede
5467 * any reference to it...
5468 */
5469 if ((ctxt->standalone == 1) ||
5470 ((ctxt->hasExternalSubset == 0) &&
5471 (ctxt->hasPErefs == 0))) {
5472 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5473 if ((!ctxt->disableSAX) &&
5474 (ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5475 ctxt->sax->error(ctxt->userData,
5476 "PEReference: %%%s; not found\n", name);
5477 ctxt->wellFormed = 0;
5478 ctxt->disableSAX = 1;
5479 } else {
5480 /*
5481 * [ VC: Entity Declared ]
5482 * In a document with an external subset or external
5483 * parameter entities with "standalone='no'", ...
5484 * ... The declaration of a parameter entity must precede
5485 * any reference to it...
5486 */
5487 if ((!ctxt->disableSAX) &&
5488 (ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5489 ctxt->sax->warning(ctxt->userData,
5490 "PEReference: %%%s; not found\n", name);
5491 ctxt->valid = 0;
5492 }
5493 } else {
5494 /*
5495 * Internal checking in case the entity quest barfed
5496 */
5497 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5498 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5499 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5500 ctxt->sax->warning(ctxt->userData,
5501 "Internal: %%%s; is not a parameter entity\n", name);
5502 } else {
5503 /*
5504 * TODO !!!
5505 * handle the extra spaces added before and after
5506 * c.f. http://www.w3.org/TR/REC-xml#as-PE
5507 */
5508 input = xmlNewEntityInputStream(ctxt, entity);
5509 xmlPushInput(ctxt, input);
5510 if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
5511 (RAW == '<') && (NXT(1) == '?') &&
5512 (NXT(2) == 'x') && (NXT(3) == 'm') &&
5513 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
5514 xmlParseTextDecl(ctxt);
5515 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
5516 /*
5517 * The XML REC instructs us to stop parsing
5518 * right here
5519 */
5520 ctxt->instate = XML_PARSER_EOF;
5521 xmlFree(name);
5522 return;
5523 }
5524 }
5525 if (ctxt->token == 0)
5526 ctxt->token = ' ';
5527 }
5528 }
5529 ctxt->hasPErefs = 1;
5530 } else {
5531 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5533 ctxt->sax->error(ctxt->userData,
5534 "xmlParsePEReference: expecting ';'\n");
5535 ctxt->wellFormed = 0;
5536 ctxt->disableSAX = 1;
5537 }
5538 xmlFree(name);
5539 }
5540 }
5541}
5542
5543/**
5544 * xmlParseStringPEReference:
5545 * @ctxt: an XML parser context
5546 * @str: a pointer to an index in the string
5547 *
5548 * parse PEReference declarations
5549 *
5550 * [69] PEReference ::= '%' Name ';'
5551 *
5552 * [ WFC: No Recursion ]
5553 * A parsed entity must not contain a recursive
5554 * reference to itself, either directly or indirectly.
5555 *
5556 * [ WFC: Entity Declared ]
5557 * In a document without any DTD, a document with only an internal DTD
5558 * subset which contains no parameter entity references, or a document
5559 * with "standalone='yes'", ... ... The declaration of a parameter
5560 * entity must precede any reference to it...
5561 *
5562 * [ VC: Entity Declared ]
5563 * In a document with an external subset or external parameter entities
5564 * with "standalone='no'", ... ... The declaration of a parameter entity
5565 * must precede any reference to it...
5566 *
5567 * [ WFC: In DTD ]
5568 * Parameter-entity references may only appear in the DTD.
5569 * NOTE: misleading but this is handled.
5570 *
5571 * Returns the string of the entity content.
5572 * str is updated to the current value of the index
5573 */
5574xmlEntityPtr
5575xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
5576 const xmlChar *ptr;
5577 xmlChar cur;
5578 xmlChar *name;
5579 xmlEntityPtr entity = NULL;
5580
5581 if ((str == NULL) || (*str == NULL)) return(NULL);
5582 ptr = *str;
5583 cur = *ptr;
5584 if (cur == '%') {
5585 ptr++;
5586 cur = *ptr;
5587 name = xmlParseStringName(ctxt, &ptr);
5588 if (name == NULL) {
5589 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5590 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5591 ctxt->sax->error(ctxt->userData,
5592 "xmlParseStringPEReference: no name\n");
5593 ctxt->wellFormed = 0;
5594 ctxt->disableSAX = 1;
5595 } else {
5596 cur = *ptr;
5597 if (cur == ';') {
5598 ptr++;
5599 cur = *ptr;
5600 if ((ctxt->sax != NULL) &&
5601 (ctxt->sax->getParameterEntity != NULL))
5602 entity = ctxt->sax->getParameterEntity(ctxt->userData,
5603 name);
5604 if (entity == NULL) {
5605 /*
5606 * [ WFC: Entity Declared ]
5607 * In a document without any DTD, a document with only an
5608 * internal DTD subset which contains no parameter entity
5609 * references, or a document with "standalone='yes'", ...
5610 * ... The declaration of a parameter entity must precede
5611 * any reference to it...
5612 */
5613 if ((ctxt->standalone == 1) ||
5614 ((ctxt->hasExternalSubset == 0) &&
5615 (ctxt->hasPErefs == 0))) {
5616 ctxt->errNo = XML_ERR_UNDECLARED_ENTITY;
5617 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5618 ctxt->sax->error(ctxt->userData,
5619 "PEReference: %%%s; not found\n", name);
5620 ctxt->wellFormed = 0;
5621 ctxt->disableSAX = 1;
5622 } else {
5623 /*
5624 * [ VC: Entity Declared ]
5625 * In a document with an external subset or external
5626 * parameter entities with "standalone='no'", ...
5627 * ... The declaration of a parameter entity must
5628 * precede any reference to it...
5629 */
5630 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5631 ctxt->sax->warning(ctxt->userData,
5632 "PEReference: %%%s; not found\n", name);
5633 ctxt->valid = 0;
5634 }
5635 } else {
5636 /*
5637 * Internal checking in case the entity quest barfed
5638 */
5639 if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
5640 (entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
5641 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5642 ctxt->sax->warning(ctxt->userData,
5643 "Internal: %%%s; is not a parameter entity\n", name);
5644 }
5645 }
5646 ctxt->hasPErefs = 1;
5647 } else {
5648 ctxt->errNo = XML_ERR_ENTITYREF_SEMICOL_MISSING;
5649 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5650 ctxt->sax->error(ctxt->userData,
5651 "xmlParseStringPEReference: expecting ';'\n");
5652 ctxt->wellFormed = 0;
5653 ctxt->disableSAX = 1;
5654 }
5655 xmlFree(name);
5656 }
5657 }
5658 *str = ptr;
5659 return(entity);
5660}
5661
5662/**
5663 * xmlParseDocTypeDecl:
5664 * @ctxt: an XML parser context
5665 *
5666 * parse a DOCTYPE declaration
5667 *
5668 * [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
5669 * ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5670 *
5671 * [ VC: Root Element Type ]
5672 * The Name in the document type declaration must match the element
5673 * type of the root element.
5674 */
5675
5676void
5677xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
5678 xmlChar *name = NULL;
5679 xmlChar *ExternalID = NULL;
5680 xmlChar *URI = NULL;
5681
5682 /*
5683 * We know that '<!DOCTYPE' has been detected.
5684 */
5685 SKIP(9);
5686
5687 SKIP_BLANKS;
5688
5689 /*
5690 * Parse the DOCTYPE name.
5691 */
5692 name = xmlParseName(ctxt);
5693 if (name == NULL) {
5694 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5695 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5696 ctxt->sax->error(ctxt->userData,
5697 "xmlParseDocTypeDecl : no DOCTYPE name !\n");
5698 ctxt->wellFormed = 0;
5699 ctxt->disableSAX = 1;
5700 }
5701 ctxt->intSubName = name;
5702
5703 SKIP_BLANKS;
5704
5705 /*
5706 * Check for SystemID and ExternalID
5707 */
5708 URI = xmlParseExternalID(ctxt, &ExternalID, 1);
5709
5710 if ((URI != NULL) || (ExternalID != NULL)) {
5711 ctxt->hasExternalSubset = 1;
5712 }
5713 ctxt->extSubURI = URI;
5714 ctxt->extSubSystem = ExternalID;
5715
5716 SKIP_BLANKS;
5717
5718 /*
5719 * Create and update the internal subset.
5720 */
5721 if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
5722 (!ctxt->disableSAX))
5723 ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
5724
5725 /*
5726 * Is there any internal subset declarations ?
5727 * they are handled separately in xmlParseInternalSubset()
5728 */
5729 if (RAW == '[')
5730 return;
5731
5732 /*
5733 * We should be at the end of the DOCTYPE declaration.
5734 */
5735 if (RAW != '>') {
5736 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5737 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5738 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5739 ctxt->wellFormed = 0;
5740 ctxt->disableSAX = 1;
5741 }
5742 NEXT;
5743}
5744
5745/**
5746 * xmlParseInternalsubset:
5747 * @ctxt: an XML parser context
5748 *
5749 * parse the internal subset declaration
5750 *
5751 * [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
5752 */
5753
5754void
5755xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
5756 /*
5757 * Is there any DTD definition ?
5758 */
5759 if (RAW == '[') {
5760 ctxt->instate = XML_PARSER_DTD;
5761 NEXT;
5762 /*
5763 * Parse the succession of Markup declarations and
5764 * PEReferences.
5765 * Subsequence (markupdecl | PEReference | S)*
5766 */
5767 while (RAW != ']') {
5768 const xmlChar *check = CUR_PTR;
5769 int cons = ctxt->input->consumed;
5770
5771 SKIP_BLANKS;
5772 xmlParseMarkupDecl(ctxt);
5773 xmlParsePEReference(ctxt);
5774
5775 /*
5776 * Pop-up of finished entities.
5777 */
5778 while ((RAW == 0) && (ctxt->inputNr > 1))
5779 xmlPopInput(ctxt);
5780
5781 if ((CUR_PTR == check) && (cons == ctxt->input->consumed)) {
5782 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
5783 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5784 ctxt->sax->error(ctxt->userData,
5785 "xmlParseInternalSubset: error detected in Markup declaration\n");
5786 ctxt->wellFormed = 0;
5787 ctxt->disableSAX = 1;
5788 break;
5789 }
5790 }
5791 if (RAW == ']') {
5792 NEXT;
5793 SKIP_BLANKS;
5794 }
5795 }
5796
5797 /*
5798 * We should be at the end of the DOCTYPE declaration.
5799 */
5800 if (RAW != '>') {
5801 ctxt->errNo = XML_ERR_DOCTYPE_NOT_FINISHED;
5802 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5803 ctxt->sax->error(ctxt->userData, "DOCTYPE unproperly terminated\n");
5804 ctxt->wellFormed = 0;
5805 ctxt->disableSAX = 1;
5806 }
5807 NEXT;
5808}
5809
5810/**
5811 * xmlParseAttribute:
5812 * @ctxt: an XML parser context
5813 * @value: a xmlChar ** used to store the value of the attribute
5814 *
5815 * parse an attribute
5816 *
5817 * [41] Attribute ::= Name Eq AttValue
5818 *
5819 * [ WFC: No External Entity References ]
5820 * Attribute values cannot contain direct or indirect entity references
5821 * to external entities.
5822 *
5823 * [ WFC: No < in Attribute Values ]
5824 * The replacement text of any entity referred to directly or indirectly in
5825 * an attribute value (other than "&lt;") must not contain a <.
5826 *
5827 * [ VC: Attribute Value Type ]
5828 * The attribute must have been declared; the value must be of the type
5829 * declared for it.
5830 *
5831 * [25] Eq ::= S? '=' S?
5832 *
5833 * With namespace:
5834 *
5835 * [NS 11] Attribute ::= QName Eq AttValue
5836 *
5837 * Also the case QName == xmlns:??? is handled independently as a namespace
5838 * definition.
5839 *
5840 * Returns the attribute name, and the value in *value.
5841 */
5842
5843xmlChar *
5844xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
5845 xmlChar *name, *val;
5846
5847 *value = NULL;
5848 name = xmlParseName(ctxt);
5849 if (name == NULL) {
5850 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5851 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5852 ctxt->sax->error(ctxt->userData, "error parsing attribute name\n");
5853 ctxt->wellFormed = 0;
5854 ctxt->disableSAX = 1;
5855 return(NULL);
5856 }
5857
5858 /*
5859 * read the value
5860 */
5861 SKIP_BLANKS;
5862 if (RAW == '=') {
5863 NEXT;
5864 SKIP_BLANKS;
5865 val = xmlParseAttValue(ctxt);
5866 ctxt->instate = XML_PARSER_CONTENT;
5867 } else {
5868 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5869 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5870 ctxt->sax->error(ctxt->userData,
5871 "Specification mandate value for attribute %s\n", name);
5872 ctxt->wellFormed = 0;
5873 ctxt->disableSAX = 1;
5874 xmlFree(name);
5875 return(NULL);
5876 }
5877
5878 /*
5879 * Check that xml:lang conforms to the specification
5880 * No more registered as an error, just generate a warning now
5881 * since this was deprecated in XML second edition
5882 */
5883 if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
5884 if (!xmlCheckLanguageID(val)) {
5885 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL))
5886 ctxt->sax->warning(ctxt->userData,
5887 "Malformed value for xml:lang : %s\n", val);
5888 }
5889 }
5890
5891 /*
5892 * Check that xml:space conforms to the specification
5893 */
5894 if (xmlStrEqual(name, BAD_CAST "xml:space")) {
5895 if (xmlStrEqual(val, BAD_CAST "default"))
5896 *(ctxt->space) = 0;
5897 else if (xmlStrEqual(val, BAD_CAST "preserve"))
5898 *(ctxt->space) = 1;
5899 else {
5900 ctxt->errNo = XML_ERR_ATTRIBUTE_WITHOUT_VALUE;
5901 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5902 ctxt->sax->error(ctxt->userData,
5903"Invalid value for xml:space : \"%s\", \"default\" or \"preserve\" expected\n",
5904 val);
5905 ctxt->wellFormed = 0;
5906 ctxt->disableSAX = 1;
5907 }
5908 }
5909
5910 *value = val;
5911 return(name);
5912}
5913
5914/**
5915 * xmlParseStartTag:
5916 * @ctxt: an XML parser context
5917 *
5918 * parse a start of tag either for rule element or
5919 * EmptyElement. In both case we don't parse the tag closing chars.
5920 *
5921 * [40] STag ::= '<' Name (S Attribute)* S? '>'
5922 *
5923 * [ WFC: Unique Att Spec ]
5924 * No attribute name may appear more than once in the same start-tag or
5925 * empty-element tag.
5926 *
5927 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
5928 *
5929 * [ WFC: Unique Att Spec ]
5930 * No attribute name may appear more than once in the same start-tag or
5931 * empty-element tag.
5932 *
5933 * With namespace:
5934 *
5935 * [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
5936 *
5937 * [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
5938 *
5939 * Returns the element name parsed
5940 */
5941
5942xmlChar *
5943xmlParseStartTag(xmlParserCtxtPtr ctxt) {
5944 xmlChar *name;
5945 xmlChar *attname;
5946 xmlChar *attvalue;
5947 const xmlChar **atts = NULL;
5948 int nbatts = 0;
5949 int maxatts = 0;
5950 int i;
5951
5952 if (RAW != '<') return(NULL);
Daniel Veillard21a0f912001-02-25 19:54:14 +00005953 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00005954
5955 name = xmlParseName(ctxt);
5956 if (name == NULL) {
5957 ctxt->errNo = XML_ERR_NAME_REQUIRED;
5958 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5959 ctxt->sax->error(ctxt->userData,
5960 "xmlParseStartTag: invalid element name\n");
5961 ctxt->wellFormed = 0;
5962 ctxt->disableSAX = 1;
5963 return(NULL);
5964 }
5965
5966 /*
5967 * Now parse the attributes, it ends up with the ending
5968 *
5969 * (S Attribute)* S?
5970 */
5971 SKIP_BLANKS;
5972 GROW;
5973
Daniel Veillard21a0f912001-02-25 19:54:14 +00005974 while ((RAW != '>') &&
5975 ((RAW != '/') || (NXT(1) != '>')) &&
5976 (IS_CHAR(RAW))) {
Owen Taylor3473f882001-02-23 17:55:21 +00005977 const xmlChar *q = CUR_PTR;
5978 int cons = ctxt->input->consumed;
5979
5980 attname = xmlParseAttribute(ctxt, &attvalue);
5981 if ((attname != NULL) && (attvalue != NULL)) {
5982 /*
5983 * [ WFC: Unique Att Spec ]
5984 * No attribute name may appear more than once in the same
5985 * start-tag or empty-element tag.
5986 */
5987 for (i = 0; i < nbatts;i += 2) {
5988 if (xmlStrEqual(atts[i], attname)) {
5989 ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
5990 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
5991 ctxt->sax->error(ctxt->userData,
5992 "Attribute %s redefined\n",
5993 attname);
5994 ctxt->wellFormed = 0;
5995 ctxt->disableSAX = 1;
5996 xmlFree(attname);
5997 xmlFree(attvalue);
5998 goto failed;
5999 }
6000 }
6001
6002 /*
6003 * Add the pair to atts
6004 */
6005 if (atts == NULL) {
6006 maxatts = 10;
6007 atts = (const xmlChar **) xmlMalloc(maxatts * sizeof(xmlChar *));
6008 if (atts == NULL) {
6009 xmlGenericError(xmlGenericErrorContext,
6010 "malloc of %ld byte failed\n",
6011 maxatts * (long)sizeof(xmlChar *));
6012 return(NULL);
6013 }
6014 } else if (nbatts + 4 > maxatts) {
6015 maxatts *= 2;
6016 atts = (const xmlChar **) xmlRealloc((void *) atts,
6017 maxatts * sizeof(xmlChar *));
6018 if (atts == NULL) {
6019 xmlGenericError(xmlGenericErrorContext,
6020 "realloc of %ld byte failed\n",
6021 maxatts * (long)sizeof(xmlChar *));
6022 return(NULL);
6023 }
6024 }
6025 atts[nbatts++] = attname;
6026 atts[nbatts++] = attvalue;
6027 atts[nbatts] = NULL;
6028 atts[nbatts + 1] = NULL;
6029 } else {
6030 if (attname != NULL)
6031 xmlFree(attname);
6032 if (attvalue != NULL)
6033 xmlFree(attvalue);
6034 }
6035
6036failed:
6037
6038 if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
6039 break;
6040 if (!IS_BLANK(RAW)) {
6041 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6042 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6043 ctxt->sax->error(ctxt->userData,
6044 "attributes construct error\n");
6045 ctxt->wellFormed = 0;
6046 ctxt->disableSAX = 1;
6047 }
6048 SKIP_BLANKS;
6049 if ((cons == ctxt->input->consumed) && (q == CUR_PTR)) {
6050 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6051 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6052 ctxt->sax->error(ctxt->userData,
6053 "xmlParseStartTag: problem parsing attributes\n");
6054 ctxt->wellFormed = 0;
6055 ctxt->disableSAX = 1;
6056 break;
6057 }
6058 GROW;
6059 }
6060
6061 /*
6062 * SAX: Start of Element !
6063 */
6064 if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
6065 (!ctxt->disableSAX))
6066 ctxt->sax->startElement(ctxt->userData, name, atts);
6067
6068 if (atts != NULL) {
6069 for (i = 0;i < nbatts;i++) xmlFree((xmlChar *) atts[i]);
6070 xmlFree((void *) atts);
6071 }
6072 return(name);
6073}
6074
6075/**
6076 * xmlParseEndTag:
6077 * @ctxt: an XML parser context
6078 *
6079 * parse an end of tag
6080 *
6081 * [42] ETag ::= '</' Name S? '>'
6082 *
6083 * With namespace
6084 *
6085 * [NS 9] ETag ::= '</' QName S? '>'
6086 */
6087
6088void
6089xmlParseEndTag(xmlParserCtxtPtr ctxt) {
6090 xmlChar *name;
6091 xmlChar *oldname;
6092
6093 GROW;
6094 if ((RAW != '<') || (NXT(1) != '/')) {
6095 ctxt->errNo = XML_ERR_LTSLASH_REQUIRED;
6096 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6097 ctxt->sax->error(ctxt->userData, "xmlParseEndTag: '</' not found\n");
6098 ctxt->wellFormed = 0;
6099 ctxt->disableSAX = 1;
6100 return;
6101 }
6102 SKIP(2);
6103
6104 name = xmlParseName(ctxt);
6105
6106 /*
6107 * We should definitely be at the ending "S? '>'" part
6108 */
6109 GROW;
6110 SKIP_BLANKS;
6111 if ((!IS_CHAR(RAW)) || (RAW != '>')) {
6112 ctxt->errNo = XML_ERR_GT_REQUIRED;
6113 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6114 ctxt->sax->error(ctxt->userData, "End tag : expected '>'\n");
6115 ctxt->wellFormed = 0;
6116 ctxt->disableSAX = 1;
6117 } else
Daniel Veillard21a0f912001-02-25 19:54:14 +00006118 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006119
6120 /*
6121 * [ WFC: Element Type Match ]
6122 * The Name in an element's end-tag must match the element type in the
6123 * start-tag.
6124 *
6125 */
6126 if ((name == NULL) || (ctxt->name == NULL) ||
6127 (!xmlStrEqual(name, ctxt->name))) {
6128 ctxt->errNo = XML_ERR_TAG_NAME_MISMATCH;
6129 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) {
6130 if ((name != NULL) && (ctxt->name != NULL)) {
6131 ctxt->sax->error(ctxt->userData,
6132 "Opening and ending tag mismatch: %s and %s\n",
6133 ctxt->name, name);
6134 } else if (ctxt->name != NULL) {
6135 ctxt->sax->error(ctxt->userData,
6136 "Ending tag eror for: %s\n", ctxt->name);
6137 } else {
6138 ctxt->sax->error(ctxt->userData,
6139 "Ending tag error: internal error ???\n");
6140 }
6141
6142 }
6143 ctxt->wellFormed = 0;
6144 ctxt->disableSAX = 1;
6145 }
6146
6147 /*
6148 * SAX: End of Tag
6149 */
6150 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6151 (!ctxt->disableSAX))
6152 ctxt->sax->endElement(ctxt->userData, name);
6153
6154 if (name != NULL)
6155 xmlFree(name);
6156 oldname = namePop(ctxt);
6157 spacePop(ctxt);
6158 if (oldname != NULL) {
6159#ifdef DEBUG_STACK
6160 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6161#endif
6162 xmlFree(oldname);
6163 }
6164 return;
6165}
6166
6167/**
6168 * xmlParseCDSect:
6169 * @ctxt: an XML parser context
6170 *
6171 * Parse escaped pure raw content.
6172 *
6173 * [18] CDSect ::= CDStart CData CDEnd
6174 *
6175 * [19] CDStart ::= '<![CDATA['
6176 *
6177 * [20] Data ::= (Char* - (Char* ']]>' Char*))
6178 *
6179 * [21] CDEnd ::= ']]>'
6180 */
6181void
6182xmlParseCDSect(xmlParserCtxtPtr ctxt) {
6183 xmlChar *buf = NULL;
6184 int len = 0;
6185 int size = XML_PARSER_BUFFER_SIZE;
6186 int r, rl;
6187 int s, sl;
6188 int cur, l;
6189 int count = 0;
6190
6191 if ((NXT(0) == '<') && (NXT(1) == '!') &&
6192 (NXT(2) == '[') && (NXT(3) == 'C') &&
6193 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6194 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6195 (NXT(8) == '[')) {
6196 SKIP(9);
6197 } else
6198 return;
6199
6200 ctxt->instate = XML_PARSER_CDATA_SECTION;
6201 r = CUR_CHAR(rl);
6202 if (!IS_CHAR(r)) {
6203 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6205 ctxt->sax->error(ctxt->userData,
6206 "CData section not finished\n");
6207 ctxt->wellFormed = 0;
6208 ctxt->disableSAX = 1;
6209 ctxt->instate = XML_PARSER_CONTENT;
6210 return;
6211 }
6212 NEXTL(rl);
6213 s = CUR_CHAR(sl);
6214 if (!IS_CHAR(s)) {
6215 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6216 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6217 ctxt->sax->error(ctxt->userData,
6218 "CData section not finished\n");
6219 ctxt->wellFormed = 0;
6220 ctxt->disableSAX = 1;
6221 ctxt->instate = XML_PARSER_CONTENT;
6222 return;
6223 }
6224 NEXTL(sl);
6225 cur = CUR_CHAR(l);
6226 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6227 if (buf == NULL) {
6228 xmlGenericError(xmlGenericErrorContext,
6229 "malloc of %d byte failed\n", size);
6230 return;
6231 }
6232 while (IS_CHAR(cur) &&
6233 ((r != ']') || (s != ']') || (cur != '>'))) {
6234 if (len + 5 >= size) {
6235 size *= 2;
6236 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6237 if (buf == NULL) {
6238 xmlGenericError(xmlGenericErrorContext,
6239 "realloc of %d byte failed\n", size);
6240 return;
6241 }
6242 }
6243 COPY_BUF(rl,buf,len,r);
6244 r = s;
6245 rl = sl;
6246 s = cur;
6247 sl = l;
6248 count++;
6249 if (count > 50) {
6250 GROW;
6251 count = 0;
6252 }
6253 NEXTL(l);
6254 cur = CUR_CHAR(l);
6255 }
6256 buf[len] = 0;
6257 ctxt->instate = XML_PARSER_CONTENT;
6258 if (cur != '>') {
6259 ctxt->errNo = XML_ERR_CDATA_NOT_FINISHED;
6260 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6261 ctxt->sax->error(ctxt->userData,
6262 "CData section not finished\n%.50s\n", buf);
6263 ctxt->wellFormed = 0;
6264 ctxt->disableSAX = 1;
6265 xmlFree(buf);
6266 return;
6267 }
6268 NEXTL(l);
6269
6270 /*
6271 * Ok the buffer is to be consumed as cdata.
6272 */
6273 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
6274 if (ctxt->sax->cdataBlock != NULL)
6275 ctxt->sax->cdataBlock(ctxt->userData, buf, len);
6276 }
6277 xmlFree(buf);
6278}
6279
6280/**
6281 * xmlParseContent:
6282 * @ctxt: an XML parser context
6283 *
6284 * Parse a content:
6285 *
6286 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
6287 */
6288
6289void
6290xmlParseContent(xmlParserCtxtPtr ctxt) {
6291 GROW;
6292 while (((RAW != 0) || (ctxt->token != 0)) &&
6293 ((RAW != '<') || (NXT(1) != '/'))) {
6294 const xmlChar *test = CUR_PTR;
6295 int cons = ctxt->input->consumed;
6296 xmlChar tok = ctxt->token;
Daniel Veillard21a0f912001-02-25 19:54:14 +00006297 const xmlChar *cur = ctxt->input->cur;
Owen Taylor3473f882001-02-23 17:55:21 +00006298
6299 /*
6300 * Handle possible processed charrefs.
6301 */
6302 if (ctxt->token != 0) {
6303 xmlParseCharData(ctxt, 0);
6304 }
6305 /*
6306 * First case : a Processing Instruction.
6307 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006308 else if ((*cur == '<') && (cur[1] == '?')) {
Owen Taylor3473f882001-02-23 17:55:21 +00006309 xmlParsePI(ctxt);
6310 }
6311
6312 /*
6313 * Second case : a CDSection
6314 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006315 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006316 (NXT(2) == '[') && (NXT(3) == 'C') &&
6317 (NXT(4) == 'D') && (NXT(5) == 'A') &&
6318 (NXT(6) == 'T') && (NXT(7) == 'A') &&
6319 (NXT(8) == '[')) {
6320 xmlParseCDSect(ctxt);
6321 }
6322
6323 /*
6324 * Third case : a comment
6325 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006326 else if ((*cur == '<') && (NXT(1) == '!') &&
Owen Taylor3473f882001-02-23 17:55:21 +00006327 (NXT(2) == '-') && (NXT(3) == '-')) {
6328 xmlParseComment(ctxt);
6329 ctxt->instate = XML_PARSER_CONTENT;
6330 }
6331
6332 /*
6333 * Fourth case : a sub-element.
6334 */
Daniel Veillard21a0f912001-02-25 19:54:14 +00006335 else if (*cur == '<') {
Owen Taylor3473f882001-02-23 17:55:21 +00006336 xmlParseElement(ctxt);
6337 }
6338
6339 /*
6340 * Fifth case : a reference. If if has not been resolved,
6341 * parsing returns it's Name, create the node
6342 */
6343
Daniel Veillard21a0f912001-02-25 19:54:14 +00006344 else if (*cur == '&') {
Owen Taylor3473f882001-02-23 17:55:21 +00006345 xmlParseReference(ctxt);
6346 }
6347
6348 /*
6349 * Last case, text. Note that References are handled directly.
6350 */
6351 else {
6352 xmlParseCharData(ctxt, 0);
6353 }
6354
6355 GROW;
6356 /*
6357 * Pop-up of finished entities.
6358 */
6359 while ((RAW == 0) && (ctxt->inputNr > 1))
6360 xmlPopInput(ctxt);
6361 SHRINK;
6362
6363 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
6364 (tok == ctxt->token)) {
6365 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
6366 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6367 ctxt->sax->error(ctxt->userData,
6368 "detected an error in element content\n");
6369 ctxt->wellFormed = 0;
6370 ctxt->disableSAX = 1;
6371 ctxt->instate = XML_PARSER_EOF;
6372 break;
6373 }
6374 }
6375}
6376
6377/**
6378 * xmlParseElement:
6379 * @ctxt: an XML parser context
6380 *
6381 * parse an XML element, this is highly recursive
6382 *
6383 * [39] element ::= EmptyElemTag | STag content ETag
6384 *
6385 * [ WFC: Element Type Match ]
6386 * The Name in an element's end-tag must match the element type in the
6387 * start-tag.
6388 *
6389 * [ VC: Element Valid ]
6390 * An element is valid if there is a declaration matching elementdecl
6391 * where the Name matches the element type and one of the following holds:
6392 * - The declaration matches EMPTY and the element has no content.
6393 * - The declaration matches children and the sequence of child elements
6394 * belongs to the language generated by the regular expression in the
6395 * content model, with optional white space (characters matching the
6396 * nonterminal S) between each pair of child elements.
6397 * - The declaration matches Mixed and the content consists of character
6398 * data and child elements whose types match names in the content model.
6399 * - The declaration matches ANY, and the types of any child elements have
6400 * been declared.
6401 */
6402
6403void
6404xmlParseElement(xmlParserCtxtPtr ctxt) {
6405 const xmlChar *openTag = CUR_PTR;
6406 xmlChar *name;
6407 xmlChar *oldname;
6408 xmlParserNodeInfo node_info;
6409 xmlNodePtr ret;
6410
6411 /* Capture start position */
6412 if (ctxt->record_info) {
6413 node_info.begin_pos = ctxt->input->consumed +
6414 (CUR_PTR - ctxt->input->base);
6415 node_info.begin_line = ctxt->input->line;
6416 }
6417
6418 if (ctxt->spaceNr == 0)
6419 spacePush(ctxt, -1);
6420 else
6421 spacePush(ctxt, *ctxt->space);
6422
6423 name = xmlParseStartTag(ctxt);
6424 if (name == NULL) {
6425 spacePop(ctxt);
6426 return;
6427 }
6428 namePush(ctxt, name);
6429 ret = ctxt->node;
6430
6431 /*
6432 * [ VC: Root Element Type ]
6433 * The Name in the document type declaration must match the element
6434 * type of the root element.
6435 */
6436 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
6437 ctxt->node && (ctxt->node == ctxt->myDoc->children))
6438 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
6439
6440 /*
6441 * Check for an Empty Element.
6442 */
6443 if ((RAW == '/') && (NXT(1) == '>')) {
6444 SKIP(2);
6445 if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
6446 (!ctxt->disableSAX))
6447 ctxt->sax->endElement(ctxt->userData, name);
6448 oldname = namePop(ctxt);
6449 spacePop(ctxt);
6450 if (oldname != NULL) {
6451#ifdef DEBUG_STACK
6452 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6453#endif
6454 xmlFree(oldname);
6455 }
6456 if ( ret != NULL && ctxt->record_info ) {
6457 node_info.end_pos = ctxt->input->consumed +
6458 (CUR_PTR - ctxt->input->base);
6459 node_info.end_line = ctxt->input->line;
6460 node_info.node = ret;
6461 xmlParserAddNodeInfo(ctxt, &node_info);
6462 }
6463 return;
6464 }
6465 if (RAW == '>') {
Daniel Veillard21a0f912001-02-25 19:54:14 +00006466 NEXT1;
Owen Taylor3473f882001-02-23 17:55:21 +00006467 } else {
6468 ctxt->errNo = XML_ERR_GT_REQUIRED;
6469 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6470 ctxt->sax->error(ctxt->userData,
6471 "Couldn't find end of Start Tag\n%.30s\n",
6472 openTag);
6473 ctxt->wellFormed = 0;
6474 ctxt->disableSAX = 1;
6475
6476 /*
6477 * end of parsing of this node.
6478 */
6479 nodePop(ctxt);
6480 oldname = namePop(ctxt);
6481 spacePop(ctxt);
6482 if (oldname != NULL) {
6483#ifdef DEBUG_STACK
6484 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6485#endif
6486 xmlFree(oldname);
6487 }
6488
6489 /*
6490 * Capture end position and add node
6491 */
6492 if ( ret != NULL && ctxt->record_info ) {
6493 node_info.end_pos = ctxt->input->consumed +
6494 (CUR_PTR - ctxt->input->base);
6495 node_info.end_line = ctxt->input->line;
6496 node_info.node = ret;
6497 xmlParserAddNodeInfo(ctxt, &node_info);
6498 }
6499 return;
6500 }
6501
6502 /*
6503 * Parse the content of the element:
6504 */
6505 xmlParseContent(ctxt);
6506 if (!IS_CHAR(RAW)) {
6507 ctxt->errNo = XML_ERR_TAG_NOT_FINISED;
6508 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6509 ctxt->sax->error(ctxt->userData,
6510 "Premature end of data in tag %.30s\n", openTag);
6511 ctxt->wellFormed = 0;
6512 ctxt->disableSAX = 1;
6513
6514 /*
6515 * end of parsing of this node.
6516 */
6517 nodePop(ctxt);
6518 oldname = namePop(ctxt);
6519 spacePop(ctxt);
6520 if (oldname != NULL) {
6521#ifdef DEBUG_STACK
6522 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
6523#endif
6524 xmlFree(oldname);
6525 }
6526 return;
6527 }
6528
6529 /*
6530 * parse the end of tag: '</' should be here.
6531 */
6532 xmlParseEndTag(ctxt);
6533
6534 /*
6535 * Capture end position and add node
6536 */
6537 if ( ret != NULL && ctxt->record_info ) {
6538 node_info.end_pos = ctxt->input->consumed +
6539 (CUR_PTR - ctxt->input->base);
6540 node_info.end_line = ctxt->input->line;
6541 node_info.node = ret;
6542 xmlParserAddNodeInfo(ctxt, &node_info);
6543 }
6544}
6545
6546/**
6547 * xmlParseVersionNum:
6548 * @ctxt: an XML parser context
6549 *
6550 * parse the XML version value.
6551 *
6552 * [26] VersionNum ::= ([a-zA-Z0-9_.:] | '-')+
6553 *
6554 * Returns the string giving the XML version number, or NULL
6555 */
6556xmlChar *
6557xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
6558 xmlChar *buf = NULL;
6559 int len = 0;
6560 int size = 10;
6561 xmlChar cur;
6562
6563 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6564 if (buf == NULL) {
6565 xmlGenericError(xmlGenericErrorContext,
6566 "malloc of %d byte failed\n", size);
6567 return(NULL);
6568 }
6569 cur = CUR;
6570 while (((cur >= 'a') && (cur <= 'z')) ||
6571 ((cur >= 'A') && (cur <= 'Z')) ||
6572 ((cur >= '0') && (cur <= '9')) ||
6573 (cur == '_') || (cur == '.') ||
6574 (cur == ':') || (cur == '-')) {
6575 if (len + 1 >= size) {
6576 size *= 2;
6577 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6578 if (buf == NULL) {
6579 xmlGenericError(xmlGenericErrorContext,
6580 "realloc of %d byte failed\n", size);
6581 return(NULL);
6582 }
6583 }
6584 buf[len++] = cur;
6585 NEXT;
6586 cur=CUR;
6587 }
6588 buf[len] = 0;
6589 return(buf);
6590}
6591
6592/**
6593 * xmlParseVersionInfo:
6594 * @ctxt: an XML parser context
6595 *
6596 * parse the XML version.
6597 *
6598 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
6599 *
6600 * [25] Eq ::= S? '=' S?
6601 *
6602 * Returns the version string, e.g. "1.0"
6603 */
6604
6605xmlChar *
6606xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
6607 xmlChar *version = NULL;
6608 const xmlChar *q;
6609
6610 if ((RAW == 'v') && (NXT(1) == 'e') &&
6611 (NXT(2) == 'r') && (NXT(3) == 's') &&
6612 (NXT(4) == 'i') && (NXT(5) == 'o') &&
6613 (NXT(6) == 'n')) {
6614 SKIP(7);
6615 SKIP_BLANKS;
6616 if (RAW != '=') {
6617 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6618 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6619 ctxt->sax->error(ctxt->userData,
6620 "xmlParseVersionInfo : expected '='\n");
6621 ctxt->wellFormed = 0;
6622 ctxt->disableSAX = 1;
6623 return(NULL);
6624 }
6625 NEXT;
6626 SKIP_BLANKS;
6627 if (RAW == '"') {
6628 NEXT;
6629 q = CUR_PTR;
6630 version = xmlParseVersionNum(ctxt);
6631 if (RAW != '"') {
6632 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6633 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6634 ctxt->sax->error(ctxt->userData,
6635 "String not closed\n%.50s\n", q);
6636 ctxt->wellFormed = 0;
6637 ctxt->disableSAX = 1;
6638 } else
6639 NEXT;
6640 } else if (RAW == '\''){
6641 NEXT;
6642 q = CUR_PTR;
6643 version = xmlParseVersionNum(ctxt);
6644 if (RAW != '\'') {
6645 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6646 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6647 ctxt->sax->error(ctxt->userData,
6648 "String not closed\n%.50s\n", q);
6649 ctxt->wellFormed = 0;
6650 ctxt->disableSAX = 1;
6651 } else
6652 NEXT;
6653 } else {
6654 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6655 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6656 ctxt->sax->error(ctxt->userData,
6657 "xmlParseVersionInfo : expected ' or \"\n");
6658 ctxt->wellFormed = 0;
6659 ctxt->disableSAX = 1;
6660 }
6661 }
6662 return(version);
6663}
6664
6665/**
6666 * xmlParseEncName:
6667 * @ctxt: an XML parser context
6668 *
6669 * parse the XML encoding name
6670 *
6671 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
6672 *
6673 * Returns the encoding name value or NULL
6674 */
6675xmlChar *
6676xmlParseEncName(xmlParserCtxtPtr ctxt) {
6677 xmlChar *buf = NULL;
6678 int len = 0;
6679 int size = 10;
6680 xmlChar cur;
6681
6682 cur = CUR;
6683 if (((cur >= 'a') && (cur <= 'z')) ||
6684 ((cur >= 'A') && (cur <= 'Z'))) {
6685 buf = (xmlChar *) xmlMalloc(size * sizeof(xmlChar));
6686 if (buf == NULL) {
6687 xmlGenericError(xmlGenericErrorContext,
6688 "malloc of %d byte failed\n", size);
6689 return(NULL);
6690 }
6691
6692 buf[len++] = cur;
6693 NEXT;
6694 cur = CUR;
6695 while (((cur >= 'a') && (cur <= 'z')) ||
6696 ((cur >= 'A') && (cur <= 'Z')) ||
6697 ((cur >= '0') && (cur <= '9')) ||
6698 (cur == '.') || (cur == '_') ||
6699 (cur == '-')) {
6700 if (len + 1 >= size) {
6701 size *= 2;
6702 buf = (xmlChar *) xmlRealloc(buf, size * sizeof(xmlChar));
6703 if (buf == NULL) {
6704 xmlGenericError(xmlGenericErrorContext,
6705 "realloc of %d byte failed\n", size);
6706 return(NULL);
6707 }
6708 }
6709 buf[len++] = cur;
6710 NEXT;
6711 cur = CUR;
6712 if (cur == 0) {
6713 SHRINK;
6714 GROW;
6715 cur = CUR;
6716 }
6717 }
6718 buf[len] = 0;
6719 } else {
6720 ctxt->errNo = XML_ERR_ENCODING_NAME;
6721 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6722 ctxt->sax->error(ctxt->userData, "Invalid XML encoding name\n");
6723 ctxt->wellFormed = 0;
6724 ctxt->disableSAX = 1;
6725 }
6726 return(buf);
6727}
6728
6729/**
6730 * xmlParseEncodingDecl:
6731 * @ctxt: an XML parser context
6732 *
6733 * parse the XML encoding declaration
6734 *
6735 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
6736 *
6737 * this setups the conversion filters.
6738 *
6739 * Returns the encoding value or NULL
6740 */
6741
6742xmlChar *
6743xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
6744 xmlChar *encoding = NULL;
6745 const xmlChar *q;
6746
6747 SKIP_BLANKS;
6748 if ((RAW == 'e') && (NXT(1) == 'n') &&
6749 (NXT(2) == 'c') && (NXT(3) == 'o') &&
6750 (NXT(4) == 'd') && (NXT(5) == 'i') &&
6751 (NXT(6) == 'n') && (NXT(7) == 'g')) {
6752 SKIP(8);
6753 SKIP_BLANKS;
6754 if (RAW != '=') {
6755 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6756 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6757 ctxt->sax->error(ctxt->userData,
6758 "xmlParseEncodingDecl : expected '='\n");
6759 ctxt->wellFormed = 0;
6760 ctxt->disableSAX = 1;
6761 return(NULL);
6762 }
6763 NEXT;
6764 SKIP_BLANKS;
6765 if (RAW == '"') {
6766 NEXT;
6767 q = CUR_PTR;
6768 encoding = xmlParseEncName(ctxt);
6769 if (RAW != '"') {
6770 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6771 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6772 ctxt->sax->error(ctxt->userData,
6773 "String not closed\n%.50s\n", q);
6774 ctxt->wellFormed = 0;
6775 ctxt->disableSAX = 1;
6776 } else
6777 NEXT;
6778 } else if (RAW == '\''){
6779 NEXT;
6780 q = CUR_PTR;
6781 encoding = xmlParseEncName(ctxt);
6782 if (RAW != '\'') {
6783 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6784 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6785 ctxt->sax->error(ctxt->userData,
6786 "String not closed\n%.50s\n", q);
6787 ctxt->wellFormed = 0;
6788 ctxt->disableSAX = 1;
6789 } else
6790 NEXT;
6791 } else if (RAW == '"'){
6792 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6793 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6794 ctxt->sax->error(ctxt->userData,
6795 "xmlParseEncodingDecl : expected ' or \"\n");
6796 ctxt->wellFormed = 0;
6797 ctxt->disableSAX = 1;
6798 }
6799 if (encoding != NULL) {
6800 xmlCharEncoding enc;
6801 xmlCharEncodingHandlerPtr handler;
6802
6803 if (ctxt->input->encoding != NULL)
6804 xmlFree((xmlChar *) ctxt->input->encoding);
6805 ctxt->input->encoding = encoding;
6806
6807 enc = xmlParseCharEncoding((const char *) encoding);
6808 /*
6809 * registered set of known encodings
6810 */
6811 if (enc != XML_CHAR_ENCODING_ERROR) {
6812 xmlSwitchEncoding(ctxt, enc);
6813 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
6814 xmlFree(encoding);
6815 return(NULL);
6816 }
6817 } else {
6818 /*
6819 * fallback for unknown encodings
6820 */
6821 handler = xmlFindCharEncodingHandler((const char *) encoding);
6822 if (handler != NULL) {
6823 xmlSwitchToEncoding(ctxt, handler);
6824 } else {
6825 ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
6826 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6827 ctxt->sax->error(ctxt->userData,
6828 "Unsupported encoding %s\n", encoding);
6829 return(NULL);
6830 }
6831 }
6832 }
6833 }
6834 return(encoding);
6835}
6836
6837/**
6838 * xmlParseSDDecl:
6839 * @ctxt: an XML parser context
6840 *
6841 * parse the XML standalone declaration
6842 *
6843 * [32] SDDecl ::= S 'standalone' Eq
6844 * (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
6845 *
6846 * [ VC: Standalone Document Declaration ]
6847 * TODO The standalone document declaration must have the value "no"
6848 * if any external markup declarations contain declarations of:
6849 * - attributes with default values, if elements to which these
6850 * attributes apply appear in the document without specifications
6851 * of values for these attributes, or
6852 * - entities (other than amp, lt, gt, apos, quot), if references
6853 * to those entities appear in the document, or
6854 * - attributes with values subject to normalization, where the
6855 * attribute appears in the document with a value which will change
6856 * as a result of normalization, or
6857 * - element types with element content, if white space occurs directly
6858 * within any instance of those types.
6859 *
6860 * Returns 1 if standalone, 0 otherwise
6861 */
6862
6863int
6864xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
6865 int standalone = -1;
6866
6867 SKIP_BLANKS;
6868 if ((RAW == 's') && (NXT(1) == 't') &&
6869 (NXT(2) == 'a') && (NXT(3) == 'n') &&
6870 (NXT(4) == 'd') && (NXT(5) == 'a') &&
6871 (NXT(6) == 'l') && (NXT(7) == 'o') &&
6872 (NXT(8) == 'n') && (NXT(9) == 'e')) {
6873 SKIP(10);
6874 SKIP_BLANKS;
6875 if (RAW != '=') {
6876 ctxt->errNo = XML_ERR_EQUAL_REQUIRED;
6877 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6878 ctxt->sax->error(ctxt->userData,
6879 "XML standalone declaration : expected '='\n");
6880 ctxt->wellFormed = 0;
6881 ctxt->disableSAX = 1;
6882 return(standalone);
6883 }
6884 NEXT;
6885 SKIP_BLANKS;
6886 if (RAW == '\''){
6887 NEXT;
6888 if ((RAW == 'n') && (NXT(1) == 'o')) {
6889 standalone = 0;
6890 SKIP(2);
6891 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6892 (NXT(2) == 's')) {
6893 standalone = 1;
6894 SKIP(3);
6895 } else {
6896 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6897 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6898 ctxt->sax->error(ctxt->userData,
6899 "standalone accepts only 'yes' or 'no'\n");
6900 ctxt->wellFormed = 0;
6901 ctxt->disableSAX = 1;
6902 }
6903 if (RAW != '\'') {
6904 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6905 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6906 ctxt->sax->error(ctxt->userData, "String not closed\n");
6907 ctxt->wellFormed = 0;
6908 ctxt->disableSAX = 1;
6909 } else
6910 NEXT;
6911 } else if (RAW == '"'){
6912 NEXT;
6913 if ((RAW == 'n') && (NXT(1) == 'o')) {
6914 standalone = 0;
6915 SKIP(2);
6916 } else if ((RAW == 'y') && (NXT(1) == 'e') &&
6917 (NXT(2) == 's')) {
6918 standalone = 1;
6919 SKIP(3);
6920 } else {
6921 ctxt->errNo = XML_ERR_STANDALONE_VALUE;
6922 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6923 ctxt->sax->error(ctxt->userData,
6924 "standalone accepts only 'yes' or 'no'\n");
6925 ctxt->wellFormed = 0;
6926 ctxt->disableSAX = 1;
6927 }
6928 if (RAW != '"') {
6929 ctxt->errNo = XML_ERR_STRING_NOT_CLOSED;
6930 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6931 ctxt->sax->error(ctxt->userData, "String not closed\n");
6932 ctxt->wellFormed = 0;
6933 ctxt->disableSAX = 1;
6934 } else
6935 NEXT;
6936 } else {
6937 ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
6938 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6939 ctxt->sax->error(ctxt->userData,
6940 "Standalone value not found\n");
6941 ctxt->wellFormed = 0;
6942 ctxt->disableSAX = 1;
6943 }
6944 }
6945 return(standalone);
6946}
6947
6948/**
6949 * xmlParseXMLDecl:
6950 * @ctxt: an XML parser context
6951 *
6952 * parse an XML declaration header
6953 *
6954 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
6955 */
6956
6957void
6958xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
6959 xmlChar *version;
6960
6961 /*
6962 * We know that '<?xml' is here.
6963 */
6964 SKIP(5);
6965
6966 if (!IS_BLANK(RAW)) {
6967 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6968 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6969 ctxt->sax->error(ctxt->userData, "Blank needed after '<?xml'\n");
6970 ctxt->wellFormed = 0;
6971 ctxt->disableSAX = 1;
6972 }
6973 SKIP_BLANKS;
6974
6975 /*
6976 * We should have the VersionInfo here.
6977 */
6978 version = xmlParseVersionInfo(ctxt);
6979 if (version == NULL)
6980 version = xmlCharStrdup(XML_DEFAULT_VERSION);
6981 ctxt->version = xmlStrdup(version);
6982 xmlFree(version);
6983
6984 /*
6985 * We may have the encoding declaration
6986 */
6987 if (!IS_BLANK(RAW)) {
6988 if ((RAW == '?') && (NXT(1) == '>')) {
6989 SKIP(2);
6990 return;
6991 }
6992 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
6993 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
6994 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
6995 ctxt->wellFormed = 0;
6996 ctxt->disableSAX = 1;
6997 }
6998 xmlParseEncodingDecl(ctxt);
6999 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7000 /*
7001 * The XML REC instructs us to stop parsing right here
7002 */
7003 return;
7004 }
7005
7006 /*
7007 * We may have the standalone status.
7008 */
7009 if ((ctxt->input->encoding != NULL) && (!IS_BLANK(RAW))) {
7010 if ((RAW == '?') && (NXT(1) == '>')) {
7011 SKIP(2);
7012 return;
7013 }
7014 ctxt->errNo = XML_ERR_SPACE_REQUIRED;
7015 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7016 ctxt->sax->error(ctxt->userData, "Blank needed here\n");
7017 ctxt->wellFormed = 0;
7018 ctxt->disableSAX = 1;
7019 }
7020 SKIP_BLANKS;
7021 ctxt->input->standalone = xmlParseSDDecl(ctxt);
7022
7023 SKIP_BLANKS;
7024 if ((RAW == '?') && (NXT(1) == '>')) {
7025 SKIP(2);
7026 } else if (RAW == '>') {
7027 /* Deprecated old WD ... */
7028 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7029 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7030 ctxt->sax->error(ctxt->userData,
7031 "XML declaration must end-up with '?>'\n");
7032 ctxt->wellFormed = 0;
7033 ctxt->disableSAX = 1;
7034 NEXT;
7035 } else {
7036 ctxt->errNo = XML_ERR_XMLDECL_NOT_FINISHED;
7037 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7038 ctxt->sax->error(ctxt->userData,
7039 "parsing XML declaration: '?>' expected\n");
7040 ctxt->wellFormed = 0;
7041 ctxt->disableSAX = 1;
7042 MOVETO_ENDTAG(CUR_PTR);
7043 NEXT;
7044 }
7045}
7046
7047/**
7048 * xmlParseMisc:
7049 * @ctxt: an XML parser context
7050 *
7051 * parse an XML Misc* optionnal field.
7052 *
7053 * [27] Misc ::= Comment | PI | S
7054 */
7055
7056void
7057xmlParseMisc(xmlParserCtxtPtr ctxt) {
7058 while (((RAW == '<') && (NXT(1) == '?')) ||
7059 ((RAW == '<') && (NXT(1) == '!') &&
7060 (NXT(2) == '-') && (NXT(3) == '-')) ||
7061 IS_BLANK(CUR)) {
7062 if ((RAW == '<') && (NXT(1) == '?')) {
7063 xmlParsePI(ctxt);
7064 } else if (IS_BLANK(CUR)) {
7065 NEXT;
7066 } else
7067 xmlParseComment(ctxt);
7068 }
7069}
7070
7071/**
7072 * xmlParseDocument:
7073 * @ctxt: an XML parser context
7074 *
7075 * parse an XML document (and build a tree if using the standard SAX
7076 * interface).
7077 *
7078 * [1] document ::= prolog element Misc*
7079 *
7080 * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
7081 *
7082 * Returns 0, -1 in case of error. the parser context is augmented
7083 * as a result of the parsing.
7084 */
7085
7086int
7087xmlParseDocument(xmlParserCtxtPtr ctxt) {
7088 xmlChar start[4];
7089 xmlCharEncoding enc;
7090
7091 xmlInitParser();
7092
7093 GROW;
7094
7095 /*
7096 * SAX: beginning of the document processing.
7097 */
7098 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7099 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7100
7101 /*
7102 * Get the 4 first bytes and decode the charset
7103 * if enc != XML_CHAR_ENCODING_NONE
7104 * plug some encoding conversion routines.
7105 */
7106 start[0] = RAW;
7107 start[1] = NXT(1);
7108 start[2] = NXT(2);
7109 start[3] = NXT(3);
7110 enc = xmlDetectCharEncoding(start, 4);
7111 if (enc != XML_CHAR_ENCODING_NONE) {
7112 xmlSwitchEncoding(ctxt, enc);
7113 }
7114
7115
7116 if (CUR == 0) {
7117 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7118 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7119 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7120 ctxt->wellFormed = 0;
7121 ctxt->disableSAX = 1;
7122 }
7123
7124 /*
7125 * Check for the XMLDecl in the Prolog.
7126 */
7127 GROW;
7128 if ((RAW == '<') && (NXT(1) == '?') &&
7129 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7130 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7131
7132 /*
7133 * Note that we will switch encoding on the fly.
7134 */
7135 xmlParseXMLDecl(ctxt);
7136 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7137 /*
7138 * The XML REC instructs us to stop parsing right here
7139 */
7140 return(-1);
7141 }
7142 ctxt->standalone = ctxt->input->standalone;
7143 SKIP_BLANKS;
7144 } else {
7145 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7146 }
7147 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7148 ctxt->sax->startDocument(ctxt->userData);
7149
7150 /*
7151 * The Misc part of the Prolog
7152 */
7153 GROW;
7154 xmlParseMisc(ctxt);
7155
7156 /*
7157 * Then possibly doc type declaration(s) and more Misc
7158 * (doctypedecl Misc*)?
7159 */
7160 GROW;
7161 if ((RAW == '<') && (NXT(1) == '!') &&
7162 (NXT(2) == 'D') && (NXT(3) == 'O') &&
7163 (NXT(4) == 'C') && (NXT(5) == 'T') &&
7164 (NXT(6) == 'Y') && (NXT(7) == 'P') &&
7165 (NXT(8) == 'E')) {
7166
7167 ctxt->inSubset = 1;
7168 xmlParseDocTypeDecl(ctxt);
7169 if (RAW == '[') {
7170 ctxt->instate = XML_PARSER_DTD;
7171 xmlParseInternalSubset(ctxt);
7172 }
7173
7174 /*
7175 * Create and update the external subset.
7176 */
7177 ctxt->inSubset = 2;
7178 if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
7179 (!ctxt->disableSAX))
7180 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
7181 ctxt->extSubSystem, ctxt->extSubURI);
7182 ctxt->inSubset = 0;
7183
7184
7185 ctxt->instate = XML_PARSER_PROLOG;
7186 xmlParseMisc(ctxt);
7187 }
7188
7189 /*
7190 * Time to start parsing the tree itself
7191 */
7192 GROW;
7193 if (RAW != '<') {
7194 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7195 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7196 ctxt->sax->error(ctxt->userData,
7197 "Start tag expected, '<' not found\n");
7198 ctxt->wellFormed = 0;
7199 ctxt->disableSAX = 1;
7200 ctxt->instate = XML_PARSER_EOF;
7201 } else {
7202 ctxt->instate = XML_PARSER_CONTENT;
7203 xmlParseElement(ctxt);
7204 ctxt->instate = XML_PARSER_EPILOG;
7205
7206
7207 /*
7208 * The Misc part at the end
7209 */
7210 xmlParseMisc(ctxt);
7211
7212 if (RAW != 0) {
7213 ctxt->errNo = XML_ERR_DOCUMENT_END;
7214 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7215 ctxt->sax->error(ctxt->userData,
7216 "Extra content at the end of the document\n");
7217 ctxt->wellFormed = 0;
7218 ctxt->disableSAX = 1;
7219 }
7220 ctxt->instate = XML_PARSER_EOF;
7221 }
7222
7223 /*
7224 * SAX: end of the document processing.
7225 */
7226 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7227 (!ctxt->disableSAX))
7228 ctxt->sax->endDocument(ctxt->userData);
7229
7230 if (! ctxt->wellFormed) return(-1);
7231 return(0);
7232}
7233
7234/**
7235 * xmlParseExtParsedEnt:
7236 * @ctxt: an XML parser context
7237 *
7238 * parse a genreral parsed entity
7239 * An external general parsed entity is well-formed if it matches the
7240 * production labeled extParsedEnt.
7241 *
7242 * [78] extParsedEnt ::= TextDecl? content
7243 *
7244 * Returns 0, -1 in case of error. the parser context is augmented
7245 * as a result of the parsing.
7246 */
7247
7248int
7249xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
7250 xmlChar start[4];
7251 xmlCharEncoding enc;
7252
7253 xmlDefaultSAXHandlerInit();
7254
7255 GROW;
7256
7257 /*
7258 * SAX: beginning of the document processing.
7259 */
7260 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7261 ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
7262
7263 /*
7264 * Get the 4 first bytes and decode the charset
7265 * if enc != XML_CHAR_ENCODING_NONE
7266 * plug some encoding conversion routines.
7267 */
7268 start[0] = RAW;
7269 start[1] = NXT(1);
7270 start[2] = NXT(2);
7271 start[3] = NXT(3);
7272 enc = xmlDetectCharEncoding(start, 4);
7273 if (enc != XML_CHAR_ENCODING_NONE) {
7274 xmlSwitchEncoding(ctxt, enc);
7275 }
7276
7277
7278 if (CUR == 0) {
7279 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7280 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7281 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7282 ctxt->wellFormed = 0;
7283 ctxt->disableSAX = 1;
7284 }
7285
7286 /*
7287 * Check for the XMLDecl in the Prolog.
7288 */
7289 GROW;
7290 if ((RAW == '<') && (NXT(1) == '?') &&
7291 (NXT(2) == 'x') && (NXT(3) == 'm') &&
7292 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
7293
7294 /*
7295 * Note that we will switch encoding on the fly.
7296 */
7297 xmlParseXMLDecl(ctxt);
7298 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7299 /*
7300 * The XML REC instructs us to stop parsing right here
7301 */
7302 return(-1);
7303 }
7304 SKIP_BLANKS;
7305 } else {
7306 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7307 }
7308 if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
7309 ctxt->sax->startDocument(ctxt->userData);
7310
7311 /*
7312 * Doing validity checking on chunk doesn't make sense
7313 */
7314 ctxt->instate = XML_PARSER_CONTENT;
7315 ctxt->validate = 0;
7316 ctxt->loadsubset = 0;
7317 ctxt->depth = 0;
7318
7319 xmlParseContent(ctxt);
7320
7321 if ((RAW == '<') && (NXT(1) == '/')) {
7322 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
7323 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7324 ctxt->sax->error(ctxt->userData,
7325 "chunk is not well balanced\n");
7326 ctxt->wellFormed = 0;
7327 ctxt->disableSAX = 1;
7328 } else if (RAW != 0) {
7329 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
7330 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7331 ctxt->sax->error(ctxt->userData,
7332 "extra content at the end of well balanced chunk\n");
7333 ctxt->wellFormed = 0;
7334 ctxt->disableSAX = 1;
7335 }
7336
7337 /*
7338 * SAX: end of the document processing.
7339 */
7340 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7341 (!ctxt->disableSAX))
7342 ctxt->sax->endDocument(ctxt->userData);
7343
7344 if (! ctxt->wellFormed) return(-1);
7345 return(0);
7346}
7347
7348/************************************************************************
7349 * *
7350 * Progressive parsing interfaces *
7351 * *
7352 ************************************************************************/
7353
7354/**
7355 * xmlParseLookupSequence:
7356 * @ctxt: an XML parser context
7357 * @first: the first char to lookup
7358 * @next: the next char to lookup or zero
7359 * @third: the next char to lookup or zero
7360 *
7361 * Try to find if a sequence (first, next, third) or just (first next) or
7362 * (first) is available in the input stream.
7363 * This function has a side effect of (possibly) incrementing ctxt->checkIndex
7364 * to avoid rescanning sequences of bytes, it DOES change the state of the
7365 * parser, do not use liberally.
7366 *
7367 * Returns the index to the current parsing point if the full sequence
7368 * is available, -1 otherwise.
7369 */
7370int
7371xmlParseLookupSequence(xmlParserCtxtPtr ctxt, xmlChar first,
7372 xmlChar next, xmlChar third) {
7373 int base, len;
7374 xmlParserInputPtr in;
7375 const xmlChar *buf;
7376
7377 in = ctxt->input;
7378 if (in == NULL) return(-1);
7379 base = in->cur - in->base;
7380 if (base < 0) return(-1);
7381 if (ctxt->checkIndex > base)
7382 base = ctxt->checkIndex;
7383 if (in->buf == NULL) {
7384 buf = in->base;
7385 len = in->length;
7386 } else {
7387 buf = in->buf->buffer->content;
7388 len = in->buf->buffer->use;
7389 }
7390 /* take into account the sequence length */
7391 if (third) len -= 2;
7392 else if (next) len --;
7393 for (;base < len;base++) {
7394 if (buf[base] == first) {
7395 if (third != 0) {
7396 if ((buf[base + 1] != next) ||
7397 (buf[base + 2] != third)) continue;
7398 } else if (next != 0) {
7399 if (buf[base + 1] != next) continue;
7400 }
7401 ctxt->checkIndex = 0;
7402#ifdef DEBUG_PUSH
7403 if (next == 0)
7404 xmlGenericError(xmlGenericErrorContext,
7405 "PP: lookup '%c' found at %d\n",
7406 first, base);
7407 else if (third == 0)
7408 xmlGenericError(xmlGenericErrorContext,
7409 "PP: lookup '%c%c' found at %d\n",
7410 first, next, base);
7411 else
7412 xmlGenericError(xmlGenericErrorContext,
7413 "PP: lookup '%c%c%c' found at %d\n",
7414 first, next, third, base);
7415#endif
7416 return(base - (in->cur - in->base));
7417 }
7418 }
7419 ctxt->checkIndex = base;
7420#ifdef DEBUG_PUSH
7421 if (next == 0)
7422 xmlGenericError(xmlGenericErrorContext,
7423 "PP: lookup '%c' failed\n", first);
7424 else if (third == 0)
7425 xmlGenericError(xmlGenericErrorContext,
7426 "PP: lookup '%c%c' failed\n", first, next);
7427 else
7428 xmlGenericError(xmlGenericErrorContext,
7429 "PP: lookup '%c%c%c' failed\n", first, next, third);
7430#endif
7431 return(-1);
7432}
7433
7434/**
7435 * xmlParseTryOrFinish:
7436 * @ctxt: an XML parser context
7437 * @terminate: last chunk indicator
7438 *
7439 * Try to progress on parsing
7440 *
7441 * Returns zero if no parsing was possible
7442 */
7443int
7444xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
7445 int ret = 0;
7446 int avail;
7447 xmlChar cur, next;
7448
7449#ifdef DEBUG_PUSH
7450 switch (ctxt->instate) {
7451 case XML_PARSER_EOF:
7452 xmlGenericError(xmlGenericErrorContext,
7453 "PP: try EOF\n"); break;
7454 case XML_PARSER_START:
7455 xmlGenericError(xmlGenericErrorContext,
7456 "PP: try START\n"); break;
7457 case XML_PARSER_MISC:
7458 xmlGenericError(xmlGenericErrorContext,
7459 "PP: try MISC\n");break;
7460 case XML_PARSER_COMMENT:
7461 xmlGenericError(xmlGenericErrorContext,
7462 "PP: try COMMENT\n");break;
7463 case XML_PARSER_PROLOG:
7464 xmlGenericError(xmlGenericErrorContext,
7465 "PP: try PROLOG\n");break;
7466 case XML_PARSER_START_TAG:
7467 xmlGenericError(xmlGenericErrorContext,
7468 "PP: try START_TAG\n");break;
7469 case XML_PARSER_CONTENT:
7470 xmlGenericError(xmlGenericErrorContext,
7471 "PP: try CONTENT\n");break;
7472 case XML_PARSER_CDATA_SECTION:
7473 xmlGenericError(xmlGenericErrorContext,
7474 "PP: try CDATA_SECTION\n");break;
7475 case XML_PARSER_END_TAG:
7476 xmlGenericError(xmlGenericErrorContext,
7477 "PP: try END_TAG\n");break;
7478 case XML_PARSER_ENTITY_DECL:
7479 xmlGenericError(xmlGenericErrorContext,
7480 "PP: try ENTITY_DECL\n");break;
7481 case XML_PARSER_ENTITY_VALUE:
7482 xmlGenericError(xmlGenericErrorContext,
7483 "PP: try ENTITY_VALUE\n");break;
7484 case XML_PARSER_ATTRIBUTE_VALUE:
7485 xmlGenericError(xmlGenericErrorContext,
7486 "PP: try ATTRIBUTE_VALUE\n");break;
7487 case XML_PARSER_DTD:
7488 xmlGenericError(xmlGenericErrorContext,
7489 "PP: try DTD\n");break;
7490 case XML_PARSER_EPILOG:
7491 xmlGenericError(xmlGenericErrorContext,
7492 "PP: try EPILOG\n");break;
7493 case XML_PARSER_PI:
7494 xmlGenericError(xmlGenericErrorContext,
7495 "PP: try PI\n");break;
7496 case XML_PARSER_IGNORE:
7497 xmlGenericError(xmlGenericErrorContext,
7498 "PP: try IGNORE\n");break;
7499 }
7500#endif
7501
7502 while (1) {
7503 /*
7504 * Pop-up of finished entities.
7505 */
7506 while ((RAW == 0) && (ctxt->inputNr > 1))
7507 xmlPopInput(ctxt);
7508
7509 if (ctxt->input ==NULL) break;
7510 if (ctxt->input->buf == NULL)
7511 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7512 else
7513 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7514 if (avail < 1)
7515 goto done;
7516 switch (ctxt->instate) {
7517 case XML_PARSER_EOF:
7518 /*
7519 * Document parsing is done !
7520 */
7521 goto done;
7522 case XML_PARSER_START:
7523 /*
7524 * Very first chars read from the document flow.
7525 */
7526 cur = ctxt->input->cur[0];
7527 if (IS_BLANK(cur)) {
7528 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7529 ctxt->sax->setDocumentLocator(ctxt->userData,
7530 &xmlDefaultSAXLocator);
7531 ctxt->errNo = XML_ERR_DOCUMENT_START;
7532 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7533 ctxt->sax->error(ctxt->userData,
7534 "Extra spaces at the beginning of the document are not allowed\n");
7535 ctxt->wellFormed = 0;
7536 ctxt->disableSAX = 1;
7537 SKIP_BLANKS;
7538 ret++;
7539 if (ctxt->input->buf == NULL)
7540 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7541 else
7542 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7543 }
7544 if (avail < 2)
7545 goto done;
7546
7547 cur = ctxt->input->cur[0];
7548 next = ctxt->input->cur[1];
7549 if (cur == 0) {
7550 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7551 ctxt->sax->setDocumentLocator(ctxt->userData,
7552 &xmlDefaultSAXLocator);
7553 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7554 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7555 ctxt->sax->error(ctxt->userData, "Document is empty\n");
7556 ctxt->wellFormed = 0;
7557 ctxt->disableSAX = 1;
7558 ctxt->instate = XML_PARSER_EOF;
7559#ifdef DEBUG_PUSH
7560 xmlGenericError(xmlGenericErrorContext,
7561 "PP: entering EOF\n");
7562#endif
7563 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
7564 ctxt->sax->endDocument(ctxt->userData);
7565 goto done;
7566 }
7567 if ((cur == '<') && (next == '?')) {
7568 /* PI or XML decl */
7569 if (avail < 5) return(ret);
7570 if ((!terminate) &&
7571 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7572 return(ret);
7573 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7574 ctxt->sax->setDocumentLocator(ctxt->userData,
7575 &xmlDefaultSAXLocator);
7576 if ((ctxt->input->cur[2] == 'x') &&
7577 (ctxt->input->cur[3] == 'm') &&
7578 (ctxt->input->cur[4] == 'l') &&
7579 (IS_BLANK(ctxt->input->cur[5]))) {
7580 ret += 5;
7581#ifdef DEBUG_PUSH
7582 xmlGenericError(xmlGenericErrorContext,
7583 "PP: Parsing XML Decl\n");
7584#endif
7585 xmlParseXMLDecl(ctxt);
7586 if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7587 /*
7588 * The XML REC instructs us to stop parsing right
7589 * here
7590 */
7591 ctxt->instate = XML_PARSER_EOF;
7592 return(0);
7593 }
7594 ctxt->standalone = ctxt->input->standalone;
7595 if ((ctxt->encoding == NULL) &&
7596 (ctxt->input->encoding != NULL))
7597 ctxt->encoding = xmlStrdup(ctxt->input->encoding);
7598 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7599 (!ctxt->disableSAX))
7600 ctxt->sax->startDocument(ctxt->userData);
7601 ctxt->instate = XML_PARSER_MISC;
7602#ifdef DEBUG_PUSH
7603 xmlGenericError(xmlGenericErrorContext,
7604 "PP: entering MISC\n");
7605#endif
7606 } else {
7607 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7608 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7609 (!ctxt->disableSAX))
7610 ctxt->sax->startDocument(ctxt->userData);
7611 ctxt->instate = XML_PARSER_MISC;
7612#ifdef DEBUG_PUSH
7613 xmlGenericError(xmlGenericErrorContext,
7614 "PP: entering MISC\n");
7615#endif
7616 }
7617 } else {
7618 if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
7619 ctxt->sax->setDocumentLocator(ctxt->userData,
7620 &xmlDefaultSAXLocator);
7621 ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
7622 if ((ctxt->sax) && (ctxt->sax->startDocument) &&
7623 (!ctxt->disableSAX))
7624 ctxt->sax->startDocument(ctxt->userData);
7625 ctxt->instate = XML_PARSER_MISC;
7626#ifdef DEBUG_PUSH
7627 xmlGenericError(xmlGenericErrorContext,
7628 "PP: entering MISC\n");
7629#endif
7630 }
7631 break;
7632 case XML_PARSER_MISC:
7633 SKIP_BLANKS;
7634 if (ctxt->input->buf == NULL)
7635 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7636 else
7637 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7638 if (avail < 2)
7639 goto done;
7640 cur = ctxt->input->cur[0];
7641 next = ctxt->input->cur[1];
7642 if ((cur == '<') && (next == '?')) {
7643 if ((!terminate) &&
7644 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7645 goto done;
7646#ifdef DEBUG_PUSH
7647 xmlGenericError(xmlGenericErrorContext,
7648 "PP: Parsing PI\n");
7649#endif
7650 xmlParsePI(ctxt);
7651 } else if ((cur == '<') && (next == '!') &&
7652 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7653 if ((!terminate) &&
7654 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7655 goto done;
7656#ifdef DEBUG_PUSH
7657 xmlGenericError(xmlGenericErrorContext,
7658 "PP: Parsing Comment\n");
7659#endif
7660 xmlParseComment(ctxt);
7661 ctxt->instate = XML_PARSER_MISC;
7662 } else if ((cur == '<') && (next == '!') &&
7663 (ctxt->input->cur[2] == 'D') && (ctxt->input->cur[3] == 'O') &&
7664 (ctxt->input->cur[4] == 'C') && (ctxt->input->cur[5] == 'T') &&
7665 (ctxt->input->cur[6] == 'Y') && (ctxt->input->cur[7] == 'P') &&
7666 (ctxt->input->cur[8] == 'E')) {
7667 if ((!terminate) &&
7668 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7669 goto done;
7670#ifdef DEBUG_PUSH
7671 xmlGenericError(xmlGenericErrorContext,
7672 "PP: Parsing internal subset\n");
7673#endif
7674 ctxt->inSubset = 1;
7675 xmlParseDocTypeDecl(ctxt);
7676 if (RAW == '[') {
7677 ctxt->instate = XML_PARSER_DTD;
7678#ifdef DEBUG_PUSH
7679 xmlGenericError(xmlGenericErrorContext,
7680 "PP: entering DTD\n");
7681#endif
7682 } else {
7683 /*
7684 * Create and update the external subset.
7685 */
7686 ctxt->inSubset = 2;
7687 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7688 (ctxt->sax->externalSubset != NULL))
7689 ctxt->sax->externalSubset(ctxt->userData,
7690 ctxt->intSubName, ctxt->extSubSystem,
7691 ctxt->extSubURI);
7692 ctxt->inSubset = 0;
7693 ctxt->instate = XML_PARSER_PROLOG;
7694#ifdef DEBUG_PUSH
7695 xmlGenericError(xmlGenericErrorContext,
7696 "PP: entering PROLOG\n");
7697#endif
7698 }
7699 } else if ((cur == '<') && (next == '!') &&
7700 (avail < 9)) {
7701 goto done;
7702 } else {
7703 ctxt->instate = XML_PARSER_START_TAG;
7704#ifdef DEBUG_PUSH
7705 xmlGenericError(xmlGenericErrorContext,
7706 "PP: entering START_TAG\n");
7707#endif
7708 }
7709 break;
7710 case XML_PARSER_IGNORE:
7711 xmlGenericError(xmlGenericErrorContext,
7712 "PP: internal error, state == IGNORE");
7713 ctxt->instate = XML_PARSER_DTD;
7714#ifdef DEBUG_PUSH
7715 xmlGenericError(xmlGenericErrorContext,
7716 "PP: entering DTD\n");
7717#endif
7718 break;
7719 case XML_PARSER_PROLOG:
7720 SKIP_BLANKS;
7721 if (ctxt->input->buf == NULL)
7722 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7723 else
7724 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7725 if (avail < 2)
7726 goto done;
7727 cur = ctxt->input->cur[0];
7728 next = ctxt->input->cur[1];
7729 if ((cur == '<') && (next == '?')) {
7730 if ((!terminate) &&
7731 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7732 goto done;
7733#ifdef DEBUG_PUSH
7734 xmlGenericError(xmlGenericErrorContext,
7735 "PP: Parsing PI\n");
7736#endif
7737 xmlParsePI(ctxt);
7738 } else if ((cur == '<') && (next == '!') &&
7739 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7740 if ((!terminate) &&
7741 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7742 goto done;
7743#ifdef DEBUG_PUSH
7744 xmlGenericError(xmlGenericErrorContext,
7745 "PP: Parsing Comment\n");
7746#endif
7747 xmlParseComment(ctxt);
7748 ctxt->instate = XML_PARSER_PROLOG;
7749 } else if ((cur == '<') && (next == '!') &&
7750 (avail < 4)) {
7751 goto done;
7752 } else {
7753 ctxt->instate = XML_PARSER_START_TAG;
7754#ifdef DEBUG_PUSH
7755 xmlGenericError(xmlGenericErrorContext,
7756 "PP: entering START_TAG\n");
7757#endif
7758 }
7759 break;
7760 case XML_PARSER_EPILOG:
7761 SKIP_BLANKS;
7762 if (ctxt->input->buf == NULL)
7763 avail = ctxt->input->length - (ctxt->input->cur - ctxt->input->base);
7764 else
7765 avail = ctxt->input->buf->buffer->use - (ctxt->input->cur - ctxt->input->base);
7766 if (avail < 2)
7767 goto done;
7768 cur = ctxt->input->cur[0];
7769 next = ctxt->input->cur[1];
7770 if ((cur == '<') && (next == '?')) {
7771 if ((!terminate) &&
7772 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7773 goto done;
7774#ifdef DEBUG_PUSH
7775 xmlGenericError(xmlGenericErrorContext,
7776 "PP: Parsing PI\n");
7777#endif
7778 xmlParsePI(ctxt);
7779 ctxt->instate = XML_PARSER_EPILOG;
7780 } else if ((cur == '<') && (next == '!') &&
7781 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7782 if ((!terminate) &&
7783 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7784 goto done;
7785#ifdef DEBUG_PUSH
7786 xmlGenericError(xmlGenericErrorContext,
7787 "PP: Parsing Comment\n");
7788#endif
7789 xmlParseComment(ctxt);
7790 ctxt->instate = XML_PARSER_EPILOG;
7791 } else if ((cur == '<') && (next == '!') &&
7792 (avail < 4)) {
7793 goto done;
7794 } else {
7795 ctxt->errNo = XML_ERR_DOCUMENT_END;
7796 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7797 ctxt->sax->error(ctxt->userData,
7798 "Extra content at the end of the document\n");
7799 ctxt->wellFormed = 0;
7800 ctxt->disableSAX = 1;
7801 ctxt->instate = XML_PARSER_EOF;
7802#ifdef DEBUG_PUSH
7803 xmlGenericError(xmlGenericErrorContext,
7804 "PP: entering EOF\n");
7805#endif
7806 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7807 (!ctxt->disableSAX))
7808 ctxt->sax->endDocument(ctxt->userData);
7809 goto done;
7810 }
7811 break;
7812 case XML_PARSER_START_TAG: {
7813 xmlChar *name, *oldname;
7814
7815 if ((avail < 2) && (ctxt->inputNr == 1))
7816 goto done;
7817 cur = ctxt->input->cur[0];
7818 if (cur != '<') {
7819 ctxt->errNo = XML_ERR_DOCUMENT_EMPTY;
7820 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7821 ctxt->sax->error(ctxt->userData,
7822 "Start tag expect, '<' not found\n");
7823 ctxt->wellFormed = 0;
7824 ctxt->disableSAX = 1;
7825 ctxt->instate = XML_PARSER_EOF;
7826#ifdef DEBUG_PUSH
7827 xmlGenericError(xmlGenericErrorContext,
7828 "PP: entering EOF\n");
7829#endif
7830 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7831 (!ctxt->disableSAX))
7832 ctxt->sax->endDocument(ctxt->userData);
7833 goto done;
7834 }
7835 if ((!terminate) &&
7836 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
7837 goto done;
7838 if (ctxt->spaceNr == 0)
7839 spacePush(ctxt, -1);
7840 else
7841 spacePush(ctxt, *ctxt->space);
7842 name = xmlParseStartTag(ctxt);
7843 if (name == NULL) {
7844 spacePop(ctxt);
7845 ctxt->instate = XML_PARSER_EOF;
7846#ifdef DEBUG_PUSH
7847 xmlGenericError(xmlGenericErrorContext,
7848 "PP: entering EOF\n");
7849#endif
7850 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
7851 (!ctxt->disableSAX))
7852 ctxt->sax->endDocument(ctxt->userData);
7853 goto done;
7854 }
7855 namePush(ctxt, xmlStrdup(name));
7856
7857 /*
7858 * [ VC: Root Element Type ]
7859 * The Name in the document type declaration must match
7860 * the element type of the root element.
7861 */
7862 if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
7863 ctxt->node && (ctxt->node == ctxt->myDoc->children))
7864 ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
7865
7866 /*
7867 * Check for an Empty Element.
7868 */
7869 if ((RAW == '/') && (NXT(1) == '>')) {
7870 SKIP(2);
7871 if ((ctxt->sax != NULL) &&
7872 (ctxt->sax->endElement != NULL) && (!ctxt->disableSAX))
7873 ctxt->sax->endElement(ctxt->userData, name);
7874 xmlFree(name);
7875 oldname = namePop(ctxt);
7876 spacePop(ctxt);
7877 if (oldname != NULL) {
7878#ifdef DEBUG_STACK
7879 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7880#endif
7881 xmlFree(oldname);
7882 }
7883 if (ctxt->name == NULL) {
7884 ctxt->instate = XML_PARSER_EPILOG;
7885#ifdef DEBUG_PUSH
7886 xmlGenericError(xmlGenericErrorContext,
7887 "PP: entering EPILOG\n");
7888#endif
7889 } else {
7890 ctxt->instate = XML_PARSER_CONTENT;
7891#ifdef DEBUG_PUSH
7892 xmlGenericError(xmlGenericErrorContext,
7893 "PP: entering CONTENT\n");
7894#endif
7895 }
7896 break;
7897 }
7898 if (RAW == '>') {
7899 NEXT;
7900 } else {
7901 ctxt->errNo = XML_ERR_GT_REQUIRED;
7902 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
7903 ctxt->sax->error(ctxt->userData,
7904 "Couldn't find end of Start Tag %s\n",
7905 name);
7906 ctxt->wellFormed = 0;
7907 ctxt->disableSAX = 1;
7908
7909 /*
7910 * end of parsing of this node.
7911 */
7912 nodePop(ctxt);
7913 oldname = namePop(ctxt);
7914 spacePop(ctxt);
7915 if (oldname != NULL) {
7916#ifdef DEBUG_STACK
7917 xmlGenericError(xmlGenericErrorContext,"Close: popped %s\n", oldname);
7918#endif
7919 xmlFree(oldname);
7920 }
7921 }
7922 xmlFree(name);
7923 ctxt->instate = XML_PARSER_CONTENT;
7924#ifdef DEBUG_PUSH
7925 xmlGenericError(xmlGenericErrorContext,
7926 "PP: entering CONTENT\n");
7927#endif
7928 break;
7929 }
7930 case XML_PARSER_CONTENT: {
7931 const xmlChar *test;
7932 int cons;
7933 xmlChar tok;
7934
7935 /*
7936 * Handle preparsed entities and charRef
7937 */
7938 if (ctxt->token != 0) {
7939 xmlChar cur[2] = { 0 , 0 } ;
7940
7941 cur[0] = (xmlChar) ctxt->token;
7942 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
7943 (ctxt->sax->characters != NULL))
7944 ctxt->sax->characters(ctxt->userData, cur, 1);
7945 ctxt->token = 0;
7946 }
7947 if ((avail < 2) && (ctxt->inputNr == 1))
7948 goto done;
7949 cur = ctxt->input->cur[0];
7950 next = ctxt->input->cur[1];
7951
7952 test = CUR_PTR;
7953 cons = ctxt->input->consumed;
7954 tok = ctxt->token;
7955 if ((cur == '<') && (next == '?')) {
7956 if ((!terminate) &&
7957 (xmlParseLookupSequence(ctxt, '?', '>', 0) < 0))
7958 goto done;
7959#ifdef DEBUG_PUSH
7960 xmlGenericError(xmlGenericErrorContext,
7961 "PP: Parsing PI\n");
7962#endif
7963 xmlParsePI(ctxt);
7964 } else if ((cur == '<') && (next == '!') &&
7965 (ctxt->input->cur[2] == '-') && (ctxt->input->cur[3] == '-')) {
7966 if ((!terminate) &&
7967 (xmlParseLookupSequence(ctxt, '-', '-', '>') < 0))
7968 goto done;
7969#ifdef DEBUG_PUSH
7970 xmlGenericError(xmlGenericErrorContext,
7971 "PP: Parsing Comment\n");
7972#endif
7973 xmlParseComment(ctxt);
7974 ctxt->instate = XML_PARSER_CONTENT;
7975 } else if ((cur == '<') && (ctxt->input->cur[1] == '!') &&
7976 (ctxt->input->cur[2] == '[') && (NXT(3) == 'C') &&
7977 (ctxt->input->cur[4] == 'D') && (NXT(5) == 'A') &&
7978 (ctxt->input->cur[6] == 'T') && (NXT(7) == 'A') &&
7979 (ctxt->input->cur[8] == '[')) {
7980 SKIP(9);
7981 ctxt->instate = XML_PARSER_CDATA_SECTION;
7982#ifdef DEBUG_PUSH
7983 xmlGenericError(xmlGenericErrorContext,
7984 "PP: entering CDATA_SECTION\n");
7985#endif
7986 break;
7987 } else if ((cur == '<') && (next == '!') &&
7988 (avail < 9)) {
7989 goto done;
7990 } else if ((cur == '<') && (next == '/')) {
7991 ctxt->instate = XML_PARSER_END_TAG;
7992#ifdef DEBUG_PUSH
7993 xmlGenericError(xmlGenericErrorContext,
7994 "PP: entering END_TAG\n");
7995#endif
7996 break;
7997 } else if (cur == '<') {
7998 ctxt->instate = XML_PARSER_START_TAG;
7999#ifdef DEBUG_PUSH
8000 xmlGenericError(xmlGenericErrorContext,
8001 "PP: entering START_TAG\n");
8002#endif
8003 break;
8004 } else if (cur == '&') {
8005 if ((!terminate) &&
8006 (xmlParseLookupSequence(ctxt, ';', 0, 0) < 0))
8007 goto done;
8008#ifdef DEBUG_PUSH
8009 xmlGenericError(xmlGenericErrorContext,
8010 "PP: Parsing Reference\n");
8011#endif
8012 xmlParseReference(ctxt);
8013 } else {
8014 /* TODO Avoid the extra copy, handle directly !!! */
8015 /*
8016 * Goal of the following test is:
8017 * - minimize calls to the SAX 'character' callback
8018 * when they are mergeable
8019 * - handle an problem for isBlank when we only parse
8020 * a sequence of blank chars and the next one is
8021 * not available to check against '<' presence.
8022 * - tries to homogenize the differences in SAX
8023 * callbacks beween the push and pull versions
8024 * of the parser.
8025 */
8026 if ((ctxt->inputNr == 1) &&
8027 (avail < XML_PARSER_BIG_BUFFER_SIZE)) {
8028 if ((!terminate) &&
8029 (xmlParseLookupSequence(ctxt, '<', 0, 0) < 0))
8030 goto done;
8031 }
8032 ctxt->checkIndex = 0;
8033#ifdef DEBUG_PUSH
8034 xmlGenericError(xmlGenericErrorContext,
8035 "PP: Parsing char data\n");
8036#endif
8037 xmlParseCharData(ctxt, 0);
8038 }
8039 /*
8040 * Pop-up of finished entities.
8041 */
8042 while ((RAW == 0) && (ctxt->inputNr > 1))
8043 xmlPopInput(ctxt);
8044 if ((cons == ctxt->input->consumed) && (test == CUR_PTR) &&
8045 (tok == ctxt->token)) {
8046 ctxt->errNo = XML_ERR_INTERNAL_ERROR;
8047 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8048 ctxt->sax->error(ctxt->userData,
8049 "detected an error in element content\n");
8050 ctxt->wellFormed = 0;
8051 ctxt->disableSAX = 1;
8052 ctxt->instate = XML_PARSER_EOF;
8053 break;
8054 }
8055 break;
8056 }
8057 case XML_PARSER_CDATA_SECTION: {
8058 /*
8059 * The Push mode need to have the SAX callback for
8060 * cdataBlock merge back contiguous callbacks.
8061 */
8062 int base;
8063
8064 base = xmlParseLookupSequence(ctxt, ']', ']', '>');
8065 if (base < 0) {
8066 if (avail >= XML_PARSER_BIG_BUFFER_SIZE + 2) {
8067 if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
8068 if (ctxt->sax->cdataBlock != NULL)
8069 ctxt->sax->cdataBlock(ctxt->userData, ctxt->input->cur,
8070 XML_PARSER_BIG_BUFFER_SIZE);
8071 }
8072 SKIP(XML_PARSER_BIG_BUFFER_SIZE);
8073 ctxt->checkIndex = 0;
8074 }
8075 goto done;
8076 } else {
8077 if ((ctxt->sax != NULL) && (base > 0) &&
8078 (!ctxt->disableSAX)) {
8079 if (ctxt->sax->cdataBlock != NULL)
8080 ctxt->sax->cdataBlock(ctxt->userData,
8081 ctxt->input->cur, base);
8082 }
8083 SKIP(base + 3);
8084 ctxt->checkIndex = 0;
8085 ctxt->instate = XML_PARSER_CONTENT;
8086#ifdef DEBUG_PUSH
8087 xmlGenericError(xmlGenericErrorContext,
8088 "PP: entering CONTENT\n");
8089#endif
8090 }
8091 break;
8092 }
8093 case XML_PARSER_END_TAG:
8094 if (avail < 2)
8095 goto done;
8096 if ((!terminate) &&
8097 (xmlParseLookupSequence(ctxt, '>', 0, 0) < 0))
8098 goto done;
8099 xmlParseEndTag(ctxt);
8100 if (ctxt->name == NULL) {
8101 ctxt->instate = XML_PARSER_EPILOG;
8102#ifdef DEBUG_PUSH
8103 xmlGenericError(xmlGenericErrorContext,
8104 "PP: entering EPILOG\n");
8105#endif
8106 } else {
8107 ctxt->instate = XML_PARSER_CONTENT;
8108#ifdef DEBUG_PUSH
8109 xmlGenericError(xmlGenericErrorContext,
8110 "PP: entering CONTENT\n");
8111#endif
8112 }
8113 break;
8114 case XML_PARSER_DTD: {
8115 /*
8116 * Sorry but progressive parsing of the internal subset
8117 * is not expected to be supported. We first check that
8118 * the full content of the internal subset is available and
8119 * the parsing is launched only at that point.
8120 * Internal subset ends up with "']' S? '>'" in an unescaped
8121 * section and not in a ']]>' sequence which are conditional
8122 * sections (whoever argued to keep that crap in XML deserve
8123 * a place in hell !).
8124 */
8125 int base, i;
8126 xmlChar *buf;
8127 xmlChar quote = 0;
8128
8129 base = ctxt->input->cur - ctxt->input->base;
8130 if (base < 0) return(0);
8131 if (ctxt->checkIndex > base)
8132 base = ctxt->checkIndex;
8133 buf = ctxt->input->buf->buffer->content;
8134 for (;(unsigned int) base < ctxt->input->buf->buffer->use;
8135 base++) {
8136 if (quote != 0) {
8137 if (buf[base] == quote)
8138 quote = 0;
8139 continue;
8140 }
8141 if (buf[base] == '"') {
8142 quote = '"';
8143 continue;
8144 }
8145 if (buf[base] == '\'') {
8146 quote = '\'';
8147 continue;
8148 }
8149 if (buf[base] == ']') {
8150 if ((unsigned int) base +1 >=
8151 ctxt->input->buf->buffer->use)
8152 break;
8153 if (buf[base + 1] == ']') {
8154 /* conditional crap, skip both ']' ! */
8155 base++;
8156 continue;
8157 }
8158 for (i = 0;
8159 (unsigned int) base + i < ctxt->input->buf->buffer->use;
8160 i++) {
8161 if (buf[base + i] == '>')
8162 goto found_end_int_subset;
8163 }
8164 break;
8165 }
8166 }
8167 /*
8168 * We didn't found the end of the Internal subset
8169 */
8170 if (quote == 0)
8171 ctxt->checkIndex = base;
8172#ifdef DEBUG_PUSH
8173 if (next == 0)
8174 xmlGenericError(xmlGenericErrorContext,
8175 "PP: lookup of int subset end filed\n");
8176#endif
8177 goto done;
8178
8179found_end_int_subset:
8180 xmlParseInternalSubset(ctxt);
8181 ctxt->inSubset = 2;
8182 if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
8183 (ctxt->sax->externalSubset != NULL))
8184 ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
8185 ctxt->extSubSystem, ctxt->extSubURI);
8186 ctxt->inSubset = 0;
8187 ctxt->instate = XML_PARSER_PROLOG;
8188 ctxt->checkIndex = 0;
8189#ifdef DEBUG_PUSH
8190 xmlGenericError(xmlGenericErrorContext,
8191 "PP: entering PROLOG\n");
8192#endif
8193 break;
8194 }
8195 case XML_PARSER_COMMENT:
8196 xmlGenericError(xmlGenericErrorContext,
8197 "PP: internal error, state == COMMENT\n");
8198 ctxt->instate = XML_PARSER_CONTENT;
8199#ifdef DEBUG_PUSH
8200 xmlGenericError(xmlGenericErrorContext,
8201 "PP: entering CONTENT\n");
8202#endif
8203 break;
8204 case XML_PARSER_PI:
8205 xmlGenericError(xmlGenericErrorContext,
8206 "PP: internal error, state == PI\n");
8207 ctxt->instate = XML_PARSER_CONTENT;
8208#ifdef DEBUG_PUSH
8209 xmlGenericError(xmlGenericErrorContext,
8210 "PP: entering CONTENT\n");
8211#endif
8212 break;
8213 case XML_PARSER_ENTITY_DECL:
8214 xmlGenericError(xmlGenericErrorContext,
8215 "PP: internal error, state == ENTITY_DECL\n");
8216 ctxt->instate = XML_PARSER_DTD;
8217#ifdef DEBUG_PUSH
8218 xmlGenericError(xmlGenericErrorContext,
8219 "PP: entering DTD\n");
8220#endif
8221 break;
8222 case XML_PARSER_ENTITY_VALUE:
8223 xmlGenericError(xmlGenericErrorContext,
8224 "PP: internal error, state == ENTITY_VALUE\n");
8225 ctxt->instate = XML_PARSER_CONTENT;
8226#ifdef DEBUG_PUSH
8227 xmlGenericError(xmlGenericErrorContext,
8228 "PP: entering DTD\n");
8229#endif
8230 break;
8231 case XML_PARSER_ATTRIBUTE_VALUE:
8232 xmlGenericError(xmlGenericErrorContext,
8233 "PP: internal error, state == ATTRIBUTE_VALUE\n");
8234 ctxt->instate = XML_PARSER_START_TAG;
8235#ifdef DEBUG_PUSH
8236 xmlGenericError(xmlGenericErrorContext,
8237 "PP: entering START_TAG\n");
8238#endif
8239 break;
8240 case XML_PARSER_SYSTEM_LITERAL:
8241 xmlGenericError(xmlGenericErrorContext,
8242 "PP: internal error, state == SYSTEM_LITERAL\n");
8243 ctxt->instate = XML_PARSER_START_TAG;
8244#ifdef DEBUG_PUSH
8245 xmlGenericError(xmlGenericErrorContext,
8246 "PP: entering START_TAG\n");
8247#endif
8248 break;
8249 }
8250 }
8251done:
8252#ifdef DEBUG_PUSH
8253 xmlGenericError(xmlGenericErrorContext, "PP: done %d\n", ret);
8254#endif
8255 return(ret);
8256}
8257
8258/**
8259 * xmlParseTry:
8260 * @ctxt: an XML parser context
8261 *
8262 * Try to progress on parsing
8263 *
8264 * Returns zero if no parsing was possible
8265 */
8266int
8267xmlParseTry(xmlParserCtxtPtr ctxt) {
8268 return(xmlParseTryOrFinish(ctxt, 0));
8269}
8270
8271/**
8272 * xmlParseChunk:
8273 * @ctxt: an XML parser context
8274 * @chunk: an char array
8275 * @size: the size in byte of the chunk
8276 * @terminate: last chunk indicator
8277 *
8278 * Parse a Chunk of memory
8279 *
8280 * Returns zero if no error, the xmlParserErrors otherwise.
8281 */
8282int
8283xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
8284 int terminate) {
8285 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8286 (ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
8287 int base = ctxt->input->base - ctxt->input->buf->buffer->content;
8288 int cur = ctxt->input->cur - ctxt->input->base;
8289
8290 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8291 ctxt->input->base = ctxt->input->buf->buffer->content + base;
8292 ctxt->input->cur = ctxt->input->base + cur;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008293 ctxt->input->end =
8294 &ctxt->input->buf->buffer->content[ctxt->input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008295#ifdef DEBUG_PUSH
8296 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8297#endif
8298
8299 if ((terminate) || (ctxt->input->buf->buffer->use > 80))
8300 xmlParseTryOrFinish(ctxt, terminate);
8301 } else if (ctxt->instate != XML_PARSER_EOF) {
8302 if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
8303 xmlParserInputBufferPtr in = ctxt->input->buf;
8304 if ((in->encoder != NULL) && (in->buffer != NULL) &&
8305 (in->raw != NULL)) {
8306 int nbchars;
8307
8308 nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
8309 if (nbchars < 0) {
8310 xmlGenericError(xmlGenericErrorContext,
8311 "xmlParseChunk: encoder error\n");
8312 return(XML_ERR_INVALID_ENCODING);
8313 }
8314 }
8315 }
8316 }
8317 xmlParseTryOrFinish(ctxt, terminate);
8318 if (terminate) {
8319 /*
8320 * Check for termination
8321 */
8322 if ((ctxt->instate != XML_PARSER_EOF) &&
8323 (ctxt->instate != XML_PARSER_EPILOG)) {
8324 ctxt->errNo = XML_ERR_DOCUMENT_END;
8325 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8326 ctxt->sax->error(ctxt->userData,
8327 "Extra content at the end of the document\n");
8328 ctxt->wellFormed = 0;
8329 ctxt->disableSAX = 1;
8330 }
8331 if (ctxt->instate != XML_PARSER_EOF) {
8332 if ((ctxt->sax) && (ctxt->sax->endDocument != NULL) &&
8333 (!ctxt->disableSAX))
8334 ctxt->sax->endDocument(ctxt->userData);
8335 }
8336 ctxt->instate = XML_PARSER_EOF;
8337 }
8338 return((xmlParserErrors) ctxt->errNo);
8339}
8340
8341/************************************************************************
8342 * *
8343 * I/O front end functions to the parser *
8344 * *
8345 ************************************************************************/
8346
8347/**
8348 * xmlStopParser:
8349 * @ctxt: an XML parser context
8350 *
8351 * Blocks further parser processing
8352 */
8353void
8354xmlStopParser(xmlParserCtxtPtr ctxt) {
8355 ctxt->instate = XML_PARSER_EOF;
8356 if (ctxt->input != NULL)
8357 ctxt->input->cur = BAD_CAST"";
8358}
8359
8360/**
8361 * xmlCreatePushParserCtxt:
8362 * @sax: a SAX handler
8363 * @user_data: The user data returned on SAX callbacks
8364 * @chunk: a pointer to an array of chars
8365 * @size: number of chars in the array
8366 * @filename: an optional file name or URI
8367 *
8368 * Create a parser context for using the XML parser in push mode
8369 * To allow content encoding detection, @size should be >= 4
8370 * The value of @filename is used for fetching external entities
8371 * and error/warning reports.
8372 *
8373 * Returns the new parser context or NULL
8374 */
8375xmlParserCtxtPtr
8376xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8377 const char *chunk, int size, const char *filename) {
8378 xmlParserCtxtPtr ctxt;
8379 xmlParserInputPtr inputStream;
8380 xmlParserInputBufferPtr buf;
8381 xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
8382
8383 /*
8384 * plug some encoding conversion routines
8385 */
8386 if ((chunk != NULL) && (size >= 4))
8387 enc = xmlDetectCharEncoding((const xmlChar *) chunk, size);
8388
8389 buf = xmlAllocParserInputBuffer(enc);
8390 if (buf == NULL) return(NULL);
8391
8392 ctxt = xmlNewParserCtxt();
8393 if (ctxt == NULL) {
8394 xmlFree(buf);
8395 return(NULL);
8396 }
8397 if (sax != NULL) {
8398 if (ctxt->sax != &xmlDefaultSAXHandler)
8399 xmlFree(ctxt->sax);
8400 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8401 if (ctxt->sax == NULL) {
8402 xmlFree(buf);
8403 xmlFree(ctxt);
8404 return(NULL);
8405 }
8406 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8407 if (user_data != NULL)
8408 ctxt->userData = user_data;
8409 }
8410 if (filename == NULL) {
8411 ctxt->directory = NULL;
8412 } else {
8413 ctxt->directory = xmlParserGetDirectory(filename);
8414 }
8415
8416 inputStream = xmlNewInputStream(ctxt);
8417 if (inputStream == NULL) {
8418 xmlFreeParserCtxt(ctxt);
8419 return(NULL);
8420 }
8421
8422 if (filename == NULL)
8423 inputStream->filename = NULL;
8424 else
8425 inputStream->filename = xmlMemStrdup(filename);
8426 inputStream->buf = buf;
8427 inputStream->base = inputStream->buf->buffer->content;
8428 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00008429 inputStream->end =
8430 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00008431 if (enc != XML_CHAR_ENCODING_NONE) {
8432 xmlSwitchEncoding(ctxt, enc);
8433 }
8434
8435 inputPush(ctxt, inputStream);
8436
8437 if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
8438 (ctxt->input->buf != NULL)) {
8439 xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
8440#ifdef DEBUG_PUSH
8441 xmlGenericError(xmlGenericErrorContext, "PP: pushed %d\n", size);
8442#endif
8443 }
8444
8445 return(ctxt);
8446}
8447
8448/**
8449 * xmlCreateIOParserCtxt:
8450 * @sax: a SAX handler
8451 * @user_data: The user data returned on SAX callbacks
8452 * @ioread: an I/O read function
8453 * @ioclose: an I/O close function
8454 * @ioctx: an I/O handler
8455 * @enc: the charset encoding if known
8456 *
8457 * Create a parser context for using the XML parser with an existing
8458 * I/O stream
8459 *
8460 * Returns the new parser context or NULL
8461 */
8462xmlParserCtxtPtr
8463xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
8464 xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
8465 void *ioctx, xmlCharEncoding enc) {
8466 xmlParserCtxtPtr ctxt;
8467 xmlParserInputPtr inputStream;
8468 xmlParserInputBufferPtr buf;
8469
8470 buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
8471 if (buf == NULL) return(NULL);
8472
8473 ctxt = xmlNewParserCtxt();
8474 if (ctxt == NULL) {
8475 xmlFree(buf);
8476 return(NULL);
8477 }
8478 if (sax != NULL) {
8479 if (ctxt->sax != &xmlDefaultSAXHandler)
8480 xmlFree(ctxt->sax);
8481 ctxt->sax = (xmlSAXHandlerPtr) xmlMalloc(sizeof(xmlSAXHandler));
8482 if (ctxt->sax == NULL) {
8483 xmlFree(buf);
8484 xmlFree(ctxt);
8485 return(NULL);
8486 }
8487 memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
8488 if (user_data != NULL)
8489 ctxt->userData = user_data;
8490 }
8491
8492 inputStream = xmlNewIOInputStream(ctxt, buf, enc);
8493 if (inputStream == NULL) {
8494 xmlFreeParserCtxt(ctxt);
8495 return(NULL);
8496 }
8497 inputPush(ctxt, inputStream);
8498
8499 return(ctxt);
8500}
8501
8502/************************************************************************
8503 * *
8504 * Front ends when parsing a Dtd *
8505 * *
8506 ************************************************************************/
8507
8508/**
8509 * xmlIOParseDTD:
8510 * @sax: the SAX handler block or NULL
8511 * @input: an Input Buffer
8512 * @enc: the charset encoding if known
8513 *
8514 * Load and parse a DTD
8515 *
8516 * Returns the resulting xmlDtdPtr or NULL in case of error.
8517 * @input will be freed at parsing end.
8518 */
8519
8520xmlDtdPtr
8521xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
8522 xmlCharEncoding enc) {
8523 xmlDtdPtr ret = NULL;
8524 xmlParserCtxtPtr ctxt;
8525 xmlParserInputPtr pinput = NULL;
8526
8527 if (input == NULL)
8528 return(NULL);
8529
8530 ctxt = xmlNewParserCtxt();
8531 if (ctxt == NULL) {
8532 return(NULL);
8533 }
8534
8535 /*
8536 * Set-up the SAX context
8537 */
8538 if (sax != NULL) {
8539 if (ctxt->sax != NULL)
8540 xmlFree(ctxt->sax);
8541 ctxt->sax = sax;
8542 ctxt->userData = NULL;
8543 }
8544
8545 /*
8546 * generate a parser input from the I/O handler
8547 */
8548
8549 pinput = xmlNewIOInputStream(ctxt, input, enc);
8550 if (pinput == NULL) {
8551 if (sax != NULL) ctxt->sax = NULL;
8552 xmlFreeParserCtxt(ctxt);
8553 return(NULL);
8554 }
8555
8556 /*
8557 * plug some encoding conversion routines here.
8558 */
8559 xmlPushInput(ctxt, pinput);
8560
8561 pinput->filename = NULL;
8562 pinput->line = 1;
8563 pinput->col = 1;
8564 pinput->base = ctxt->input->cur;
8565 pinput->cur = ctxt->input->cur;
8566 pinput->free = NULL;
8567
8568 /*
8569 * let's parse that entity knowing it's an external subset.
8570 */
8571 ctxt->inSubset = 2;
8572 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8573 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8574 BAD_CAST "none", BAD_CAST "none");
8575 xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
8576
8577 if (ctxt->myDoc != NULL) {
8578 if (ctxt->wellFormed) {
8579 ret = ctxt->myDoc->extSubset;
8580 ctxt->myDoc->extSubset = NULL;
8581 } else {
8582 ret = NULL;
8583 }
8584 xmlFreeDoc(ctxt->myDoc);
8585 ctxt->myDoc = NULL;
8586 }
8587 if (sax != NULL) ctxt->sax = NULL;
8588 xmlFreeParserCtxt(ctxt);
8589
8590 return(ret);
8591}
8592
8593/**
8594 * xmlSAXParseDTD:
8595 * @sax: the SAX handler block
8596 * @ExternalID: a NAME* containing the External ID of the DTD
8597 * @SystemID: a NAME* containing the URL to the DTD
8598 *
8599 * Load and parse an external subset.
8600 *
8601 * Returns the resulting xmlDtdPtr or NULL in case of error.
8602 */
8603
8604xmlDtdPtr
8605xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
8606 const xmlChar *SystemID) {
8607 xmlDtdPtr ret = NULL;
8608 xmlParserCtxtPtr ctxt;
8609 xmlParserInputPtr input = NULL;
8610 xmlCharEncoding enc;
8611
8612 if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
8613
8614 ctxt = xmlNewParserCtxt();
8615 if (ctxt == NULL) {
8616 return(NULL);
8617 }
8618
8619 /*
8620 * Set-up the SAX context
8621 */
8622 if (sax != NULL) {
8623 if (ctxt->sax != NULL)
8624 xmlFree(ctxt->sax);
8625 ctxt->sax = sax;
8626 ctxt->userData = NULL;
8627 }
8628
8629 /*
8630 * Ask the Entity resolver to load the damn thing
8631 */
8632
8633 if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
8634 input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID, SystemID);
8635 if (input == NULL) {
8636 if (sax != NULL) ctxt->sax = NULL;
8637 xmlFreeParserCtxt(ctxt);
8638 return(NULL);
8639 }
8640
8641 /*
8642 * plug some encoding conversion routines here.
8643 */
8644 xmlPushInput(ctxt, input);
8645 enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
8646 xmlSwitchEncoding(ctxt, enc);
8647
8648 if (input->filename == NULL)
8649 input->filename = (char *) xmlStrdup(SystemID);
8650 input->line = 1;
8651 input->col = 1;
8652 input->base = ctxt->input->cur;
8653 input->cur = ctxt->input->cur;
8654 input->free = NULL;
8655
8656 /*
8657 * let's parse that entity knowing it's an external subset.
8658 */
8659 ctxt->inSubset = 2;
8660 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8661 ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
8662 ExternalID, SystemID);
8663 xmlParseExternalSubset(ctxt, ExternalID, SystemID);
8664
8665 if (ctxt->myDoc != NULL) {
8666 if (ctxt->wellFormed) {
8667 ret = ctxt->myDoc->extSubset;
8668 ctxt->myDoc->extSubset = NULL;
8669 } else {
8670 ret = NULL;
8671 }
8672 xmlFreeDoc(ctxt->myDoc);
8673 ctxt->myDoc = NULL;
8674 }
8675 if (sax != NULL) ctxt->sax = NULL;
8676 xmlFreeParserCtxt(ctxt);
8677
8678 return(ret);
8679}
8680
8681/**
8682 * xmlParseDTD:
8683 * @ExternalID: a NAME* containing the External ID of the DTD
8684 * @SystemID: a NAME* containing the URL to the DTD
8685 *
8686 * Load and parse an external subset.
8687 *
8688 * Returns the resulting xmlDtdPtr or NULL in case of error.
8689 */
8690
8691xmlDtdPtr
8692xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
8693 return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
8694}
8695
8696/************************************************************************
8697 * *
8698 * Front ends when parsing an Entity *
8699 * *
8700 ************************************************************************/
8701
8702/**
8703 * xmlSAXParseBalancedChunk:
8704 * @ctx: an XML parser context (possibly NULL)
8705 * @sax: the SAX handler bloc (possibly NULL)
8706 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8707 * @input: a parser input stream
8708 * @enc: the encoding
8709 *
8710 * Parse a well-balanced chunk of an XML document
8711 * The user has to provide SAX callback block whose routines will be
8712 * called by the parser
8713 * The allowed sequence for the Well Balanced Chunk is the one defined by
8714 * the content production in the XML grammar:
8715 *
8716 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
8717 *
8718 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
8719 * the error code otherwise
8720 */
8721
8722int
8723xmlSAXParseBalancedChunk(xmlParserCtxtPtr ctx, xmlSAXHandlerPtr sax,
8724 void *user_data, xmlParserInputPtr input,
8725 xmlCharEncoding enc) {
8726 xmlParserCtxtPtr ctxt;
8727 int ret;
8728
8729 if (input == NULL) return(-1);
8730
8731 if (ctx != NULL)
8732 ctxt = ctx;
8733 else {
8734 ctxt = xmlNewParserCtxt();
8735 if (ctxt == NULL)
8736 return(-1);
8737 if (sax == NULL)
8738 ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
8739 }
8740
8741 /*
8742 * Set-up the SAX context
8743 */
8744 if (sax != NULL) {
8745 if (ctxt->sax != NULL)
8746 xmlFree(ctxt->sax);
8747 ctxt->sax = sax;
8748 ctxt->userData = user_data;
8749 }
8750
8751 /*
8752 * plug some encoding conversion routines here.
8753 */
8754 xmlPushInput(ctxt, input);
8755 if (enc != XML_CHAR_ENCODING_NONE)
8756 xmlSwitchEncoding(ctxt, enc);
8757
8758 /*
8759 * let's parse that entity knowing it's an external subset.
8760 */
8761 xmlParseContent(ctxt);
8762 ret = ctxt->errNo;
8763
8764 if (ctx == NULL) {
8765 if (sax != NULL)
8766 ctxt->sax = NULL;
8767 else
8768 xmlFreeDoc(ctxt->myDoc);
8769 xmlFreeParserCtxt(ctxt);
8770 }
8771 return(ret);
8772}
8773
8774/**
8775 * xmlParseCtxtExternalEntity:
8776 * @ctx: the existing parsing context
8777 * @URL: the URL for the entity to load
8778 * @ID: the System ID for the entity to load
8779 * @list: the return value for the set of parsed nodes
8780 *
8781 * Parse an external general entity within an existing parsing context
8782 * An external general parsed entity is well-formed if it matches the
8783 * production labeled extParsedEnt.
8784 *
8785 * [78] extParsedEnt ::= TextDecl? content
8786 *
8787 * Returns 0 if the entity is well formed, -1 in case of args problem and
8788 * the parser error code otherwise
8789 */
8790
8791int
8792xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
8793 const xmlChar *ID, xmlNodePtr *list) {
8794 xmlParserCtxtPtr ctxt;
8795 xmlDocPtr newDoc;
8796 xmlSAXHandlerPtr oldsax = NULL;
8797 int ret = 0;
8798
8799 if (ctx->depth > 40) {
8800 return(XML_ERR_ENTITY_LOOP);
8801 }
8802
8803 if (list != NULL)
8804 *list = NULL;
8805 if ((URL == NULL) && (ID == NULL))
8806 return(-1);
8807 if (ctx->myDoc == NULL) /* @@ relax but check for dereferences */
8808 return(-1);
8809
8810
8811 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8812 if (ctxt == NULL) return(-1);
8813 ctxt->userData = ctxt;
8814 oldsax = ctxt->sax;
8815 ctxt->sax = ctx->sax;
8816 newDoc = xmlNewDoc(BAD_CAST "1.0");
8817 if (newDoc == NULL) {
8818 xmlFreeParserCtxt(ctxt);
8819 return(-1);
8820 }
8821 if (ctx->myDoc != NULL) {
8822 newDoc->intSubset = ctx->myDoc->intSubset;
8823 newDoc->extSubset = ctx->myDoc->extSubset;
8824 }
8825 if (ctx->myDoc->URL != NULL) {
8826 newDoc->URL = xmlStrdup(ctx->myDoc->URL);
8827 }
8828 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
8829 if (newDoc->children == NULL) {
8830 ctxt->sax = oldsax;
8831 xmlFreeParserCtxt(ctxt);
8832 newDoc->intSubset = NULL;
8833 newDoc->extSubset = NULL;
8834 xmlFreeDoc(newDoc);
8835 return(-1);
8836 }
8837 nodePush(ctxt, newDoc->children);
8838 if (ctx->myDoc == NULL) {
8839 ctxt->myDoc = newDoc;
8840 } else {
8841 ctxt->myDoc = ctx->myDoc;
8842 newDoc->children->doc = ctx->myDoc;
8843 }
8844
8845 /*
8846 * Parse a possible text declaration first
8847 */
8848 GROW;
8849 if ((RAW == '<') && (NXT(1) == '?') &&
8850 (NXT(2) == 'x') && (NXT(3) == 'm') &&
8851 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
8852 xmlParseTextDecl(ctxt);
8853 }
8854
8855 /*
8856 * Doing validity checking on chunk doesn't make sense
8857 */
8858 ctxt->instate = XML_PARSER_CONTENT;
8859 ctxt->validate = ctx->validate;
8860 ctxt->loadsubset = ctx->loadsubset;
8861 ctxt->depth = ctx->depth + 1;
8862 ctxt->replaceEntities = ctx->replaceEntities;
8863 if (ctxt->validate) {
8864 ctxt->vctxt.error = ctx->vctxt.error;
8865 ctxt->vctxt.warning = ctx->vctxt.warning;
8866 /* Allocate the Node stack */
8867 ctxt->vctxt.nodeTab = (xmlNodePtr *) xmlMalloc(4 * sizeof(xmlNodePtr));
8868 if (ctxt->vctxt.nodeTab == NULL) {
8869 xmlGenericError(xmlGenericErrorContext,
8870 "xmlParseCtxtExternalEntity: out of memory\n");
8871 ctxt->validate = 0;
8872 ctxt->vctxt.error = NULL;
8873 ctxt->vctxt.warning = NULL;
8874 } else {
8875 ctxt->vctxt.nodeNr = 0;
8876 ctxt->vctxt.nodeMax = 4;
8877 ctxt->vctxt.node = NULL;
8878 }
8879 } else {
8880 ctxt->vctxt.error = NULL;
8881 ctxt->vctxt.warning = NULL;
8882 }
8883
8884 xmlParseContent(ctxt);
8885
8886 if ((RAW == '<') && (NXT(1) == '/')) {
8887 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8888 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8889 ctxt->sax->error(ctxt->userData,
8890 "chunk is not well balanced\n");
8891 ctxt->wellFormed = 0;
8892 ctxt->disableSAX = 1;
8893 } else if (RAW != 0) {
8894 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
8895 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8896 ctxt->sax->error(ctxt->userData,
8897 "extra content at the end of well balanced chunk\n");
8898 ctxt->wellFormed = 0;
8899 ctxt->disableSAX = 1;
8900 }
8901 if (ctxt->node != newDoc->children) {
8902 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
8903 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
8904 ctxt->sax->error(ctxt->userData,
8905 "chunk is not well balanced\n");
8906 ctxt->wellFormed = 0;
8907 ctxt->disableSAX = 1;
8908 }
8909
8910 if (!ctxt->wellFormed) {
8911 if (ctxt->errNo == 0)
8912 ret = 1;
8913 else
8914 ret = ctxt->errNo;
8915 } else {
8916 if (list != NULL) {
8917 xmlNodePtr cur;
8918
8919 /*
8920 * Return the newly created nodeset after unlinking it from
8921 * they pseudo parent.
8922 */
8923 cur = newDoc->children->children;
8924 *list = cur;
8925 while (cur != NULL) {
8926 cur->parent = NULL;
8927 cur = cur->next;
8928 }
8929 newDoc->children->children = NULL;
8930 }
8931 ret = 0;
8932 }
8933 ctxt->sax = oldsax;
8934 xmlFreeParserCtxt(ctxt);
8935 newDoc->intSubset = NULL;
8936 newDoc->extSubset = NULL;
8937 xmlFreeDoc(newDoc);
8938
8939 return(ret);
8940}
8941
8942/**
8943 * xmlParseExternalEntity:
8944 * @doc: the document the chunk pertains to
8945 * @sax: the SAX handler bloc (possibly NULL)
8946 * @user_data: The user data returned on SAX callbacks (possibly NULL)
8947 * @depth: Used for loop detection, use 0
8948 * @URL: the URL for the entity to load
8949 * @ID: the System ID for the entity to load
8950 * @list: the return value for the set of parsed nodes
8951 *
8952 * Parse an external general entity
8953 * An external general parsed entity is well-formed if it matches the
8954 * production labeled extParsedEnt.
8955 *
8956 * [78] extParsedEnt ::= TextDecl? content
8957 *
8958 * Returns 0 if the entity is well formed, -1 in case of args problem and
8959 * the parser error code otherwise
8960 */
8961
8962int
8963xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
8964 int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *list) {
8965 xmlParserCtxtPtr ctxt;
8966 xmlDocPtr newDoc;
8967 xmlSAXHandlerPtr oldsax = NULL;
8968 int ret = 0;
8969
8970 if (depth > 40) {
8971 return(XML_ERR_ENTITY_LOOP);
8972 }
8973
8974
8975
8976 if (list != NULL)
8977 *list = NULL;
8978 if ((URL == NULL) && (ID == NULL))
8979 return(-1);
8980 if (doc == NULL) /* @@ relax but check for dereferences */
8981 return(-1);
8982
8983
8984 ctxt = xmlCreateEntityParserCtxt(URL, ID, NULL);
8985 if (ctxt == NULL) return(-1);
8986 ctxt->userData = ctxt;
8987 if (sax != NULL) {
8988 oldsax = ctxt->sax;
8989 ctxt->sax = sax;
8990 if (user_data != NULL)
8991 ctxt->userData = user_data;
8992 }
8993 newDoc = xmlNewDoc(BAD_CAST "1.0");
8994 if (newDoc == NULL) {
8995 xmlFreeParserCtxt(ctxt);
8996 return(-1);
8997 }
8998 if (doc != NULL) {
8999 newDoc->intSubset = doc->intSubset;
9000 newDoc->extSubset = doc->extSubset;
9001 }
9002 if (doc->URL != NULL) {
9003 newDoc->URL = xmlStrdup(doc->URL);
9004 }
9005 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9006 if (newDoc->children == NULL) {
9007 if (sax != NULL)
9008 ctxt->sax = oldsax;
9009 xmlFreeParserCtxt(ctxt);
9010 newDoc->intSubset = NULL;
9011 newDoc->extSubset = NULL;
9012 xmlFreeDoc(newDoc);
9013 return(-1);
9014 }
9015 nodePush(ctxt, newDoc->children);
9016 if (doc == NULL) {
9017 ctxt->myDoc = newDoc;
9018 } else {
9019 ctxt->myDoc = doc;
9020 newDoc->children->doc = doc;
9021 }
9022
9023 /*
9024 * Parse a possible text declaration first
9025 */
9026 GROW;
9027 if ((RAW == '<') && (NXT(1) == '?') &&
9028 (NXT(2) == 'x') && (NXT(3) == 'm') &&
9029 (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
9030 xmlParseTextDecl(ctxt);
9031 }
9032
9033 /*
9034 * Doing validity checking on chunk doesn't make sense
9035 */
9036 ctxt->instate = XML_PARSER_CONTENT;
9037 ctxt->validate = 0;
9038 ctxt->loadsubset = 0;
9039 ctxt->depth = depth;
9040
9041 xmlParseContent(ctxt);
9042
9043 if ((RAW == '<') && (NXT(1) == '/')) {
9044 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9045 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9046 ctxt->sax->error(ctxt->userData,
9047 "chunk is not well balanced\n");
9048 ctxt->wellFormed = 0;
9049 ctxt->disableSAX = 1;
9050 } else if (RAW != 0) {
9051 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9052 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9053 ctxt->sax->error(ctxt->userData,
9054 "extra content at the end of well balanced chunk\n");
9055 ctxt->wellFormed = 0;
9056 ctxt->disableSAX = 1;
9057 }
9058 if (ctxt->node != newDoc->children) {
9059 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9060 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9061 ctxt->sax->error(ctxt->userData,
9062 "chunk is not well balanced\n");
9063 ctxt->wellFormed = 0;
9064 ctxt->disableSAX = 1;
9065 }
9066
9067 if (!ctxt->wellFormed) {
9068 if (ctxt->errNo == 0)
9069 ret = 1;
9070 else
9071 ret = ctxt->errNo;
9072 } else {
9073 if (list != NULL) {
9074 xmlNodePtr cur;
9075
9076 /*
9077 * Return the newly created nodeset after unlinking it from
9078 * they pseudo parent.
9079 */
9080 cur = newDoc->children->children;
9081 *list = cur;
9082 while (cur != NULL) {
9083 cur->parent = NULL;
9084 cur = cur->next;
9085 }
9086 newDoc->children->children = NULL;
9087 }
9088 ret = 0;
9089 }
9090 if (sax != NULL)
9091 ctxt->sax = oldsax;
9092 xmlFreeParserCtxt(ctxt);
9093 newDoc->intSubset = NULL;
9094 newDoc->extSubset = NULL;
9095 xmlFreeDoc(newDoc);
9096
9097 return(ret);
9098}
9099
9100/**
9101 * xmlParseBalancedChunk:
9102 * @doc: the document the chunk pertains to
9103 * @sax: the SAX handler bloc (possibly NULL)
9104 * @user_data: The user data returned on SAX callbacks (possibly NULL)
9105 * @depth: Used for loop detection, use 0
9106 * @string: the input string in UTF8 or ISO-Latin (zero terminated)
9107 * @list: the return value for the set of parsed nodes
9108 *
9109 * Parse a well-balanced chunk of an XML document
9110 * called by the parser
9111 * The allowed sequence for the Well Balanced Chunk is the one defined by
9112 * the content production in the XML grammar:
9113 *
9114 * [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
9115 *
9116 * Returns 0 if the chunk is well balanced, -1 in case of args problem and
9117 * the parser error code otherwise
9118 */
9119
9120int
9121xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
9122 void *user_data, int depth, const xmlChar *string, xmlNodePtr *list) {
9123 xmlParserCtxtPtr ctxt;
9124 xmlDocPtr newDoc;
9125 xmlSAXHandlerPtr oldsax = NULL;
9126 int size;
9127 int ret = 0;
9128
9129 if (depth > 40) {
9130 return(XML_ERR_ENTITY_LOOP);
9131 }
9132
9133
9134 if (list != NULL)
9135 *list = NULL;
9136 if (string == NULL)
9137 return(-1);
9138
9139 size = xmlStrlen(string);
9140
9141 ctxt = xmlCreateMemoryParserCtxt((char *) string, size);
9142 if (ctxt == NULL) return(-1);
9143 ctxt->userData = ctxt;
9144 if (sax != NULL) {
9145 oldsax = ctxt->sax;
9146 ctxt->sax = sax;
9147 if (user_data != NULL)
9148 ctxt->userData = user_data;
9149 }
9150 newDoc = xmlNewDoc(BAD_CAST "1.0");
9151 if (newDoc == NULL) {
9152 xmlFreeParserCtxt(ctxt);
9153 return(-1);
9154 }
9155 if (doc != NULL) {
9156 newDoc->intSubset = doc->intSubset;
9157 newDoc->extSubset = doc->extSubset;
9158 }
9159 newDoc->children = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
9160 if (newDoc->children == NULL) {
9161 if (sax != NULL)
9162 ctxt->sax = oldsax;
9163 xmlFreeParserCtxt(ctxt);
9164 newDoc->intSubset = NULL;
9165 newDoc->extSubset = NULL;
9166 xmlFreeDoc(newDoc);
9167 return(-1);
9168 }
9169 nodePush(ctxt, newDoc->children);
9170 if (doc == NULL) {
9171 ctxt->myDoc = newDoc;
9172 } else {
9173 ctxt->myDoc = doc;
9174 newDoc->children->doc = doc;
9175 }
9176 ctxt->instate = XML_PARSER_CONTENT;
9177 ctxt->depth = depth;
9178
9179 /*
9180 * Doing validity checking on chunk doesn't make sense
9181 */
9182 ctxt->validate = 0;
9183 ctxt->loadsubset = 0;
9184
9185 xmlParseContent(ctxt);
9186
9187 if ((RAW == '<') && (NXT(1) == '/')) {
9188 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9189 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9190 ctxt->sax->error(ctxt->userData,
9191 "chunk is not well balanced\n");
9192 ctxt->wellFormed = 0;
9193 ctxt->disableSAX = 1;
9194 } else if (RAW != 0) {
9195 ctxt->errNo = XML_ERR_EXTRA_CONTENT;
9196 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9197 ctxt->sax->error(ctxt->userData,
9198 "extra content at the end of well balanced chunk\n");
9199 ctxt->wellFormed = 0;
9200 ctxt->disableSAX = 1;
9201 }
9202 if (ctxt->node != newDoc->children) {
9203 ctxt->errNo = XML_ERR_NOT_WELL_BALANCED;
9204 if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
9205 ctxt->sax->error(ctxt->userData,
9206 "chunk is not well balanced\n");
9207 ctxt->wellFormed = 0;
9208 ctxt->disableSAX = 1;
9209 }
9210
9211 if (!ctxt->wellFormed) {
9212 if (ctxt->errNo == 0)
9213 ret = 1;
9214 else
9215 ret = ctxt->errNo;
9216 } else {
9217 if (list != NULL) {
9218 xmlNodePtr cur;
9219
9220 /*
9221 * Return the newly created nodeset after unlinking it from
9222 * they pseudo parent.
9223 */
9224 cur = newDoc->children->children;
9225 *list = cur;
9226 while (cur != NULL) {
9227 cur->parent = NULL;
9228 cur = cur->next;
9229 }
9230 newDoc->children->children = NULL;
9231 }
9232 ret = 0;
9233 }
9234 if (sax != NULL)
9235 ctxt->sax = oldsax;
9236 xmlFreeParserCtxt(ctxt);
9237 newDoc->intSubset = NULL;
9238 newDoc->extSubset = NULL;
9239 xmlFreeDoc(newDoc);
9240
9241 return(ret);
9242}
9243
9244/**
9245 * xmlSAXParseEntity:
9246 * @sax: the SAX handler block
9247 * @filename: the filename
9248 *
9249 * parse an XML external entity out of context and build a tree.
9250 * It use the given SAX function block to handle the parsing callback.
9251 * If sax is NULL, fallback to the default DOM tree building routines.
9252 *
9253 * [78] extParsedEnt ::= TextDecl? content
9254 *
9255 * This correspond to a "Well Balanced" chunk
9256 *
9257 * Returns the resulting document tree
9258 */
9259
9260xmlDocPtr
9261xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
9262 xmlDocPtr ret;
9263 xmlParserCtxtPtr ctxt;
9264 char *directory = NULL;
9265
9266 ctxt = xmlCreateFileParserCtxt(filename);
9267 if (ctxt == NULL) {
9268 return(NULL);
9269 }
9270 if (sax != NULL) {
9271 if (ctxt->sax != NULL)
9272 xmlFree(ctxt->sax);
9273 ctxt->sax = sax;
9274 ctxt->userData = NULL;
9275 }
9276
9277 if ((ctxt->directory == NULL) && (directory == NULL))
9278 directory = xmlParserGetDirectory(filename);
9279
9280 xmlParseExtParsedEnt(ctxt);
9281
9282 if (ctxt->wellFormed)
9283 ret = ctxt->myDoc;
9284 else {
9285 ret = NULL;
9286 xmlFreeDoc(ctxt->myDoc);
9287 ctxt->myDoc = NULL;
9288 }
9289 if (sax != NULL)
9290 ctxt->sax = NULL;
9291 xmlFreeParserCtxt(ctxt);
9292
9293 return(ret);
9294}
9295
9296/**
9297 * xmlParseEntity:
9298 * @filename: the filename
9299 *
9300 * parse an XML external entity out of context and build a tree.
9301 *
9302 * [78] extParsedEnt ::= TextDecl? content
9303 *
9304 * This correspond to a "Well Balanced" chunk
9305 *
9306 * Returns the resulting document tree
9307 */
9308
9309xmlDocPtr
9310xmlParseEntity(const char *filename) {
9311 return(xmlSAXParseEntity(NULL, filename));
9312}
9313
9314/**
9315 * xmlCreateEntityParserCtxt:
9316 * @URL: the entity URL
9317 * @ID: the entity PUBLIC ID
9318 * @base: a posible base for the target URI
9319 *
9320 * Create a parser context for an external entity
9321 * Automatic support for ZLIB/Compress compressed document is provided
9322 * by default if found at compile-time.
9323 *
9324 * Returns the new parser context or NULL
9325 */
9326xmlParserCtxtPtr
9327xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
9328 const xmlChar *base) {
9329 xmlParserCtxtPtr ctxt;
9330 xmlParserInputPtr inputStream;
9331 char *directory = NULL;
9332 xmlChar *uri;
9333
9334 ctxt = xmlNewParserCtxt();
9335 if (ctxt == NULL) {
9336 return(NULL);
9337 }
9338
9339 uri = xmlBuildURI(URL, base);
9340
9341 if (uri == NULL) {
9342 inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
9343 if (inputStream == NULL) {
9344 xmlFreeParserCtxt(ctxt);
9345 return(NULL);
9346 }
9347
9348 inputPush(ctxt, inputStream);
9349
9350 if ((ctxt->directory == NULL) && (directory == NULL))
9351 directory = xmlParserGetDirectory((char *)URL);
9352 if ((ctxt->directory == NULL) && (directory != NULL))
9353 ctxt->directory = directory;
9354 } else {
9355 inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
9356 if (inputStream == NULL) {
9357 xmlFree(uri);
9358 xmlFreeParserCtxt(ctxt);
9359 return(NULL);
9360 }
9361
9362 inputPush(ctxt, inputStream);
9363
9364 if ((ctxt->directory == NULL) && (directory == NULL))
9365 directory = xmlParserGetDirectory((char *)uri);
9366 if ((ctxt->directory == NULL) && (directory != NULL))
9367 ctxt->directory = directory;
9368 xmlFree(uri);
9369 }
9370
9371 return(ctxt);
9372}
9373
9374/************************************************************************
9375 * *
9376 * Front ends when parsing from a file *
9377 * *
9378 ************************************************************************/
9379
9380/**
9381 * xmlCreateFileParserCtxt:
9382 * @filename: the filename
9383 *
9384 * Create a parser context for a file content.
9385 * Automatic support for ZLIB/Compress compressed document is provided
9386 * by default if found at compile-time.
9387 *
9388 * Returns the new parser context or NULL
9389 */
9390xmlParserCtxtPtr
9391xmlCreateFileParserCtxt(const char *filename)
9392{
9393 xmlParserCtxtPtr ctxt;
9394 xmlParserInputPtr inputStream;
9395 xmlParserInputBufferPtr buf;
9396 char *directory = NULL;
9397
9398 buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
9399 if (buf == NULL) {
9400 return(NULL);
9401 }
9402
9403 ctxt = xmlNewParserCtxt();
9404 if (ctxt == NULL) {
9405 if (xmlDefaultSAXHandler.error != NULL) {
9406 xmlDefaultSAXHandler.error(NULL, "out of memory\n");
9407 }
9408 return(NULL);
9409 }
9410
9411 inputStream = xmlNewInputStream(ctxt);
9412 if (inputStream == NULL) {
9413 xmlFreeParserCtxt(ctxt);
9414 return(NULL);
9415 }
9416
9417 inputStream->filename = xmlMemStrdup(filename);
9418 inputStream->buf = buf;
9419 inputStream->base = inputStream->buf->buffer->content;
9420 inputStream->cur = inputStream->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009421 inputStream->end =
9422 &inputStream->buf->buffer->content[inputStream->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009423
9424 inputPush(ctxt, inputStream);
9425 if ((ctxt->directory == NULL) && (directory == NULL))
9426 directory = xmlParserGetDirectory(filename);
9427 if ((ctxt->directory == NULL) && (directory != NULL))
9428 ctxt->directory = directory;
9429
9430 return(ctxt);
9431}
9432
9433/**
9434 * xmlSAXParseFile:
9435 * @sax: the SAX handler block
9436 * @filename: the filename
9437 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9438 * documents
9439 *
9440 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9441 * compressed document is provided by default if found at compile-time.
9442 * It use the given SAX function block to handle the parsing callback.
9443 * If sax is NULL, fallback to the default DOM tree building routines.
9444 *
9445 * Returns the resulting document tree
9446 */
9447
9448xmlDocPtr
9449xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
9450 int recovery) {
9451 xmlDocPtr ret;
9452 xmlParserCtxtPtr ctxt;
9453 char *directory = NULL;
9454
9455 ctxt = xmlCreateFileParserCtxt(filename);
9456 if (ctxt == NULL) {
9457 return(NULL);
9458 }
9459 if (sax != NULL) {
9460 if (ctxt->sax != NULL)
9461 xmlFree(ctxt->sax);
9462 ctxt->sax = sax;
9463 ctxt->userData = NULL;
9464 }
9465
9466 if ((ctxt->directory == NULL) && (directory == NULL))
9467 directory = xmlParserGetDirectory(filename);
9468 if ((ctxt->directory == NULL) && (directory != NULL))
9469 ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
9470
9471 xmlParseDocument(ctxt);
9472
9473 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9474 else {
9475 ret = NULL;
9476 xmlFreeDoc(ctxt->myDoc);
9477 ctxt->myDoc = NULL;
9478 }
9479 if (sax != NULL)
9480 ctxt->sax = NULL;
9481 xmlFreeParserCtxt(ctxt);
9482
9483 return(ret);
9484}
9485
9486/**
9487 * xmlRecoverDoc:
9488 * @cur: a pointer to an array of xmlChar
9489 *
9490 * parse an XML in-memory document and build a tree.
9491 * In the case the document is not Well Formed, a tree is built anyway
9492 *
9493 * Returns the resulting document tree
9494 */
9495
9496xmlDocPtr
9497xmlRecoverDoc(xmlChar *cur) {
9498 return(xmlSAXParseDoc(NULL, cur, 1));
9499}
9500
9501/**
9502 * xmlParseFile:
9503 * @filename: the filename
9504 *
9505 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9506 * compressed document is provided by default if found at compile-time.
9507 *
9508 * Returns the resulting document tree
9509 */
9510
9511xmlDocPtr
9512xmlParseFile(const char *filename) {
9513 return(xmlSAXParseFile(NULL, filename, 0));
9514}
9515
9516/**
9517 * xmlRecoverFile:
9518 * @filename: the filename
9519 *
9520 * parse an XML file and build a tree. Automatic support for ZLIB/Compress
9521 * compressed document is provided by default if found at compile-time.
9522 * In the case the document is not Well Formed, a tree is built anyway
9523 *
9524 * Returns the resulting document tree
9525 */
9526
9527xmlDocPtr
9528xmlRecoverFile(const char *filename) {
9529 return(xmlSAXParseFile(NULL, filename, 1));
9530}
9531
9532
9533/**
9534 * xmlSetupParserForBuffer:
9535 * @ctxt: an XML parser context
9536 * @buffer: a xmlChar * buffer
9537 * @filename: a file name
9538 *
9539 * Setup the parser context to parse a new buffer; Clears any prior
9540 * contents from the parser context. The buffer parameter must not be
9541 * NULL, but the filename parameter can be
9542 */
9543void
9544xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
9545 const char* filename)
9546{
9547 xmlParserInputPtr input;
9548
9549 input = xmlNewInputStream(ctxt);
9550 if (input == NULL) {
9551 perror("malloc");
9552 xmlFree(ctxt);
9553 return;
9554 }
9555
9556 xmlClearParserCtxt(ctxt);
9557 if (filename != NULL)
9558 input->filename = xmlMemStrdup(filename);
9559 input->base = buffer;
9560 input->cur = buffer;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009561 input->end = &buffer[xmlStrlen(buffer)];
Owen Taylor3473f882001-02-23 17:55:21 +00009562 inputPush(ctxt, input);
9563}
9564
9565/**
9566 * xmlSAXUserParseFile:
9567 * @sax: a SAX handler
9568 * @user_data: The user data returned on SAX callbacks
9569 * @filename: a file name
9570 *
9571 * parse an XML file and call the given SAX handler routines.
9572 * Automatic support for ZLIB/Compress compressed document is provided
9573 *
9574 * Returns 0 in case of success or a error number otherwise
9575 */
9576int
9577xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
9578 const char *filename) {
9579 int ret = 0;
9580 xmlParserCtxtPtr ctxt;
9581
9582 ctxt = xmlCreateFileParserCtxt(filename);
9583 if (ctxt == NULL) return -1;
9584 if (ctxt->sax != &xmlDefaultSAXHandler)
9585 xmlFree(ctxt->sax);
9586 ctxt->sax = sax;
9587 if (user_data != NULL)
9588 ctxt->userData = user_data;
9589
9590 xmlParseDocument(ctxt);
9591
9592 if (ctxt->wellFormed)
9593 ret = 0;
9594 else {
9595 if (ctxt->errNo != 0)
9596 ret = ctxt->errNo;
9597 else
9598 ret = -1;
9599 }
9600 if (sax != NULL)
9601 ctxt->sax = NULL;
9602 xmlFreeParserCtxt(ctxt);
9603
9604 return ret;
9605}
9606
9607/************************************************************************
9608 * *
9609 * Front ends when parsing from memory *
9610 * *
9611 ************************************************************************/
9612
9613/**
9614 * xmlCreateMemoryParserCtxt:
9615 * @buffer: a pointer to a char array
9616 * @size: the size of the array
9617 *
9618 * Create a parser context for an XML in-memory document.
9619 *
9620 * Returns the new parser context or NULL
9621 */
9622xmlParserCtxtPtr
9623xmlCreateMemoryParserCtxt(char *buffer, int size) {
9624 xmlParserCtxtPtr ctxt;
9625 xmlParserInputPtr input;
9626 xmlParserInputBufferPtr buf;
9627
9628 if (buffer == NULL)
9629 return(NULL);
9630 if (size <= 0)
9631 return(NULL);
9632
9633 ctxt = xmlNewParserCtxt();
9634 if (ctxt == NULL)
9635 return(NULL);
9636
9637 buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
9638 if (buf == NULL) return(NULL);
9639
9640 input = xmlNewInputStream(ctxt);
9641 if (input == NULL) {
9642 xmlFreeParserCtxt(ctxt);
9643 return(NULL);
9644 }
9645
9646 input->filename = NULL;
9647 input->buf = buf;
9648 input->base = input->buf->buffer->content;
9649 input->cur = input->buf->buffer->content;
Daniel Veillard48b2f892001-02-25 16:11:03 +00009650 input->end = &input->buf->buffer->content[input->buf->buffer->use];
Owen Taylor3473f882001-02-23 17:55:21 +00009651
9652 inputPush(ctxt, input);
9653 return(ctxt);
9654}
9655
9656/**
9657 * xmlSAXParseMemory:
9658 * @sax: the SAX handler block
9659 * @buffer: an pointer to a char array
9660 * @size: the size of the array
9661 * @recovery: work in recovery mode, i.e. tries to read not Well Formed
9662 * documents
9663 *
9664 * parse an XML in-memory block and use the given SAX function block
9665 * to handle the parsing callback. If sax is NULL, fallback to the default
9666 * DOM tree building routines.
9667 *
9668 * Returns the resulting document tree
9669 */
9670xmlDocPtr
9671xmlSAXParseMemory(xmlSAXHandlerPtr sax, char *buffer, int size, int recovery) {
9672 xmlDocPtr ret;
9673 xmlParserCtxtPtr ctxt;
9674
9675 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9676 if (ctxt == NULL) return(NULL);
9677 if (sax != NULL) {
9678 ctxt->sax = sax;
9679 ctxt->userData = NULL;
9680 }
9681
9682 xmlParseDocument(ctxt);
9683
9684 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9685 else {
9686 ret = NULL;
9687 xmlFreeDoc(ctxt->myDoc);
9688 ctxt->myDoc = NULL;
9689 }
9690 if (sax != NULL)
9691 ctxt->sax = NULL;
9692 xmlFreeParserCtxt(ctxt);
9693
9694 return(ret);
9695}
9696
9697/**
9698 * xmlParseMemory:
9699 * @buffer: an pointer to a char array
9700 * @size: the size of the array
9701 *
9702 * parse an XML in-memory block and build a tree.
9703 *
9704 * Returns the resulting document tree
9705 */
9706
9707xmlDocPtr xmlParseMemory(char *buffer, int size) {
9708 return(xmlSAXParseMemory(NULL, buffer, size, 0));
9709}
9710
9711/**
9712 * xmlRecoverMemory:
9713 * @buffer: an pointer to a char array
9714 * @size: the size of the array
9715 *
9716 * parse an XML in-memory block and build a tree.
9717 * In the case the document is not Well Formed, a tree is built anyway
9718 *
9719 * Returns the resulting document tree
9720 */
9721
9722xmlDocPtr xmlRecoverMemory(char *buffer, int size) {
9723 return(xmlSAXParseMemory(NULL, buffer, size, 1));
9724}
9725
9726/**
9727 * xmlSAXUserParseMemory:
9728 * @sax: a SAX handler
9729 * @user_data: The user data returned on SAX callbacks
9730 * @buffer: an in-memory XML document input
9731 * @size: the length of the XML document in bytes
9732 *
9733 * A better SAX parsing routine.
9734 * parse an XML in-memory buffer and call the given SAX handler routines.
9735 *
9736 * Returns 0 in case of success or a error number otherwise
9737 */
9738int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
9739 char *buffer, int size) {
9740 int ret = 0;
9741 xmlParserCtxtPtr ctxt;
9742 xmlSAXHandlerPtr oldsax = NULL;
9743
9744 ctxt = xmlCreateMemoryParserCtxt(buffer, size);
9745 if (ctxt == NULL) return -1;
9746 if (sax != NULL) {
9747 oldsax = ctxt->sax;
9748 ctxt->sax = sax;
9749 }
9750 ctxt->userData = user_data;
9751
9752 xmlParseDocument(ctxt);
9753
9754 if (ctxt->wellFormed)
9755 ret = 0;
9756 else {
9757 if (ctxt->errNo != 0)
9758 ret = ctxt->errNo;
9759 else
9760 ret = -1;
9761 }
9762 if (sax != NULL) {
9763 ctxt->sax = oldsax;
9764 }
9765 xmlFreeParserCtxt(ctxt);
9766
9767 return ret;
9768}
9769
9770/**
9771 * xmlCreateDocParserCtxt:
9772 * @cur: a pointer to an array of xmlChar
9773 *
9774 * Creates a parser context for an XML in-memory document.
9775 *
9776 * Returns the new parser context or NULL
9777 */
9778xmlParserCtxtPtr
9779xmlCreateDocParserCtxt(xmlChar *cur) {
9780 int len;
9781
9782 if (cur == NULL)
9783 return(NULL);
9784 len = xmlStrlen(cur);
9785 return(xmlCreateMemoryParserCtxt((char *)cur, len));
9786}
9787
9788/**
9789 * xmlSAXParseDoc:
9790 * @sax: the SAX handler block
9791 * @cur: a pointer to an array of xmlChar
9792 * @recovery: work in recovery mode, i.e. tries to read no Well Formed
9793 * documents
9794 *
9795 * parse an XML in-memory document and build a tree.
9796 * It use the given SAX function block to handle the parsing callback.
9797 * If sax is NULL, fallback to the default DOM tree building routines.
9798 *
9799 * Returns the resulting document tree
9800 */
9801
9802xmlDocPtr
9803xmlSAXParseDoc(xmlSAXHandlerPtr sax, xmlChar *cur, int recovery) {
9804 xmlDocPtr ret;
9805 xmlParserCtxtPtr ctxt;
9806
9807 if (cur == NULL) return(NULL);
9808
9809
9810 ctxt = xmlCreateDocParserCtxt(cur);
9811 if (ctxt == NULL) return(NULL);
9812 if (sax != NULL) {
9813 ctxt->sax = sax;
9814 ctxt->userData = NULL;
9815 }
9816
9817 xmlParseDocument(ctxt);
9818 if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
9819 else {
9820 ret = NULL;
9821 xmlFreeDoc(ctxt->myDoc);
9822 ctxt->myDoc = NULL;
9823 }
9824 if (sax != NULL)
9825 ctxt->sax = NULL;
9826 xmlFreeParserCtxt(ctxt);
9827
9828 return(ret);
9829}
9830
9831/**
9832 * xmlParseDoc:
9833 * @cur: a pointer to an array of xmlChar
9834 *
9835 * parse an XML in-memory document and build a tree.
9836 *
9837 * Returns the resulting document tree
9838 */
9839
9840xmlDocPtr
9841xmlParseDoc(xmlChar *cur) {
9842 return(xmlSAXParseDoc(NULL, cur, 0));
9843}
9844
9845
9846/************************************************************************
9847 * *
9848 * Miscellaneous *
9849 * *
9850 ************************************************************************/
9851
9852#ifdef LIBXML_XPATH_ENABLED
9853#include <libxml/xpath.h>
9854#endif
9855
9856static int xmlParserInitialized = 0;
9857
9858/**
9859 * xmlInitParser:
9860 *
9861 * Initialization function for the XML parser.
9862 * This is not reentrant. Call once before processing in case of
9863 * use in multithreaded programs.
9864 */
9865
9866void
9867xmlInitParser(void) {
9868 if (xmlParserInitialized) return;
9869
9870 xmlInitCharEncodingHandlers();
9871 xmlInitializePredefinedEntities();
9872 xmlDefaultSAXHandlerInit();
9873 xmlRegisterDefaultInputCallbacks();
9874 xmlRegisterDefaultOutputCallbacks();
9875#ifdef LIBXML_HTML_ENABLED
9876 htmlInitAutoClose();
9877 htmlDefaultSAXHandlerInit();
9878#endif
9879#ifdef LIBXML_XPATH_ENABLED
9880 xmlXPathInit();
9881#endif
9882 xmlParserInitialized = 1;
9883}
9884
9885/**
9886 * xmlCleanupParser:
9887 *
9888 * Cleanup function for the XML parser. It tries to reclaim all
9889 * parsing related global memory allocated for the parser processing.
9890 * It doesn't deallocate any document related memory. Calling this
9891 * function should not prevent reusing the parser.
9892 */
9893
9894void
9895xmlCleanupParser(void) {
9896 xmlParserInitialized = 0;
9897 xmlCleanupCharEncodingHandlers();
9898 xmlCleanupPredefinedEntities();
9899}
9900
9901/**
9902 * xmlPedanticParserDefault:
9903 * @val: int 0 or 1
9904 *
9905 * Set and return the previous value for enabling pedantic warnings.
9906 *
9907 * Returns the last value for 0 for no substitution, 1 for substitution.
9908 */
9909
9910int
9911xmlPedanticParserDefault(int val) {
9912 int old = xmlPedanticParserDefaultValue;
9913
9914 xmlPedanticParserDefaultValue = val;
9915 return(old);
9916}
9917
9918/**
9919 * xmlSubstituteEntitiesDefault:
9920 * @val: int 0 or 1
9921 *
9922 * Set and return the previous value for default entity support.
9923 * Initially the parser always keep entity references instead of substituting
9924 * entity values in the output. This function has to be used to change the
9925 * default parser behaviour
9926 * SAX::subtituteEntities() has to be used for changing that on a file by
9927 * file basis.
9928 *
9929 * Returns the last value for 0 for no substitution, 1 for substitution.
9930 */
9931
9932int
9933xmlSubstituteEntitiesDefault(int val) {
9934 int old = xmlSubstituteEntitiesDefaultValue;
9935
9936 xmlSubstituteEntitiesDefaultValue = val;
9937 return(old);
9938}
9939
9940/**
9941 * xmlKeepBlanksDefault:
9942 * @val: int 0 or 1
9943 *
9944 * Set and return the previous value for default blanks text nodes support.
9945 * The 1.x version of the parser used an heuristic to try to detect
9946 * ignorable white spaces. As a result the SAX callback was generating
9947 * ignorableWhitespace() callbacks instead of characters() one, and when
9948 * using the DOM output text nodes containing those blanks were not generated.
9949 * The 2.x and later version will switch to the XML standard way and
9950 * ignorableWhitespace() are only generated when running the parser in
9951 * validating mode and when the current element doesn't allow CDATA or
9952 * mixed content.
9953 * This function is provided as a way to force the standard behaviour
9954 * on 1.X libs and to switch back to the old mode for compatibility when
9955 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
9956 * by using xmlIsBlankNode() commodity function to detect the "empty"
9957 * nodes generated.
9958 * This value also affect autogeneration of indentation when saving code
9959 * if blanks sections are kept, indentation is not generated.
9960 *
9961 * Returns the last value for 0 for no substitution, 1 for substitution.
9962 */
9963
9964int
9965xmlKeepBlanksDefault(int val) {
9966 int old = xmlKeepBlanksDefaultValue;
9967
9968 xmlKeepBlanksDefaultValue = val;
9969 xmlIndentTreeOutput = !val;
9970 return(old);
9971}
9972